'./', // string The path to check for $file in 'element' => '', // string The XML element to return 'type' => 'upload', 'encoding' => 'UTF-8', 'pointer' => 1, 'chunkSize' => 1024, 'filter' => true, 'get_cloud' => false ); /** * file * * @var string The filename being read * @access public */ public $file = ''; /** * pointer * * @var integer The current position the file is being read from * @access public */ public $reader; public $cloud = array(); public $loop = 1; public $is_404 = false; public $parser_type = false; /** * handle * * @var resource The fopen() resource * @access private */ private $handle = null; /** * reading * * @var boolean Whether the script is currently reading the file * @access private */ /** * __construct * * Builds the Chunk object * * @param string $file The filename to work with * @param array $options The options with which to parse the file * @author Dom Hastings * @access public */ public function __construct($file, $options = array(), $parser_type = false) { // merge the options together $this->options = array_merge($this->options, (is_array($options) ? $options : array())); $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size'); // set the filename $this->file = $file; $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type; $sleep = apply_filters( 'wp_all_import_shard_delay', 0 ); usleep($sleep); $is_html = false; $f = @fopen($file, "rb"); while (!@feof($f)) { $chunk = @fread($f, 1024); if (strpos($chunk, "get_file_path(); $this->is_404 = true; $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); return; } $input = new PMXI_Input(); $import_id = $input->get('id', 0); if ( empty($import_id)) $import_id = $input->get('import_id', 0); if ( PMXI_Plugin::getInstance()->getOption('force_stream_reader') ) { $this->parser_type = 'xmlstreamer'; } else { if ( ! empty($import_id) ) { $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type; $import = new PMXI_Import_Record(); $import->getById($import_id); if ( ! $import->isEmpty() ){ $this->parser_type = empty($import->options['xml_reader_engine']) ? 'xmlreader' : 'xmlstreamer'; } } else { $this->parser_type = empty($parser_type) ? get_option('wpai_parser_type', 'xmlreader') : $parser_type; } } if (empty($this->options['element']) or $this->options['get_cloud']) { $path = $this->get_file_path(); if ( $this->parser_type == 'xmlreader' ) { $reader = new XMLReader(); $reader->open($path); $reader->setParserProperty(XMLReader::VALIDATE, false); while ( @$reader->read() ) { switch ($reader->nodeType) { case (XMLREADER::ELEMENT): $localName = str_replace("_colon_", ":", $reader->localName); if (array_key_exists(str_replace(":", "_", $localName), $this->cloud)) $this->cloud[str_replace(":", "_", $localName)]++; else $this->cloud[str_replace(":", "_", $localName)] = 1; break; default: break; } } unset($reader); } else { $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE); $parseroptions = array( "extractContainer" => false, // Required option ); // Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting. $parser = new Parser\StringWalker($parseroptions); // Create the streamer $streamer = new XmlStringStreamer($parser, $streamProvider); while ($node = $streamer->getNode()) { // $simpleXmlNode = simplexml_load_string($node); // echo (string)$simpleXmlNode->firstName; } $this->cloud = $parser->cloud; } if ( ! empty($this->cloud) and empty($this->options['element']) ){ arsort($this->cloud); $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0'); foreach ($this->cloud as $element_name => $value) { if ( in_array(strtolower($element_name), $main_elements) ){ $this->options['element'] = $element_name; break; } } if (empty($this->options['element'])){ foreach ($this->cloud as $el => $count) { $this->options['element'] = $el; break; } } $this->options['element'] = apply_filters('wp_all_import_root_element', $this->options['element'], $import_id, $this->cloud); } } $path = $this->get_file_path(); if ( $this->parser_type == 'xmlreader' ) { $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); } else { $parseroptions = array( "uniqueNode" => $this->options['element'] ); $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE); $parser = new Parser\UniqueNode($parseroptions); $this->reader = new XmlStringStreamer($parser, $streamProvider); } } function get_file_path() { $is_enabled_stream_filter = apply_filters('wp_all_import_is_enabled_stream_filter', true); if ( function_exists('stream_filter_register') and $this->options['filter'] and $is_enabled_stream_filter and $this->parser_type == 'xmlreader' ) { stream_filter_register('preprocessxml', 'preprocessXml_filter'); if (defined('HHVM_VERSION')) $path = $this->file; else $path = 'php://filter/read=preprocessxml/resource=' . $this->file; } else $path = $this->file; return $path; } /** * __destruct * * Cleans up * * @return void * @author Dom Hastings * @access public */ public function __destruct() { // close the file resource unset($this->reader); } /** * read * * Reads the first available occurence of the XML element $this->options['element'] * * @return string The XML string from $this->file * @author Dom Hastings * @access public */ public function read($debug = false) { // trim it $element = trim($this->options['element']); $xml = ''; if ( $this->parser_type == 'xmlreader' ) { try { while ( @$this->reader->read() ) { switch ($this->reader->nodeType) { case (XMLREADER::ELEMENT): $localName = str_replace("_colon_", ":", $this->reader->localName); if ( strtolower(str_replace(":", "_", $localName)) == strtolower($element) ) { if ($this->loop < $this->options['pointer']){ $this->loop++; continue; } $xml = @$this->reader->readOuterXML(); break(2); } break; default: // code ... break; } } } catch (XmlImportException $e) { $xml = false; } } else { $is_preprocess_enabled = apply_filters('is_xml_preprocess_enabled', true); while ($xml = $this->reader->getNode()) { if ($this->loop < $this->options['pointer']){ $this->loop++; continue; } if ($is_preprocess_enabled) { // the & symbol is not valid in XML, so replace it with temporary word _ampersand_ $xml = str_replace("&", "_ampersand_", $xml); $xml = preg_replace('/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', str_replace(":", "_colon_", $xml)); } break; } } return ( ! empty($xml) ) ? self::removeColonsFromRSS(preg_replace('%xmlns.*=\s*([\'""]).*\1%sU', '', $xml)) : false; } public static function removeColonsFromRSS($feed) { $feed = str_replace("_colon_", ":", $feed); // pull out colons from start tags // (<\w+):(\w+>) $pattern = '/(<\w+):([\w+|\.|-]+[ |>]{1})/i'; $replacement = '$1_$2'; $feed = preg_replace($pattern, $replacement, $feed); // pull out colons from end tags // (<\/\w+):(\w+>) $pattern = '/(<\/\w+):([\w+|\.|-]+>)/i'; $replacement = '$1_$2'; $feed = preg_replace($pattern, $replacement, $feed); $is_replace_colons = apply_filters('wp_all_import_replace_colons_in_attribute_names', true); if ( $is_replace_colons ) { // pull out colons from attributes $pattern = '/(\s+\w+):(\w+[=]{1})/i'; $replacement = '$1_$2'; $feed = preg_replace($pattern, $replacement, $feed); } // pull colons from single element // (<\w+):(\w+\/>) $pattern = '/(<\w+):([\w+|\.|-]+\/>)/i'; $replacement = '$1_$2'; $feed = preg_replace($pattern, $replacement, $feed); $is_preprocess_enabled = apply_filters('is_xml_preprocess_enabled', true); if ($is_preprocess_enabled) { // replace temporary word _ampersand_ back to & symbol $feed = str_replace("_ampersand_", "&", $feed); } // replace all standalone & symbols ( which is not in htmlentities e.q.   and not wrapped in CDATA section ) to & PMXI_Import_Record::preprocessXml($feed); return $feed; } } class preprocessXml_filter extends php_user_filter { function filter($in, $out, &$consumed, $closing) { while ($bucket = stream_bucket_make_writeable($in)) { $is_preprocess_enabled = apply_filters('is_xml_preprocess_enabled', true); if ($is_preprocess_enabled) { // the & symbol is not valid in XML, so replace it with temporary word _ampersand_ $bucket->data = str_replace("&", "_ampersand_", $bucket->data); $cleanXML = preg_replace('/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $this->replace_colons($bucket->data)); if ($cleanXML == NULL && preg_last_error() == PREG_BAD_UTF8_ERROR){ $cleanXML = preg_replace('/[^\x09\x0a\x0d\x20-\xFF]+/', ' ', $this->replace_colons($bucket->data)); } if ($cleanXML == NULL && preg_last_error() == PREG_BAD_UTF8_ERROR){ if (function_exists('mb_ereg_replace')){ mb_regex_encoding('UTF-8'); $cleanXML = mb_ereg_replace('/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $this->replace_colons($bucket->data)); } } $bucket->data = empty($cleanXML) ? $this->replace_colons($bucket->data) : $cleanXML; } $consumed += $bucket->datalen; stream_bucket_append($out, $bucket); } return PSFS_PASS_ON; } function replace_colons($data) { return str_replace(":", "_colon_", $data); } }