* @copyright 2015 PrestaShow.pl * @license http://PrestaShow.pl/license */ class PShow_XMLReaderToSimpleXmlObjects { /** * @var PShow_XMLReaderToSimpleXmlObjects */ private static $instance = null; /** * @var string */ public $filepath; /** * @var XMLReader */ private $xml; /** * Cache for function xpath() * * @var array */ protected $cache_xpath = array(); /** * @param string $filename * @return PShow_XMLReaderToSimpleXmlObjects * @throws Exception */ public static function getInstance($filename = null) { if (self::$instance === null || self::$instance->filepath != $filename) { if ($filename === null) { throw new Exception("SimpleXMLParser: filename not specified"); } self::$instance = new PShow_XMLReaderToSimpleXmlObjects($filename); } return self::$instance; } /** * @param string $filename * @throws Exception */ public function __construct($filename) { if (!file_exists($filename)) { throw new Exception("SimpleXMLParser: file not exists"); } $this->filepath = $filename; } /** * * @param string $xpath * @param int $offset * @param int $limit * @param bool $onlyCount * @return int * @throws UnexpectedValueException */ public function xpath($xpath, $offset = 0, $limit = null, $onlyCount = false) { $cache_key = "filepath:" . $this->filepath . ";xpath:" . $xpath . ";offset:" . $offset . ";limit:" . $limit . ";onlyCount:" . $onlyCount; $cache_key_md5 = md5($cache_key); if (in_array($cache_key_md5, $this->cache_xpath)) { return $this->cache_xpath[$cache_key_md5]; } $results = array(); $xpath = explode('/', $xpath); $objectTag = end($xpath); $n = 0; $config = PShow_Config::getFileConfig(pathinfo($this->filepath, PATHINFO_FILENAME)); // re-encode file to prevent encoding errors if (isset($config['primary']['re_encode_utf8']) && $config['primary']['re_encode_utf8']) { PShow_File::re_encode_utf8_file($this->filepath); PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . '.log', 're-encoding file with UTF8'); } $this->xml = new XMLReader(); $this->xml->open($this->filepath, $this->getXMLEncoding($this->filepath), (LIBXML_PARSEHUGE | LIBXML_COMPACT | LIBXML_NOBLANKS | LIBXML_NOEMPTYTAG | LIBXML_NSCLEAN | LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NOCDATA)); while ($this->xml->read() && $this->xml->name !== $objectTag); while (--$offset >= 0 && $this->xml->next($objectTag)); while ($this->xml->name == $objectTag) { ++$n; if (!$onlyCount) { $node = @$this->xml->expand(); if (!$node instanceof DOMElement) { $size = memory_get_usage(); $size = max(0, (int) $size); $units = array('B', 'KB', 'MB', 'GB', 'TB'); $power = $size > 0 ? floor(log($size, 1024)) : 0; $memory_usage = number_format($size / pow(1024, $power), 2, '.', ',') . $units[$power]; $cpu_usage = sys_getloadavg(); PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . ".log", "Error in expanding XML; Elements count " . count($results) . "; " . "Tag: " . $this->xml->name . "; Memory usage: " . $memory_usage . "; " . "CPU usage: " . ($cpu_usage[0]) . '%'); break; } else { $doc = new DomDocument(); $node = $doc->importNode($node, true); $results[] = simplexml_import_dom($node); unset($node); } } $this->xml->next($objectTag); if ($limit !== null && --$limit <= 0) { break; } } $this->xml->close(); if ($error = libxml_get_last_error()) { PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . ".log", "Error in XML reading: [" . $error->line . "] " . trim($error->message)); } if (!$onlyCount) { $this->cache_xpath[$cache_key_md5] = $results; } else { $this->cache_xpath[$cache_key_md5] = $n; } unset($results); return $this->cache_xpath[$cache_key_md5]; } /** * * @param string $filepath * @return string|null */ public function getXMLEncoding($filepath) { $output = array(); @exec('file -i \'' . $filepath . '\'', $output); if (isset($output[0])) { $ex = explode('charset=', $output[0]); if (isset($ex[1])) { return $ex[1]; } } return null; } /** * @return array */ public function getAllTags() { $results = array(); $limit = 100; $searchLimit = 100000; $actPath = array(); $config = PShow_Config::getFileConfig(pathinfo($this->filepath, PATHINFO_FILENAME)); // re-encode file to prevent encoding errors if (isset($config['primary']['re_encode_utf8']) && $config['primary']['re_encode_utf8']) { PShow_File::re_encode_utf8_file($this->filepath); PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . '.log', 're-encoded file with utf-8'); } $this->xml = new XMLReader(); $this->xml->open( $this->filepath, $this->getXMLEncoding($this->filepath), (LIBXML_PARSEHUGE | LIBXML_COMPACT | LIBXML_NOBLANKS | LIBXML_NOEMPTYTAG | LIBXML_NSCLEAN | LIBXML_NOCDATA | LIBXML_BIGLINES | LIBXML_NSCLEAN | LIBXML_PEDANTIC) ); while ($this->xml->read()) { $actPath_str = (count($actPath)) ? implode(',', $actPath) . ',' : ''; if ($this->xml->nodeType == XMLReader::ELEMENT && !in_array($actPath_str . $this->xml->name . ',', $results)) { array_push($results, $actPath_str . $this->xml->name . ','); if (--$limit <= 0) { break; } } if ($this->xml->nodeType == XMLReader::ELEMENT && !$this->xml->isEmptyElement) { array_push($actPath, $this->xml->name); } if ($this->xml->nodeType == XMLReader::END_ELEMENT) { array_pop($actPath); } if (--$searchLimit <= 0) { break; } } $this->xml->close(); return $results; } }