Files
interblue.pl/modules/pshowimporter/classes/PShow_XMLReaderToSimpleXmlObjects.php
2024-10-25 14:16:28 +02:00

236 lines
6.9 KiB
PHP

<?php
/**
* File from http://PrestaShow.pl
*
* DISCLAIMER
* Do not edit or add to this file if you wish to upgrade this module to newer
* versions in the future.
*
* @authors PrestaShow.pl <kontakt@prestashow.pl>
* @copyright 2015 PrestaShow.pl
* @license http://PrestaShow.pl/license
*/
class PShow_XMLReaderToSimpleXmlObjects
{
/**
* @var PShow_XMLReaderToSimpleXmlObjects
*/
private static $instance = null;
/**
* @var string
*/
public $filepath;
/**
* @var XMLReader
*/
private $xml;
/**
* Cache for function xpath()
*
* @var array
*/
protected $cache_xpath = array();
/**
* @param string $filename
* @return PShow_XMLReaderToSimpleXmlObjects
* @throws Exception
*/
public static function getInstance($filename = null)
{
if (self::$instance === null || self::$instance->filepath != $filename) {
if ($filename === null) {
throw new Exception("SimpleXMLParser: filename not specified");
}
self::$instance = new PShow_XMLReaderToSimpleXmlObjects($filename);
}
return self::$instance;
}
/**
* @param string $filename
* @throws Exception
*/
public function __construct($filename)
{
if (!file_exists($filename)) {
throw new Exception("SimpleXMLParser: file not exists");
}
$this->filepath = $filename;
}
/**
*
* @param string $xpath
* @param int $offset
* @param int $limit
* @param bool $onlyCount
* @return int
* @throws UnexpectedValueException
*/
public function xpath($xpath, $offset = 0, $limit = null, $onlyCount = false)
{
$cache_key = "filepath:" . $this->filepath . ";xpath:" . $xpath . ";offset:" . $offset . ";limit:" . $limit . ";onlyCount:" . $onlyCount;
$cache_key_md5 = md5($cache_key);
if (in_array($cache_key_md5, $this->cache_xpath)) {
return $this->cache_xpath[$cache_key_md5];
}
$results = array();
$xpath = explode('/', $xpath);
$objectTag = end($xpath);
$n = 0;
$config = PShow_Config::getFileConfig(pathinfo($this->filepath, PATHINFO_FILENAME));
// re-encode file to prevent encoding errors
if (isset($config['primary']['re_encode_utf8']) && $config['primary']['re_encode_utf8']) {
PShow_File::re_encode_utf8_file($this->filepath);
PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . '.log', 're-encoding file with UTF8');
}
$this->xml = new XMLReader();
$this->xml->open($this->filepath, $this->getXMLEncoding($this->filepath), (LIBXML_PARSEHUGE | LIBXML_COMPACT | LIBXML_NOBLANKS |
LIBXML_NOEMPTYTAG | LIBXML_NSCLEAN | LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NOCDATA));
while ($this->xml->read() && $this->xml->name !== $objectTag);
while (--$offset >= 0 && $this->xml->next($objectTag));
while ($this->xml->name == $objectTag) {
++$n;
if (!$onlyCount) {
$node = @$this->xml->expand();
if (!$node instanceof DOMElement) {
$size = memory_get_usage();
$size = max(0, (int) $size);
$units = array('B', 'KB', 'MB', 'GB', 'TB');
$power = $size > 0 ? floor(log($size, 1024)) : 0;
$memory_usage = number_format($size / pow(1024, $power), 2, '.', ',') . $units[$power];
$cpu_usage = sys_getloadavg();
PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . ".log", "Error in expanding XML; Elements count " . count($results) . "; "
. "Tag: " . $this->xml->name . "; Memory usage: " . $memory_usage . "; "
. "CPU usage: " . ($cpu_usage[0]) . '%');
break;
} else {
$doc = new DomDocument();
$node = $doc->importNode($node, true);
$results[] = simplexml_import_dom($node);
unset($node);
}
}
$this->xml->next($objectTag);
if ($limit !== null && --$limit <= 0) {
break;
}
}
$this->xml->close();
if ($error = libxml_get_last_error()) {
PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . ".log", "Error in XML reading: [" . $error->line . "] " . trim($error->message));
}
if (!$onlyCount) {
$this->cache_xpath[$cache_key_md5] = $results;
} else {
$this->cache_xpath[$cache_key_md5] = $n;
}
unset($results);
return $this->cache_xpath[$cache_key_md5];
}
/**
*
* @param string $filepath
* @return string|null
*/
public function getXMLEncoding($filepath)
{
$output = array();
@exec('file -i \'' . $filepath . '\'', $output);
if (isset($output[0])) {
$ex = explode('charset=', $output[0]);
if (isset($ex[1])) {
return $ex[1];
}
}
return null;
}
/**
* @return array
*/
public function getAllTags()
{
$results = array();
$limit = 100;
$searchLimit = 100000;
$actPath = array();
$config = PShow_Config::getFileConfig(pathinfo($this->filepath, PATHINFO_FILENAME));
// re-encode file to prevent encoding errors
if (isset($config['primary']['re_encode_utf8']) && $config['primary']['re_encode_utf8']) {
PShow_File::re_encode_utf8_file($this->filepath);
PShow_Log::add(pathinfo($this->filepath, PATHINFO_FILENAME) . '.log', 're-encoded file with utf-8');
}
$this->xml = new XMLReader();
$this->xml->open(
$this->filepath, $this->getXMLEncoding($this->filepath), (LIBXML_PARSEHUGE | LIBXML_COMPACT | LIBXML_NOBLANKS |
LIBXML_NOEMPTYTAG | LIBXML_NSCLEAN | LIBXML_NOCDATA | LIBXML_BIGLINES | LIBXML_NSCLEAN | LIBXML_PEDANTIC)
);
while ($this->xml->read()) {
$actPath_str = (count($actPath)) ? implode(',', $actPath) . ',' : '';
if ($this->xml->nodeType == XMLReader::ELEMENT && !in_array($actPath_str . $this->xml->name . ',', $results)) {
array_push($results, $actPath_str . $this->xml->name . ',');
if (--$limit <= 0) {
break;
}
}
if ($this->xml->nodeType == XMLReader::ELEMENT && !$this->xml->isEmptyElement) {
array_push($actPath, $this->xml->name);
}
if ($this->xml->nodeType == XMLReader::END_ELEMENT) {
array_pop($actPath);
}
if (--$searchLimit <= 0) {
break;
}
}
$this->xml->close();
return $results;
}
}