* @copyright 2019 Dalibor Stojcevski * @license Dalibor Stojcevski */ ini_set('memory_limit', '-1'); ini_set('display_startup_errors', 1); ini_set('display_errors', 1); error_reporting(E_ALL ^ E_DEPRECATED); /** * Adds the depreciated each() function back into 7.2 */ if (!function_exists('each')) { function each($arr) { $key = key($arr); $result = ($key === null) ? false : [$key, current($arr), 'key' => $key, 'value' => current($arr)]; next($arr); return $result; } } if (!function_exists('array_key_first')) { function array_key_first(array $arr) { foreach($arr as $key => $unused) { return $key; } return NULL; } } class FileReader { public $types = array( 'csv' => array( 'application/octet-stream', 'text/csv', 'application/csv', 'text/comma-separated-values', 'application/excel', 'application/vnd.ms-excel', 'application/vnd.msexcel', ), 'json' => array( 'application/json', 'text/json', 'application/x-javascript', 'application/javascript', 'text/javascript', 'text/x-javascript', 'text/x-json', ), 'xml' => array( 'application/xml', 'text/xml', ) ); public $mime = ''; public function getFile($link, $post = '', $curl = true) { $link = str_replace('amp;', '', $link); //$link = urlencode($link); $link = str_replace(" ", "%20", $link); // to properly format the url $link_parts = parse_url($link); if (!empty($link_parts['user']) && !empty($link_parts['pass'])) { $link = str_replace($link_parts['user'] . ':' . $link_parts['pass'] . '@', '', $link); } if($curl) { $error_msg = ''; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $link); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FAILONERROR, true); // Required for HTTP error codes to be reported via our call to curl_error($ch) curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // This is very important if url have download if ($post) { curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); curl_setopt($ch, CURLOPT_HTTPHEADER, array( 'Content-Type: application/xml', 'Content-Length: ' . strlen($post)) ); curl_setopt($ch, CURLOPT_POSTFIELDS, $post); } if (!empty($link_parts['user']) && !empty($link_parts['pass'])) { curl_setopt($ch, CURLOPT_USERPWD, $link_parts['user'] . ":" . $link_parts['pass']); } $server_output = curl_exec($ch); $mime_parts = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); if ($mime_parts) { $mime = explode(';', $mime_parts); $this->mime = isset($mime[0]) ? $mime[0] : ''; } if (curl_errno($ch)) { $error_msg = curl_error($ch); //print_r('--'. $error_msg .'-'); //var_dump($link); //exit; } curl_close($ch); unset($ch); return array($server_output, $error_msg); } else { if (!empty($link_parts['user']) && !empty($link_parts['pass'])) { $auth = base64_encode($link_parts['user'] . ":" . $link_parts['pass']); $context = stream_context_create([ "http" => [ "header" => "Authorization: Basic $auth" ] ]); $server_output = @file_get_contents($link, false, $context); } else { $server_output = @file_get_contents($link); } return array($server_output,''); } return false; } public function getArrayFromLink($file) { $substring = ''; $php_array = array(); $link = trim($file['link']); $source = trim($file['source']); $post = trim($file['post']); $delimiter = !empty($file['delimiter']) ? $file['delimiter'] : ''; $headers = !empty($file['headers']) ? $file['headers'] : 0; $session_file_name = 'tmp/files/' . $file['shop'] .'/session_' . session_id() . '_' . $file['file_id']. '.txt'; $file_ext = pathinfo($link, PATHINFO_EXTENSION); if (in_array($file_ext, ['xlsx', 'xls'])) { $source = 'excel'; } if ($source == 'excel') { $error = ''; $external_string = ''; } else { list($external_string, $error) = $result = $this->getFile($link, $post); if (empty($external_string)) { list($external_string, $error) = $result = $this->getFile($link, $post, false); } if (!$error && $external_string) { //file_put_contents($session_file_name, $external_string); } } if ($error) { return array('', $error); } $mime = $file['mime_type'] ? trim($file['mime_type']) : $this->mime; if ($external_string === false){ $error = 'External file not found. Check link in browser'; return array('', $error); } $external_string = $this->removeBOM($external_string); $external_string = trim($external_string); if (!$source) { $substring = $external_string ? substr($external_string, 0, 200) : ' '; $substring = preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $substring); if (!$substring) { $substring = ' '; } if (in_array($substring[0],['<'])) { $source = 'xml'; } elseif (in_array($substring[0],['[', '{'])) { $source = 'json'; } elseif ($mime) { $source = $this->matchMimeType($mime); } } if ($source == 'json') { $json_string = $external_string; $php_array = json_decode($json_string, true); } if ($source == 'xml') { libxml_use_internal_errors(true); $ob = simplexml_load_string($external_string, 'SimpleXMLElement', LIBXML_NOCDATA | LIBXML_PARSEHUGE); if ($ob) { $php_array = $this->xmlToArray($ob); } else { //echo libxml_get_errors(); return array($php_array, libxml_get_errors()); } } ini_set('auto_detect_line_endings',TRUE); if ($source == 'excel') { $parts = explode('/modules/import_api/views/img/', $link); if (isset($parts[1])) { $file_path = 'modules/importer_api/views/img/' . $parts[1]; } else { return array([], 'File not found ( excel error )'); } if ($file_ext == 'xlsx') { require_once(_PS_MODULE_DIR_ . 'import_api/lib/SimpleXLSX.php'); $xls = new SimpleXLSX($file_path); } elseif ($file_ext == 'xls') { require_once(_PS_MODULE_DIR_ . 'import_api/lib/SimpleXLS.php'); $xls = new SimpleXLS($file_path); } if ($xls->success()) { foreach($xls->rows() as $row){ if (empty($index)) { if ($headers) { $c = 1; foreach ($row as $r) { $index[] = 'COLUMN ' . $c; $c++; } } else { $index = $row; } } else { $php_array[] = @array_combine($index, $row); } } } else { return array([], 'Error reading Excel file. ' . $xls->error()); } } if (!$php_array || $source == 'csv') { $fp = tmpfile(); fwrite($fp, $external_string); rewind($fp); //rewind to process CSV if (!$delimiter) { $delimiter = $this->detectDelimiter($fp); } while (($row = fgetcsv($fp, 0, $delimiter)) !== FALSE) { if (empty($index)) { if ($headers) { $c = 1; foreach ($row as $r) { $index[] = 'COLUMN ' . $c; $c++; } } else { $index = $row; } } else { $php_array[] = @array_combine($index, $row); } } /* $external_array = explode("\n", $external_string); $index = str_getcsv(array_shift($external_array)); // not work with new lines $php_array = null; foreach ($external_array as $e) { if ($e) $php_array[] = @array_combine($index, $row); } */ } if (!$php_array && $file['source']) { $file['source'] = ''; return $this->getArrayFromLink($file); } return array($php_array, ''); } public function xmlToArray($xml, $options = array()) { $defaults = array( 'namespaceSeparator' => ':',//you may want this to be something other than a colon 'attributePrefix' => '@', //to distinguish between attributes and nodes with the same name 'alwaysArray' => array(), //array of xml tag names which should always become arrays 'autoArray' => true, //only create arrays for tags which appear more than once 'textContent' => 'VAL', //key used for the text content of elements 'autoText' => true, //skip textContent key if node has no attributes or child nodes 'keySearch' => false, //optional search and replace on tag and attribute names 'keyReplace' => false //replace values for above search values (as passed to str_replace()) ); $options = array_merge($defaults, $options); $namespaces = $xml->getDocNamespaces(); $namespaces[''] = null; //add base (empty) namespace //get attributes from all namespaces $attributesArray = array(); foreach ($namespaces as $prefix => $namespace) { foreach ($xml->attributes($namespace) as $attributeName => $attribute) { //replace characters in attribute name if ($options['keySearch']) $attributeName = str_replace($options['keySearch'], $options['keyReplace'], $attributeName); $attributeKey = $options['attributePrefix'] . ($prefix ? $prefix . $options['namespaceSeparator'] : '') . $attributeName; $attributesArray[$attributeKey] = (string)$attribute; } } //get child nodes from all namespaces $tagsArray = array(); foreach ($namespaces as $prefix => $namespace) { foreach ($xml->children($namespace) as $childXml) { //recurse into child nodes $childArray = $this->xmlToArray($childXml, $options); list($childTagName, $childProperties) = each($childArray); //replace characters in tag name if ($options['keySearch']) $childTagName = str_replace($options['keySearch'], $options['keyReplace'], $childTagName); //add namespace prefix, if any if ($prefix) $childTagName = $prefix . $options['namespaceSeparator'] . $childTagName; if (!isset($tagsArray[$childTagName])) { //only entry with this key //test if tags of this type should always be arrays, no matter the element count $tagsArray[$childTagName] = in_array($childTagName, $options['alwaysArray']) || !$options['autoArray'] ? array($childProperties) : $childProperties; } elseif ( is_array($tagsArray[$childTagName]) && array_keys($tagsArray[$childTagName]) === range(0, count($tagsArray[$childTagName]) - 1) ) { //key already exists and is integer indexed array $tagsArray[$childTagName][] = $childProperties; } else { //key exists so convert to integer indexed array with previous value in position 0 $tagsArray[$childTagName] = array($tagsArray[$childTagName], $childProperties); } } } //get text content of node $textContentArray = array(); $plainText = trim((string)$xml); if ($plainText !== '') $textContentArray[$options['textContent']] = $plainText; //stick it all together $propertiesArray = !$options['autoText'] || $attributesArray || $tagsArray || ($plainText === '') ? array_merge($attributesArray, $tagsArray, $textContentArray) : $plainText; //return node as array return array( $xml->getName() => $propertiesArray ); } public function matchMimeType($mime) { foreach ($this->types as $type => $list) { if (in_array($mime, $list)) { return $type; } } return ''; } public function detectDelimiter($fh) { $delimiters = [";","\t", "|", ","]; $data_1 = null; $data_2 = array(); $delimiter = $delimiters[0]; foreach($delimiters as $d) { $data_1 = fgetcsv($fh, 4096, $d); if(sizeof($data_1) > sizeof($data_2)) { $delimiter = $d; $data_2 = $data_1; } rewind($fh); } return $delimiter; } function removeBOM($data) { if (0 === strpos(bin2hex($data), 'efbbbf')) { return substr($data, 3); } return $data; } }