first commit
This commit is contained in:
106
modules/x13import/tools/Spout/Reader/XLSX/Helper/CellHelper.php
Normal file
106
modules/x13import/tools/Spout/Reader/XLSX/Helper/CellHelper.php
Normal file
@@ -0,0 +1,106 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
use Box\Spout\Common\Exception\InvalidArgumentException;
|
||||
|
||||
/**
|
||||
* Class CellHelper
|
||||
* This class provides helper functions when working with cells
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper
|
||||
*/
|
||||
class CellHelper
|
||||
{
|
||||
// Using ord() is super slow... Using a pre-computed hash table instead.
|
||||
private static $columnLetterToIndexMapping = [
|
||||
'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 4, 'F' => 5, 'G' => 6,
|
||||
'H' => 7, 'I' => 8, 'J' => 9, 'K' => 10, 'L' => 11, 'M' => 12, 'N' => 13,
|
||||
'O' => 14, 'P' => 15, 'Q' => 16, 'R' => 17, 'S' => 18, 'T' => 19, 'U' => 20,
|
||||
'V' => 21, 'W' => 22, 'X' => 23, 'Y' => 24, 'Z' => 25,
|
||||
];
|
||||
|
||||
/**
|
||||
* Fills the missing indexes of an array with a given value.
|
||||
* For instance, $dataArray = []; $a[1] = 1; $a[3] = 3;
|
||||
* Calling fillMissingArrayIndexes($dataArray, 'FILL') will return this array: ['FILL', 1, 'FILL', 3]
|
||||
*
|
||||
* @param array $dataArray The array to fill
|
||||
* @param string|void $fillValue optional
|
||||
* @return array
|
||||
*/
|
||||
public static function fillMissingArrayIndexes($dataArray, $fillValue = '')
|
||||
{
|
||||
if (empty($dataArray)) {
|
||||
return [];
|
||||
}
|
||||
$existingIndexes = array_keys($dataArray);
|
||||
|
||||
$newIndexes = array_fill_keys(range(0, max($existingIndexes)), $fillValue);
|
||||
$dataArray += $newIndexes;
|
||||
|
||||
ksort($dataArray);
|
||||
|
||||
return $dataArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the base 10 column index associated to the cell index (base 26).
|
||||
* Excel uses A to Z letters for column indexing, where A is the 1st column,
|
||||
* Z is the 26th and AA is the 27th.
|
||||
* The mapping is zero based, so that A1 maps to 0, B2 maps to 1, Z13 to 25 and AA4 to 26.
|
||||
*
|
||||
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
|
||||
* @return int
|
||||
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
|
||||
*/
|
||||
public static function getColumnIndexFromCellIndex($cellIndex)
|
||||
{
|
||||
if (!self::isValidCellIndex($cellIndex)) {
|
||||
throw new InvalidArgumentException('Cannot get column index from an invalid cell index.');
|
||||
}
|
||||
|
||||
$columnIndex = 0;
|
||||
|
||||
// Remove row information
|
||||
$columnLetters = preg_replace('/\d/', '', $cellIndex);
|
||||
|
||||
// strlen() is super slow too... Using isset() is way faster and not too unreadable,
|
||||
// since we checked before that there are between 1 and 3 letters.
|
||||
$columnLength = isset($columnLetters[1]) ? (isset($columnLetters[2]) ? 3 : 2) : 1;
|
||||
|
||||
// Looping over the different letters of the column is slower than this method.
|
||||
// Also, not using the pow() function because it's slooooow...
|
||||
switch ($columnLength) {
|
||||
case 1:
|
||||
$columnIndex = (self::$columnLetterToIndexMapping[$columnLetters]);
|
||||
break;
|
||||
case 2:
|
||||
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 26;
|
||||
$secondLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[1]];
|
||||
$columnIndex = $firstLetterIndex + $secondLetterIndex;
|
||||
break;
|
||||
case 3:
|
||||
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 676;
|
||||
$secondLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[1]] + 1) * 26;
|
||||
$thirdLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[2]];
|
||||
$columnIndex = $firstLetterIndex + $secondLetterIndex + $thirdLetterIndex;
|
||||
break;
|
||||
}
|
||||
|
||||
return $columnIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether a cell index is valid, in an Excel world.
|
||||
* To be valid, the cell index should start with capital letters and be followed by numbers.
|
||||
* There can only be 3 letters, as there can only be 16,384 rows, which is equivalent to 'XFE'.
|
||||
*
|
||||
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
|
||||
* @return bool
|
||||
*/
|
||||
protected static function isValidCellIndex($cellIndex)
|
||||
{
|
||||
return (preg_match('/^[A-Z]{1,3}\d+$/', $cellIndex) === 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,300 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
/**
|
||||
* Class CellValueFormatter
|
||||
* This class provides helper functions to format cell values
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper
|
||||
*/
|
||||
class CellValueFormatter
|
||||
{
|
||||
/** Definition of all possible cell types */
|
||||
const CELL_TYPE_INLINE_STRING = 'inlineStr';
|
||||
const CELL_TYPE_STR = 'str';
|
||||
const CELL_TYPE_SHARED_STRING = 's';
|
||||
const CELL_TYPE_BOOLEAN = 'b';
|
||||
const CELL_TYPE_NUMERIC = 'n';
|
||||
const CELL_TYPE_DATE = 'd';
|
||||
const CELL_TYPE_ERROR = 'e';
|
||||
|
||||
/** Definition of XML nodes names used to parse data */
|
||||
const XML_NODE_VALUE = 'v';
|
||||
const XML_NODE_INLINE_STRING_VALUE = 't';
|
||||
|
||||
/** Definition of XML attributes used to parse data */
|
||||
const XML_ATTRIBUTE_TYPE = 't';
|
||||
const XML_ATTRIBUTE_STYLE_ID = 's';
|
||||
|
||||
/** Constants used for date formatting */
|
||||
const NUM_SECONDS_IN_ONE_DAY = 86400;
|
||||
const NUM_SECONDS_IN_ONE_HOUR = 3600;
|
||||
const NUM_SECONDS_IN_ONE_MINUTE = 60;
|
||||
|
||||
/**
|
||||
* February 29th, 1900 is NOT a leap year but Excel thinks it is...
|
||||
* @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
|
||||
*/
|
||||
const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
|
||||
|
||||
/** @var SharedStringsHelper Helper to work with shared strings */
|
||||
protected $sharedStringsHelper;
|
||||
|
||||
/** @var StyleHelper Helper to work with styles */
|
||||
protected $styleHelper;
|
||||
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates;
|
||||
|
||||
/** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
|
||||
protected $escaper;
|
||||
|
||||
/**
|
||||
* @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
|
||||
* @param StyleHelper $styleHelper Helper to work with styles
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
*/
|
||||
public function __construct($sharedStringsHelper, $styleHelper, $shouldFormatDates)
|
||||
{
|
||||
$this->sharedStringsHelper = $sharedStringsHelper;
|
||||
$this->styleHelper = $styleHelper;
|
||||
$this->shouldFormatDates = $shouldFormatDates;
|
||||
|
||||
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
|
||||
$this->escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
public function extractAndFormatNodeValue($node)
|
||||
{
|
||||
// Default cell type is "n"
|
||||
$cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
|
||||
$cellStyleId = intval($node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID));
|
||||
$vNodeValue = $this->getVNodeValue($node);
|
||||
|
||||
if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
|
||||
return $vNodeValue;
|
||||
}
|
||||
|
||||
switch ($cellType) {
|
||||
case self::CELL_TYPE_INLINE_STRING:
|
||||
return $this->formatInlineStringCellValue($node);
|
||||
case self::CELL_TYPE_SHARED_STRING:
|
||||
return $this->formatSharedStringCellValue($vNodeValue);
|
||||
case self::CELL_TYPE_STR:
|
||||
return $this->formatStrCellValue($vNodeValue);
|
||||
case self::CELL_TYPE_BOOLEAN:
|
||||
return $this->formatBooleanCellValue($vNodeValue);
|
||||
case self::CELL_TYPE_NUMERIC:
|
||||
return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
|
||||
case self::CELL_TYPE_DATE:
|
||||
return $this->formatDateCellValue($vNodeValue);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell's string value from a node's nested value node
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @return string The value associated with the cell
|
||||
*/
|
||||
protected function getVNodeValue($node)
|
||||
{
|
||||
// for cell types having a "v" tag containing the value.
|
||||
// if not, the returned value should be empty string.
|
||||
$vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
|
||||
return ($vNode !== null) ? $vNode->nodeValue : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell String value where string is inline.
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @return string The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function formatInlineStringCellValue($node)
|
||||
{
|
||||
// inline strings are formatted this way:
|
||||
// <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
|
||||
$tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
|
||||
$cellValue = $this->escaper->unescape($tNode->nodeValue);
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell String value from shared-strings file using nodeValue index.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @return string The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function formatSharedStringCellValue($nodeValue)
|
||||
{
|
||||
// shared strings are formatted this way:
|
||||
// <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
|
||||
$sharedStringIndex = intval($nodeValue);
|
||||
$escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
|
||||
$cellValue = $this->escaper->unescape($escapedCellValue);
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell String value, where string is stored in value node.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @return string The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function formatStrCellValue($nodeValue)
|
||||
{
|
||||
$escapedCellValue = trim($nodeValue);
|
||||
$cellValue = $this->escaper->unescape($escapedCellValue);
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell Numeric value from string of nodeValue.
|
||||
* The value can also represent a timestamp and a DateTime will be returned.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @param int $cellStyleId 0 being the default style
|
||||
* @return int|float|\DateTime|null The value associated with the cell
|
||||
*/
|
||||
protected function formatNumericCellValue($nodeValue, $cellStyleId)
|
||||
{
|
||||
// Numeric values can represent numbers as well as timestamps.
|
||||
// We need to look at the style of the cell to determine whether it is one or the other.
|
||||
$shouldFormatAsDate = $this->styleHelper->shouldFormatNumericValueAsDate($cellStyleId);
|
||||
|
||||
if ($shouldFormatAsDate) {
|
||||
return $this->formatExcelTimestampValue(floatval($nodeValue), $cellStyleId);
|
||||
} else {
|
||||
$nodeIntValue = intval($nodeValue);
|
||||
return ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a cell's PHP Date value, associated to the given timestamp.
|
||||
* NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
|
||||
* NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
|
||||
*
|
||||
* @param float $nodeValue
|
||||
* @param int $cellStyleId 0 being the default style
|
||||
* @return \DateTime|null The value associated with the cell or NULL if invalid date value
|
||||
*/
|
||||
protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
|
||||
{
|
||||
// Fix for the erroneous leap year in Excel
|
||||
if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) {
|
||||
--$nodeValue;
|
||||
}
|
||||
|
||||
if ($nodeValue >= 1) {
|
||||
// Values greater than 1 represent "dates". The value 1.0 representing the "base" date: 1900-01-01.
|
||||
return $this->formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId);
|
||||
} else if ($nodeValue >= 0) {
|
||||
// Values between 0 and 1 represent "times".
|
||||
return $this->formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId);
|
||||
} else {
|
||||
// invalid date
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a cell's PHP DateTime value, associated to the given timestamp.
|
||||
* Only the time value matters. The date part is set to Jan 1st, 1900 (base Excel date).
|
||||
*
|
||||
* @param float $nodeValue
|
||||
* @param int $cellStyleId 0 being the default style
|
||||
* @return \DateTime|string The value associated with the cell
|
||||
*/
|
||||
protected function formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId)
|
||||
{
|
||||
$time = round($nodeValue * self::NUM_SECONDS_IN_ONE_DAY);
|
||||
$hours = floor($time / self::NUM_SECONDS_IN_ONE_HOUR);
|
||||
$minutes = floor($time / self::NUM_SECONDS_IN_ONE_MINUTE) - ($hours * self::NUM_SECONDS_IN_ONE_MINUTE);
|
||||
$seconds = $time - ($hours * self::NUM_SECONDS_IN_ONE_HOUR) - ($minutes * self::NUM_SECONDS_IN_ONE_MINUTE);
|
||||
|
||||
// using the base Excel date (Jan 1st, 1900) - not relevant here
|
||||
$dateObj = new \DateTime('1900-01-01');
|
||||
$dateObj->setTime($hours, $minutes, $seconds);
|
||||
|
||||
if ($this->shouldFormatDates) {
|
||||
$styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
|
||||
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
|
||||
return $dateObj->format($phpDateFormat);
|
||||
} else {
|
||||
return $dateObj;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a cell's PHP Date value, associated to the given timestamp.
|
||||
* NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
|
||||
*
|
||||
* @param float $nodeValue
|
||||
* @param int $cellStyleId 0 being the default style
|
||||
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
|
||||
*/
|
||||
protected function formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId)
|
||||
{
|
||||
// Do not use any unix timestamps for calculation to prevent
|
||||
// issues with numbers exceeding 2^31.
|
||||
$secondsRemainder = fmod($nodeValue, 1) * self::NUM_SECONDS_IN_ONE_DAY;
|
||||
$secondsRemainder = round($secondsRemainder, 0);
|
||||
|
||||
try {
|
||||
$dateObj = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
|
||||
$dateObj->modify('+' . intval($nodeValue) . 'days');
|
||||
$dateObj->modify('+' . $secondsRemainder . 'seconds');
|
||||
|
||||
if ($this->shouldFormatDates) {
|
||||
$styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
|
||||
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
|
||||
return $dateObj->format($phpDateFormat);
|
||||
} else {
|
||||
return $dateObj;
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell Boolean value from a specific node's Value.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @return bool The value associated with the cell
|
||||
*/
|
||||
protected function formatBooleanCellValue($nodeValue)
|
||||
{
|
||||
// !! is similar to boolval()
|
||||
$cellValue = !!$nodeValue;
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a cell's PHP Date value, associated to the given stored nodeValue.
|
||||
* @see ECMA-376 Part 1 - §18.17.4
|
||||
*
|
||||
* @param string $nodeValue ISO 8601 Date string
|
||||
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
|
||||
*/
|
||||
protected function formatDateCellValue($nodeValue)
|
||||
{
|
||||
// Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
|
||||
try {
|
||||
return ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
|
||||
} catch (\Exception $e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
/**
|
||||
* Class DateFormatHelper
|
||||
* This class provides helper functions to format Excel dates
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper
|
||||
*/
|
||||
class DateFormatHelper
|
||||
{
|
||||
const KEY_GENERAL = 'general';
|
||||
const KEY_HOUR_12 = '12h';
|
||||
const KEY_HOUR_24 = '24h';
|
||||
|
||||
/**
|
||||
* This map is used to replace Excel format characters by their PHP equivalent.
|
||||
* Keys should be ordered from longest to smallest.
|
||||
*
|
||||
* @var array Mapping between Excel format characters and PHP format characters
|
||||
*/
|
||||
private static $excelDateFormatToPHPDateFormatMapping = [
|
||||
self::KEY_GENERAL => [
|
||||
// Time
|
||||
'am/pm' => 'A', // Uppercase Ante meridiem and Post meridiem
|
||||
':mm' => ':i', // Minutes with leading zeros - if preceded by a ":" (otherwise month)
|
||||
'mm:' => 'i:', // Minutes with leading zeros - if followed by a ":" (otherwise month)
|
||||
'ss' => 's', // Seconds, with leading zeros
|
||||
'.s' => '', // Ignore (fractional seconds format does not exist in PHP)
|
||||
|
||||
// Date
|
||||
'e' => 'Y', // Full numeric representation of a year, 4 digits
|
||||
'yyyy' => 'Y', // Full numeric representation of a year, 4 digits
|
||||
'yy' => 'y', // Two digit representation of a year
|
||||
'mmmmm' => 'M', // Short textual representation of a month, three letters ("mmmmm" should only contain the 1st letter...)
|
||||
'mmmm' => 'F', // Full textual representation of a month
|
||||
'mmm' => 'M', // Short textual representation of a month, three letters
|
||||
'mm' => 'm', // Numeric representation of a month, with leading zeros
|
||||
'm' => 'n', // Numeric representation of a month, without leading zeros
|
||||
'dddd' => 'l', // Full textual representation of the day of the week
|
||||
'ddd' => 'D', // Textual representation of a day, three letters
|
||||
'dd' => 'd', // Day of the month, 2 digits with leading zeros
|
||||
'd' => 'j', // Day of the month without leading zeros
|
||||
],
|
||||
self::KEY_HOUR_12 => [
|
||||
'hh' => 'h', // 12-hour format of an hour without leading zeros
|
||||
'h' => 'g', // 12-hour format of an hour without leading zeros
|
||||
],
|
||||
self::KEY_HOUR_24 => [
|
||||
'hh' => 'H', // 24-hour hours with leading zero
|
||||
'h' => 'G', // 24-hour format of an hour without leading zeros
|
||||
],
|
||||
];
|
||||
|
||||
/**
|
||||
* Converts the given Excel date format to a format understandable by the PHP date function.
|
||||
*
|
||||
* @param string $excelDateFormat Excel date format
|
||||
* @return string PHP date format (as defined here: http://php.net/manual/en/function.date.php)
|
||||
*/
|
||||
public static function toPHPDateFormat($excelDateFormat)
|
||||
{
|
||||
// Remove brackets potentially present at the beginning of the format string
|
||||
// and text portion of the format at the end of it (starting with ";")
|
||||
// See §18.8.31 of ECMA-376 for more detail.
|
||||
$dateFormat = preg_replace('/^(?:\[\$[^\]]+?\])?([^;]*).*/', '$1', $excelDateFormat);
|
||||
|
||||
// Double quotes are used to escape characters that must not be interpreted.
|
||||
// For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y"
|
||||
// By exploding the format string using double quote as a delimiter, we can get all parts
|
||||
// that must be transformed (even indexes) and all parts that must not be (odd indexes).
|
||||
$dateFormatParts = explode('"', $dateFormat);
|
||||
|
||||
foreach ($dateFormatParts as $partIndex => $dateFormatPart) {
|
||||
// do not look at odd indexes
|
||||
if ($partIndex % 2 === 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Make sure all characters are lowercase, as the mapping table is using lowercase characters
|
||||
$transformedPart = strtolower($dateFormatPart);
|
||||
|
||||
// Remove escapes related to non-format characters
|
||||
$transformedPart = str_replace('\\', '', $transformedPart);
|
||||
|
||||
// Apply general transformation first...
|
||||
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_GENERAL]);
|
||||
|
||||
// ... then apply hour transformation, for 12-hour or 24-hour format
|
||||
if (self::has12HourFormatMarker($dateFormatPart)) {
|
||||
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_12]);
|
||||
} else {
|
||||
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_24]);
|
||||
}
|
||||
|
||||
// overwrite the parts array with the new transformed part
|
||||
$dateFormatParts[$partIndex] = $transformedPart;
|
||||
}
|
||||
|
||||
// Merge all transformed parts back together
|
||||
$phpDateFormat = implode('"', $dateFormatParts);
|
||||
|
||||
// Finally, to have the date format compatible with the DateTime::format() function, we need to escape
|
||||
// all characters that are inside double quotes (and double quotes must be removed).
|
||||
// For instance, ["Day " dd] should become [\D\a\y\ dd]
|
||||
$phpDateFormat = preg_replace_callback('/"(.+?)"/', function($matches) {
|
||||
$stringToEscape = $matches[1];
|
||||
$letters = preg_split('//u', $stringToEscape, -1, PREG_SPLIT_NO_EMPTY);
|
||||
return '\\' . implode('\\', $letters);
|
||||
}, $phpDateFormat);
|
||||
|
||||
return $phpDateFormat;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $excelDateFormat Date format as defined by Excel
|
||||
* @return bool Whether the given date format has the 12-hour format marker
|
||||
*/
|
||||
private static function has12HourFormatMarker($excelDateFormat)
|
||||
{
|
||||
return (stripos($excelDateFormat, 'am/pm') !== false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
|
||||
|
||||
/**
|
||||
* Class CachingStrategyFactory
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
|
||||
*/
|
||||
class CachingStrategyFactory
|
||||
{
|
||||
/**
|
||||
* The memory amount needed to store a string was obtained empirically from this data:
|
||||
*
|
||||
* ------------------------------------
|
||||
* | Number of chars⁺ | Memory needed |
|
||||
* ------------------------------------
|
||||
* | 3,000 | 1 MB |
|
||||
* | 15,000 | 2 MB |
|
||||
* | 30,000 | 5 MB |
|
||||
* | 75,000 | 11 MB |
|
||||
* | 150,000 | 21 MB |
|
||||
* | 300,000 | 43 MB |
|
||||
* | 750,000 | 105 MB |
|
||||
* | 1,500,000 | 210 MB |
|
||||
* | 2,250,000 | 315 MB |
|
||||
* | 3,000,000 | 420 MB |
|
||||
* | 4,500,000 | 630 MB |
|
||||
* ------------------------------------
|
||||
*
|
||||
* ⁺ All characters were 1 byte long
|
||||
*
|
||||
* This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
|
||||
* Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
|
||||
* Also, there is on average about 20 characters per cell (this is entirely empirical data...).
|
||||
*
|
||||
* This means that in order to store one shared string in memory, the memory amount needed is:
|
||||
* => 20 * 600 ≈ 12KB
|
||||
*/
|
||||
const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
|
||||
|
||||
/**
|
||||
* To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
|
||||
* instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
|
||||
* and the string will be quickly retrieved.
|
||||
* The performance bottleneck is not when creating these temporary files, but rather when loading their content.
|
||||
* Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
|
||||
* best when the indexes of the shared strings are sorted in the sheet data.
|
||||
* 10,000 was chosen because it creates small files that are fast to be loaded in memory.
|
||||
*/
|
||||
const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
|
||||
|
||||
/** @var CachingStrategyFactory|null Singleton instance */
|
||||
protected static $instance = null;
|
||||
|
||||
/**
|
||||
* Private constructor for singleton
|
||||
*/
|
||||
private function __construct()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the singleton instance of the factory
|
||||
*
|
||||
* @return CachingStrategyFactory
|
||||
*/
|
||||
public static function getInstance()
|
||||
{
|
||||
if (self::$instance === null) {
|
||||
self::$instance = new CachingStrategyFactory();
|
||||
}
|
||||
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the best caching strategy, given the number of unique shared strings
|
||||
* and the amount of memory available.
|
||||
*
|
||||
* @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
|
||||
* @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
|
||||
* @return CachingStrategyInterface The best caching strategy
|
||||
*/
|
||||
public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null)
|
||||
{
|
||||
if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
|
||||
return new InMemoryStrategy($sharedStringsUniqueCount);
|
||||
} else {
|
||||
return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether it is safe to use in-memory caching, given the number of unique shared strings
|
||||
* and the amount of memory available.
|
||||
*
|
||||
* @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
|
||||
* @return bool
|
||||
*/
|
||||
protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
|
||||
{
|
||||
// if the number of shared strings in unknown, do not use "in memory" strategy
|
||||
if ($sharedStringsUniqueCount === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$memoryAvailable = $this->getMemoryLimitInKB();
|
||||
|
||||
if ($memoryAvailable === -1) {
|
||||
// if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
|
||||
return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
|
||||
} else {
|
||||
$memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
|
||||
return ($memoryAvailable > $memoryNeeded);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the PHP "memory_limit" in Kilobytes
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
protected function getMemoryLimitInKB()
|
||||
{
|
||||
$memoryLimitFormatted = $this->getMemoryLimitFromIni();
|
||||
$memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
|
||||
|
||||
// No memory limit
|
||||
if ($memoryLimitFormatted === '-1') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
|
||||
$amount = intval($matches[1]);
|
||||
$unit = $matches[2];
|
||||
|
||||
switch ($unit) {
|
||||
case 'b': return ($amount / 1024);
|
||||
case 'k': return $amount;
|
||||
case 'm': return ($amount * 1024);
|
||||
case 'g': return ($amount * 1024 * 1024);
|
||||
case 't': return ($amount * 1024 * 1024 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the formatted "memory_limit" value
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function getMemoryLimitFromIni()
|
||||
{
|
||||
return ini_get('memory_limit');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
|
||||
|
||||
/**
|
||||
* Interface CachingStrategyInterface
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
|
||||
*/
|
||||
interface CachingStrategyInterface
|
||||
{
|
||||
/**
|
||||
* Adds the given string to the cache.
|
||||
*
|
||||
* @param string $sharedString The string to be added to the cache
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return void
|
||||
*/
|
||||
public function addStringForIndex($sharedString, $sharedStringIndex);
|
||||
|
||||
/**
|
||||
* Closes the cache after the last shared string was added.
|
||||
* This prevents any additional string from being added to the cache.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function closeCache();
|
||||
|
||||
/**
|
||||
* Returns the string located at the given index from the cache.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex);
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clearCache();
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
|
||||
|
||||
use Box\Spout\Common\Helper\FileSystemHelper;
|
||||
use Box\Spout\Common\Helper\GlobalFunctionsHelper;
|
||||
use Box\Spout\Reader\Exception\SharedStringNotFoundException;
|
||||
|
||||
/**
|
||||
* Class FileBasedStrategy
|
||||
*
|
||||
* This class implements the file-based caching strategy for shared strings.
|
||||
* Shared strings are stored in small files (with a max number of strings per file).
|
||||
* This strategy is slower than an in-memory strategy but is used to avoid out of memory crashes.
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
|
||||
*/
|
||||
class FileBasedStrategy implements CachingStrategyInterface
|
||||
{
|
||||
/** Value to use to escape the line feed character ("\n") */
|
||||
const ESCAPED_LINE_FEED_CHARACTER = '_x000A_';
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\FileSystemHelper Helper to perform file system operations */
|
||||
protected $fileSystemHelper;
|
||||
|
||||
/** @var string Temporary folder where the temporary files will be created */
|
||||
protected $tempFolder;
|
||||
|
||||
/**
|
||||
* @var int Maximum number of strings that can be stored in one temp file
|
||||
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
|
||||
*/
|
||||
protected $maxNumStringsPerTempFile;
|
||||
|
||||
/** @var resource Pointer to the last temp file a shared string was written to */
|
||||
protected $tempFilePointer;
|
||||
|
||||
/**
|
||||
* @var string Path of the temporary file whose contents is currently stored in memory
|
||||
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
|
||||
*/
|
||||
protected $inMemoryTempFilePath;
|
||||
|
||||
/**
|
||||
* @var array Contents of the temporary file that was last read
|
||||
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
|
||||
*/
|
||||
protected $inMemoryTempFileContents;
|
||||
|
||||
/**
|
||||
* @param string|null $tempFolder Temporary folder where the temporary files to store shared strings will be stored
|
||||
* @param int $maxNumStringsPerTempFile Maximum number of strings that can be stored in one temp file
|
||||
*/
|
||||
public function __construct($tempFolder, $maxNumStringsPerTempFile)
|
||||
{
|
||||
$rootTempFolder = ($tempFolder) ?: sys_get_temp_dir();
|
||||
$this->fileSystemHelper = new FileSystemHelper($rootTempFolder);
|
||||
$this->tempFolder = $this->fileSystemHelper->createFolder($rootTempFolder, uniqid('sharedstrings'));
|
||||
|
||||
$this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile;
|
||||
|
||||
$this->globalFunctionsHelper = new GlobalFunctionsHelper();
|
||||
$this->tempFilePointer = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given string to the cache.
|
||||
*
|
||||
* @param string $sharedString The string to be added to the cache
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return void
|
||||
*/
|
||||
public function addStringForIndex($sharedString, $sharedStringIndex)
|
||||
{
|
||||
$tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex);
|
||||
|
||||
if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) {
|
||||
if ($this->tempFilePointer) {
|
||||
$this->globalFunctionsHelper->fclose($this->tempFilePointer);
|
||||
}
|
||||
$this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w');
|
||||
}
|
||||
|
||||
// The shared string retrieval logic expects each cell data to be on one line only
|
||||
// Encoding the line feed character allows to preserve this assumption
|
||||
$lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString);
|
||||
|
||||
$this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path for the temp file that should contain the string for the given index
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The temp file path for the given index
|
||||
*/
|
||||
protected function getSharedStringTempFilePath($sharedStringIndex)
|
||||
{
|
||||
$numTempFile = intval($sharedStringIndex / $this->maxNumStringsPerTempFile);
|
||||
return $this->tempFolder . '/sharedstrings' . $numTempFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the cache after the last shared string was added.
|
||||
* This prevents any additional string from being added to the cache.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function closeCache()
|
||||
{
|
||||
// close pointer to the last temp file that was written
|
||||
if ($this->tempFilePointer) {
|
||||
$this->globalFunctionsHelper->fclose($this->tempFilePointer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string located at the given index from the cache.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex)
|
||||
{
|
||||
$tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex);
|
||||
$indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile;
|
||||
|
||||
if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) {
|
||||
throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex");
|
||||
}
|
||||
|
||||
if ($this->inMemoryTempFilePath !== $tempFilePath) {
|
||||
// free memory
|
||||
unset($this->inMemoryTempFileContents);
|
||||
|
||||
$this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath));
|
||||
$this->inMemoryTempFilePath = $tempFilePath;
|
||||
}
|
||||
|
||||
$sharedString = null;
|
||||
|
||||
// Using isset here because it is way faster than array_key_exists...
|
||||
if (isset($this->inMemoryTempFileContents[$indexInFile])) {
|
||||
$escapedSharedString = $this->inMemoryTempFileContents[$indexInFile];
|
||||
$sharedString = $this->unescapeLineFeed($escapedSharedString);
|
||||
}
|
||||
|
||||
if ($sharedString === null) {
|
||||
throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex");
|
||||
}
|
||||
|
||||
return rtrim($sharedString, PHP_EOL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes the line feed characters (\n)
|
||||
*
|
||||
* @param string $unescapedString
|
||||
* @return string
|
||||
*/
|
||||
private function escapeLineFeed($unescapedString)
|
||||
{
|
||||
return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unescapes the line feed characters (\n)
|
||||
*
|
||||
* @param string $escapedString
|
||||
* @return string
|
||||
*/
|
||||
private function unescapeLineFeed($escapedString)
|
||||
{
|
||||
return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clearCache()
|
||||
{
|
||||
if ($this->tempFolder) {
|
||||
$this->fileSystemHelper->deleteFolderRecursively($this->tempFolder);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
|
||||
|
||||
use Box\Spout\Reader\Exception\SharedStringNotFoundException;
|
||||
|
||||
/**
|
||||
* Class InMemoryStrategy
|
||||
*
|
||||
* This class implements the in-memory caching strategy for shared strings.
|
||||
* This strategy is used when the number of unique strings is low, compared to the memory available.
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
|
||||
*/
|
||||
class InMemoryStrategy implements CachingStrategyInterface
|
||||
{
|
||||
/** @var \SplFixedArray Array used to cache the shared strings */
|
||||
protected $inMemoryCache;
|
||||
|
||||
/** @var bool Whether the cache has been closed */
|
||||
protected $isCacheClosed;
|
||||
|
||||
/**
|
||||
* @param int $sharedStringsUniqueCount Number of unique shared strings
|
||||
*/
|
||||
public function __construct($sharedStringsUniqueCount)
|
||||
{
|
||||
$this->inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount);
|
||||
$this->isCacheClosed = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given string to the cache.
|
||||
*
|
||||
* @param string $sharedString The string to be added to the cache
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return void
|
||||
*/
|
||||
public function addStringForIndex($sharedString, $sharedStringIndex)
|
||||
{
|
||||
if (!$this->isCacheClosed) {
|
||||
$this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the cache after the last shared string was added.
|
||||
* This prevents any additional string from being added to the cache.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function closeCache()
|
||||
{
|
||||
$this->isCacheClosed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string located at the given index from the cache.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex)
|
||||
{
|
||||
try {
|
||||
return $this->inMemoryCache->offsetGet($sharedStringIndex);
|
||||
} catch (\RuntimeException $e) {
|
||||
throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clearCache()
|
||||
{
|
||||
unset($this->inMemoryCache);
|
||||
$this->isCacheClosed = false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory;
|
||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyInterface;
|
||||
|
||||
/**
|
||||
* Class SharedStringsHelper
|
||||
* This class provides helper functions for reading sharedStrings XML file
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper
|
||||
*/
|
||||
class SharedStringsHelper
|
||||
{
|
||||
/** Path of sharedStrings XML file inside the XLSX file */
|
||||
const SHARED_STRINGS_XML_FILE_PATH = 'xl/sharedStrings.xml';
|
||||
|
||||
/** Main namespace for the sharedStrings.xml file */
|
||||
const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
/** Definition of XML nodes names used to parse data */
|
||||
const XML_NODE_SST = 'sst';
|
||||
const XML_NODE_SI = 'si';
|
||||
const XML_NODE_R = 'r';
|
||||
const XML_NODE_T = 't';
|
||||
|
||||
/** Definition of XML attributes used to parse data */
|
||||
const XML_ATTRIBUTE_COUNT = 'count';
|
||||
const XML_ATTRIBUTE_UNIQUE_COUNT = 'uniqueCount';
|
||||
const XML_ATTRIBUTE_XML_SPACE = 'xml:space';
|
||||
const XML_ATTRIBUTE_VALUE_PRESERVE = 'preserve';
|
||||
|
||||
/** @var string Path of the XLSX file being read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var string Temporary folder where the temporary files to store shared strings will be stored */
|
||||
protected $tempFolder;
|
||||
|
||||
/** @var CachingStrategyInterface The best caching strategy for storing shared strings */
|
||||
protected $cachingStrategy;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param string|null|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
|
||||
*/
|
||||
public function __construct($filePath, $tempFolder = null)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->tempFolder = $tempFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the XLSX file contains a shared strings XML file
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasSharedStrings()
|
||||
{
|
||||
$hasSharedStrings = false;
|
||||
$zip = new \ZipArchive();
|
||||
|
||||
if ($zip->open($this->filePath) === true) {
|
||||
$hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false);
|
||||
$zip->close();
|
||||
}
|
||||
|
||||
return $hasSharedStrings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an in-memory array containing all the shared strings of the sheet.
|
||||
* All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'.
|
||||
* It is then accessed by the sheet data, via the string index in the built table.
|
||||
*
|
||||
* More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
|
||||
*
|
||||
* The XML file can be really big with sheets containing a lot of data. That is why
|
||||
* we need to use a XML reader that provides streaming like the XMLReader library.
|
||||
*
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read
|
||||
*/
|
||||
public function extractSharedStrings()
|
||||
{
|
||||
$xmlReader = new XMLReader();
|
||||
$sharedStringIndex = 0;
|
||||
|
||||
if ($xmlReader->openFileInZip($this->filePath, self::SHARED_STRINGS_XML_FILE_PATH) === false) {
|
||||
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
|
||||
}
|
||||
|
||||
try {
|
||||
$sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
|
||||
$this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
|
||||
|
||||
$xmlReader->readUntilNodeFound(self::XML_NODE_SI);
|
||||
|
||||
while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SI) {
|
||||
$this->processSharedStringsItem($xmlReader, $sharedStringIndex);
|
||||
$sharedStringIndex++;
|
||||
|
||||
// jump to the next '<si>' tag
|
||||
$xmlReader->next(self::XML_NODE_SI);
|
||||
}
|
||||
|
||||
$this->cachingStrategy->closeCache();
|
||||
|
||||
} catch (XMLProcessingException $exception) {
|
||||
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
|
||||
}
|
||||
|
||||
$xmlReader->close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the shared strings unique count, as specified in <sst> tag.
|
||||
*
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance
|
||||
* @return int|null Number of unique shared strings in the sharedStrings.xml file
|
||||
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read
|
||||
*/
|
||||
protected function getSharedStringsUniqueCount($xmlReader)
|
||||
{
|
||||
$xmlReader->next(self::XML_NODE_SST);
|
||||
|
||||
// Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE)
|
||||
while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SST && $xmlReader->nodeType !== XMLReader::ELEMENT) {
|
||||
$xmlReader->read();
|
||||
}
|
||||
|
||||
$uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_UNIQUE_COUNT);
|
||||
|
||||
// some software do not add the "uniqueCount" attribute but only use the "count" one
|
||||
// @see https://github.com/box/spout/issues/254
|
||||
if ($uniqueCount === null) {
|
||||
$uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT);
|
||||
}
|
||||
|
||||
return ($uniqueCount !== null) ? intval($uniqueCount) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the best shared strings caching strategy.
|
||||
*
|
||||
* @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
|
||||
* @return CachingStrategyInterface
|
||||
*/
|
||||
protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount)
|
||||
{
|
||||
return CachingStrategyFactory::getInstance()
|
||||
->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the shared strings item XML node which the given XML reader is positioned on.
|
||||
*
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on a "<si>" node
|
||||
* @param int $sharedStringIndex Index of the processed shared strings item
|
||||
* @return void
|
||||
*/
|
||||
protected function processSharedStringsItem($xmlReader, $sharedStringIndex)
|
||||
{
|
||||
$sharedStringValue = '';
|
||||
|
||||
// NOTE: expand() will automatically decode all XML entities of the child nodes
|
||||
$siNode = $xmlReader->expand();
|
||||
$textNodes = $siNode->getElementsByTagName(self::XML_NODE_T);
|
||||
|
||||
foreach ($textNodes as $textNode) {
|
||||
if ($this->shouldExtractTextNodeValue($textNode)) {
|
||||
$textNodeValue = $textNode->nodeValue;
|
||||
$shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode);
|
||||
|
||||
$sharedStringValue .= ($shouldPreserveWhitespace) ? $textNodeValue : trim($textNodeValue);
|
||||
}
|
||||
}
|
||||
|
||||
$this->cachingStrategy->addStringForIndex($sharedStringValue, $sharedStringIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Not all text nodes' values must be extracted.
|
||||
* Some text nodes are part of a node describing the pronunciation for instance.
|
||||
* We'll only consider the nodes whose parents are "<si>" or "<r>".
|
||||
*
|
||||
* @param \DOMElement $textNode Text node to check
|
||||
* @return bool Whether the given text node's value must be extracted
|
||||
*/
|
||||
protected function shouldExtractTextNodeValue($textNode)
|
||||
{
|
||||
$parentTagName = $textNode->parentNode->localName;
|
||||
return ($parentTagName === self::XML_NODE_SI || $parentTagName === self::XML_NODE_R);
|
||||
}
|
||||
|
||||
/**
|
||||
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
|
||||
*
|
||||
* @param \DOMElement $textNode The text node element (<t>) whose whitespace may be preserved
|
||||
* @return bool Whether whitespace should be preserved
|
||||
*/
|
||||
protected function shouldPreserveWhitespace($textNode)
|
||||
{
|
||||
$spaceValue = $textNode->getAttribute(self::XML_ATTRIBUTE_XML_SPACE);
|
||||
return ($spaceValue === self::XML_ATTRIBUTE_VALUE_PRESERVE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the shared string at the given index, using the previously chosen caching strategy.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex)
|
||||
{
|
||||
return $this->cachingStrategy->getStringAtIndex($sharedStringIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function cleanup()
|
||||
{
|
||||
if ($this->cachingStrategy) {
|
||||
$this->cachingStrategy->clearCache();
|
||||
}
|
||||
}
|
||||
}
|
||||
156
modules/x13import/tools/Spout/Reader/XLSX/Helper/SheetHelper.php
Normal file
156
modules/x13import/tools/Spout/Reader/XLSX/Helper/SheetHelper.php
Normal file
@@ -0,0 +1,156 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\XLSX\Sheet;
|
||||
|
||||
/**
|
||||
* Class SheetHelper
|
||||
* This class provides helper functions related to XLSX sheets
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper
|
||||
*/
|
||||
class SheetHelper
|
||||
{
|
||||
/** Paths of XML files relative to the XLSX file root */
|
||||
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
|
||||
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
|
||||
|
||||
/** Definition of XML node names used to parse data */
|
||||
const XML_NODE_WORKBOOK_VIEW = 'workbookView';
|
||||
const XML_NODE_SHEET = 'sheet';
|
||||
const XML_NODE_SHEETS = 'sheets';
|
||||
const XML_NODE_RELATIONSHIP = 'Relationship';
|
||||
|
||||
/** Definition of XML attributes used to parse data */
|
||||
const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
|
||||
const XML_ATTRIBUTE_R_ID = 'r:id';
|
||||
const XML_ATTRIBUTE_NAME = 'name';
|
||||
const XML_ATTRIBUTE_ID = 'Id';
|
||||
const XML_ATTRIBUTE_TARGET = 'Target';
|
||||
|
||||
/** @var string Path of the XLSX file being read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var \Box\Spout\Reader\XLSX\ReaderOptions Reader's current options */
|
||||
protected $options;
|
||||
|
||||
/** @var \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings */
|
||||
protected $sharedStringsHelper;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
|
||||
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->options = $options;
|
||||
$this->sharedStringsHelper = $sharedStringsHelper;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the sheets metadata of the file located at the previously given file path.
|
||||
* The paths to the sheets' data are read from the [Content_Types].xml file.
|
||||
*
|
||||
* @return Sheet[] Sheets within the XLSX file
|
||||
*/
|
||||
public function getSheets()
|
||||
{
|
||||
$sheets = [];
|
||||
$sheetIndex = 0;
|
||||
$activeSheetIndex = 0; // By default, the first sheet is active
|
||||
|
||||
$xmlReader = new XMLReader();
|
||||
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
|
||||
while ($xmlReader->read()) {
|
||||
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_WORKBOOK_VIEW)) {
|
||||
// The "workbookView" node is located before "sheet" nodes, ensuring that
|
||||
// the active sheet is known before parsing sheets data.
|
||||
$activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB);
|
||||
} else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_SHEET)) {
|
||||
$isSheetActive = ($sheetIndex === $activeSheetIndex);
|
||||
$sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex, $isSheetActive);
|
||||
$sheetIndex++;
|
||||
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_SHEETS)) {
|
||||
// stop reading once all sheets have been read
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$xmlReader->close();
|
||||
}
|
||||
|
||||
return $sheets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
|
||||
* We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
|
||||
* ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
|
||||
*
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
|
||||
* @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based)
|
||||
* @param bool $isSheetActive Whether this sheet was defined as active
|
||||
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
|
||||
*/
|
||||
protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased, $isSheetActive)
|
||||
{
|
||||
$sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
|
||||
$escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
|
||||
|
||||
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
|
||||
$escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
|
||||
$sheetName = $escaper->unescape($escapedSheetName);
|
||||
|
||||
$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
|
||||
|
||||
return new Sheet(
|
||||
$this->filePath, $sheetDataXMLFilePath,
|
||||
$sheetIndexZeroBased, $sheetName, $isSheetActive,
|
||||
$this->options, $this->sharedStringsHelper
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $sheetId The sheet ID, as defined in "workbook.xml"
|
||||
* @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
|
||||
*/
|
||||
protected function getSheetDataXMLFilePathForSheetId($sheetId)
|
||||
{
|
||||
$sheetDataXMLFilePath = '';
|
||||
|
||||
// find the file path of the sheet, by looking at the "workbook.xml.res" file
|
||||
$xmlReader = new XMLReader();
|
||||
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
|
||||
while ($xmlReader->read()) {
|
||||
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
|
||||
$relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID);
|
||||
|
||||
if ($relationshipSheetId === $sheetId) {
|
||||
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
|
||||
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
|
||||
$sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
|
||||
|
||||
// sometimes, the sheet data file path already contains "/xl/"...
|
||||
if (strpos($sheetDataXMLFilePath, '/xl/') !== 0) {
|
||||
$sheetDataXMLFilePath = '/xl/' . $sheetDataXMLFilePath;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$xmlReader->close();
|
||||
}
|
||||
|
||||
return $sheetDataXMLFilePath;
|
||||
}
|
||||
}
|
||||
330
modules/x13import/tools/Spout/Reader/XLSX/Helper/StyleHelper.php
Normal file
330
modules/x13import/tools/Spout/Reader/XLSX/Helper/StyleHelper.php
Normal file
@@ -0,0 +1,330 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
|
||||
/**
|
||||
* Class StyleHelper
|
||||
* This class provides helper functions related to XLSX styles
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX\Helper
|
||||
*/
|
||||
class StyleHelper
|
||||
{
|
||||
/** Paths of XML files relative to the XLSX file root */
|
||||
const STYLES_XML_FILE_PATH = 'xl/styles.xml';
|
||||
|
||||
/** Nodes used to find relevant information in the styles XML file */
|
||||
const XML_NODE_NUM_FMTS = 'numFmts';
|
||||
const XML_NODE_NUM_FMT = 'numFmt';
|
||||
const XML_NODE_CELL_XFS = 'cellXfs';
|
||||
const XML_NODE_XF = 'xf';
|
||||
|
||||
/** Attributes used to find relevant information in the styles XML file */
|
||||
const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId';
|
||||
const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode';
|
||||
const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat';
|
||||
|
||||
/** By convention, default style ID is 0 */
|
||||
const DEFAULT_STYLE_ID = 0;
|
||||
|
||||
const NUMBER_FORMAT_GENERAL = 'General';
|
||||
|
||||
/**
|
||||
* @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
|
||||
* @var array Mapping between built-in numFmtId and the associated format - for dates only
|
||||
*/
|
||||
protected static $builtinNumFmtIdToNumFormatMapping = [
|
||||
14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy'
|
||||
15 => 'd-mmm-yy',
|
||||
16 => 'd-mmm',
|
||||
17 => 'mmm-yy',
|
||||
18 => 'h:mm AM/PM',
|
||||
19 => 'h:mm:ss AM/PM',
|
||||
20 => 'h:mm',
|
||||
21 => 'h:mm:ss',
|
||||
22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm',
|
||||
45 => 'mm:ss',
|
||||
46 => '[h]:mm:ss',
|
||||
47 => 'mm:ss.0', // @NOTE: ECMA spec is 'mmss.0',
|
||||
];
|
||||
|
||||
/** @var string Path of the XLSX file being read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var array Array containing the IDs of built-in number formats indicating a date */
|
||||
protected $builtinNumFmtIdIndicatingDates;
|
||||
|
||||
/** @var array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
|
||||
protected $customNumberFormats;
|
||||
|
||||
/** @var array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
|
||||
protected $stylesAttributes;
|
||||
|
||||
/** @var array Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */
|
||||
protected $numFmtIdToIsDateFormatCache = [];
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
*/
|
||||
public function __construct($filePath)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the style with the given ID should consider
|
||||
* numeric values as timestamps and format the cell as a date.
|
||||
*
|
||||
* @param int $styleId Zero-based style ID
|
||||
* @return bool Whether the cell with the given cell should display a date instead of a numeric value
|
||||
*/
|
||||
public function shouldFormatNumericValueAsDate($styleId)
|
||||
{
|
||||
$stylesAttributes = $this->getStylesAttributes();
|
||||
|
||||
// Default style (0) does not format numeric values as timestamps. Only custom styles do.
|
||||
// Also if the style ID does not exist in the styles.xml file, format as numeric value.
|
||||
// Using isset here because it is way faster than array_key_exists...
|
||||
if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$styleAttributes = $stylesAttributes[$styleId];
|
||||
|
||||
return $this->doesStyleIndicateDate($styleAttributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the styles.xml file and extract the relevant information from the file.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function extractRelevantInfo()
|
||||
{
|
||||
$this->customNumberFormats = [];
|
||||
$this->stylesAttributes = [];
|
||||
|
||||
$xmlReader = new XMLReader();
|
||||
|
||||
if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) {
|
||||
while ($xmlReader->read()) {
|
||||
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
|
||||
$this->extractNumberFormats($xmlReader);
|
||||
|
||||
} else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) {
|
||||
$this->extractStyleAttributes($xmlReader);
|
||||
}
|
||||
}
|
||||
|
||||
$xmlReader->close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts number formats from the "numFmt" nodes.
|
||||
* For simplicity, the styles attributes are kept in memory. This is possible thanks
|
||||
* to the reuse of formats. So 1 million cells should not use 1 million formats.
|
||||
*
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "numFmts" node
|
||||
* @return void
|
||||
*/
|
||||
protected function extractNumberFormats($xmlReader)
|
||||
{
|
||||
while ($xmlReader->read()) {
|
||||
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) {
|
||||
$numFmtId = intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID));
|
||||
$formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE);
|
||||
$this->customNumberFormats[$numFmtId] = $formatCode;
|
||||
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) {
|
||||
// Once done reading "numFmts" node's children
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts style attributes from the "xf" nodes, inside the "cellXfs" section.
|
||||
* For simplicity, the styles attributes are kept in memory. This is possible thanks
|
||||
* to the reuse of styles. So 1 million cells should not use 1 million styles.
|
||||
*
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node
|
||||
* @return void
|
||||
*/
|
||||
protected function extractStyleAttributes($xmlReader)
|
||||
{
|
||||
while ($xmlReader->read()) {
|
||||
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
|
||||
$numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID);
|
||||
$normalizedNumFmtId = ($numFmtId !== null) ? intval($numFmtId) : null;
|
||||
|
||||
$applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT);
|
||||
$normalizedApplyNumberFormat = ($applyNumberFormat !== null) ? !!$applyNumberFormat : null;
|
||||
|
||||
$this->stylesAttributes[] = [
|
||||
self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId,
|
||||
self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat,
|
||||
];
|
||||
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) {
|
||||
// Once done reading "cellXfs" node's children
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array The custom number formats
|
||||
*/
|
||||
protected function getCustomNumberFormats()
|
||||
{
|
||||
if (!isset($this->customNumberFormats)) {
|
||||
$this->extractRelevantInfo();
|
||||
}
|
||||
|
||||
return $this->customNumberFormats;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array The styles attributes
|
||||
*/
|
||||
protected function getStylesAttributes()
|
||||
{
|
||||
if (!isset($this->stylesAttributes)) {
|
||||
$this->extractRelevantInfo();
|
||||
}
|
||||
|
||||
return $this->stylesAttributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId")
|
||||
* @return bool Whether the style with the given attributes indicates that the number is a date
|
||||
*/
|
||||
protected function doesStyleIndicateDate($styleAttributes)
|
||||
{
|
||||
$applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT];
|
||||
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
|
||||
|
||||
// A style may apply a date format if it has:
|
||||
// - "applyNumberFormat" attribute not set to "false"
|
||||
// - "numFmtId" attribute set
|
||||
// This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format,
|
||||
// but this is not necessarily a date.
|
||||
if ($applyNumberFormat === false || $numFmtId === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $this->doesNumFmtIdIndicateDate($numFmtId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the number format ID indicates that the number is a date.
|
||||
* The result is cached to avoid recomputing the same thing over and over, as
|
||||
* "numFmtId" attributes can be shared between multiple styles.
|
||||
*
|
||||
* @param int $numFmtId
|
||||
* @return bool Whether the number format ID indicates that the number is a date
|
||||
*/
|
||||
protected function doesNumFmtIdIndicateDate($numFmtId)
|
||||
{
|
||||
if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) {
|
||||
$formatCode = $this->getFormatCodeForNumFmtId($numFmtId);
|
||||
|
||||
$this->numFmtIdToIsDateFormatCache[$numFmtId] = (
|
||||
$this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
|
||||
$this->isFormatCodeCustomDateFormat($formatCode)
|
||||
);
|
||||
}
|
||||
|
||||
return $this->numFmtIdToIsDateFormatCache[$numFmtId];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $numFmtId
|
||||
* @return string|null The custom number format or NULL if none defined for the given numFmtId
|
||||
*/
|
||||
protected function getFormatCodeForNumFmtId($numFmtId)
|
||||
{
|
||||
$customNumberFormats = $this->getCustomNumberFormats();
|
||||
|
||||
// Using isset here because it is way faster than array_key_exists...
|
||||
return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $numFmtId
|
||||
* @return bool Whether the number format ID indicates that the number is a date
|
||||
*/
|
||||
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
|
||||
{
|
||||
return in_array($numFmtId, $this->builtinNumFmtIdIndicatingDates);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string|null $formatCode
|
||||
* @return bool Whether the given format code indicates that the number is a date
|
||||
*/
|
||||
protected function isFormatCodeCustomDateFormat($formatCode)
|
||||
{
|
||||
// if no associated format code or if using the default "General" format
|
||||
if ($formatCode === null || strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL) === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $this->isFormatCodeMatchingDateFormatPattern($formatCode);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $formatCode
|
||||
* @return bool Whether the given format code matches a date format pattern
|
||||
*/
|
||||
protected function isFormatCodeMatchingDateFormatPattern($formatCode)
|
||||
{
|
||||
// Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\")
|
||||
$pattern = '((?<!\\\)\[.+?(?<!\\\)\])';
|
||||
$formatCode = preg_replace($pattern, '', $formatCode);
|
||||
|
||||
// custom date formats contain specific characters to represent the date:
|
||||
// e - yy - m - d - h - s
|
||||
// and all of their variants (yyyy - mm - dd...)
|
||||
$dateFormatCharacters = ['e', 'yy', 'm', 'd', 'h', 's'];
|
||||
|
||||
$hasFoundDateFormatCharacter = false;
|
||||
foreach ($dateFormatCharacters as $dateFormatCharacter) {
|
||||
// character not preceded by "\" (case insensitive)
|
||||
$pattern = '/(?<!\\\)' . $dateFormatCharacter . '/i';
|
||||
|
||||
if (preg_match($pattern, $formatCode)) {
|
||||
$hasFoundDateFormatCharacter = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $hasFoundDateFormatCharacter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the format as defined in "styles.xml" of the given style.
|
||||
* NOTE: It is assumed that the style DOES have a number format associated to it.
|
||||
*
|
||||
* @param int $styleId Zero-based style ID
|
||||
* @return string The number format code associated with the given style
|
||||
*/
|
||||
public function getNumberFormatCode($styleId)
|
||||
{
|
||||
$stylesAttributes = $this->getStylesAttributes();
|
||||
$styleAttributes = $stylesAttributes[$styleId];
|
||||
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
|
||||
|
||||
if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
|
||||
$numberFormatCode = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
|
||||
} else {
|
||||
$customNumberFormats = $this->getCustomNumberFormats();
|
||||
$numberFormatCode = $customNumberFormats[$numFmtId];
|
||||
}
|
||||
|
||||
return $numberFormatCode;
|
||||
}
|
||||
}
|
||||
113
modules/x13import/tools/Spout/Reader/XLSX/Reader.php
Normal file
113
modules/x13import/tools/Spout/Reader/XLSX/Reader.php
Normal file
@@ -0,0 +1,113 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\AbstractReader;
|
||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper;
|
||||
|
||||
/**
|
||||
* Class Reader
|
||||
* This class provides support to read data from a XLSX file
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX
|
||||
*/
|
||||
class Reader extends AbstractReader
|
||||
{
|
||||
/** @var \ZipArchive */
|
||||
protected $zip;
|
||||
|
||||
/** @var \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings */
|
||||
protected $sharedStringsHelper;
|
||||
|
||||
/** @var SheetIterator To iterator over the XLSX sheets */
|
||||
protected $sheetIterator;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the reader's current options
|
||||
*
|
||||
* @return ReaderOptions
|
||||
*/
|
||||
protected function getOptions()
|
||||
{
|
||||
if (!isset($this->options)) {
|
||||
$this->options = new ReaderOptions();
|
||||
}
|
||||
return $this->options;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $tempFolder Temporary folder where the temporary files will be created
|
||||
* @return Reader
|
||||
*/
|
||||
public function setTempFolder($tempFolder)
|
||||
{
|
||||
$this->getOptions()->setTempFolder($tempFolder);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether stream wrappers are supported
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
protected function doesSupportStreamWrapper()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the file at the given file path to make it ready to be read.
|
||||
* It also parses the sharedStrings.xml file to get all the shared strings available in memory
|
||||
* and fetches all the available sheets.
|
||||
*
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read
|
||||
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
|
||||
*/
|
||||
protected function openReader($filePath)
|
||||
{
|
||||
$this->zip = new \ZipArchive();
|
||||
|
||||
if ($this->zip->open($filePath) === true) {
|
||||
$this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->getOptions()->getTempFolder());
|
||||
|
||||
if ($this->sharedStringsHelper->hasSharedStrings()) {
|
||||
// Extracts all the strings from the sheets for easy access in the future
|
||||
$this->sharedStringsHelper->extractSharedStrings();
|
||||
}
|
||||
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->getOptions(), $this->sharedStringsHelper, $this->globalFunctionsHelper);
|
||||
} else {
|
||||
throw new IOException("Could not open $filePath for reading.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator to iterate over sheets.
|
||||
*
|
||||
* @return SheetIterator To iterate over sheets
|
||||
*/
|
||||
protected function getConcreteSheetIterator()
|
||||
{
|
||||
return $this->sheetIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the reader. To be used after reading the file.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function closeReader()
|
||||
{
|
||||
if ($this->zip) {
|
||||
$this->zip->close();
|
||||
}
|
||||
|
||||
if ($this->sharedStringsHelper) {
|
||||
$this->sharedStringsHelper->cleanup();
|
||||
}
|
||||
}
|
||||
}
|
||||
33
modules/x13import/tools/Spout/Reader/XLSX/ReaderOptions.php
Normal file
33
modules/x13import/tools/Spout/Reader/XLSX/ReaderOptions.php
Normal file
@@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
/**
|
||||
* Class ReaderOptions
|
||||
* This class is used to customize the reader's behavior
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX
|
||||
*/
|
||||
class ReaderOptions extends \Box\Spout\Reader\Common\ReaderOptions
|
||||
{
|
||||
/** @var string|null Temporary folder where the temporary files will be created */
|
||||
protected $tempFolder = null;
|
||||
|
||||
/**
|
||||
* @return string|null Temporary folder where the temporary files will be created
|
||||
*/
|
||||
public function getTempFolder()
|
||||
{
|
||||
return $this->tempFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string|null $tempFolder Temporary folder where the temporary files will be created
|
||||
* @return ReaderOptions
|
||||
*/
|
||||
public function setTempFolder($tempFolder)
|
||||
{
|
||||
$this->tempFolder = $tempFolder;
|
||||
return $this;
|
||||
}
|
||||
}
|
||||
405
modules/x13import/tools/Spout/Reader/XLSX/RowIterator.php
Normal file
405
modules/x13import/tools/Spout/Reader/XLSX/RowIterator.php
Normal file
@@ -0,0 +1,405 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\XLSX\Helper\CellHelper;
|
||||
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
|
||||
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
|
||||
use Box\Spout\Reader\Common\XMLProcessor;
|
||||
|
||||
/**
|
||||
* Class RowIterator
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX
|
||||
*/
|
||||
class RowIterator implements IteratorInterface
|
||||
{
|
||||
/** Definition of XML nodes names used to parse data */
|
||||
const XML_NODE_DIMENSION = 'dimension';
|
||||
const XML_NODE_WORKSHEET = 'worksheet';
|
||||
const XML_NODE_ROW = 'row';
|
||||
const XML_NODE_CELL = 'c';
|
||||
|
||||
/** Definition of XML attributes used to parse data */
|
||||
const XML_ATTRIBUTE_REF = 'ref';
|
||||
const XML_ATTRIBUTE_SPANS = 'spans';
|
||||
const XML_ATTRIBUTE_ROW_INDEX = 'r';
|
||||
const XML_ATTRIBUTE_CELL_INDEX = 'r';
|
||||
|
||||
/** @var string Path of the XLSX file being read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
|
||||
protected $sheetDataXMLFilePath;
|
||||
|
||||
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
|
||||
protected $xmlReader;
|
||||
|
||||
/** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
|
||||
protected $xmlProcessor;
|
||||
|
||||
/** @var Helper\CellValueFormatter Helper to format cell values */
|
||||
protected $cellValueFormatter;
|
||||
|
||||
/** @var Helper\StyleHelper $styleHelper Helper to work with styles */
|
||||
protected $styleHelper;
|
||||
|
||||
/**
|
||||
* TODO: This variable can be deleted when row indices get preserved
|
||||
* @var int Number of read rows
|
||||
*/
|
||||
protected $numReadRows = 0;
|
||||
|
||||
/** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
|
||||
protected $currentlyProcessedRowData = [];
|
||||
|
||||
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
|
||||
protected $rowDataBuffer = null;
|
||||
|
||||
/** @var bool Indicates whether all rows have been read */
|
||||
protected $hasReachedEndOfFile = false;
|
||||
|
||||
/** @var int The number of columns the sheet has (0 meaning undefined) */
|
||||
protected $numColumns = 0;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows;
|
||||
|
||||
/** @var int Last row index processed (one-based) */
|
||||
protected $lastRowIndexProcessed = 0;
|
||||
|
||||
/** @var int Row index to be processed next (one-based) */
|
||||
protected $nextRowIndexToBeProcessed = 0;
|
||||
|
||||
/** @var int Last column index processed (zero-based) */
|
||||
protected $lastColumnIndexProcessed = -1;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
|
||||
* @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
|
||||
*/
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
|
||||
|
||||
$this->xmlReader = new XMLReader();
|
||||
|
||||
$this->styleHelper = new StyleHelper($filePath);
|
||||
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates());
|
||||
|
||||
$this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
|
||||
|
||||
// Register all callbacks to process different nodes when reading the XML file
|
||||
$this->xmlProcessor = new XMLProcessor($this->xmlReader);
|
||||
$this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
|
||||
$this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
|
||||
$this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
|
||||
$this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
|
||||
$this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @return string Path of the XML file containing the sheet data,
|
||||
* without the leading slash.
|
||||
*/
|
||||
protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
|
||||
{
|
||||
return ltrim($sheetDataXMLFilePath, '/');
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewind the Iterator to the first element.
|
||||
* Initializes the XMLReader object that reads the associated sheet data.
|
||||
* The XMLReader is configured to be safe from billion laughs attack.
|
||||
* @link http://php.net/manual/en/iterator.rewind.php
|
||||
*
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
|
||||
*/
|
||||
public function rewind()
|
||||
{
|
||||
$this->xmlReader->close();
|
||||
|
||||
if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
|
||||
throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
|
||||
}
|
||||
|
||||
$this->numReadRows = 0;
|
||||
$this->lastRowIndexProcessed = 0;
|
||||
$this->nextRowIndexToBeProcessed = 0;
|
||||
$this->rowDataBuffer = null;
|
||||
$this->hasReachedEndOfFile = false;
|
||||
$this->numColumns = 0;
|
||||
|
||||
$this->next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if current position is valid
|
||||
* @link http://php.net/manual/en/iterator.valid.php
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function valid()
|
||||
{
|
||||
return (!$this->hasReachedEndOfFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Move forward to next element. Reads data describing the next unprocessed row.
|
||||
* @link http://php.net/manual/en/iterator.next.php
|
||||
*
|
||||
* @return void
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
|
||||
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
$this->nextRowIndexToBeProcessed++;
|
||||
|
||||
if ($this->doesNeedDataForNextRowToBeProcessed()) {
|
||||
$this->readDataForNextRow();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether we need data for the next row to be processed.
|
||||
* We don't need to read data if:
|
||||
* we have already read at least one row
|
||||
* AND
|
||||
* we need to preserve empty rows
|
||||
* AND
|
||||
* the last row that was read is not the row that need to be processed
|
||||
* (i.e. if we need to return empty rows)
|
||||
*
|
||||
* @return bool Whether we need data for the next row to be processed.
|
||||
*/
|
||||
protected function doesNeedDataForNextRowToBeProcessed()
|
||||
{
|
||||
$hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
|
||||
|
||||
return (
|
||||
!$hasReadAtLeastOneRow ||
|
||||
!$this->shouldPreserveEmptyRows ||
|
||||
$this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
|
||||
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
|
||||
*/
|
||||
protected function readDataForNextRow()
|
||||
{
|
||||
$this->currentlyProcessedRowData = [];
|
||||
|
||||
try {
|
||||
$this->xmlProcessor->readUntilStopped();
|
||||
} catch (XMLProcessingException $exception) {
|
||||
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
|
||||
}
|
||||
|
||||
$this->rowDataBuffer = $this->currentlyProcessedRowData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
|
||||
* @return int A return code that indicates what action should the processor take next
|
||||
*/
|
||||
protected function processDimensionStartingNode($xmlReader)
|
||||
{
|
||||
// Read dimensions of the sheet
|
||||
$dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
||||
if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
|
||||
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
|
||||
}
|
||||
|
||||
return XMLProcessor::PROCESSING_CONTINUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
|
||||
* @return int A return code that indicates what action should the processor take next
|
||||
*/
|
||||
protected function processRowStartingNode($xmlReader)
|
||||
{
|
||||
// Reset index of the last processed column
|
||||
$this->lastColumnIndexProcessed = -1;
|
||||
|
||||
// Mark the last processed row as the one currently being read
|
||||
$this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
|
||||
|
||||
// Read spans info if present
|
||||
$numberOfColumnsForRow = $this->numColumns;
|
||||
$spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
|
||||
if ($spans) {
|
||||
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
||||
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
||||
}
|
||||
|
||||
$this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||
|
||||
return XMLProcessor::PROCESSING_CONTINUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
|
||||
* @return int A return code that indicates what action should the processor take next
|
||||
*/
|
||||
protected function processCellStartingNode($xmlReader)
|
||||
{
|
||||
$currentColumnIndex = $this->getColumnIndex($xmlReader);
|
||||
|
||||
// NOTE: expand() will automatically decode all XML entities of the child nodes
|
||||
$node = $xmlReader->expand();
|
||||
$this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node);
|
||||
$this->lastColumnIndexProcessed = $currentColumnIndex;
|
||||
|
||||
return XMLProcessor::PROCESSING_CONTINUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int A return code that indicates what action should the processor take next
|
||||
*/
|
||||
protected function processRowEndingNode()
|
||||
{
|
||||
// if the fetched row is empty and we don't want to preserve it..,
|
||||
if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) {
|
||||
// ... skip it
|
||||
return XMLProcessor::PROCESSING_CONTINUE;
|
||||
}
|
||||
|
||||
$this->numReadRows++;
|
||||
|
||||
// If needed, we fill the empty cells
|
||||
if ($this->numColumns === 0) {
|
||||
$this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData);
|
||||
}
|
||||
|
||||
// at this point, we have all the data we need for the row
|
||||
// so that we can populate the buffer
|
||||
return XMLProcessor::PROCESSING_STOP;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int A return code that indicates what action should the processor take next
|
||||
*/
|
||||
protected function processWorksheetEndingNode()
|
||||
{
|
||||
// The closing "</worksheet>" marks the end of the file
|
||||
$this->hasReachedEndOfFile = true;
|
||||
|
||||
return XMLProcessor::PROCESSING_STOP;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
|
||||
* @return int Row index
|
||||
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
|
||||
*/
|
||||
protected function getRowIndex($xmlReader)
|
||||
{
|
||||
// Get "r" attribute if present (from something like <row r="3"...>
|
||||
$currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
|
||||
|
||||
return ($currentRowIndex !== null) ?
|
||||
intval($currentRowIndex) :
|
||||
$this->lastRowIndexProcessed + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
|
||||
* @return int Column index
|
||||
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
|
||||
*/
|
||||
protected function getColumnIndex($xmlReader)
|
||||
{
|
||||
// Get "r" attribute if present (from something like <c r="A1"...>
|
||||
$currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
||||
|
||||
return ($currentCellIndex !== null) ?
|
||||
CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
|
||||
$this->lastColumnIndexProcessed + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function getCellValue($node)
|
||||
{
|
||||
return $this->cellValueFormatter->extractAndFormatNodeValue($node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $rowData
|
||||
* @return bool Whether the given row is empty
|
||||
*/
|
||||
protected function isEmptyRow($rowData)
|
||||
{
|
||||
return (count($rowData) === 1 && key($rowData) === '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the current element, either an empty row or from the buffer.
|
||||
* @link http://php.net/manual/en/iterator.current.php
|
||||
*
|
||||
* @return array|null
|
||||
*/
|
||||
public function current()
|
||||
{
|
||||
$rowDataForRowToBeProcessed = $this->rowDataBuffer;
|
||||
|
||||
if ($this->shouldPreserveEmptyRows) {
|
||||
// when we need to preserve empty rows, we will either return
|
||||
// an empty row or the last row read. This depends whether the
|
||||
// index of last row that was read matches the index of the last
|
||||
// row whose value should be returned.
|
||||
if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
|
||||
// return empty row if mismatch between last processed row
|
||||
// and the row that needs to be returned
|
||||
$rowDataForRowToBeProcessed = [''];
|
||||
}
|
||||
}
|
||||
|
||||
return $rowDataForRowToBeProcessed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the key of the current element. Here, the row index.
|
||||
* @link http://php.net/manual/en/iterator.key.php
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
// TODO: This should return $this->nextRowIndexToBeProcessed
|
||||
// but to avoid a breaking change, the return value for
|
||||
// this function has been kept as the number of rows read.
|
||||
return $this->shouldPreserveEmptyRows ?
|
||||
$this->nextRowIndexToBeProcessed :
|
||||
$this->numReadRows;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cleans up what was created to iterate over the object.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function end()
|
||||
{
|
||||
$this->xmlReader->close();
|
||||
}
|
||||
}
|
||||
79
modules/x13import/tools/Spout/Reader/XLSX/Sheet.php
Normal file
79
modules/x13import/tools/Spout/Reader/XLSX/Sheet.php
Normal file
@@ -0,0 +1,79 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Reader\SheetInterface;
|
||||
|
||||
/**
|
||||
* Class Sheet
|
||||
* Represents a sheet within a XLSX file
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX
|
||||
*/
|
||||
class Sheet implements SheetInterface
|
||||
{
|
||||
/** @var \Box\Spout\Reader\XLSX\RowIterator To iterate over sheet's rows */
|
||||
protected $rowIterator;
|
||||
|
||||
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
|
||||
protected $index;
|
||||
|
||||
/** @var string Name of the sheet */
|
||||
protected $name;
|
||||
|
||||
/** @var bool Whether the sheet was the active one */
|
||||
protected $isActive;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||
* @param string $sheetName Name of the sheet
|
||||
* @param bool $isSheetActive Whether the sheet was defined as active
|
||||
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
|
||||
* @param Helper\SharedStringsHelper Helper to work with shared strings
|
||||
*/
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sheetIndex, $sheetName, $isSheetActive, $options, $sharedStringsHelper)
|
||||
{
|
||||
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper);
|
||||
$this->index = $sheetIndex;
|
||||
$this->name = $sheetName;
|
||||
$this->isActive = $isSheetActive;
|
||||
}
|
||||
|
||||
/**
|
||||
* @api
|
||||
* @return \Box\Spout\Reader\XLSX\RowIterator
|
||||
*/
|
||||
public function getRowIterator()
|
||||
{
|
||||
return $this->rowIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* @api
|
||||
* @return int Index of the sheet, based on order in the workbook (zero-based)
|
||||
*/
|
||||
public function getIndex()
|
||||
{
|
||||
return $this->index;
|
||||
}
|
||||
|
||||
/**
|
||||
* @api
|
||||
* @return string Name of the sheet
|
||||
*/
|
||||
public function getName()
|
||||
{
|
||||
return $this->name;
|
||||
}
|
||||
|
||||
/**
|
||||
* @api
|
||||
* @return bool Whether the sheet was defined as active
|
||||
*/
|
||||
public function isActive()
|
||||
{
|
||||
return $this->isActive;
|
||||
}
|
||||
}
|
||||
114
modules/x13import/tools/Spout/Reader/XLSX/SheetIterator.php
Normal file
114
modules/x13import/tools/Spout/Reader/XLSX/SheetIterator.php
Normal file
@@ -0,0 +1,114 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
use Box\Spout\Reader\XLSX\Helper\SheetHelper;
|
||||
use Box\Spout\Reader\Exception\NoSheetsFoundException;
|
||||
|
||||
/**
|
||||
* Class SheetIterator
|
||||
* Iterate over XLSX sheet.
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX
|
||||
*/
|
||||
class SheetIterator implements IteratorInterface
|
||||
{
|
||||
/** @var \Box\Spout\Reader\XLSX\Sheet[] The list of sheet present in the file */
|
||||
protected $sheets;
|
||||
|
||||
/** @var int The index of the sheet being read (zero-based) */
|
||||
protected $currentSheetIndex;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
|
||||
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
|
||||
*/
|
||||
public function __construct($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper)
|
||||
{
|
||||
// Fetch all available sheets
|
||||
$sheetHelper = new SheetHelper($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper);
|
||||
$this->sheets = $sheetHelper->getSheets();
|
||||
|
||||
if (count($this->sheets) === 0) {
|
||||
throw new NoSheetsFoundException('The file must contain at least one sheet.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewind the Iterator to the first element
|
||||
* @link http://php.net/manual/en/iterator.rewind.php
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function rewind()
|
||||
{
|
||||
$this->currentSheetIndex = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if current position is valid
|
||||
* @link http://php.net/manual/en/iterator.valid.php
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function valid()
|
||||
{
|
||||
return ($this->currentSheetIndex < count($this->sheets));
|
||||
}
|
||||
|
||||
/**
|
||||
* Move forward to next element
|
||||
* @link http://php.net/manual/en/iterator.next.php
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
// Using isset here because it is way faster than array_key_exists...
|
||||
if (isset($this->sheets[$this->currentSheetIndex])) {
|
||||
$currentSheet = $this->sheets[$this->currentSheetIndex];
|
||||
$currentSheet->getRowIterator()->end();
|
||||
|
||||
$this->currentSheetIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the current element
|
||||
* @link http://php.net/manual/en/iterator.current.php
|
||||
*
|
||||
* @return \Box\Spout\Reader\XLSX\Sheet
|
||||
*/
|
||||
public function current()
|
||||
{
|
||||
return $this->sheets[$this->currentSheetIndex];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the key of the current element
|
||||
* @link http://php.net/manual/en/iterator.key.php
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->currentSheetIndex + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans up what was created to iterate over the object.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function end()
|
||||
{
|
||||
// make sure we are not leaking memory in case the iteration stopped before the end
|
||||
foreach ($this->sheets as $sheet) {
|
||||
$sheet->getRowIterator()->end();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user