Files
drmaterac.pl/modules/pagecache/classes/PageCacheURLNormalizer.php
2025-03-21 20:24:43 +01:00

284 lines
9.0 KiB
PHP

<?php
/**
* Syntax based normalization of URI's
*
* This normalises URI's based on the specification RFC 3986
* www . apps . ietf . org/rfc/rfc3986.html
*
* Example usage:
* <code>
* require_once 'PageCacheURLNormalizer.php';
*
* $url = 'eXAMPLE://a/./b/../b/%63/%7bfoo%7d';
* $un = new PageCacheURLNormalizer();
* $un->setUrl( $url );
* echo $un->normalize();
*
* // result: "example://a/b/c/%7Bfoo%7D"
* </code>
*
* @author Glen Scott <glen_scott @ yahoo.co.uk>
* @copyright Glen Scott
* @license Open Source
*/
if (!defined('_PS_VERSION_')) {exit;}
// Check existence of the class to be able to handle compatibility problems in a friendly way
if (!class_exists('PageCacheURLNormalizer')) {
if (!function_exists('pagecacheRawurlencode')) {
function pagecacheRawurlencode($value)
{
if (is_array($value)) {
return array_map('pagecacheRawurlencode', array_values($value));
} else {
return rawurldecode($value);
}
}
}
class PageCacheURLNormalizer
{
private $url;
private $scheme;
private $host;
private $port;
private $user;
private $pass;
private $path;
private $query;
private $fragment;
private $default_scheme_ports = array('http:' => 80, 'https:' => 443,);
private $components = array('scheme', 'host', 'port', 'user', 'pass', 'path', 'query', 'fragment',);
public function __construct($url = null)
{
if ($url) {
$this->setUrl($url);
}
}
public function getUrl()
{
return $this->url;
}
public function setUrl($url)
{
$this->url = $url;
// parse URL into respective parts
$url_components = parse_url($this->url);
if (!$url_components) {
// Reset URL
$this->url = '';
// Flush properties
foreach ($this->components as $key) {
if (property_exists($this, $key)) {
$this->$key = '';
}
}
return false;
} else {
// Update properties
foreach ($url_components as $key => $value) {
if (property_exists($this, $key)) {
$this->$key = $value;
}
}
// Flush missing components
$missing_components = array_diff(
array_values($this->components),
array_keys($url_components)
);
foreach ($missing_components as $key) {
if (property_exists($this, $key)) {
$this->$key = '';
}
}
return true;
}
}
public function normalize()
{
// URI Syntax Components
// scheme authority path query fragment
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3
// Scheme
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.1
if ($this->scheme) {
// Converting the scheme to lower case
$this->scheme = Tools::strtolower($this->scheme) . ':';
}
// Authority
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.2
$authority = '';
if ($this->host) {
$authority .= '//';
// User Information
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.2.1
if ($this->user) {
if ($this->pass) {
$authority .= $this->user . ':' . $this->pass . '@';
} else {
$authority .= $this->user . '@';
}
}
// Host
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.2.2
// Converting the host to lower case
$authority .= Tools::strtolower($this->host);
// Port
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.2.3
// Removing the default port
if (isset($this->default_scheme_ports[$this->scheme])
&& $this->port == $this->default_scheme_ports[$this->scheme]) {
$this->port = '';
}
if ($this->port) {
$authority .= ':' . $this->port;
}
}
// Path
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.3
if ($this->path) {
// Removing dot-segments
$this->path = $this->removeDotSegments($this->path);
// Do not urlDecodeUnreservedChars to preserve Greek URLs for exemple
//$this->path = $this->urlDecodeUnreservedChars( $this->path );
$this->path = $this->urlDecodeReservedSubDelimChars($this->path);
} // Add default path only when valid URL is present
elseif ($this->url) {
// Adding trailing /
$this->path = '/';
}
// Query
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.4
if ($this->query) {
parse_str($this->query, $query);
$keys = array_map('rawurldecode', array_keys($query));
$values = array_map('pagecacheRawurlencode', array_values($query));
$query = array_combine($keys, $values);
$this->query = '?' . str_replace('+', '%20', http_build_query($query, '', '&'));
// Fix http_build_query adding equals sign to empty keys
$this->query = str_replace('=&', '&', rtrim($this->query, '='));
}
// Fragment
// @link www.apps.ietf . org/rfc/rfc3986.html#sec-3.5
if ($this->fragment) {
$this->fragment = rawurldecode($this->fragment);
$this->fragment = rawurlencode($this->fragment);
$this->fragment = '#' . $this->fragment;
}
$this->setUrl($this->scheme . $authority . $this->path . $this->query . $this->fragment);
return $this->getUrl();
}
/**
* Path segment normalization
* www.apps.ietf . org/rfc/rfc3986.html#sec-5.2.4
*/
public function removeDotSegments($path)
{
$new_path = '';
while (!empty($path)) {
// A
$pattern_a = '!^(\.\./|\./)!x';
$pattern_b_1 = '!^(/\./)!x';
$pattern_b_2 = '!^(/\.)$!x';
$pattern_c = '!^(/\.\./|/\.\.)!x';
$pattern_d = '!^(\.|\.\.)$!x';
$pattern_e = '!(/*[^/]*)!x';
if (preg_match($pattern_a, $path)) {
// remove prefix from $path
$path = preg_replace($pattern_a, '', $path);
} elseif (preg_match($pattern_b_1, $path, $matches) || preg_match($pattern_b_2, $path, $matches)) {
$path = preg_replace("!^" . $matches[1] . "!", '/', $path);
} elseif (preg_match($pattern_c, $path, $matches)) {
$path = preg_replace('!^' . preg_quote($matches[1], '!') . '!x', '/', $path);
// remove the last segment and its preceding "/" (if any) from output buffer
$new_path = preg_replace('!/([^/]+)$!x', '', $new_path);
} elseif (preg_match($pattern_d, $path)) {
$path = preg_replace($pattern_d, '', $path);
} else {
if (preg_match($pattern_e, $path, $matches)) {
$first_path_segment = $matches[1];
$path = preg_replace('/^' . preg_quote($first_path_segment, '/') . '/', '', $path, 1);
$new_path .= $first_path_segment;
}
}
}
return $new_path;
}
public function getScheme()
{
return $this->scheme;
}
/**
* Decode unreserved characters
*
* @link www.apps.ietf . org/rfc/rfc3986.html#sec-2.3
*/
public function urlDecodeUnreservedChars($string)
{
$string = rawurldecode($string);
$string = rawurlencode($string);
$string = str_replace(array('%2F', '%3A', '%40'), array('/', ':', '@'), $string);
return $string;
}
/**
* Decode reserved sub-delims
*
* @link www.apps.ietf . org/rfc/rfc3986.html#sec-2.2
*/
public function urlDecodeReservedSubDelimChars($string)
{
return str_replace(array('%21', '%24', '%26', '%27', '%28', '%29', '%2A', '%2B', '%2C', '%3B', '%3D'),
array('!', '$', '&', "'", '(', ')', '*', '+', ',', ';', '='), $string);
}
}
}