wingedit.pl/wp-content/plugins/wordpress-importer/php-toolkit/DataLiberation/URL/functions.php

<?php

namespace WordPress\DataLiberation\URL;

use Rowbot\URL\URL;
use WordPress\DataLiberation\BlockMarkup\BlockMarkupUrlProcessor;


/**
 * Migrate URLs in post content. See WPRewriteUrlsTests for
 * specific examples. TODO: A better description.
 *
 * Example:
 *
 * ```php
 * php > wp_rewrite_urls([
 *   'block_markup' => '<!-- wp:image {"src": "http://legacy-blog.com/image.jpg"} -->',
 *   'url-mapping' => [
 *     'http://legacy-blog.com' => 'https://modern-webstore.org'
 *   ]
 * ])
 * <!-- wp:image {"src":"https:\/\/modern-webstore.org\/image.jpg"} -->
 * ```
 *
 * @TODO Use a proper JSON parser and encoder to:
 * * Support UTF-16 characters
 * * Gracefully handle recoverable encoding issues
 * * Avoid changing the whitespace in the same manner as
 *   we do in WP_HTML_Tag_Processor. e.g. if we start with:
 *
 * ```html
 * <!-- wp:block {"url":"https://w.org"}` -->
 *                     ^ no space here
 * ```
 *
 * then it would be nice to re-encode that block markup also without the space character. This is similar
 * to how the tag processor avoids changing parts of the tag it doesn't need to change.
 */
function wp_rewrite_urls( $options ) {
	if ( empty( $options['base_url'] ) ) {
		// Use first from-url as base_url if not specified.
		$from_urls           = array_keys( $options['url-mapping'] );
		$options['base_url'] = $from_urls[0];
	}

	$url_mapping = array();
	foreach ( $options['url-mapping'] as $from_url_string => $to_url_string ) {
		$url_mapping[] = array(
			'from_url' => WPURL::parse( $from_url_string ),
			'to_url'   => WPURL::parse( $to_url_string ),
		);
	}

	$p = new BlockMarkupUrlProcessor( $options['block_markup'], $options['base_url'] );
	while ( $p->next_url() ) {
		$parsed_url = $p->get_parsed_url();
		foreach ( $url_mapping as $mapping ) {
			if ( is_child_url_of( $parsed_url, $mapping['from_url'] ) ) {
				$p->replace_base_url( $mapping['to_url'] );
				break;
			}
		}
	}

	return $p->get_updated_html();
}

/**
 * Check if a given URL matches the current site URL.
 *
 * @param  URL    $child  The URL to check.
 * @param  string $parent_url  The current site URL to compare against.
 *
 * @return bool Whether the URL matches the current site URL.
 */
function is_child_url_of( $child, $parent_url ) {
	$parent_url                       = is_string( $parent_url ) ? WPURL::parse( $parent_url ) : $parent_url;
	$child                            = is_string( $child ) ? WPURL::parse( $child ) : $child;
	$child_pathname_no_trailing_slash = rtrim( urldecode( $child->pathname ), '/' );

	if ( false === $child || false === $parent_url ) {
		return false;
	}

	if ( $parent_url->hostname !== $child->hostname ) {
		return false;
	}

	if ( $parent_url->protocol !== $child->protocol ) {
		return false;
	}

	$parent_pathname = urldecode( $parent_url->pathname );

	return (
		// Direct match.
		$parent_pathname === $child_pathname_no_trailing_slash ||
		$parent_pathname === $child_pathname_no_trailing_slash . '/' ||
		// Path prefix.
		0 === strncmp( $child_pathname_no_trailing_slash . '/', $parent_pathname, strlen( $parent_pathname ) )
	);
}

/**
 * Decodes the first n **encoded bytes** a URL-encoded string.
 *
 * For example, `urldecode_n( '%22is 6 %3C 6?%22 – asked Achilles', 1 )` returns
 * '"is 6 %3C 6?%22 – asked Achilles' because only the first encoded byte is decoded.
 *
 * @param  string $input  The string to decode.
 * @param  int    $decode_n  The number of bytes to decode in $input
 *
 * @return string The decoded string.
 */
function urldecode_n( $input, $decode_n ) {
	// Fast paths: nothing to do.
	if ( $decode_n <= 0 || false === strpos( $input, '%' ) ) {
			return $input;
	}

	$result = '';
	$at     = 0;
	while ( true ) {
		if ( $at + 3 > strlen( $input ) ) {
			break;
		}

		$last_at = $at;
		$at     += strcspn( $input, '%', $at );
		// Consume bytes except for the percent sign.
		$result .= substr( $input, $last_at, $at - $last_at );

		// If we've already decoded the requested number of bytes, stop.
		if ( strlen( $result ) >= $decode_n ) {
			break;
		}

		++$at;
		if ( $at > strlen( $input ) ) {
			break;
		}

		$decodable_length = strspn(
			$input,
			'0123456789ABCDEFabcdef',
			$at,
			2
		);

		if ( 2 === $decodable_length ) {
			// Decodes the urlencoded hex sequence from URL.
			// Note: This decodes bytes, not characters. It will recover the original byte sequence,
			// not necessarily any valid UTF-8 characters.
			$result .= chr( hexdec( $input[ $at ] . $input[ $at + 1 ] ) );
			$at     += 2;
		} else {
			// Consume the next byte and move on.
			$result .= '%';
		}
	}
	$result .= substr( $input, $at );

	return $result;
}