Files
2026-04-28 15:13:50 +02:00

425 lines
11 KiB
PHP

<?php
namespace AIOSEO\Plugin\Common\Sitemap\Image;
// Exit if accessed directly.
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
/**
* Determines which images are included in a post/term.
*
* @since 4.0.0
*/
class Image {
/**
* The image scan action name.
*
* @since 4.0.13
*
* @var string
*/
private $imageScanAction = 'aioseo_image_sitemap_scan';
/**
* The supported image extensions.
*
* @since 4.2.2
*
* @var array[string]
*/
public $supportedExtensions = [
'gif',
'heic',
'jpeg',
'jpg',
'png',
'svg',
'webp',
'ico',
'avif',
];
/**
* The post object.
*
* @since 4.2.7
*
* @var \WP_Post
*/
private $post = null;
/**
* Class constructor.
*
* @since 4.0.5
* @version 4.9.4.2 Remove is_admin() check to allow frontend scheduling and switch to recurring action.
*/
public function __construct() {
// Column may not have been created yet.
if ( ! aioseo()->core->db->columnExists( 'aioseo_posts', 'image_scan_date' ) ) {
return;
}
// After completion, the scan won't reschedule until someone is in the admin. That's fine for most cases since posts being created/updated happens in the admin.
add_action( 'admin_init', [ $this, 'scheduleScan' ], 3001 );
add_action( $this->imageScanAction, [ $this, 'scanPosts' ] );
}
/**
* Schedules the image sitemap scan as a recurring action.
*
* @since 4.0.5
* @version 4.9.4.2 Switch to recurring action with cache-based idle state.
*
* @return void
*/
public function scheduleScan() {
if (
! aioseo()->options->sitemap->general->enable ||
aioseo()->sitemap->helpers->excludeImages()
) {
return;
}
// If we're in idle mode (no posts to scan), unschedule and don't reschedule yet.
if ( aioseo()->core->cache->get( 'as_image_scan_idle' ) ) {
aioseo()->actionScheduler->unschedule( $this->imageScanAction );
return;
}
if ( aioseo()->actionScheduler->isScheduled( $this->imageScanAction ) ) {
return;
}
$scanInterval = apply_filters( 'aioseo_image_sitemap_scan_interval', MINUTE_IN_SECONDS );
aioseo()->actionScheduler->scheduleRecurrent( $this->imageScanAction, 10, $scanInterval );
}
/**
* Scans posts for images.
*
* @since 4.0.5
* @version 4.9.4.2 Use recurring action with runtime lock and idle state.
*
* @return void
*/
public function scanPosts() {
// Runtime lock: Prevent concurrent execution of this action.
$lockKey = 'as_image_scan_running';
if ( aioseo()->core->cache->get( $lockKey ) ) {
return;
}
// Set lock with a safety timeout in case the action fails mid-execution.
aioseo()->core->cache->update( $lockKey, true, 2 * MINUTE_IN_SECONDS );
if (
! aioseo()->options->sitemap->general->enable ||
aioseo()->sitemap->helpers->excludeImages()
) {
aioseo()->core->cache->delete( $lockKey );
return;
}
$postsPerScan = apply_filters( 'aioseo_image_sitemap_posts_per_scan', 10 );
$postTypes = aioseo()->helpers->getPublicPostTypes( true );
$query = aioseo()->core->db
->start( aioseo()->core->db->db->posts . ' as p', true )
->select( '`p`.`ID`, `p`.`post_type`, `p`.`post_content`, `p`.`post_excerpt`, `p`.`post_modified_gmt`' )
->leftJoin( 'aioseo_posts as ap', '`ap`.`post_id` = `p`.`ID`' )
->whereRaw( '( `ap`.`id` IS NULL OR `p`.`post_modified_gmt` > `ap`.`image_scan_date` OR `ap`.`image_scan_date` IS NULL )' )
->whereIn( 'p.post_status', [ 'publish', 'inherit' ] )
->whereIn( 'p.post_type', $postTypes )
->limit( $postsPerScan );
$orderByClause = $this->getScanPostsOrderByClause( $postTypes );
if ( $orderByClause ) {
$query->orderByRaw( $orderByClause );
}
$posts = $query->run()->result();
if ( ! $posts ) {
// No more posts to scan - set idle cache. The schedule method on the next init will unschedule.
aioseo()->core->cache->update( 'as_image_scan_idle', true, HOUR_IN_SECONDS );
aioseo()->core->cache->delete( $lockKey );
return;
}
foreach ( $posts as $post ) {
$this->scanPost( $post );
}
aioseo()->core->cache->delete( $lockKey );
}
/**
* Gets the ORDER BY clause for prioritizing included post types.
* Prioritizes included sitemap post types before non-included ones.
*
* @since 4.9.5
*
* @param array $publicPostTypes All public post types.
* @return string
*/
private function getScanPostsOrderByClause( $publicPostTypes ) {
if ( aioseo()->options->sitemap->general->postTypes->all ) {
return '';
}
$includedPostTypes = aioseo()->options->sitemap->general->postTypes->included;
if ( empty( $includedPostTypes ) ) {
return '';
}
// Filter out post types that are no longer registered.
$includedPostTypes = array_values( array_intersect( $includedPostTypes, $publicPostTypes ) );
if ( empty( $includedPostTypes ) ) {
return '';
}
$orderByClause = 'CASE';
foreach ( $includedPostTypes as $index => $postType ) {
$orderByClause .= " WHEN `p`.`post_type` = '" . esc_sql( $postType ) . "' THEN " . $index;
}
$orderByClause .= ' ELSE 9999 END ASC, `p`.`ID` ASC';
return $orderByClause;
}
/**
* Returns the image entries for a given post.
*
* @since 4.0.0
*
* @param \WP_Post|int $post The post object or ID.
* @return void
*/
public function scanPost( $post ) {
if ( is_numeric( $post ) ) {
$post = get_post( $post );
}
$this->post = $post;
if ( ! empty( $post->post_password ) ) {
$this->updatePost( $post->ID );
return;
}
if ( 'attachment' === $post->post_type ) {
if ( ! wp_attachment_is( 'image', $post->ID ) ) {
$this->updatePost( $post->ID );
return;
}
$image = $this->buildEntries( [ $post->ID ] );
$this->updatePost( $post->ID, $image );
return;
}
$images = $this->extract();
$images = $this->removeImageDimensions( $images );
$images = apply_filters( 'aioseo_sitemap_images', $images, $post );
// Limit to a 1,000 URLs, in accordance to Google's specifications.
$images = array_slice( $images, 0, 1000 );
$this->updatePost( $post->ID, $this->buildEntries( $images ) );
}
/**
* Returns the image entries for a given term.
*
* @since 4.0.0
*
* @param \WP_Term $term The term object.
* @return array The image entries.
*/
public function term( $term ) {
if ( aioseo()->sitemap->helpers->excludeImages() ) {
return [];
}
$id = get_term_meta( $term->term_id, 'thumbnail_id', true );
if ( ! $id ) {
return [];
}
return $this->buildEntries( [ $id ] );
}
/**
* Builds the image entries.
*
* @since 4.0.0
*
* @param array $images The images, consisting of attachment IDs or external URLs.
* @return array The image entries.
*/
private function buildEntries( $images ) {
$entries = [];
foreach ( $images as $image ) {
$idOrUrl = $this->getImageIdOrUrl( $image );
$imageUrl = is_numeric( $idOrUrl ) ? wp_get_attachment_url( $idOrUrl ) : $idOrUrl;
$imageUrl = aioseo()->sitemap->helpers->formatUrl( $imageUrl );
if ( ! $imageUrl || ! preg_match( $this->getImageExtensionRegexPattern(), (string) $imageUrl ) ) {
continue;
}
// If the image URL is not external, make it relative.
// This is important for users who scan their sites in a local/staging environment and then
// push the data to production.
if ( ! aioseo()->helpers->isExternalUrl( $imageUrl ) ) {
$imageUrl = aioseo()->helpers->makeUrlRelative( $imageUrl );
}
$entries[ $idOrUrl ] = [ 'image:loc' => $imageUrl ];
}
return array_values( $entries );
}
/**
* Returns the ID of the image if it's hosted on the site. Otherwise it returns the external URL.
*
* @since 4.1.3
*
* @param int|string $image The attachment ID or URL.
* @return int|string The attachment ID or URL.
*/
private function getImageIdOrUrl( $image ) {
if ( is_numeric( $image ) ) {
return $image;
}
$attachmentId = false;
if ( aioseo()->helpers->isValidAttachment( $image ) ) {
$attachmentId = aioseo()->helpers->attachmentUrlToPostId( $image );
}
return $attachmentId ? $attachmentId : $image;
}
/**
* Extracts all image URls and IDs from the post.
*
* @since 4.0.0
*
* @return array The image URLs and IDs.
*/
private function extract() {
$images = [];
if ( has_post_thumbnail( $this->post ) ) {
$images[] = get_the_post_thumbnail_url( $this->post );
}
// Get the galleries here before doShortcodes() runs below to prevent buggy behaviour.
// WordPress is supposed to only return the attached images but returns a different result if the shortcode has no valid attributes, so we need to grab them manually.
$images = array_merge( $images, $this->getPostGalleryImages() );
// Now, get the remaining images from image tags in the post content.
$parsedPostContent = do_blocks( $this->post->post_content );
$parsedPostContent = aioseo()->helpers->doShortcodes( $parsedPostContent, true, $this->post->ID );
$parsedPostContent = preg_replace( '/\s\s+/u', ' ', (string) trim( $parsedPostContent ) ); // Trim both internal and external whitespace.
// Get the images from any third-party plugins/themes that are active.
$thirdParty = new ThirdParty( $this->post, $parsedPostContent );
$images = array_merge( $images, $thirdParty->extract() );
preg_match_all( '#<(amp-)?img[^>]+src="([^">]+)"#', (string) $parsedPostContent, $matches );
foreach ( $matches[2] as $url ) {
$images[] = aioseo()->helpers->makeUrlAbsolute( $url );
}
return array_unique( $images );
}
/**
* Returns all images from WP Core post galleries.
*
* @since 4.2.2
*
* @return array[string] The image URLs.
*/
private function getPostGalleryImages() {
$images = [];
$galleries = get_post_galleries( $this->post, false );
foreach ( $galleries as $gallery ) {
foreach ( $gallery['src'] as $imageUrl ) {
$images[] = $imageUrl;
}
}
// Now, get rid of them so that we don't process the shortcodes again.
$regex = get_shortcode_regex( [ 'gallery' ] );
$this->post->post_content = preg_replace( "/$regex/i", '', (string) $this->post->post_content );
return $images;
}
/**
* Removes image dimensions from the slug.
*
* @since 4.0.0
*
* @param array $urls The image URLs.
* @return array $preparedUrls The formatted image URLs.
*/
private function removeImageDimensions( $urls ) {
$preparedUrls = [];
foreach ( $urls as $url ) {
$preparedUrls[] = aioseo()->helpers->removeImageDimensions( $url );
}
return array_unique( array_filter( $preparedUrls ) );
}
/**
* Stores the image data for a given post in our DB table.
*
* @since 4.0.5
*
* @param int $postId The post ID.
* @param array $images The images.
* @return void
*/
private function updatePost( $postId, $images = [] ) {
$post = \AIOSEO\Plugin\Common\Models\Post::getPost( $postId );
$meta = $post->exists() ? [] : aioseo()->migration->meta->getMigratedPostMeta( $postId );
$meta['post_id'] = $postId;
$meta['images'] = ! empty( $images ) ? $images : null;
$meta['image_scan_date'] = gmdate( 'Y-m-d H:i:s' );
$post->set( $meta );
$post->save();
}
/**
* Returns the image extension regex pattern.
*
* @since 4.2.2
*
* @return string
*/
public function getImageExtensionRegexPattern() {
static $pattern;
if ( null !== $pattern ) {
return $pattern;
}
$pattern = '/http.*\.(' . implode( '|', $this->supportedExtensions ) . ')$/i';
return $pattern;
}
}