core->db->columnExists( 'aioseo_posts', 'image_scan_date' ) ) { return; } // After completion, the scan won't reschedule until someone is in the admin. That's fine for most cases since posts being created/updated happens in the admin. add_action( 'admin_init', [ $this, 'scheduleScan' ], 3001 ); add_action( $this->imageScanAction, [ $this, 'scanPosts' ] ); } /** * Schedules the image sitemap scan as a recurring action. * * @since 4.0.5 * @version 4.9.4.2 Switch to recurring action with cache-based idle state. * * @return void */ public function scheduleScan() { if ( ! aioseo()->options->sitemap->general->enable || aioseo()->sitemap->helpers->excludeImages() ) { return; } // If we're in idle mode (no posts to scan), unschedule and don't reschedule yet. if ( aioseo()->core->cache->get( 'as_image_scan_idle' ) ) { aioseo()->actionScheduler->unschedule( $this->imageScanAction ); return; } if ( aioseo()->actionScheduler->isScheduled( $this->imageScanAction ) ) { return; } $scanInterval = apply_filters( 'aioseo_image_sitemap_scan_interval', MINUTE_IN_SECONDS ); aioseo()->actionScheduler->scheduleRecurrent( $this->imageScanAction, 10, $scanInterval ); } /** * Scans posts for images. * * @since 4.0.5 * @version 4.9.4.2 Use recurring action with runtime lock and idle state. * * @return void */ public function scanPosts() { // Runtime lock: Prevent concurrent execution of this action. $lockKey = 'as_image_scan_running'; if ( aioseo()->core->cache->get( $lockKey ) ) { return; } // Set lock with a safety timeout in case the action fails mid-execution. aioseo()->core->cache->update( $lockKey, true, 2 * MINUTE_IN_SECONDS ); if ( ! aioseo()->options->sitemap->general->enable || aioseo()->sitemap->helpers->excludeImages() ) { aioseo()->core->cache->delete( $lockKey ); return; } $postsPerScan = apply_filters( 'aioseo_image_sitemap_posts_per_scan', 10 ); $postTypes = aioseo()->helpers->getPublicPostTypes( true ); $query = aioseo()->core->db ->start( aioseo()->core->db->db->posts . ' as p', true ) ->select( '`p`.`ID`, `p`.`post_type`, `p`.`post_content`, `p`.`post_excerpt`, `p`.`post_modified_gmt`' ) ->leftJoin( 'aioseo_posts as ap', '`ap`.`post_id` = `p`.`ID`' ) ->whereRaw( '( `ap`.`id` IS NULL OR `p`.`post_modified_gmt` > `ap`.`image_scan_date` OR `ap`.`image_scan_date` IS NULL )' ) ->whereIn( 'p.post_status', [ 'publish', 'inherit' ] ) ->whereIn( 'p.post_type', $postTypes ) ->limit( $postsPerScan ); $orderByClause = $this->getScanPostsOrderByClause( $postTypes ); if ( $orderByClause ) { $query->orderByRaw( $orderByClause ); } $posts = $query->run()->result(); if ( ! $posts ) { // No more posts to scan - set idle cache. The schedule method on the next init will unschedule. aioseo()->core->cache->update( 'as_image_scan_idle', true, HOUR_IN_SECONDS ); aioseo()->core->cache->delete( $lockKey ); return; } foreach ( $posts as $post ) { $this->scanPost( $post ); } aioseo()->core->cache->delete( $lockKey ); } /** * Gets the ORDER BY clause for prioritizing included post types. * Prioritizes included sitemap post types before non-included ones. * * @since 4.9.5 * * @param array $publicPostTypes All public post types. * @return string */ private function getScanPostsOrderByClause( $publicPostTypes ) { if ( aioseo()->options->sitemap->general->postTypes->all ) { return ''; } $includedPostTypes = aioseo()->options->sitemap->general->postTypes->included; if ( empty( $includedPostTypes ) ) { return ''; } // Filter out post types that are no longer registered. $includedPostTypes = array_values( array_intersect( $includedPostTypes, $publicPostTypes ) ); if ( empty( $includedPostTypes ) ) { return ''; } $orderByClause = 'CASE'; foreach ( $includedPostTypes as $index => $postType ) { $orderByClause .= " WHEN `p`.`post_type` = '" . esc_sql( $postType ) . "' THEN " . $index; } $orderByClause .= ' ELSE 9999 END ASC, `p`.`ID` ASC'; return $orderByClause; } /** * Returns the image entries for a given post. * * @since 4.0.0 * * @param \WP_Post|int $post The post object or ID. * @return void */ public function scanPost( $post ) { if ( is_numeric( $post ) ) { $post = get_post( $post ); } $this->post = $post; if ( ! empty( $post->post_password ) ) { $this->updatePost( $post->ID ); return; } if ( 'attachment' === $post->post_type ) { if ( ! wp_attachment_is( 'image', $post->ID ) ) { $this->updatePost( $post->ID ); return; } $image = $this->buildEntries( [ $post->ID ] ); $this->updatePost( $post->ID, $image ); return; } $images = $this->extract(); $images = $this->removeImageDimensions( $images ); $images = apply_filters( 'aioseo_sitemap_images', $images, $post ); // Limit to a 1,000 URLs, in accordance to Google's specifications. $images = array_slice( $images, 0, 1000 ); $this->updatePost( $post->ID, $this->buildEntries( $images ) ); } /** * Returns the image entries for a given term. * * @since 4.0.0 * * @param \WP_Term $term The term object. * @return array The image entries. */ public function term( $term ) { if ( aioseo()->sitemap->helpers->excludeImages() ) { return []; } $id = get_term_meta( $term->term_id, 'thumbnail_id', true ); if ( ! $id ) { return []; } return $this->buildEntries( [ $id ] ); } /** * Builds the image entries. * * @since 4.0.0 * * @param array $images The images, consisting of attachment IDs or external URLs. * @return array The image entries. */ private function buildEntries( $images ) { $entries = []; foreach ( $images as $image ) { $idOrUrl = $this->getImageIdOrUrl( $image ); $imageUrl = is_numeric( $idOrUrl ) ? wp_get_attachment_url( $idOrUrl ) : $idOrUrl; $imageUrl = aioseo()->sitemap->helpers->formatUrl( $imageUrl ); if ( ! $imageUrl || ! preg_match( $this->getImageExtensionRegexPattern(), (string) $imageUrl ) ) { continue; } // If the image URL is not external, make it relative. // This is important for users who scan their sites in a local/staging environment and then // push the data to production. if ( ! aioseo()->helpers->isExternalUrl( $imageUrl ) ) { $imageUrl = aioseo()->helpers->makeUrlRelative( $imageUrl ); } $entries[ $idOrUrl ] = [ 'image:loc' => $imageUrl ]; } return array_values( $entries ); } /** * Returns the ID of the image if it's hosted on the site. Otherwise it returns the external URL. * * @since 4.1.3 * * @param int|string $image The attachment ID or URL. * @return int|string The attachment ID or URL. */ private function getImageIdOrUrl( $image ) { if ( is_numeric( $image ) ) { return $image; } $attachmentId = false; if ( aioseo()->helpers->isValidAttachment( $image ) ) { $attachmentId = aioseo()->helpers->attachmentUrlToPostId( $image ); } return $attachmentId ? $attachmentId : $image; } /** * Extracts all image URls and IDs from the post. * * @since 4.0.0 * * @return array The image URLs and IDs. */ private function extract() { $images = []; if ( has_post_thumbnail( $this->post ) ) { $images[] = get_the_post_thumbnail_url( $this->post ); } // Get the galleries here before doShortcodes() runs below to prevent buggy behaviour. // WordPress is supposed to only return the attached images but returns a different result if the shortcode has no valid attributes, so we need to grab them manually. $images = array_merge( $images, $this->getPostGalleryImages() ); // Now, get the remaining images from image tags in the post content. $parsedPostContent = do_blocks( $this->post->post_content ); $parsedPostContent = aioseo()->helpers->doShortcodes( $parsedPostContent, true, $this->post->ID ); $parsedPostContent = preg_replace( '/\s\s+/u', ' ', (string) trim( $parsedPostContent ) ); // Trim both internal and external whitespace. // Get the images from any third-party plugins/themes that are active. $thirdParty = new ThirdParty( $this->post, $parsedPostContent ); $images = array_merge( $images, $thirdParty->extract() ); preg_match_all( '#<(amp-)?img[^>]+src="([^">]+)"#', (string) $parsedPostContent, $matches ); foreach ( $matches[2] as $url ) { $images[] = aioseo()->helpers->makeUrlAbsolute( $url ); } return array_unique( $images ); } /** * Returns all images from WP Core post galleries. * * @since 4.2.2 * * @return array[string] The image URLs. */ private function getPostGalleryImages() { $images = []; $galleries = get_post_galleries( $this->post, false ); foreach ( $galleries as $gallery ) { foreach ( $gallery['src'] as $imageUrl ) { $images[] = $imageUrl; } } // Now, get rid of them so that we don't process the shortcodes again. $regex = get_shortcode_regex( [ 'gallery' ] ); $this->post->post_content = preg_replace( "/$regex/i", '', (string) $this->post->post_content ); return $images; } /** * Removes image dimensions from the slug. * * @since 4.0.0 * * @param array $urls The image URLs. * @return array $preparedUrls The formatted image URLs. */ private function removeImageDimensions( $urls ) { $preparedUrls = []; foreach ( $urls as $url ) { $preparedUrls[] = aioseo()->helpers->removeImageDimensions( $url ); } return array_unique( array_filter( $preparedUrls ) ); } /** * Stores the image data for a given post in our DB table. * * @since 4.0.5 * * @param int $postId The post ID. * @param array $images The images. * @return void */ private function updatePost( $postId, $images = [] ) { $post = \AIOSEO\Plugin\Common\Models\Post::getPost( $postId ); $meta = $post->exists() ? [] : aioseo()->migration->meta->getMigratedPostMeta( $postId ); $meta['post_id'] = $postId; $meta['images'] = ! empty( $images ) ? $images : null; $meta['image_scan_date'] = gmdate( 'Y-m-d H:i:s' ); $post->set( $meta ); $post->save(); } /** * Returns the image extension regex pattern. * * @since 4.2.2 * * @return string */ public function getImageExtensionRegexPattern() { static $pattern; if ( null !== $pattern ) { return $pattern; } $pattern = '/http.*\.(' . implode( '|', $this->supportedExtensions ) . ')$/i'; return $pattern; } }