Files
cmsPRO/autoload/class.Cron.php
2026-02-22 21:59:33 +01:00

515 lines
19 KiB
PHP

<?php
class Cron
{
public static function automatic_update_sites()
{
global $mdb;
$results = $mdb -> query( "SELECT id, url FROM projects WHERE automatic_update = 1 AND DATE_ADD( last_update, INTERVAL 1 WEEK ) <= '" . date( 'Y-m-d H:i:s' ) . "'" ) -> fetchAll();
if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row )
{
$mdb -> delete( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'parent_id[!]' => null ] ] );
$mdb -> delete( 'project_links_external', [ 'project_id' => $row['id'] ] );
$mdb -> update( 'project_links_internal', [ 'visited' => 0 ], [ 'project_id' => $row['id'] ] );
$mdb -> update( 'projects', [ 'last_update' => date( 'Y-m-d H:i:s' ) ], [ 'id' => $row['id'] ] );
return [ 'status' => 'ok', 'msg' => 'Ponawiam sprawdzanie strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
}
return [ 'status' => 'empty' ];
}
public static function get_site_main_links()
{
global $mdb;
$results = $mdb -> query( 'SELECT id, url FROM projects WHERE id NOT IN ( SELECT project_id FROM project_links_internal GROUP BY project_id ) AND enabled = 1 LIMIT 1' ) -> fetchAll();
if ( is_array( $results ) and !empty ( $results ) ) foreach ( $results as $row )
{
$ch = curl_init();
curl_setopt( $ch, CURLOPT_URL, $row['url'] );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
curl_setopt( $ch, CURLOPT_HEADER, true );
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
$response = curl_exec( $ch );
curl_close ( $ch );
if ( !curl_errno( $ch ) )
{
$mdb -> insert( 'project_links_internal', [
'project_id' => $row['id'],
'url' => $row['url'],
'parent_id' => null
] );
$doc = new DOMDocument;
$doc -> loadHTML( $response );
foreach ( $doc -> getElementsByTagName( 'a' ) as $link )
{
$url = $link -> getAttribute( 'href' );
if ( \S::is_url_internal( $row['url'], $url ) )
{
if ( strpos( $url, '#' ) !== false )
$url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' );
$url = \S::modify_internal_link( $row['url'], $url );
if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !$mdb -> count( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'url' => $url ] ] ) )
{
$mdb -> insert( 'project_links_internal', [
'project_id' => $row['id'],
'url' => $url
] );
}
}
}
return [ 'status' => 'ok', 'msg' => 'Pobieram linki dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
}
else
return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
}
return [ 'status' => 'empty' ];
}
public static function get_site_other_links()
{
global $mdb;
$results = $mdb -> query( 'SELECT '
. 'pli.id, project_id, pli.url, p.url AS project_url '
. 'FROM '
. 'project_links_internal AS pli '
. 'INNER JOIN projects AS p ON p.id = pli.project_id '
. 'WHERE '
. 'visited = 0 AND enabled = 1 '
. 'LIMIT 1' ) -> fetchAll();
if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row )
{
$url = parse_url( $row['url'] );
$ch = curl_init();
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
curl_setopt( $ch, CURLOPT_COOKIEFILE, 'temp/cookie.txt' );
curl_setopt( $ch, CURLOPT_COOKIEJAR, 'temp/cookie.txt' );
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
curl_setopt( $ch, CURLOPT_URL, 'http://' . $url['host'] );
$response = curl_exec( $ch );
curl_setopt( $ch, CURLOPT_URL, $row['url'] );
$response = curl_exec( $ch );
$content_type = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE );
$code = curl_getinfo( $ch, CURLINFO_HTTP_CODE );
curl_close ( $ch );
if ( !curl_errno( $ch ) and ( $code == 200 or $code == 301 ) and strpos( $content_type, 'text/html' ) !== false )
{
self::get_site_meta_title( $row['id'], $response );
self::get_site_meta_keywords( $row['id'], $response );
self::get_site_meta_description( $row['id'], $response );
self::get_site_meta_robots( $row['id'], $response );
self::get_site_meta_googlebot( $row['id'], $response );
self::get_site_code_lenght( $row['id'], $response );
self::get_site_text_lenght( $row['id'], $response );
self::get_site_canonical( $row['id'], $response );
self::get_table_exists( $row['id'], $response );
self::get_iframe_exists( $row['id'], $response );
self::get_h1_exists( $row['id'], $response );
self::get_images_without_alt( $row['id'], $response );
/* pobranie linków ze strony */
$doc = new DOMDocument;
$doc -> loadHTML( $response );
foreach ( $doc -> getElementsByTagName( 'a' ) as $link )
{
$url = $link -> getAttribute( 'href' );
/* linki wewnętrzne na danej postronie */
if ( \S::is_url_internal( $row['project_url'], $url ) )
{
if ( strpos( $url, '#' ) !== false )
$url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' );
$url = \S::modify_internal_link( $row['project_url'], $url, $row['url'] );
$info = pathinfo( $url );
if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !in_array( strtolower( $info['extension'] ), \S::not_html_format() ) and !$mdb -> count( 'project_links_internal', [
'AND' => [
'project_id' => $row['project_id'],
'url' => $url
]
] ) )
{
$mdb -> insert( 'project_links_internal', [
'project_id' => $row['project_id'],
'url' => $url,
'visited' => 0,
'parent_id' => $row['id'],
'response' => $response
] );
}
}
/* linki zewnętrzne na danej podstronie */
else
{
$link -> getAttribute( 'rel' ) == 'nofollow' ? $nofollow = 1 : $nofollow = 0;
$mdb -> insert( 'project_links_external', [
'project_id' => $row['project_id'],
'link_id' => $row['id'],
'url' => $link -> getAttribute( 'href' ),
'nofollow' => $nofollow,
'title' => $link -> getAttribute( 'title' )
] );
}
}
$mdb -> update( 'project_links_internal', [
'visited' => 1,
'content_type' => $content_type,
'response_code' => $code,
'response' => $response
], [
'id' => $row['id']
] );
return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
}
else if ( $code == 404 or strpos( $content_type, 'text/html' ) === false )
{
$mdb -> update( 'project_links_internal', [
'visited' => 1,
'deleted' => 1,
'content_type' => $content_type,
'response_code' => $code
], [
'id' => $row['id']
] );
return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
}
else if ( $code !== 200 and strpos( $content_type, 'text/html' ) !== false )
{
$mdb -> update( 'project_links_internal', [
'visited' => 1,
'content_type' => $content_type,
'response_code' => $code,
'response' => $response
], [
'id' => $row['id']
] );
return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
}
else
return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
}
return [ 'status' => 'empty' ];
}
static public function get_images_without_alt( $url_id, $response )
{
global $mdb;
$doc = new DOMDocument;
$doc -> loadHTML( $response );
$images = $doc -> getElementsByTagName("img");
$have_images_without_alt = 0;
foreach ( $images as $img )
{
if ( !$img -> getAttribute( 'alt' ) )
$have_images_without_alt = 1;
}
$mdb -> update( 'project_links_internal', [ 'have_images_without_alt' => $have_images_without_alt ], [ 'id' => $url_id ] );
}
static public function get_table_exists( $url_id, $response )
{
global $mdb;
$doc = new DOMDocument;
$doc -> loadHTML( $response );
$count = $doc -> getElementsByTagName("table");
$mdb -> update( 'project_links_internal', [ 'have_table' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] );
}
static public function get_iframe_exists( $url_id, $response )
{
global $mdb;
$doc = new DOMDocument;
$doc -> loadHTML( $response );
$count = $doc -> getElementsByTagName("iframe");
$mdb -> update( 'project_links_internal', [ 'have_iframe' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] );
}
static public function get_h1_exists( $url_id, $response )
{
global $mdb;
$doc = new DOMDocument;
$doc -> loadHTML( $response );
$count = $doc -> getElementsByTagName("h1");
$mdb -> update( 'project_links_internal', [ 'have_h1' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] );
}
public static function get_site_meta_title( $url_id, $response )
{
global $mdb;
$title = '';
preg_match('/<title>([^>]*)<\/title>/si', $response, $match );
if ( isset( $match ) && is_array( $match ) && count( $match ) > 0 )
$title = (string)strip_tags( $match[1] );
if ( !$title )
{
preg_match_all('/<[\s]*meta[\s]*name="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
{
$metaTags = array();
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
{
$metaTags[ $names[$i] ] = array(
'html' => htmlentities( $originals[$i] ),
'value' => $values[$i]
);
}
}
$title = (string)$metaTags['title']['value'];
}
}
$mdb -> update( 'project_links_internal', [ 'title' => $title ], [ 'id' => $url_id ] );
}
public static function get_site_canonical( $url_id, $response )
{
global $mdb;
$doc = new DOMDocument;
$doc -> loadHTML( $response );
foreach ( $doc -> getElementsByTagName( 'link' ) as $link )
{
$rel = $link -> getAttribute( 'rel' );
if ( $rel == 'canonical' )
{
$canonical = $link -> getAttribute( 'href' );
}
}
$mdb -> update( 'project_links_internal', [ 'canonical' => $canonical ], [ 'id' => $url_id ] );
}
public static function get_site_meta_keywords( $url_id, $response )
{
global $mdb;
$meta_keywords = '';
preg_match_all( '/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match );
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
{
$metaTags = array();
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
{
$metaTags[ $names[$i] ] = array(
'html' => htmlentities( $originals[$i] ),
'value' => $values[$i]
);
}
}
$meta_keywords = (string)$metaTags['keywords']['value'];
}
if ( !$meta_keywords )
{
preg_match_all( '/<[\s]*meta[\s]*property="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match );
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
{
$metaTags = array();
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
{
$metaTags[ $names[$i] ] = array(
'html' => htmlentities( $originals[$i] ),
'value' => $values[$i]
);
}
}
$meta_keywords = (string)$metaTags['keywords']['value'];
}
}
$mdb -> update( 'project_links_internal', [ 'meta_keywords' => $meta_keywords ], [ 'id' => $url_id ] );
}
public static function get_site_meta_description( $url_id, $response )
{
global $mdb;
$meta_description = '';
preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
{
$metaTags = array();
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
{
$metaTags[ $names[$i] ] = array(
'html' => htmlentities( $originals[$i] ),
'value' => $values[$i]
);
}
}
$meta_description = (string)$metaTags['description']['value'];
}
if ( !$meta_description )
{
preg_match_all( '/<[\s]*meta[\s]*property="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match );
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
{
$metaTags = array();
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
{
$metaTags[ $names[$i] ] = array(
'html' => htmlentities( $originals[$i] ),
'value' => $values[$i]
);
}
}
$meta_description = (string)$metaTags['description']['value'];
}
}
$mdb -> update( 'project_links_internal', [ 'meta_description' => $meta_description ], [ 'id' => $url_id ] );
}
public static function get_site_meta_robots( $url_id, $response )
{
global $mdb;
$meta_robots = '';
preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
{
$metaTags = array();
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
{
$metaTags[ $names[$i] ] = array(
'html' => htmlentities( $originals[$i] ),
'value' => $values[$i]
);
}
}
$meta_robots = (string)$metaTags['robots']['value'];
}
$mdb -> update( 'project_links_internal', [ 'meta_robots' => $meta_robots ], [ 'id' => $url_id ] );
}
public static function get_site_meta_googlebot( $url_id, $response )
{
global $mdb;
$meta_googlebot = '';
preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
{
$originals = $match[0];
$names = $match[1];
$values = $match[2];
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
{
$metaTags = array();
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
{
$metaTags[ $names[$i] ] = array(
'html' => htmlentities( $originals[$i] ),
'value' => $values[$i]
);
}
}
$meta_googlebot = (string)$metaTags['googlebot']['value'];
}
$mdb -> update( 'project_links_internal', [ 'meta_googlebot' => $meta_googlebot ], [ 'id' => $url_id ] );
}
public static function get_site_code_lenght( $url_id, $response )
{
global $mdb;
$mdb -> update( 'project_links_internal', [ 'code_lenght' => strlen( $response ) ], [ 'id' => $url_id ] );
}
public static function get_site_text_lenght( $url_id, $response )
{
global $mdb;
$mdb -> update( 'project_links_internal', [ 'text_lenght' => strlen( \S::strip_html_tags( $response ) ) ], [ 'id' => $url_id ] );
}
}