515 lines
19 KiB
PHP
515 lines
19 KiB
PHP
<?php
|
|
class Cron
|
|
{
|
|
public static function automatic_update_sites()
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> query( "SELECT id, url FROM projects WHERE automatic_update = 1 AND DATE_ADD( last_update, INTERVAL 1 WEEK ) <= '" . date( 'Y-m-d H:i:s' ) . "'" ) -> fetchAll();
|
|
if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row )
|
|
{
|
|
$mdb -> delete( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'parent_id[!]' => null ] ] );
|
|
$mdb -> delete( 'project_links_external', [ 'project_id' => $row['id'] ] );
|
|
$mdb -> update( 'project_links_internal', [ 'visited' => 0 ], [ 'project_id' => $row['id'] ] );
|
|
|
|
$mdb -> update( 'projects', [ 'last_update' => date( 'Y-m-d H:i:s' ) ], [ 'id' => $row['id'] ] );
|
|
|
|
return [ 'status' => 'ok', 'msg' => 'Ponawiam sprawdzanie strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
|
|
}
|
|
return [ 'status' => 'empty' ];
|
|
}
|
|
|
|
public static function get_site_main_links()
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> query( 'SELECT id, url FROM projects WHERE id NOT IN ( SELECT project_id FROM project_links_internal GROUP BY project_id ) AND enabled = 1 LIMIT 1' ) -> fetchAll();
|
|
if ( is_array( $results ) and !empty ( $results ) ) foreach ( $results as $row )
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, $row['url'] );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
$response = curl_exec( $ch );
|
|
curl_close ( $ch );
|
|
|
|
if ( !curl_errno( $ch ) )
|
|
{
|
|
$mdb -> insert( 'project_links_internal', [
|
|
'project_id' => $row['id'],
|
|
'url' => $row['url'],
|
|
'parent_id' => null
|
|
] );
|
|
|
|
$doc = new DOMDocument;
|
|
$doc -> loadHTML( $response );
|
|
foreach ( $doc -> getElementsByTagName( 'a' ) as $link )
|
|
{
|
|
$url = $link -> getAttribute( 'href' );
|
|
|
|
if ( \S::is_url_internal( $row['url'], $url ) )
|
|
{
|
|
if ( strpos( $url, '#' ) !== false )
|
|
$url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' );
|
|
|
|
$url = \S::modify_internal_link( $row['url'], $url );
|
|
|
|
if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !$mdb -> count( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'url' => $url ] ] ) )
|
|
{
|
|
$mdb -> insert( 'project_links_internal', [
|
|
'project_id' => $row['id'],
|
|
'url' => $url
|
|
] );
|
|
}
|
|
}
|
|
}
|
|
return [ 'status' => 'ok', 'msg' => 'Pobieram linki dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
|
|
}
|
|
else
|
|
return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
|
|
}
|
|
return [ 'status' => 'empty' ];
|
|
}
|
|
|
|
public static function get_site_other_links()
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> query( 'SELECT '
|
|
. 'pli.id, project_id, pli.url, p.url AS project_url '
|
|
. 'FROM '
|
|
. 'project_links_internal AS pli '
|
|
. 'INNER JOIN projects AS p ON p.id = pli.project_id '
|
|
. 'WHERE '
|
|
. 'visited = 0 AND enabled = 1 '
|
|
. 'LIMIT 1' ) -> fetchAll();
|
|
if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row )
|
|
{
|
|
$url = parse_url( $row['url'] );
|
|
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_COOKIEFILE, 'temp/cookie.txt' );
|
|
curl_setopt( $ch, CURLOPT_COOKIEJAR, 'temp/cookie.txt' );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
|
|
curl_setopt( $ch, CURLOPT_URL, 'http://' . $url['host'] );
|
|
$response = curl_exec( $ch );
|
|
|
|
curl_setopt( $ch, CURLOPT_URL, $row['url'] );
|
|
$response = curl_exec( $ch );
|
|
$content_type = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE );
|
|
$code = curl_getinfo( $ch, CURLINFO_HTTP_CODE );
|
|
curl_close ( $ch );
|
|
|
|
if ( !curl_errno( $ch ) and ( $code == 200 or $code == 301 ) and strpos( $content_type, 'text/html' ) !== false )
|
|
{
|
|
self::get_site_meta_title( $row['id'], $response );
|
|
self::get_site_meta_keywords( $row['id'], $response );
|
|
self::get_site_meta_description( $row['id'], $response );
|
|
self::get_site_meta_robots( $row['id'], $response );
|
|
self::get_site_meta_googlebot( $row['id'], $response );
|
|
self::get_site_code_lenght( $row['id'], $response );
|
|
self::get_site_text_lenght( $row['id'], $response );
|
|
self::get_site_canonical( $row['id'], $response );
|
|
self::get_table_exists( $row['id'], $response );
|
|
self::get_iframe_exists( $row['id'], $response );
|
|
self::get_h1_exists( $row['id'], $response );
|
|
self::get_images_without_alt( $row['id'], $response );
|
|
|
|
/* pobranie linków ze strony */
|
|
$doc = new DOMDocument;
|
|
$doc -> loadHTML( $response );
|
|
|
|
foreach ( $doc -> getElementsByTagName( 'a' ) as $link )
|
|
{
|
|
$url = $link -> getAttribute( 'href' );
|
|
|
|
/* linki wewnętrzne na danej postronie */
|
|
if ( \S::is_url_internal( $row['project_url'], $url ) )
|
|
{
|
|
if ( strpos( $url, '#' ) !== false )
|
|
$url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' );
|
|
|
|
$url = \S::modify_internal_link( $row['project_url'], $url, $row['url'] );
|
|
$info = pathinfo( $url );
|
|
|
|
if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !in_array( strtolower( $info['extension'] ), \S::not_html_format() ) and !$mdb -> count( 'project_links_internal', [
|
|
'AND' => [
|
|
'project_id' => $row['project_id'],
|
|
'url' => $url
|
|
]
|
|
] ) )
|
|
{
|
|
$mdb -> insert( 'project_links_internal', [
|
|
'project_id' => $row['project_id'],
|
|
'url' => $url,
|
|
'visited' => 0,
|
|
'parent_id' => $row['id'],
|
|
'response' => $response
|
|
] );
|
|
}
|
|
}
|
|
/* linki zewnętrzne na danej podstronie */
|
|
else
|
|
{
|
|
$link -> getAttribute( 'rel' ) == 'nofollow' ? $nofollow = 1 : $nofollow = 0;
|
|
|
|
$mdb -> insert( 'project_links_external', [
|
|
'project_id' => $row['project_id'],
|
|
'link_id' => $row['id'],
|
|
'url' => $link -> getAttribute( 'href' ),
|
|
'nofollow' => $nofollow,
|
|
'title' => $link -> getAttribute( 'title' )
|
|
] );
|
|
}
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [
|
|
'visited' => 1,
|
|
'content_type' => $content_type,
|
|
'response_code' => $code,
|
|
'response' => $response
|
|
], [
|
|
'id' => $row['id']
|
|
] );
|
|
|
|
return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
|
|
}
|
|
else if ( $code == 404 or strpos( $content_type, 'text/html' ) === false )
|
|
{
|
|
$mdb -> update( 'project_links_internal', [
|
|
'visited' => 1,
|
|
'deleted' => 1,
|
|
'content_type' => $content_type,
|
|
'response_code' => $code
|
|
], [
|
|
'id' => $row['id']
|
|
] );
|
|
|
|
return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
|
|
}
|
|
else if ( $code !== 200 and strpos( $content_type, 'text/html' ) !== false )
|
|
{
|
|
$mdb -> update( 'project_links_internal', [
|
|
'visited' => 1,
|
|
'content_type' => $content_type,
|
|
'response_code' => $code,
|
|
'response' => $response
|
|
], [
|
|
'id' => $row['id']
|
|
] );
|
|
|
|
return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
|
|
}
|
|
else
|
|
return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony <a href="' . $row['url'] . '" target="_blank">' . $row['url'] . '</a>' ];
|
|
}
|
|
return [ 'status' => 'empty' ];
|
|
}
|
|
|
|
static public function get_images_without_alt( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$doc = new DOMDocument;
|
|
$doc -> loadHTML( $response );
|
|
$images = $doc -> getElementsByTagName("img");
|
|
|
|
$have_images_without_alt = 0;
|
|
foreach ( $images as $img )
|
|
{
|
|
if ( !$img -> getAttribute( 'alt' ) )
|
|
$have_images_without_alt = 1;
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'have_images_without_alt' => $have_images_without_alt ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
static public function get_table_exists( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$doc = new DOMDocument;
|
|
$doc -> loadHTML( $response );
|
|
$count = $doc -> getElementsByTagName("table");
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'have_table' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
static public function get_iframe_exists( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$doc = new DOMDocument;
|
|
$doc -> loadHTML( $response );
|
|
$count = $doc -> getElementsByTagName("iframe");
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'have_iframe' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
static public function get_h1_exists( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$doc = new DOMDocument;
|
|
$doc -> loadHTML( $response );
|
|
$count = $doc -> getElementsByTagName("h1");
|
|
$mdb -> update( 'project_links_internal', [ 'have_h1' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_meta_title( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$title = '';
|
|
|
|
preg_match('/<title>([^>]*)<\/title>/si', $response, $match );
|
|
|
|
if ( isset( $match ) && is_array( $match ) && count( $match ) > 0 )
|
|
$title = (string)strip_tags( $match[1] );
|
|
|
|
if ( !$title )
|
|
{
|
|
preg_match_all('/<[\s]*meta[\s]*name="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
|
|
|
|
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
|
|
{
|
|
$originals = $match[0];
|
|
$names = $match[1];
|
|
$values = $match[2];
|
|
|
|
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
|
|
{
|
|
$metaTags = array();
|
|
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
|
|
{
|
|
$metaTags[ $names[$i] ] = array(
|
|
'html' => htmlentities( $originals[$i] ),
|
|
'value' => $values[$i]
|
|
);
|
|
}
|
|
}
|
|
$title = (string)$metaTags['title']['value'];
|
|
}
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'title' => $title ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_canonical( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$doc = new DOMDocument;
|
|
$doc -> loadHTML( $response );
|
|
foreach ( $doc -> getElementsByTagName( 'link' ) as $link )
|
|
{
|
|
$rel = $link -> getAttribute( 'rel' );
|
|
|
|
if ( $rel == 'canonical' )
|
|
{
|
|
$canonical = $link -> getAttribute( 'href' );
|
|
}
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'canonical' => $canonical ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_meta_keywords( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$meta_keywords = '';
|
|
|
|
preg_match_all( '/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match );
|
|
|
|
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
|
|
{
|
|
$originals = $match[0];
|
|
$names = $match[1];
|
|
$values = $match[2];
|
|
|
|
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
|
|
{
|
|
$metaTags = array();
|
|
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
|
|
{
|
|
$metaTags[ $names[$i] ] = array(
|
|
'html' => htmlentities( $originals[$i] ),
|
|
'value' => $values[$i]
|
|
);
|
|
}
|
|
}
|
|
$meta_keywords = (string)$metaTags['keywords']['value'];
|
|
}
|
|
|
|
if ( !$meta_keywords )
|
|
{
|
|
preg_match_all( '/<[\s]*meta[\s]*property="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match );
|
|
|
|
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
|
|
{
|
|
$originals = $match[0];
|
|
$names = $match[1];
|
|
$values = $match[2];
|
|
|
|
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
|
|
{
|
|
$metaTags = array();
|
|
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
|
|
{
|
|
$metaTags[ $names[$i] ] = array(
|
|
'html' => htmlentities( $originals[$i] ),
|
|
'value' => $values[$i]
|
|
);
|
|
}
|
|
}
|
|
$meta_keywords = (string)$metaTags['keywords']['value'];
|
|
}
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'meta_keywords' => $meta_keywords ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_meta_description( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$meta_description = '';
|
|
|
|
preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
|
|
|
|
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
|
|
{
|
|
$originals = $match[0];
|
|
$names = $match[1];
|
|
$values = $match[2];
|
|
|
|
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
|
|
{
|
|
$metaTags = array();
|
|
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
|
|
{
|
|
$metaTags[ $names[$i] ] = array(
|
|
'html' => htmlentities( $originals[$i] ),
|
|
'value' => $values[$i]
|
|
);
|
|
}
|
|
}
|
|
$meta_description = (string)$metaTags['description']['value'];
|
|
}
|
|
|
|
if ( !$meta_description )
|
|
{
|
|
preg_match_all( '/<[\s]*meta[\s]*property="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match );
|
|
|
|
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
|
|
{
|
|
$originals = $match[0];
|
|
$names = $match[1];
|
|
$values = $match[2];
|
|
|
|
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
|
|
{
|
|
$metaTags = array();
|
|
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
|
|
{
|
|
$metaTags[ $names[$i] ] = array(
|
|
'html' => htmlentities( $originals[$i] ),
|
|
'value' => $values[$i]
|
|
);
|
|
}
|
|
}
|
|
$meta_description = (string)$metaTags['description']['value'];
|
|
}
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'meta_description' => $meta_description ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_meta_robots( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$meta_robots = '';
|
|
|
|
preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
|
|
|
|
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
|
|
{
|
|
$originals = $match[0];
|
|
$names = $match[1];
|
|
$values = $match[2];
|
|
|
|
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
|
|
{
|
|
$metaTags = array();
|
|
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
|
|
{
|
|
$metaTags[ $names[$i] ] = array(
|
|
'html' => htmlentities( $originals[$i] ),
|
|
'value' => $values[$i]
|
|
);
|
|
}
|
|
}
|
|
$meta_robots = (string)$metaTags['robots']['value'];
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'meta_robots' => $meta_robots ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_meta_googlebot( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
|
|
$meta_googlebot = '';
|
|
|
|
preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match);
|
|
|
|
if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 )
|
|
{
|
|
$originals = $match[0];
|
|
$names = $match[1];
|
|
$values = $match[2];
|
|
|
|
if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) )
|
|
{
|
|
$metaTags = array();
|
|
for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ )
|
|
{
|
|
$metaTags[ $names[$i] ] = array(
|
|
'html' => htmlentities( $originals[$i] ),
|
|
'value' => $values[$i]
|
|
);
|
|
}
|
|
}
|
|
$meta_googlebot = (string)$metaTags['googlebot']['value'];
|
|
}
|
|
|
|
$mdb -> update( 'project_links_internal', [ 'meta_googlebot' => $meta_googlebot ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_code_lenght( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
$mdb -> update( 'project_links_internal', [ 'code_lenght' => strlen( $response ) ], [ 'id' => $url_id ] );
|
|
}
|
|
|
|
public static function get_site_text_lenght( $url_id, $response )
|
|
{
|
|
global $mdb;
|
|
$mdb -> update( 'project_links_internal', [ 'text_lenght' => strlen( \S::strip_html_tags( $response ) ) ], [ 'id' => $url_id ] );
|
|
}
|
|
}
|