765 lines
28 KiB
PHP
765 lines
28 KiB
PHP
<?php
|
|
namespace front\factory;
|
|
class AuditSEO
|
|
{
|
|
public static $good_img = '<img src="/templates/audit-seo/good.svg" class="status">';
|
|
public static $bad_img = '<img src="/templates/audit-seo/bad.svg" class="status">';
|
|
|
|
public static function robots_allowed( $robots_txt, $useragent )
|
|
{
|
|
$agents = array( preg_quote( '*' ) );
|
|
if ( $useragent) $agents[] = preg_quote( $useragent );
|
|
$agents = implode( '|', $agents );
|
|
|
|
$robotstxt = @file( $robots_txt );
|
|
if ( empty( $robotstxt ) )
|
|
return true;
|
|
|
|
$rules = array();
|
|
$ruleApplies = false;
|
|
foreach ( $robotstxt as $line )
|
|
{
|
|
if ( !$line = trim( $line ) )
|
|
continue;
|
|
|
|
if ( preg_match('/^\s*User-agent: (.*)/i', $line, $match ) )
|
|
$ruleApplies = preg_match( "/($agents)/i", $match[1] );
|
|
|
|
if ( $ruleApplies && preg_match( '/^\s*Disallow:(.*)/i', $line, $regs ) )
|
|
{
|
|
if ( !$regs[1] )
|
|
return true;
|
|
|
|
$rules[] = preg_quote( trim( $regs[1] ), '/' );
|
|
}
|
|
}
|
|
|
|
foreach ( $rules as $rule )
|
|
{
|
|
if ( preg_match( "/^$rule/", $parsed['path'] ) )
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
public static function is_url_internal( $domain, $url )
|
|
{
|
|
$domain_host = parse_url( $domain, PHP_URL_HOST );
|
|
$url_host = parse_url( $url, PHP_URL_HOST );
|
|
|
|
if ( $domain_host == $url_host or empty( $url_host ) )
|
|
{
|
|
if ( $url != '/' and $url != '#' and strpos( $url, 'mailto:' ) === false and $url != '' and strpos( $url, 'tel:' ) === false )
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function is_url_external( $domain, $url )
|
|
{
|
|
$domain_host = str_replace( 'www.', '', parse_url( $domain, PHP_URL_HOST ) );
|
|
$url_host = str_replace( 'www.', '', parse_url( $url, PHP_URL_HOST ) );
|
|
|
|
if ( $domain_host != $url_host and !empty( $url_host ) and !empty( $domain_host ) )
|
|
{
|
|
if ( $url != '/' and $url != '#' and strpos( $url, 'mailto:' ) === false and $url != '' and strpos( $url, 'tel:' ) === false )
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function semstorm( $url )
|
|
{
|
|
if ( !$url )
|
|
return false;
|
|
|
|
$data = [ 'domains' => [ $url ] ];
|
|
$data_string = json_encode( $data );
|
|
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, 'http://api.semstorm.com/api-v3/explorer/explorer-keywords/position-distribution.json?services_token=ay_oMCvqro2DuTbG5EMayLUTYitOJC_Lf40gq2Rj_zE' );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_POST, true );
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string);
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
|
|
'Content-Type: application/json',
|
|
'Content-Length: ' . strlen( $data_string ) )
|
|
);
|
|
$response = curl_exec( $ch );
|
|
curl_close ( $ch );
|
|
|
|
echo $response;
|
|
exit;
|
|
}
|
|
|
|
public static function data08( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$data08['top3']['txt'] = 0;
|
|
$data08['top10']['txt'] = 0;
|
|
$data08['top50']['txt'] = 0;
|
|
$results = $mdb -> get( 'as_sites', [ 'semstorm' ], [ 'url' => $url ] );
|
|
|
|
if ( !$results['semstorm'] )
|
|
{
|
|
if ( strpos( $url, 'www.' ) === 0 )
|
|
$url = str_replace( 'www.', '', $url );
|
|
|
|
$data = [ 'domains' => [ $url ] ];
|
|
$data_string = json_encode( $data );
|
|
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, 'http://api.semstorm.com/api-v3/explorer/explorer-keywords/position-distribution.json?services_token=ay_oMCvqro2DuTbG5EMayLUTYitOJC_Lf40gq2Rj_zE' );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_POST, true );
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string );
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
|
|
'Content-Type: application/json',
|
|
'Content-Length: ' . strlen( $data_string ) )
|
|
);
|
|
$response = curl_exec( $ch );
|
|
curl_close ( $ch );
|
|
$mdb -> update( 'as_sites', [ 'semstorm' => $response ], [ 'url' => $url ] );
|
|
|
|
$results['semstorm'] = $response;
|
|
}
|
|
|
|
$data_tmp = json_decode( $results['semstorm'], true );
|
|
|
|
foreach ( $data_tmp['results'][$url] as $key => $val )
|
|
{
|
|
if ( $key <= 3 )
|
|
$data08['top3']['txt'] += $val;
|
|
|
|
if ( $key > 3 and $key <= 10 )
|
|
$data08['top10']['txt'] += $val;
|
|
|
|
if ( $key > 10 )
|
|
$data08['top50']['txt'] += $val;
|
|
}
|
|
|
|
return $data08;
|
|
}
|
|
|
|
public static function data07( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> get( 'as_sites', [ 'html', 'effective_url' ], [ 'url' => $url ] );
|
|
|
|
$data07['audit_links_inside']['count'] = 0;
|
|
$data07['audit_links_outside']['count'] = 0;
|
|
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $results['html'] );
|
|
$links = @$dom -> getElementsByTagName( 'a' );
|
|
|
|
for ( $i = 0; $i < $links -> length; $i++ )
|
|
{
|
|
$link = $links -> item( $i );
|
|
$url_tmp = $link -> getAttribute( 'href' );
|
|
|
|
if ( self::is_url_internal( $results['effective_url'], $url_tmp ) )
|
|
{
|
|
$data07['audit_links_inside']['count']++;
|
|
$links_internal[] = $url_tmp;
|
|
}
|
|
|
|
if ( self::is_url_external( $results['effective_url'], $url_tmp ) )
|
|
{
|
|
$data07['audit_links_outside']['count']++;
|
|
$links_external[] = $url_tmp;
|
|
}
|
|
}
|
|
$data07['audit_links_inside']['txt'] = implode( '<br>', $links_internal );
|
|
$data07['audit_links_outside']['txt'] = implode( '<br>', $links_external );
|
|
|
|
return $data07;
|
|
}
|
|
|
|
public static function data06( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> get( 'as_sites', [ 'effective_url', 'html', 'flash', 'iframe', 'file_robots_txt', 'file_sitemap_xml', 'inline_css', 'doctype', 'html_language', 'w3c' ], [ 'url' => $url ] );
|
|
|
|
$data06['flash']['txt'] = $results['flash'] ? 'tak' : 'nie';
|
|
$data06['iframe']['txt'] = $results['iframe'] ? 'tak' : 'nie';
|
|
$data06['file_robots_txt']['txt'] = $results['file_robots_txt'] ? 'tak' : 'nie';
|
|
$data06['file_sitemap_xml']['txt'] = $results['file_sitemap_xml'] ? 'tak' : 'nie';
|
|
$data06['imgs_without_alt']['txt'] = implode( '<br/>', self::imgs_without_alt( $results['html'] ) );
|
|
$data06['inline_css']['txt'] = $results['inline_css'] ? 'tak' : 'nie';
|
|
$data06['doctype']['txt'] = $results['doctype'];
|
|
$data06['html_language']['txt'] = $results['html_language'];
|
|
|
|
if ( !$results['w3c'] )
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, 'https://validator.w3.org/nu/?doc=' . urlencode( $results['effective_url'] ) . '%2F&out=json' );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
$response = curl_exec( $ch );
|
|
curl_close ( $ch );
|
|
|
|
$mdb -> update( 'as_sites', [ 'w3c' => $response ], [ 'url' => $url ] );
|
|
|
|
$w3c = json_decode( $response, true );
|
|
}
|
|
else
|
|
{
|
|
$w3c = json_decode( $results['w3c'], true );
|
|
}
|
|
|
|
$data06['w3c_validator']['txt'] = 0;
|
|
|
|
foreach ( $w3c['messages'] as $message ) {
|
|
if ( $message['type'] == 'error' )
|
|
$data06['w3c_validator']['txt']++;
|
|
}
|
|
|
|
return $data06;
|
|
}
|
|
|
|
public static function data05( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> get( 'as_sites', [
|
|
'effective_url',
|
|
'page_speed_insight_mobile'
|
|
], [
|
|
'url' => $url
|
|
] );
|
|
|
|
if ( !$results['page_speed_insight_mobile'] )
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, 'https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url=' . $results['effective_url'] . '&category=performance&strategy=mobile' );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
$response = curl_exec( $ch );
|
|
curl_close ( $ch );
|
|
|
|
$mdb -> update( 'as_sites', [ 'page_speed_insight_mobile' => $response ], [ 'url' => $url ] );
|
|
|
|
$page_speed_insight_mobile = json_decode( $response, true );
|
|
}
|
|
else
|
|
{
|
|
$page_speed_insight_mobile = json_decode( $results['page_speed_insight_mobile'], true );
|
|
}
|
|
|
|
$data05['psid']['txt'] = ( $page_speed_insight_mobile['lighthouseResult']['categories']['performance']['score'] * 100 ) . '/100';
|
|
$data05['psid']['score'] = $page_speed_insight_mobile['lighthouseResult']['categories']['performance']['score'] * 100;
|
|
|
|
return $data05;
|
|
}
|
|
|
|
public static function data04( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> get( 'as_sites', [
|
|
'effective_url',
|
|
'page_speed_insight_desktop'
|
|
], [
|
|
'url' => $url
|
|
] );
|
|
|
|
if ( !$results['page_speed_insight_desktop'] )
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, 'https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url=' . $results['effective_url'] . '&category=performance&strategy=desktop' );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
$response = curl_exec( $ch );
|
|
curl_close ( $ch );
|
|
|
|
$mdb -> update( 'as_sites', [ 'page_speed_insight_desktop' => $response ], [ 'url' => $url ] );
|
|
|
|
$page_speed_insight_desktop = json_decode( $response, true );
|
|
}
|
|
else
|
|
{
|
|
$page_speed_insight_desktop = json_decode( $results['page_speed_insight_desktop'], true );
|
|
}
|
|
|
|
$data04['psid']['txt'] = ( $page_speed_insight_desktop['lighthouseResult']['categories']['performance']['score'] * 100 ) . '/100';
|
|
$data04['psid']['score'] = $page_speed_insight_desktop['lighthouseResult']['categories']['performance']['score'] * 100;
|
|
$data04['site_size']['txt'] = round( $page_speed_insight_desktop['lighthouseResult']['audits']['total-byte-weight']['numericValue'] / 1024 ) . ' KB';
|
|
$data04['site_http']['txt'] = $page_speed_insight_desktop['lighthouseResult']['audits']['network-requests']['numericValue'];
|
|
|
|
return $data04;
|
|
}
|
|
|
|
public static function data03( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> get( 'as_sites', [ 'meta_title', 'meta_description', 'meta_keywords', 'code_length', 'text_length', 'words_count',
|
|
'h1_count', 'h2_count', 'h3_count', 'h4_count', 'h5_count', 'h6_count'
|
|
], [
|
|
'url' => $url
|
|
] );
|
|
|
|
$data03['meta_title']['txt'] = $results['meta_title'];
|
|
$data03['meta_title']['signs'] = strlen( $results['meta_title'] );
|
|
$data03['meta_description']['txt'] = $results['meta_description'];
|
|
$data03['meta_description']['signs'] = strlen( $results['meta_description'] );
|
|
$data03['meta_keywords']['txt'] = $results['meta_keywords'] != '' ? $results['meta_keywords'] : 'brak';
|
|
$data03['code_to_text_ratio']['txt'] = round( ( $results['text_length'] / ( $results['text_length'] + $results['code_length'] ) * 100 ), 0 );
|
|
$data03['words_count']['txt'] = $results['words_count'];
|
|
$data03['headers']['h1_count']['txt'] = $results['h1_count'];
|
|
$data03['headers']['h2_count']['txt'] = $results['h2_count'];
|
|
$data03['headers']['h3_count']['txt'] = $results['h3_count'];
|
|
$data03['headers']['h4_count']['txt'] = $results['h4_count'];
|
|
$data03['headers']['h5_count']['txt'] = $results['h5_count'];
|
|
$data03['headers']['h6_count']['txt'] = $results['h6_count'];
|
|
|
|
return $data03;
|
|
}
|
|
|
|
public static function data02( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$results = $mdb -> get( 'as_sites', [ 'meta_robots', 'robots_txt', 'redirect_www', 'https' ], [ 'url' => $url ] );
|
|
|
|
$data02['meta_robots']['txt'] = strpos( $results['meta_robots'], 'index' ) !== false ? 'tak' : 'nie';
|
|
$data02['meta_robots']['img'] = strpos( $results['meta_robots'], 'index' ) !== false ? self::$good_img : self::$bad_img;
|
|
|
|
$robots_txt = self::robots_allowed( $data02['robots_txt'], 'GoogleBot' );
|
|
$data02['robots_txt']['txt'] = $robots_txt ? 'tak' : 'nie';
|
|
$data02['robots_txt']['img'] = $robots_txt ? self::$good_img : self::$bad_img;
|
|
|
|
$data02['redirect_www']['txt'] = $results['redirect_www'] ? 'tak' : 'nie';
|
|
$data02['redirect_www']['img'] = $results['redirect_www'] ? self::$good_img : self::$bad_img;
|
|
|
|
$data02['https']['txt'] = $results['https'] ? 'tak' : 'nie';
|
|
$data02['https']['img'] = $results['https'] ? self::$good_img : self::$bad_img;
|
|
|
|
return $data02;
|
|
}
|
|
|
|
public static function data01( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
$data01 = $mdb -> get( 'as_sites', [
|
|
'effective_url',
|
|
'ip',
|
|
'location',
|
|
'favicon',
|
|
'cms'
|
|
], [
|
|
'url' => $url
|
|
] );
|
|
|
|
$url_tmp = parse_url( $data01['effective_url'] );
|
|
|
|
$location = json_decode( $data01['location'] );
|
|
$data01['domain']['txt'] = $url_tmp['host'];
|
|
$data01['location'] = $location -> country;
|
|
$data01['favicon'] = $data01['favicon'] != null ? 'tak' : 'nie';
|
|
$data01['cms'] = $data01['cms'] != null ? $data01['cms'] : 'nieznany';
|
|
|
|
return $data01;
|
|
}
|
|
|
|
public static function imgs_without_alt( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$images = $dom -> getElementsByTagName( 'img' );
|
|
$cms = '';
|
|
|
|
for ( $i = 0; $i < $images -> length; $i++ )
|
|
{
|
|
$img = $images -> item( $i );
|
|
if ( $img -> getAttribute( 'alt' ) == '' )
|
|
$imgs_output[] = $img -> getAttribute( 'src' );
|
|
}
|
|
return $imgs_output;
|
|
}
|
|
|
|
public static function cms( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$metas = $dom -> getElementsByTagName( 'meta' );
|
|
$cms = '';
|
|
|
|
for ( $i = 0; $i < $metas -> length; $i++ )
|
|
{
|
|
$meta = $metas -> item( $i );
|
|
if ( $meta -> getAttribute( 'name' ) == 'generator' )
|
|
$cms = $meta -> getAttribute( 'content' );
|
|
}
|
|
return $cms;
|
|
}
|
|
|
|
public static function meta_title( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$metas = $dom -> getElementsByTagName( 'title' );
|
|
$meta_title = '';
|
|
|
|
$meta = $metas -> item( 0 );
|
|
$meta_title = $meta -> textContent;
|
|
|
|
return $meta_title;
|
|
}
|
|
|
|
public static function meta_description( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$metas = $dom -> getElementsByTagName( 'meta' );
|
|
$meta_description = '';
|
|
|
|
for ( $i = 0; $i < $metas -> length; $i++ )
|
|
{
|
|
$meta = $metas -> item( $i );
|
|
if ( $meta -> getAttribute( 'name' ) == 'description' )
|
|
$meta_description = $meta -> getAttribute( 'content' );
|
|
}
|
|
return $meta_description;
|
|
}
|
|
|
|
public static function meta_keywords( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$metas = $dom -> getElementsByTagName( 'meta' );
|
|
$meta_keywords = '';
|
|
|
|
for ( $i = 0; $i < $metas -> length; $i++ )
|
|
{
|
|
$meta = $metas -> item( $i );
|
|
if ( $meta -> getAttribute( 'name' ) == 'keywords' )
|
|
$meta_keywords = $meta -> getAttribute( 'content' );
|
|
}
|
|
return $meta_keywords;
|
|
}
|
|
|
|
public static function meta_robots( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$metas = $dom -> getElementsByTagName( 'meta' );
|
|
$meta_robots = '';
|
|
|
|
for ( $i = 0; $i < $metas -> length; $i++ )
|
|
{
|
|
$meta = $metas -> item( $i );
|
|
if ( $meta -> getAttribute( 'name' ) == 'robots' || $meta -> getAttribute( 'name' ) == 'googlebot' )
|
|
$meta_robots = $meta -> getAttribute( 'content' );
|
|
}
|
|
return $meta_robots;
|
|
}
|
|
|
|
public static function header_count( $html, $header = 'h1' )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$headers = $dom -> getElementsByTagName( $header );
|
|
return $headers -> length;
|
|
}
|
|
|
|
public static function favicon( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$links = $dom -> getElementsByTagName( 'link' );
|
|
$favicon = '';
|
|
|
|
for ( $i = 0; $i < $links -> length; $i++ )
|
|
{
|
|
$link = $links -> item( $i );
|
|
if ( $link -> getAttribute( 'rel' ) == 'icon' || $link -> getAttribute( 'rel' ) == "Shortcut Icon" || $link -> getAttribute( 'rel' ) == "shortcut icon" )
|
|
$favicon = $link -> getAttribute( 'href' );
|
|
}
|
|
return $favicon;
|
|
}
|
|
|
|
public static function flash_check( $html )
|
|
{
|
|
if ( strpos( $html, 'Get Adobe Flash player' ) !== false )
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
public static function iframe_check( $html )
|
|
{
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$iframes = $dom -> getElementsByTagName( 'iframe' );
|
|
if ( $iframes -> length )
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
public static function doctype( $html ) {
|
|
if ( strpos( $html, 'HTML 4.01 Frameset//EN' ) !== false )
|
|
return 'HTML 4.01 Frameset';
|
|
elseif ( strpos( $html, 'HTML 4.01 Transitional//EN' ) !== false )
|
|
return 'HTML 4.01 Transitional';
|
|
elseif ( strpos( $html, 'HTML 4.01//EN' ) !== false )
|
|
return 'HTML 4.01 Strict';
|
|
else
|
|
return 'HTML 5';
|
|
}
|
|
|
|
public static function html_language( $html ) {
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$htmls = $dom -> getElementsByTagName( 'html' );
|
|
|
|
for ( $i = 0; $i < $htmls -> length; $i++ ) {
|
|
$html = $htmls -> item( $i );
|
|
return $html -> getAttribute( 'lang' );
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public static function inline_css( $html ) {
|
|
$dom = new \DOMDocument();
|
|
@$dom -> loadHTML( $html );
|
|
$spans = $dom -> getElementsByTagName( 'span' );
|
|
for ( $i = 0; $i < $spans -> length; $i++ ) {
|
|
$span = $spans -> item( $i );
|
|
if ( $span -> getAttribute( 'style' ) != '' )
|
|
return true;
|
|
}
|
|
|
|
$ps = $dom -> getElementsByTagName( 'p' );
|
|
for ( $i = 0; $i < $ps -> length; $i++ ) {
|
|
$p = $ps -> item( $i );
|
|
if ( $p -> getAttribute( 'style' ) != '' )
|
|
return true;
|
|
}
|
|
|
|
$divs = $dom -> getElementsByTagName( 'div' );
|
|
for ( $i = 0; $i < $divs -> length; $i++ ) {
|
|
$div = $divs -> item( $i );
|
|
if ( $div -> getAttribute( 'style' ) != '' )
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function audit( $url )
|
|
{
|
|
global $mdb;
|
|
|
|
if ( !$url )
|
|
return false;
|
|
|
|
if ( strpos( $url, 'http://' ) === false and strpos( $url, 'https://' ) === false )
|
|
$url = 'http://' . $url;
|
|
|
|
$url_tmp = parse_url( $url );
|
|
$url = strip_tags( $url_tmp['host'] ) . $url_tmp['path'];
|
|
if ( substr( $url, -1, 1 ) == '/')
|
|
$url = substr( $url, 0, -1 );
|
|
|
|
if ( $mdb -> count( 'as_sites', [ 'url' => $url ] ) )
|
|
return $url;
|
|
else
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, $url );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
$response = curl_exec( $ch );
|
|
|
|
$effective_url = curl_getinfo( $ch, CURLINFO_EFFECTIVE_URL );
|
|
$ip = curl_getinfo( $ch, CURLINFO_PRIMARY_IP );
|
|
$location = file_get_contents( "http://ipinfo.io/{$ip}/json" );
|
|
$favicon = self::favicon( $response );
|
|
$cms = self::cms( $response );
|
|
$meta_robots = self::meta_robots( $response );
|
|
$meta_title = self::meta_title( $response );
|
|
$meta_description = self::meta_description( $response );
|
|
$meta_keywords = self::meta_keywords( $response );
|
|
$h1_count = self::header_count( $response, 'h1' );
|
|
$h2_count = self::header_count( $response, 'h2' );
|
|
$h3_count = self::header_count( $response, 'h3' );
|
|
$h4_count = self::header_count( $response, 'h4' );
|
|
$h5_count = self::header_count( $response, 'h5' );
|
|
$h6_count = self::header_count( $response, 'h6' );
|
|
$flash = self::flash_check( $response );
|
|
$iframe = self::iframe_check( $response );
|
|
$inline_css = self::inline_css( $response );
|
|
$doctype = self::doctype( $response );
|
|
$html_language = self::html_language( $response );
|
|
|
|
$html_length = strlen( $response );
|
|
$text_length = strlen( strip_tags( $response ) );
|
|
$code_length = $html_length - $text_length;
|
|
|
|
$words_count = str_word_count( strip_tags( $response ) );
|
|
|
|
curl_close ( $ch );
|
|
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, $effective_url . '/robots.txt' );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
$robots_txt = curl_exec( $ch );
|
|
|
|
if ( curl_getinfo( $ch, CURLINFO_HTTP_CODE ) == 200 )
|
|
$file_robots_txt = 1;
|
|
else
|
|
$file_robots_txt = 0;
|
|
|
|
curl_close ( $ch );
|
|
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, $effective_url . '/sitemap.xml' );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
$sitemap_xml = curl_exec( $ch );
|
|
|
|
if ( curl_getinfo( $ch, CURLINFO_HTTP_CODE ) == 200 )
|
|
$file_sitemap_xml = 1;
|
|
else
|
|
$file_sitemap_xml = 0;
|
|
|
|
curl_close ( $ch );
|
|
|
|
$url_parse = parse_url( $effective_url );
|
|
$redirect_www = false;
|
|
|
|
if ( strpos( $effective_url, $url_parse['scheme'] . '://www.' ) !== false )
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, str_replace( $url_parse['scheme'] . '://www.', $url_parse['scheme'] . '://', $effective_url ) );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
curl_exec( $ch );
|
|
$effective_url_tmp = curl_getinfo( $ch, CURLINFO_EFFECTIVE_URL );
|
|
curl_close ( $ch );
|
|
|
|
if ( $effective_url_tmp == $effective_url )
|
|
$redirect_www = true;
|
|
}
|
|
else
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_URL, str_replace( $url_parse['scheme'] . '://', $url_parse['scheme'] . '://www.', $effective_url ) );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
|
|
curl_setopt( $ch, CURLOPT_VERBOSE, 1 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 60 );
|
|
curl_setopt( $ch, CURLOPT_HEADER, true );
|
|
curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' );
|
|
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' );
|
|
curl_exec( $ch );
|
|
$effective_url_tmp = curl_getinfo( $ch, CURLINFO_EFFECTIVE_URL );
|
|
curl_close ( $ch );
|
|
|
|
if ( $effective_url_tmp == $effective_url )
|
|
$redirect_www = true;
|
|
}
|
|
|
|
$mdb -> insert( 'as_sites', [
|
|
'url' => $url,
|
|
'html' => $response,
|
|
'effective_url' => $effective_url,
|
|
'ip' => $ip,
|
|
'location' => $location,
|
|
'favicon' => $favicon,
|
|
'cms' => $cms,
|
|
'meta_robots' => $meta_robots,
|
|
'robots_txt' => $robots_txt,
|
|
'redirect_www' => $redirect_www ? 1 : 0,
|
|
'https' => $url_parse['scheme'] == 'https' ? 1 : 0,
|
|
'meta_title' => $meta_title,
|
|
'meta_description' => $meta_description,
|
|
'meta_keywords' => $meta_keywords,
|
|
'code_length' => $code_length,
|
|
'text_length' => $text_length,
|
|
'words_count' => $words_count,
|
|
'h1_count' => $h1_count,
|
|
'h2_count' => $h2_count,
|
|
'h3_count' => $h3_count,
|
|
'h4_count' => $h4_count,
|
|
'h5_count' => $h5_count,
|
|
'h6_count' => $h6_count,
|
|
'flash' => $flash ? 1 : 0,
|
|
'iframe' => $iframe ? 1 : 0,
|
|
'file_robots_txt' => $file_robots_txt,
|
|
'file_sitemap_xml' => $file_sitemap_xml,
|
|
'inline_css' => $inline_css,
|
|
'doctype' => $doctype,
|
|
'html_language' => $html_language
|
|
] );
|
|
|
|
return $url;
|
|
}
|
|
}
|
|
} |