315 lines
10 KiB
PHP
315 lines
10 KiB
PHP
<?php
|
||
|
||
class GoogleSite {
|
||
|
||
const G_PATTERN = '/<a href="\/url\?q=([^"]+)"/';
|
||
|
||
public function checkProxyServerVersion( $proxy )
|
||
{
|
||
$curl = curl_init();
|
||
curl_setopt( $curl, CURLOPT_URL, $proxy . '?action=check_version' );
|
||
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, 1 );
|
||
curl_setopt( $curl, CURLOPT_VERBOSE, 1 );
|
||
curl_setopt( $curl, CURLOPT_TIMEOUT, 15 );
|
||
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 15 );
|
||
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
|
||
$out['result'] = curl_exec( $curl );
|
||
$out['code'] = curl_getinfo( $curl, CURLINFO_HTTP_CODE );
|
||
$out['info'] = curl_getinfo( $curl );
|
||
curl_close( $curl );
|
||
return $out;
|
||
}
|
||
|
||
public function checkProxyServer( $proxy )
|
||
{
|
||
$curl = curl_init();
|
||
curl_setopt( $curl , CURLOPT_URL , $proxy );
|
||
curl_setopt( $curl , CURLOPT_POST , 1 );
|
||
curl_setopt( $curl , CURLOPT_POSTFIELDS , 'action=check_site&url=onet.pl' );
|
||
curl_setopt( $curl , CURLOPT_RETURNTRANSFER , 1 );
|
||
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
|
||
$site = curl_exec( $curl );
|
||
curl_close( $curl );
|
||
|
||
return $site;
|
||
}
|
||
|
||
public function checkProxy( $proxy )
|
||
{
|
||
$curl = curl_init();
|
||
curl_setopt( $curl , CURLOPT_HEADER , 1 );
|
||
curl_setopt( $curl , CURLOPT_USERAGENT , "Mozilla/5.0 (Windows; U; Windows NT 5.1; pl; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12" );
|
||
curl_setopt( $curl , CURLOPT_RETURNTRANSFER , 1 );
|
||
curl_setopt( $curl , CURLOPT_VERBOSE , 1 );
|
||
curl_setopt( $curl , CURLOPT_REFERER , 'http://www.google.pl' );
|
||
curl_setopt( $curl , CURLOPT_CONNECTTIMEOUT, 5 );
|
||
curl_setopt( $curl , CURLOPT_TIMEOUT , 5 );
|
||
curl_setopt( $curl , CURLOPT_PROXY, $proxy );
|
||
curl_setopt( $curl , CURLOPT_URL , 'http://www.google.pl/search?q=site:' . urlencode( 'onet.pl' ) . '&num=10&start=0&hl=pl' );
|
||
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
|
||
$google = \S::curl_redir_exec( $curl );
|
||
curl_close( $curl );
|
||
|
||
if ( !$google )
|
||
return -1;
|
||
|
||
if ( strpos( $google, 'onet.pl' ) === false )
|
||
return -1;
|
||
|
||
if (
|
||
strpos( $google, 'Aby kontynuować' ) !== false ||
|
||
strpos( $google , 'Our systems have detected unusual traffic' ) !== false ||
|
||
strpos( $google , 'sending automated queries' ) !== false )
|
||
return -2;
|
||
|
||
if ( strpos( $google, 'onet.pl' ) !== false )
|
||
{
|
||
if ( strpos( $google, 'nie została odnaleziona.' ) !== false )
|
||
return 0;
|
||
|
||
preg_match_all( $pattern , $google , $google );
|
||
$google = array_pop( $google );
|
||
|
||
if ( isset( $google[0] ) )
|
||
preg_match_all( $pattern2 , $google[0] , $google );
|
||
else
|
||
return 0;
|
||
$google = array_pop( $google );
|
||
$google = str_replace( ',' , '' , $google[0] );
|
||
|
||
return $google;
|
||
}
|
||
}
|
||
|
||
public function getSite( $url, $debug = false )
|
||
{
|
||
global $db;
|
||
|
||
$site = -1;
|
||
|
||
$query = $db -> query( 'SELECT id, proxy, user_id FROM pro_proxy_servers WHERE enabled = 1 AND bg < NOW() ORDER BY used ASC LIMIT 1' );
|
||
if ( $query -> rowCount() )
|
||
{
|
||
while ( $row = $query -> fetch() )
|
||
{
|
||
$curl = curl_init();
|
||
curl_setopt( $curl , CURLOPT_URL , $row['proxy'] );
|
||
curl_setopt( $curl , CURLOPT_POST , 1 );
|
||
curl_setopt( $curl , CURLOPT_POSTFIELDS , 'action=check_site&url=' . $url );
|
||
curl_setopt( $curl , CURLOPT_RETURNTRANSFER , 1 );
|
||
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
|
||
$site = curl_exec( $curl );
|
||
curl_close( $curl );
|
||
|
||
if ( $debug )
|
||
{
|
||
$data = file_get_contents( 'data/gs-' . date( 'Y-m-d' ) . '.txt' );
|
||
$data = $row['proxy'] . ' - ' . $site . ' - ' . $url . chr( 13 ) . chr( 10 ) . $data;
|
||
file_put_contents( 'data/gs-' . date( 'Y-m-d' ) . '.txt', $data );
|
||
}
|
||
|
||
if ( $site == -1 )
|
||
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bgd = DATE_ADD( NOW(), INTERVAL 1 HOUR ) WHERE id = ' . $row['id'] );
|
||
else
|
||
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW() WHERE id = ' . $row['id'] );
|
||
}
|
||
}
|
||
$query -> closeCursor();
|
||
return $site;
|
||
}
|
||
|
||
public function getSitesByKeywords( $phrase )
|
||
{
|
||
global $db;
|
||
|
||
$query = $db -> query( 'SELECT id, proxy, bg FROM pro_proxy_servers WHERE enabled = 1 AND ( bgd < NOW() OR bgd IS NULL ) ORDER BY used ASC LIMIT 1' );
|
||
if ( $query -> rowCount() ) while ( $row = $query -> fetch() )
|
||
{
|
||
$curl = curl_init();
|
||
curl_setopt( $curl, CURLOPT_URL, $row['proxy'] );
|
||
curl_setopt( $curl, CURLOPT_HEADER, false );
|
||
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
|
||
curl_setopt( $curl, CURLOPT_POSTFIELDS, 'action=get_sites&phrase=' . $phrase );
|
||
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 10 );
|
||
curl_setopt( $curl, CURLOPT_TIMEOUT, 10 );
|
||
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
|
||
$result = curl_exec( $curl );
|
||
|
||
if ( !$result )
|
||
{
|
||
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bg = ' . ( $row['bg'] + 1 ) . ', bgd = DATE_ADD( NOW(), INTERVAL ' . ( $row['bg'] + 1 ) * 15 . ' MINUTE ) WHERE id = ' . $row['id'] );
|
||
return -1;
|
||
}
|
||
|
||
if (
|
||
strpos( $result, 'Our systems have detected unusual traffic' ) !== false
|
||
or
|
||
strpos( $result, 'Aby kontynuowa' ) !== false
|
||
or
|
||
strpos( $result, 'Our systems have detected unusual traffic' ) !== false
|
||
or
|
||
strpos( $result, 'sending automated queries' ) !== false
|
||
or
|
||
strpos( $result, 'That’s an error' ) !== false
|
||
)
|
||
{
|
||
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bg = ' . ( $row['bg'] + 1 ) . ', bgd = DATE_ADD( NOW(), INTERVAL ' . ( $row['bg'] + 1 ) * 15 . ' MINUTE ) WHERE id = ' . $row['id'] );
|
||
return -1;
|
||
}
|
||
|
||
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bg = 0 WHERE id = ' . $row['id'] );
|
||
}
|
||
else
|
||
return -1;
|
||
|
||
if ( strpos( $result, $phrase ) !== false )
|
||
{
|
||
$results = self::parse_urls( $result );
|
||
|
||
foreach ( $results as $link )
|
||
$sites .= $link['href'] . '|||';
|
||
}
|
||
|
||
if ( !$sites )
|
||
return -1;
|
||
|
||
return $sites;
|
||
}
|
||
|
||
public static function parse_urls( $result )
|
||
{
|
||
$results = array();
|
||
|
||
$doc = new DOMDocument();
|
||
$doc -> loadHTML( $result );
|
||
|
||
$div = $doc -> getElementById( 'res' );
|
||
$ol_a = $div -> getElementsByTagName( 'ol' );
|
||
foreach ( $ol_a as $ol )
|
||
{
|
||
foreach ( $ol -> childNodes as $div )
|
||
{
|
||
if ( $div -> tagName == 'div' and $div -> getAttribute( 'class' ) != '' )
|
||
{
|
||
$a_a = $div -> getElementsByTagName( 'a' );
|
||
foreach ( $a_a as $a )
|
||
{
|
||
if ( strpos( $a -> getAttribute( 'class' ), 'rllt__action' ) !== false and $a -> getAttribute( 'onmousedown' ) != '' )
|
||
{
|
||
unset( $row );
|
||
|
||
$row['type'] = 'map';
|
||
$row['href'] = $a -> getAttribute( 'href' );
|
||
|
||
$results[] = $row;
|
||
}
|
||
else if ( $a -> getAttribute( 'onmousedown' ) != '' and $a -> getAttribute( 'class' ) == '' )
|
||
{
|
||
unset( $row );
|
||
|
||
$row['type'] = 'organic';
|
||
$row['href'] = $a -> getAttribute( 'href' );
|
||
|
||
$results[] = $row;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if ( empty( $results ) )
|
||
{
|
||
$doc = new DOMDocument();
|
||
$doc -> loadHTML( $result );
|
||
|
||
$div = $doc -> getElementById( 'res' );
|
||
$ol_a = $div -> getElementsByTagName( 'ol' );
|
||
foreach ( $ol_a as $ol )
|
||
{
|
||
$h3_a = $ol -> getElementsByTagName( 'h3' );
|
||
foreach ( $h3_a as $h3 )
|
||
{
|
||
$a_a = $h3 -> getElementsByTagName( 'a' );
|
||
foreach ( $a_a as $a )
|
||
{
|
||
if ( $a -> getAttribute( 'class' ) == 'l' and $a -> getAttribute( 'onmousedown' ) != '' )
|
||
{
|
||
unset( $row );
|
||
|
||
$row['type'] = 'map';
|
||
$row['href'] = $a -> getAttribute( 'href' );
|
||
|
||
$results[] = $row;
|
||
}
|
||
|
||
if ( $a -> getAttribute( 'class' ) == '' and $a -> getAttribute( 'onmousedown' ) != '' )
|
||
{
|
||
unset( $row );
|
||
|
||
$row['type'] = 'organic';
|
||
$row['href'] = $a -> getAttribute( 'href' );
|
||
|
||
$results[] = $row;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* 12.02.2016 */
|
||
if ( empty( $results ) )
|
||
{
|
||
$doc = new DOMDocument();
|
||
$doc -> loadHTML( $result );
|
||
|
||
$div = $doc -> getElementById( 'res' );
|
||
$div_g_array = $div -> getElementsByTagName( 'div' );
|
||
foreach ( $div_g_array as $div_a )
|
||
{
|
||
if ( $div_a -> getAttribute( 'class' ) == 'g' or $div_a -> getAttribute( 'class' ) == '_gt' )
|
||
{
|
||
if ( $div_a -> getAttribute( 'class' ) == 'g' )
|
||
{
|
||
$h3_a = $div_a -> getElementsByTagName( 'h3' );
|
||
foreach ( $h3_a as $h3 )
|
||
{
|
||
$a_a = $h3 -> getElementsByTagName( 'a' );
|
||
foreach ( $a_a as $a )
|
||
{
|
||
if ( $a -> getAttribute( 'class' ) == '' and $a -> getAttribute( 'onmousedown' ) != '' )
|
||
{
|
||
unset( $row );
|
||
|
||
$row['type'] = 'organic';
|
||
$row['href'] = $a -> getAttribute( 'href' );
|
||
|
||
$results[] = $row;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
else if ( $div_a -> getAttribute( 'class' ) == '_gt' )
|
||
{
|
||
$a_a = $div_a -> getElementsByTagName( 'a' );
|
||
foreach ( $a_a as $a )
|
||
{
|
||
if ( $a -> getAttribute( 'class' ) == 'rllt__action-button _Jrh' and $a -> getAttribute( 'onmousedown' ) != '' )
|
||
{
|
||
unset( $row );
|
||
|
||
$row['type'] = 'map';
|
||
$row['href'] = $a -> getAttribute( 'href' );
|
||
|
||
$results[] = $row;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return $results;
|
||
}
|
||
}
|
||
?>
|