Files
rank24.pl/autoload/class.GoogleSite.php
2024-12-12 15:33:18 +01:00

315 lines
10 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
class GoogleSite {
const G_PATTERN = '/<a href="\/url\?q=([^"]+)"/';
public function checkProxyServerVersion( $proxy )
{
$curl = curl_init();
curl_setopt( $curl, CURLOPT_URL, $proxy . '?action=check_version' );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $curl, CURLOPT_VERBOSE, 1 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 15 );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 15 );
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
$out['result'] = curl_exec( $curl );
$out['code'] = curl_getinfo( $curl, CURLINFO_HTTP_CODE );
$out['info'] = curl_getinfo( $curl );
curl_close( $curl );
return $out;
}
public function checkProxyServer( $proxy )
{
$curl = curl_init();
curl_setopt( $curl , CURLOPT_URL , $proxy );
curl_setopt( $curl , CURLOPT_POST , 1 );
curl_setopt( $curl , CURLOPT_POSTFIELDS , 'action=check_site&url=onet.pl' );
curl_setopt( $curl , CURLOPT_RETURNTRANSFER , 1 );
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
$site = curl_exec( $curl );
curl_close( $curl );
return $site;
}
public function checkProxy( $proxy )
{
$curl = curl_init();
curl_setopt( $curl , CURLOPT_HEADER , 1 );
curl_setopt( $curl , CURLOPT_USERAGENT , "Mozilla/5.0 (Windows; U; Windows NT 5.1; pl; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12" );
curl_setopt( $curl , CURLOPT_RETURNTRANSFER , 1 );
curl_setopt( $curl , CURLOPT_VERBOSE , 1 );
curl_setopt( $curl , CURLOPT_REFERER , 'http://www.google.pl' );
curl_setopt( $curl , CURLOPT_CONNECTTIMEOUT, 5 );
curl_setopt( $curl , CURLOPT_TIMEOUT , 5 );
curl_setopt( $curl , CURLOPT_PROXY, $proxy );
curl_setopt( $curl , CURLOPT_URL , 'http://www.google.pl/search?q=site:' . urlencode( 'onet.pl' ) . '&num=10&start=0&hl=pl' );
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
$google = \S::curl_redir_exec( $curl );
curl_close( $curl );
if ( !$google )
return -1;
if ( strpos( $google, 'onet.pl' ) === false )
return -1;
if (
strpos( $google, 'Aby kontynuować' ) !== false ||
strpos( $google , 'Our systems have detected unusual traffic' ) !== false ||
strpos( $google , 'sending automated queries' ) !== false )
return -2;
if ( strpos( $google, 'onet.pl' ) !== false )
{
if ( strpos( $google, 'nie została odnaleziona.' ) !== false )
return 0;
preg_match_all( $pattern , $google , $google );
$google = array_pop( $google );
if ( isset( $google[0] ) )
preg_match_all( $pattern2 , $google[0] , $google );
else
return 0;
$google = array_pop( $google );
$google = str_replace( ',' , '' , $google[0] );
return $google;
}
}
public function getSite( $url, $debug = false )
{
global $db;
$site = -1;
$query = $db -> query( 'SELECT id, proxy, user_id FROM pro_proxy_servers WHERE enabled = 1 AND bg < NOW() ORDER BY used ASC LIMIT 1' );
if ( $query -> rowCount() )
{
while ( $row = $query -> fetch() )
{
$curl = curl_init();
curl_setopt( $curl , CURLOPT_URL , $row['proxy'] );
curl_setopt( $curl , CURLOPT_POST , 1 );
curl_setopt( $curl , CURLOPT_POSTFIELDS , 'action=check_site&url=' . $url );
curl_setopt( $curl , CURLOPT_RETURNTRANSFER , 1 );
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
$site = curl_exec( $curl );
curl_close( $curl );
if ( $debug )
{
$data = file_get_contents( 'data/gs-' . date( 'Y-m-d' ) . '.txt' );
$data = $row['proxy'] . ' - ' . $site . ' - ' . $url . chr( 13 ) . chr( 10 ) . $data;
file_put_contents( 'data/gs-' . date( 'Y-m-d' ) . '.txt', $data );
}
if ( $site == -1 )
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bgd = DATE_ADD( NOW(), INTERVAL 1 HOUR ) WHERE id = ' . $row['id'] );
else
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW() WHERE id = ' . $row['id'] );
}
}
$query -> closeCursor();
return $site;
}
public function getSitesByKeywords( $phrase )
{
global $db;
$query = $db -> query( 'SELECT id, proxy, bg FROM pro_proxy_servers WHERE enabled = 1 AND ( bgd < NOW() OR bgd IS NULL ) ORDER BY used ASC LIMIT 1' );
if ( $query -> rowCount() ) while ( $row = $query -> fetch() )
{
$curl = curl_init();
curl_setopt( $curl, CURLOPT_URL, $row['proxy'] );
curl_setopt( $curl, CURLOPT_HEADER, false );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $curl, CURLOPT_POSTFIELDS, 'action=get_sites&phrase=' . $phrase );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 10 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 10 );
curl_setopt( $curl, CURLOPT_ENCODING, "gzip,deflate" );
$result = curl_exec( $curl );
if ( !$result )
{
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bg = ' . ( $row['bg'] + 1 ) . ', bgd = DATE_ADD( NOW(), INTERVAL ' . ( $row['bg'] + 1 ) * 15 . ' MINUTE ) WHERE id = ' . $row['id'] );
return -1;
}
if (
strpos( $result, 'Our systems have detected unusual traffic' ) !== false
or
strpos( $result, 'Aby kontynuowa' ) !== false
or
strpos( $result, 'Our systems have detected unusual traffic' ) !== false
or
strpos( $result, 'sending automated queries' ) !== false
or
strpos( $result, 'Thats an error' ) !== false
)
{
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bg = ' . ( $row['bg'] + 1 ) . ', bgd = DATE_ADD( NOW(), INTERVAL ' . ( $row['bg'] + 1 ) * 15 . ' MINUTE ) WHERE id = ' . $row['id'] );
return -1;
}
$db -> query( 'UPDATE pro_proxy_servers SET used = NOW(), bg = 0 WHERE id = ' . $row['id'] );
}
else
return -1;
if ( strpos( $result, $phrase ) !== false )
{
$results = self::parse_urls( $result );
foreach ( $results as $link )
$sites .= $link['href'] . '|||';
}
if ( !$sites )
return -1;
return $sites;
}
public static function parse_urls( $result )
{
$results = array();
$doc = new DOMDocument();
$doc -> loadHTML( $result );
$div = $doc -> getElementById( 'res' );
$ol_a = $div -> getElementsByTagName( 'ol' );
foreach ( $ol_a as $ol )
{
foreach ( $ol -> childNodes as $div )
{
if ( $div -> tagName == 'div' and $div -> getAttribute( 'class' ) != '' )
{
$a_a = $div -> getElementsByTagName( 'a' );
foreach ( $a_a as $a )
{
if ( strpos( $a -> getAttribute( 'class' ), 'rllt__action' ) !== false and $a -> getAttribute( 'onmousedown' ) != '' )
{
unset( $row );
$row['type'] = 'map';
$row['href'] = $a -> getAttribute( 'href' );
$results[] = $row;
}
else if ( $a -> getAttribute( 'onmousedown' ) != '' and $a -> getAttribute( 'class' ) == '' )
{
unset( $row );
$row['type'] = 'organic';
$row['href'] = $a -> getAttribute( 'href' );
$results[] = $row;
}
}
}
}
}
if ( empty( $results ) )
{
$doc = new DOMDocument();
$doc -> loadHTML( $result );
$div = $doc -> getElementById( 'res' );
$ol_a = $div -> getElementsByTagName( 'ol' );
foreach ( $ol_a as $ol )
{
$h3_a = $ol -> getElementsByTagName( 'h3' );
foreach ( $h3_a as $h3 )
{
$a_a = $h3 -> getElementsByTagName( 'a' );
foreach ( $a_a as $a )
{
if ( $a -> getAttribute( 'class' ) == 'l' and $a -> getAttribute( 'onmousedown' ) != '' )
{
unset( $row );
$row['type'] = 'map';
$row['href'] = $a -> getAttribute( 'href' );
$results[] = $row;
}
if ( $a -> getAttribute( 'class' ) == '' and $a -> getAttribute( 'onmousedown' ) != '' )
{
unset( $row );
$row['type'] = 'organic';
$row['href'] = $a -> getAttribute( 'href' );
$results[] = $row;
}
}
}
}
}
/* 12.02.2016 */
if ( empty( $results ) )
{
$doc = new DOMDocument();
$doc -> loadHTML( $result );
$div = $doc -> getElementById( 'res' );
$div_g_array = $div -> getElementsByTagName( 'div' );
foreach ( $div_g_array as $div_a )
{
if ( $div_a -> getAttribute( 'class' ) == 'g' or $div_a -> getAttribute( 'class' ) == '_gt' )
{
if ( $div_a -> getAttribute( 'class' ) == 'g' )
{
$h3_a = $div_a -> getElementsByTagName( 'h3' );
foreach ( $h3_a as $h3 )
{
$a_a = $h3 -> getElementsByTagName( 'a' );
foreach ( $a_a as $a )
{
if ( $a -> getAttribute( 'class' ) == '' and $a -> getAttribute( 'onmousedown' ) != '' )
{
unset( $row );
$row['type'] = 'organic';
$row['href'] = $a -> getAttribute( 'href' );
$results[] = $row;
}
}
}
}
else if ( $div_a -> getAttribute( 'class' ) == '_gt' )
{
$a_a = $div_a -> getElementsByTagName( 'a' );
foreach ( $a_a as $a )
{
if ( $a -> getAttribute( 'class' ) == 'rllt__action-button _Jrh' and $a -> getAttribute( 'onmousedown' ) != '' )
{
unset( $row );
$row['type'] = 'map';
$row['href'] = $a -> getAttribute( 'href' );
$results[] = $row;
}
}
}
}
}
}
return $results;
}
}
?>