71 lines
4.1 KiB
PHP
71 lines
4.1 KiB
PHP
<?php
|
|
class GoogleScraper {
|
|
|
|
var $_header = array(
|
|
'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0',
|
|
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0',
|
|
'Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0',
|
|
'Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0',
|
|
'Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3',
|
|
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0',
|
|
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.7 (KHTML, like Gecko) Comodo_Dragon/16.1.1.0 Chrome/16.0.912.63 Safari/535.7',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.30 (KHTML, like Gecko) Comodo_Dragon/12.1.0.0 Chrome/12.0.742.91 Safari/534.30',
|
|
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.30 (KHTML, like Gecko) Comodo_Dragon/12.1.0.0 Chrome/12.0.742.91 Safari/534.30',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1250.0 Iron/22.0.2150.0 Safari/537.4',
|
|
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1250.0 Iron/22.0.2150.0 Safari/537.4',
|
|
'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
|
|
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
|
|
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
|
|
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)',
|
|
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/4.0; GTB7.4; InfoPath.3; SV1; .NET CLR 3.1.76908; WOW64; en-US)',
|
|
'Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20121202 Firefox/17.0 Iceweasel/17.0.1',
|
|
'Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1 Iceweasel/15.0.1',
|
|
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
|
|
);
|
|
var $_cookie = '';
|
|
|
|
function __construct( $url, $proxy, $cookie = '' )
|
|
{
|
|
$this -> _url = $url;
|
|
$this -> _cookie = $cookie;
|
|
}
|
|
|
|
function getpagedata( $url )
|
|
{
|
|
$header = $this -> _header[ rand( 0, count( $this -> _header ) - 1 ) ];
|
|
|
|
$ch = curl_init();
|
|
curl_setopt( $ch, CURLOPT_HEADER, false );
|
|
curl_setopt( $ch, CURLOPT_URL, $url );
|
|
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
|
|
curl_setopt( $ch, CURLOPT_REFERER, 'www.google.pl' );
|
|
curl_setopt( $ch, CURLOPT_USERAGENT, $header );
|
|
curl_setopt( $ch, CURLOPT_PROXY, $this -> _proxy );
|
|
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 10 );
|
|
curl_setopt( $ch, CURLOPT_TIMEOUT, 10 );
|
|
curl_setopt( $ch, CURLOPT_COOKIE, $this -> _cookie );
|
|
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
|
|
curl_setopt( $ch, CURLOPT_MAXREDIRS, 5 );
|
|
curl_setopt( $ch, CURLOPT_ENCODING, "gzip,deflate" );
|
|
$data = curl_exec( $ch );
|
|
curl_close( $ch );
|
|
return $data;
|
|
}
|
|
|
|
function pause()
|
|
{
|
|
usleep( rand( $this -> _time1, $this -> _time2 ) );
|
|
}
|
|
|
|
function initGoogle()
|
|
{
|
|
return $this -> getpagedata( $this -> _url );
|
|
return $google;
|
|
}
|
|
}
|