query( "SELECT id, url FROM projects WHERE automatic_update = 1 AND DATE_ADD( last_update, INTERVAL 1 WEEK ) <= '" . date( 'Y-m-d H:i:s' ) . "'" ) -> fetchAll(); if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row ) { $mdb -> delete( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'parent_id[!]' => null ] ] ); $mdb -> delete( 'project_links_external', [ 'project_id' => $row['id'] ] ); $mdb -> update( 'project_links_internal', [ 'visited' => 0 ], [ 'project_id' => $row['id'] ] ); $mdb -> update( 'projects', [ 'last_update' => date( 'Y-m-d H:i:s' ) ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Ponawiam sprawdzanie strony ' . $row['url'] . '' ]; } return [ 'status' => 'empty' ]; } public static function get_site_main_links() { global $mdb; $results = $mdb -> query( 'SELECT id, url FROM projects WHERE id NOT IN ( SELECT project_id FROM project_links_internal GROUP BY project_id ) AND enabled = 1 LIMIT 1' ) -> fetchAll(); if ( is_array( $results ) and !empty ( $results ) ) foreach ( $results as $row ) { $ch = curl_init(); curl_setopt( $ch, CURLOPT_URL, $row['url'] ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt( $ch, CURLOPT_VERBOSE, 1 ); curl_setopt( $ch, CURLOPT_TIMEOUT, 60 ); curl_setopt( $ch, CURLOPT_HEADER, true ); curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' ); curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' ); $response = curl_exec( $ch ); curl_close ( $ch ); if ( !curl_errno( $ch ) ) { $mdb -> insert( 'project_links_internal', [ 'project_id' => $row['id'], 'url' => $row['url'], 'parent_id' => null ] ); $doc = new DOMDocument; $doc -> loadHTML( $response ); foreach ( $doc -> getElementsByTagName( 'a' ) as $link ) { $url = $link -> getAttribute( 'href' ); if ( \S::is_url_internal( $row['url'], $url ) ) { if ( strpos( $url, '#' ) !== false ) $url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' ); $url = \S::modify_internal_link( $row['url'], $url ); if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !$mdb -> count( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'url' => $url ] ] ) ) { $mdb -> insert( 'project_links_internal', [ 'project_id' => $row['id'], 'url' => $url ] ); } } } return [ 'status' => 'ok', 'msg' => 'Pobieram linki dla strony ' . $row['url'] . '' ]; } else return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony ' . $row['url'] . '' ]; } return [ 'status' => 'empty' ]; } public static function get_site_other_links() { global $mdb; $results = $mdb -> query( 'SELECT ' . 'pli.id, project_id, pli.url, p.url AS project_url ' . 'FROM ' . 'project_links_internal AS pli ' . 'INNER JOIN projects AS p ON p.id = pli.project_id ' . 'WHERE ' . 'visited = 0 AND enabled = 1 ' . 'LIMIT 1' ) -> fetchAll(); if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row ) { $url = parse_url( $row['url'] ); $ch = curl_init(); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt( $ch, CURLOPT_VERBOSE, 1 ); curl_setopt( $ch, CURLOPT_TIMEOUT, 60 ); curl_setopt( $ch, CURLOPT_COOKIEFILE, 'temp/cookie.txt' ); curl_setopt( $ch, CURLOPT_COOKIEJAR, 'temp/cookie.txt' ); curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' ); curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' ); curl_setopt( $ch, CURLOPT_URL, 'http://' . $url['host'] ); $response = curl_exec( $ch ); curl_setopt( $ch, CURLOPT_URL, $row['url'] ); $response = curl_exec( $ch ); $content_type = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ); $code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); curl_close ( $ch ); if ( !curl_errno( $ch ) and ( $code == 200 or $code == 301 ) and strpos( $content_type, 'text/html' ) !== false ) { self::get_site_meta_title( $row['id'], $response ); self::get_site_meta_keywords( $row['id'], $response ); self::get_site_meta_description( $row['id'], $response ); self::get_site_meta_robots( $row['id'], $response ); self::get_site_meta_googlebot( $row['id'], $response ); self::get_site_code_lenght( $row['id'], $response ); self::get_site_text_lenght( $row['id'], $response ); self::get_site_canonical( $row['id'], $response ); self::get_table_exists( $row['id'], $response ); self::get_iframe_exists( $row['id'], $response ); self::get_h1_exists( $row['id'], $response ); self::get_images_without_alt( $row['id'], $response ); /* pobranie linków ze strony */ $doc = new DOMDocument; $doc -> loadHTML( $response ); foreach ( $doc -> getElementsByTagName( 'a' ) as $link ) { $url = $link -> getAttribute( 'href' ); /* linki wewnętrzne na danej postronie */ if ( \S::is_url_internal( $row['project_url'], $url ) ) { if ( strpos( $url, '#' ) !== false ) $url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' ); $url = \S::modify_internal_link( $row['project_url'], $url, $row['url'] ); $info = pathinfo( $url ); if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !in_array( strtolower( $info['extension'] ), \S::not_html_format() ) and !$mdb -> count( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['project_id'], 'url' => $url ] ] ) ) { $mdb -> insert( 'project_links_internal', [ 'project_id' => $row['project_id'], 'url' => $url, 'visited' => 0, 'parent_id' => $row['id'], 'response' => $response ] ); } } /* linki zewnętrzne na danej podstronie */ else { $link -> getAttribute( 'rel' ) == 'nofollow' ? $nofollow = 1 : $nofollow = 0; $mdb -> insert( 'project_links_external', [ 'project_id' => $row['project_id'], 'link_id' => $row['id'], 'url' => $link -> getAttribute( 'href' ), 'nofollow' => $nofollow, 'title' => $link -> getAttribute( 'title' ) ] ); } } $mdb -> update( 'project_links_internal', [ 'visited' => 1, 'content_type' => $content_type, 'response_code' => $code, 'response' => $response ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony ' . $row['url'] . '' ]; } else if ( $code == 404 or strpos( $content_type, 'text/html' ) === false ) { $mdb -> update( 'project_links_internal', [ 'visited' => 1, 'deleted' => 1, 'content_type' => $content_type, 'response_code' => $code ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony ' . $row['url'] . '' ]; } else if ( $code !== 200 and strpos( $content_type, 'text/html' ) !== false ) { $mdb -> update( 'project_links_internal', [ 'visited' => 1, 'content_type' => $content_type, 'response_code' => $code, 'response' => $response ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony ' . $row['url'] . '' ]; } else return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony ' . $row['url'] . '' ]; } return [ 'status' => 'empty' ]; } static public function get_images_without_alt( $url_id, $response ) { global $mdb; $doc = new DOMDocument; $doc -> loadHTML( $response ); $images = $doc -> getElementsByTagName("img"); $have_images_without_alt = 0; foreach ( $images as $img ) { if ( !$img -> getAttribute( 'alt' ) ) $have_images_without_alt = 1; } $mdb -> update( 'project_links_internal', [ 'have_images_without_alt' => $have_images_without_alt ], [ 'id' => $url_id ] ); } static public function get_table_exists( $url_id, $response ) { global $mdb; $doc = new DOMDocument; $doc -> loadHTML( $response ); $count = $doc -> getElementsByTagName("table"); $mdb -> update( 'project_links_internal', [ 'have_table' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] ); } static public function get_iframe_exists( $url_id, $response ) { global $mdb; $doc = new DOMDocument; $doc -> loadHTML( $response ); $count = $doc -> getElementsByTagName("iframe"); $mdb -> update( 'project_links_internal', [ 'have_iframe' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] ); } static public function get_h1_exists( $url_id, $response ) { global $mdb; $doc = new DOMDocument; $doc -> loadHTML( $response ); $count = $doc -> getElementsByTagName("h1"); $mdb -> update( 'project_links_internal', [ 'have_h1' => $count -> length ? 1 : 0 ], [ 'id' => $url_id ] ); } public static function get_site_meta_title( $url_id, $response ) { global $mdb; $title = ''; preg_match('/([^>]*)<\/title>/si', $response, $match ); if ( isset( $match ) && is_array( $match ) && count( $match ) > 0 ) $title = (string)strip_tags( $match[1] ); if ( !$title ) { preg_match_all('/<[\s]*meta[\s]*name="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match); if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 ) { $originals = $match[0]; $names = $match[1]; $values = $match[2]; if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) ) { $metaTags = array(); for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ ) { $metaTags[ $names[$i] ] = array( 'html' => htmlentities( $originals[$i] ), 'value' => $values[$i] ); } } $title = (string)$metaTags['title']['value']; } } $mdb -> update( 'project_links_internal', [ 'title' => $title ], [ 'id' => $url_id ] ); } public static function get_site_canonical( $url_id, $response ) { global $mdb; $doc = new DOMDocument; $doc -> loadHTML( $response ); foreach ( $doc -> getElementsByTagName( 'link' ) as $link ) { $rel = $link -> getAttribute( 'rel' ); if ( $rel == 'canonical' ) { $canonical = $link -> getAttribute( 'href' ); } } $mdb -> update( 'project_links_internal', [ 'canonical' => $canonical ], [ 'id' => $url_id ] ); } public static function get_site_meta_keywords( $url_id, $response ) { global $mdb; $meta_keywords = ''; preg_match_all( '/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match ); if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 ) { $originals = $match[0]; $names = $match[1]; $values = $match[2]; if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) ) { $metaTags = array(); for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ ) { $metaTags[ $names[$i] ] = array( 'html' => htmlentities( $originals[$i] ), 'value' => $values[$i] ); } } $meta_keywords = (string)$metaTags['keywords']['value']; } if ( !$meta_keywords ) { preg_match_all( '/<[\s]*meta[\s]*property="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match ); if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 ) { $originals = $match[0]; $names = $match[1]; $values = $match[2]; if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) ) { $metaTags = array(); for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ ) { $metaTags[ $names[$i] ] = array( 'html' => htmlentities( $originals[$i] ), 'value' => $values[$i] ); } } $meta_keywords = (string)$metaTags['keywords']['value']; } } $mdb -> update( 'project_links_internal', [ 'meta_keywords' => $meta_keywords ], [ 'id' => $url_id ] ); } public static function get_site_meta_description( $url_id, $response ) { global $mdb; $meta_description = ''; preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match); if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 ) { $originals = $match[0]; $names = $match[1]; $values = $match[2]; if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) ) { $metaTags = array(); for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ ) { $metaTags[ $names[$i] ] = array( 'html' => htmlentities( $originals[$i] ), 'value' => $values[$i] ); } } $meta_description = (string)$metaTags['description']['value']; } if ( !$meta_description ) { preg_match_all( '/<[\s]*meta[\s]*property="og:?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match ); if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 ) { $originals = $match[0]; $names = $match[1]; $values = $match[2]; if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) ) { $metaTags = array(); for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ ) { $metaTags[ $names[$i] ] = array( 'html' => htmlentities( $originals[$i] ), 'value' => $values[$i] ); } } $meta_description = (string)$metaTags['description']['value']; } } $mdb -> update( 'project_links_internal', [ 'meta_description' => $meta_description ], [ 'id' => $url_id ] ); } public static function get_site_meta_robots( $url_id, $response ) { global $mdb; $meta_robots = ''; preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match); if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 ) { $originals = $match[0]; $names = $match[1]; $values = $match[2]; if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) ) { $metaTags = array(); for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ ) { $metaTags[ $names[$i] ] = array( 'html' => htmlentities( $originals[$i] ), 'value' => $values[$i] ); } } $meta_robots = (string)$metaTags['robots']['value']; } $mdb -> update( 'project_links_internal', [ 'meta_robots' => $meta_robots ], [ 'id' => $url_id ] ); } public static function get_site_meta_googlebot( $url_id, $response ) { global $mdb; $meta_googlebot = ''; preg_match_all('/<[\s]*meta[\s]*name="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $response, $match); if ( isset ( $match ) && is_array( $match ) && count( $match ) == 3 ) { $originals = $match[0]; $names = $match[1]; $values = $match[2]; if ( count( $originals ) == count( $names ) && count( $names ) == count( $values ) ) { $metaTags = array(); for ( $i = 0, $limiti = count( $names ); $i < $limiti; $i++ ) { $metaTags[ $names[$i] ] = array( 'html' => htmlentities( $originals[$i] ), 'value' => $values[$i] ); } } $meta_googlebot = (string)$metaTags['googlebot']['value']; } $mdb -> update( 'project_links_internal', [ 'meta_googlebot' => $meta_googlebot ], [ 'id' => $url_id ] ); } public static function get_site_code_lenght( $url_id, $response ) { global $mdb; $mdb -> update( 'project_links_internal', [ 'code_lenght' => strlen( $response ) ], [ 'id' => $url_id ] ); } public static function get_site_text_lenght( $url_id, $response ) { global $mdb; $mdb -> update( 'project_links_internal', [ 'text_lenght' => strlen( \S::strip_html_tags( $response ) ) ], [ 'id' => $url_id ] ); } }