db = $db; } public function automaticUpdateSites() { $results = $this->db->query( "SELECT id, url FROM projects WHERE automatic_update = 1 AND DATE_ADD( last_update, INTERVAL 1 WEEK ) <= '" . date( 'Y-m-d H:i:s' ) . "'" )->fetchAll(); if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row ) { $this->db->delete( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'parent_id[!]' => null ] ] ); $this->db->delete( 'project_links_external', [ 'project_id' => $row['id'] ] ); $this->db->update( 'project_links_internal', [ 'visited' => 0 ], [ 'project_id' => $row['id'] ] ); $this->db->update( 'projects', [ 'last_update' => date( 'Y-m-d H:i:s' ) ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Ponawiam sprawdzanie strony ' . $row['url'] . '' ]; } return [ 'status' => 'empty' ]; } public function getSiteMainLinks() { $results = $this->db->query( 'SELECT id, url FROM projects WHERE id NOT IN ( SELECT project_id FROM project_links_internal GROUP BY project_id ) AND enabled = 1 LIMIT 1' )->fetchAll(); if ( is_array( $results ) and !empty ( $results ) ) foreach ( $results as $row ) { $ch = curl_init(); curl_setopt( $ch, CURLOPT_URL, $row['url'] ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt( $ch, CURLOPT_VERBOSE, 1 ); curl_setopt( $ch, CURLOPT_TIMEOUT, 60 ); curl_setopt( $ch, CURLOPT_HEADER, true ); curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' ); curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' ); $response = curl_exec( $ch ); curl_close ( $ch ); if ( !curl_errno( $ch ) ) { $this->db->insert( 'project_links_internal', [ 'project_id' => $row['id'], 'url' => $row['url'], 'parent_id' => null ] ); $doc = new \DOMDocument; $doc->loadHTML( $response ); foreach ( $doc->getElementsByTagName( 'a' ) as $link ) { $url = $link->getAttribute( 'href' ); if ( \S::is_url_internal( $row['url'], $url ) ) { if ( strpos( $url, '#' ) !== false ) $url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' ); $url = \S::modify_internal_link( $row['url'], $url ); if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !$this->db->count( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['id'], 'url' => $url ] ] ) ) { $this->db->insert( 'project_links_internal', [ 'project_id' => $row['id'], 'url' => $url ] ); } } } return [ 'status' => 'ok', 'msg' => 'Pobieram linki dla strony ' . $row['url'] . '' ]; } else return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony ' . $row['url'] . '' ]; } return [ 'status' => 'empty' ]; } public function getSiteOtherLinks() { $results = $this->db->query( 'SELECT ' . 'pli.id, project_id, pli.url, p.url AS project_url ' . 'FROM ' . 'project_links_internal AS pli ' . 'INNER JOIN projects AS p ON p.id = pli.project_id ' . 'WHERE ' . 'visited = 0 AND enabled = 1 ' . 'LIMIT 1' )->fetchAll(); if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row ) { $url = parse_url( $row['url'] ); $ch = curl_init(); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt( $ch, CURLOPT_VERBOSE, 1 ); curl_setopt( $ch, CURLOPT_TIMEOUT, 60 ); curl_setopt( $ch, CURLOPT_COOKIEFILE, 'temp/cookie.txt' ); curl_setopt( $ch, CURLOPT_COOKIEJAR, 'temp/cookie.txt' ); curl_setopt( $ch, CURLOPT_CAINFO, 'cacert.pem' ); curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); curl_setopt( $ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36' ); curl_setopt( $ch, CURLOPT_URL, 'http://' . $url['host'] ); $response = curl_exec( $ch ); curl_setopt( $ch, CURLOPT_URL, $row['url'] ); $response = curl_exec( $ch ); $content_type = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ); $code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); curl_close ( $ch ); if ( !curl_errno( $ch ) and ( $code == 200 or $code == 301 ) and strpos( $content_type, 'text/html' ) !== false ) { $this->getSiteMetaTitle( $row['id'], $response ); $this->getSiteMetaKeywords( $row['id'], $response ); $this->getSiteMetaDescription( $row['id'], $response ); $this->getSiteMetaRobots( $row['id'], $response ); $this->getSiteMetaGooglebot( $row['id'], $response ); $this->getSiteCodeLenght( $row['id'], $response ); $this->getSiteTextLenght( $row['id'], $response ); $this->getSiteCanonical( $row['id'], $response ); $this->getTableExists( $row['id'], $response ); $this->getIframeExists( $row['id'], $response ); $this->getH1Exists( $row['id'], $response ); $this->getImagesWithoutAlt( $row['id'], $response ); /* pobranie linków ze strony */ $doc = new \DOMDocument; $doc->loadHTML( $response ); foreach ( $doc->getElementsByTagName( 'a' ) as $link ) { $url = $link->getAttribute( 'href' ); /* linki wewnętrzne na danej postronie */ if ( \S::is_url_internal( $row['project_url'], $url ) ) { if ( strpos( $url, '#' ) !== false ) $url = rtrim( substr( $url, 0, strpos( $url, '#' ) ), '?,#' ); $url = \S::modify_internal_link( $row['project_url'], $url, $row['url'] ); $info = pathinfo( $url ); if ( !filter_var( $url, FILTER_VALIDATE_URL ) === false and !in_array( strtolower( $info['extension'] ), \S::not_html_format() ) and !$this->db->count( 'project_links_internal', [ 'AND' => [ 'project_id' => $row['project_id'], 'url' => $url ] ] ) ) { $this->db->insert( 'project_links_internal', [ 'project_id' => $row['project_id'], 'url' => $url, 'visited' => 0, 'parent_id' => $row['id'], 'response' => $response ] ); } } /* linki zewnętrzne na danej podstronie */ else { $link->getAttribute( 'rel' ) == 'nofollow' ? $nofollow = 1 : $nofollow = 0; $this->db->insert( 'project_links_external', [ 'project_id' => $row['project_id'], 'link_id' => $row['id'], 'url' => $link->getAttribute( 'href' ), 'nofollow' => $nofollow, 'title' => $link->getAttribute( 'title' ) ] ); } } $this->db->update( 'project_links_internal', [ 'visited' => 1, 'content_type' => $content_type, 'response_code' => $code, 'response' => $response ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony ' . $row['url'] . '' ]; } else if ( $code == 404 or strpos( $content_type, 'text/html' ) === false ) { $this->db->update( 'project_links_internal', [ 'visited' => 1, 'deleted' => 1, 'content_type' => $content_type, 'response_code' => $code ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony ' . $row['url'] . '' ]; } else if ( $code !== 200 and strpos( $content_type, 'text/html' ) !== false ) { $this->db->update( 'project_links_internal', [ 'visited' => 1, 'content_type' => $content_type, 'response_code' => $code, 'response' => $response ], [ 'id' => $row['id'] ] ); return [ 'status' => 'ok', 'msg' => 'Pobieram informacje dla strony ' . $row['url'] . '' ]; } else return [ 'status' => 'ok', 'msg' => 'Błąd podczas pobierania strony ' . $row['url'] . '' ]; } return [ 'status' => 'empty' ]; } private function getImagesWithoutAlt( $urlId, $response ) { $doc = new \DOMDocument; $doc->loadHTML( $response ); $images = $doc->getElementsByTagName("img"); $have_images_without_alt = 0; foreach ( $images as $img ) { if ( !$img->getAttribute( 'alt' ) ) $have_images_without_alt = 1; } $this->db->update( 'project_links_internal', [ 'have_images_without_alt' => $have_images_without_alt ], [ 'id' => $urlId ] ); } private function getTableExists( $urlId, $response ) { $doc = new \DOMDocument; $doc->loadHTML( $response ); $count = $doc->getElementsByTagName("table"); $this->db->update( 'project_links_internal', [ 'have_table' => $count->length ? 1 : 0 ], [ 'id' => $urlId ] ); } private function getIframeExists( $urlId, $response ) { $doc = new \DOMDocument; $doc->loadHTML( $response ); $count = $doc->getElementsByTagName("iframe"); $this->db->update( 'project_links_internal', [ 'have_iframe' => $count->length ? 1 : 0 ], [ 'id' => $urlId ] ); } private function getH1Exists( $urlId, $response ) { $doc = new \DOMDocument; $doc->loadHTML( $response ); $count = $doc->getElementsByTagName("h1"); $this->db->update( 'project_links_internal', [ 'have_h1' => $count->length ? 1 : 0 ], [ 'id' => $urlId ] ); } private function getSiteMetaTitle( $urlId, $response ) { $title = ''; preg_match('/