Files
adsPRO/autoload/services/class.XmlFeedImporter.php
2026-04-30 01:04:06 +02:00

399 lines
10 KiB
PHP

<?php
namespace services;
class XmlFeedImporter
{
const BATCH_SIZE = 200;
const HTTP_TIMEOUT = 300;
const GMC_NS = 'http://base.google.com/ns/1.0';
/**
* Importuje feed XML (Google Merchant) dla klienta i wzbogaca tabele products.
* Strumieniowy parser (XMLReader) odporny na feedy z kilkoma tysiacami pozycji.
*
* Aktualizuje pola: title, description, custom_label_1, price (zrodlowe dane z feedu).
* NIE nadpisuje pol edytowanych przez skrypty/AI: title_gmc, description_gmc (sluza do supplemental feed -> GMC).
* Dla pozycji nieobecnej w products tworzy nowy rekord.
*
* @param int $client_id
* @return array raport z polami: feed_url, fetched, updated, inserted, skipped, errors, peak_memory_mb, duration_ms
*/
static public function import_for_client( $client_id )
{
global $mdb;
$client_id = (int) $client_id;
$report = [
'feed_url' => '',
'fetched' => 0,
'updated' => 0,
'inserted' => 0,
'skipped' => 0,
'errors' => [],
'peak_memory_mb' => 0,
'duration_ms' => 0,
];
if ( $client_id <= 0 )
{
$report['errors'][] = 'Nieprawidlowy client_id';
return $report;
}
$client = $mdb -> get( 'clients', [ 'id', 'xml_feed_url' ], [ 'id' => $client_id ] );
$feed_url = trim( (string) ( $client['xml_feed_url'] ?? '' ) );
if ( $feed_url === '' )
{
$report['skipped_reason'] = 'no_feed';
return $report;
}
$report['feed_url'] = $feed_url;
$started_at = microtime( true );
@set_time_limit( 600 );
@ini_set( 'memory_limit', '512M' );
$tmp_file = tempnam( sys_get_temp_dir(), 'xmlfeed_' );
if ( $tmp_file === false )
{
$report['errors'][] = 'Nie mozna utworzyc pliku tymczasowego';
return $report;
}
$download_ok = self::download_feed( $feed_url, $tmp_file, $report );
if ( !$download_ok )
{
@unlink( $tmp_file );
return $report;
}
$reader = new \XMLReader();
if ( !$reader -> open( $tmp_file ) )
{
$report['errors'][] = 'Nie mozna otworzyc feedu XML do parsowania';
@unlink( $tmp_file );
return $report;
}
$batch = [];
while ( $reader -> read() )
{
if ( $reader -> nodeType !== \XMLReader::ELEMENT )
{
continue;
}
$local = $reader -> localName;
if ( $local !== 'item' && $local !== 'entry' )
{
continue;
}
try
{
$node = $reader -> expand();
if ( !$node )
{
$report['skipped']++;
continue;
}
$item = self::extract_item_fields( $node );
if ( $item === null || $item['offer_id'] === '' )
{
$report['skipped']++;
continue;
}
$report['fetched']++;
$batch[] = $item;
if ( count( $batch ) >= self::BATCH_SIZE )
{
self::flush_batch( $client_id, $batch, $report );
$batch = [];
gc_collect_cycles();
}
}
catch ( \Throwable $e )
{
$report['skipped']++;
if ( count( $report['errors'] ) < 20 )
{
$report['errors'][] = 'Item parse error: ' . $e -> getMessage();
}
}
}
if ( !empty( $batch ) )
{
self::flush_batch( $client_id, $batch, $report );
$batch = [];
}
$reader -> close();
@unlink( $tmp_file );
$mdb -> update( 'clients', [ 'xml_feed_last_sync_at' => date( 'Y-m-d H:i:s' ) ], [ 'id' => $client_id ] );
$report['peak_memory_mb'] = round( memory_get_peak_usage( true ) / 1024 / 1024, 1 );
$report['duration_ms'] = (int) round( ( microtime( true ) - $started_at ) * 1000 );
return $report;
}
static private function download_feed( $url, $tmp_file, &$report )
{
$fp = fopen( $tmp_file, 'wb' );
if ( $fp === false )
{
$report['errors'][] = 'Nie mozna otworzyc pliku tymczasowego do zapisu';
return false;
}
$ch = curl_init( $url );
curl_setopt_array( $ch, [
CURLOPT_FILE => $fp,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5,
CURLOPT_TIMEOUT => self::HTTP_TIMEOUT,
CURLOPT_CONNECTTIMEOUT => 30,
CURLOPT_FAILONERROR => true,
CURLOPT_USERAGENT => 'adsPRO XML Feed Importer/1.0',
CURLOPT_SSL_VERIFYPEER => true,
CURLOPT_ENCODING => '',
] );
$ok = curl_exec( $ch );
$http_code = (int) curl_getinfo( $ch, CURLINFO_HTTP_CODE );
$err = curl_error( $ch );
curl_close( $ch );
fclose( $fp );
if ( !$ok || ( $http_code >= 400 ) )
{
$report['errors'][] = 'Pobieranie feedu nieudane (HTTP ' . $http_code . '): ' . $err;
return false;
}
if ( filesize( $tmp_file ) === 0 )
{
$report['errors'][] = 'Feed jest pusty';
return false;
}
return true;
}
static private function extract_item_fields( \DOMNode $node )
{
$doc = new \DOMDocument();
$imported = $doc -> importNode( $node, true );
$doc -> appendChild( $imported );
$sxe = simplexml_import_dom( $doc -> documentElement );
if ( $sxe === false )
{
return null;
}
$g = $sxe -> children( self::GMC_NS );
$offer_id = '';
if ( isset( $g -> id ) )
{
$offer_id = trim( (string) $g -> id );
}
if ( $offer_id === '' && isset( $sxe -> id ) )
{
$offer_id = trim( (string) $sxe -> id );
}
if ( $offer_id === '' )
{
return null;
}
$title = '';
if ( isset( $g -> title ) )
{
$title = trim( (string) $g -> title );
}
if ( $title === '' && isset( $sxe -> title ) )
{
$title = trim( (string) $sxe -> title );
}
$description = '';
if ( isset( $g -> description ) )
{
$description = trim( (string) $g -> description );
}
if ( $description === '' && isset( $sxe -> description ) )
{
$description = trim( (string) $sxe -> description );
}
$custom_label_1 = isset( $g -> custom_label_1 ) ? trim( (string) $g -> custom_label_1 ) : '';
$price = null;
if ( isset( $g -> price ) )
{
$price_raw = trim( (string) $g -> price );
$price = self::parse_price( $price_raw );
}
if ( $price === null && isset( $g -> sale_price ) )
{
$price = self::parse_price( trim( (string) $g -> sale_price ) );
}
return [
'offer_id' => self::truncate( $offer_id, 255 ),
'title' => self::truncate( $title, 255 ),
'description' => $description,
'custom_label_1' => self::truncate( $custom_label_1, 255 ),
'price' => $price,
];
}
static private function parse_price( $raw )
{
if ( $raw === '' )
{
return null;
}
if ( preg_match( '/([0-9]+(?:[.,][0-9]+)?)/', $raw, $m ) )
{
$value = (float) str_replace( ',', '.', $m[1] );
if ( $value > 0 )
{
return round( $value, 2 );
}
}
return null;
}
static private function truncate( $value, $max )
{
if ( function_exists( 'mb_substr' ) )
{
return mb_substr( (string) $value, 0, $max, 'UTF-8' );
}
return substr( (string) $value, 0, $max );
}
static private function flush_batch( $client_id, array $batch, array &$report )
{
global $mdb;
if ( empty( $batch ) )
{
return;
}
$pdo = $mdb -> pdo;
try
{
$pdo -> beginTransaction();
$offer_ids = [];
foreach ( $batch as $item )
{
$offer_ids[ $item['offer_id'] ] = true;
}
$offer_ids = array_keys( $offer_ids );
$existing = [];
if ( !empty( $offer_ids ) )
{
$placeholders = [];
$select_params = [ ':client_id' => $client_id ];
foreach ( $offer_ids as $idx => $oid )
{
$key = ':oid_' . $idx;
$placeholders[] = $key;
$select_params[ $key ] = $oid;
}
$sel_sql = 'SELECT id, offer_id FROM products WHERE client_id = :client_id AND offer_id IN (' . implode( ', ', $placeholders ) . ')';
$sel_stmt = $pdo -> prepare( $sel_sql );
$sel_stmt -> execute( $select_params );
while ( $row = $sel_stmt -> fetch( \PDO::FETCH_ASSOC ) )
{
// jeden offer_id moze miec wiele wierszy (legacy duplikaty) - zbieramy wszystkie id
$existing[ (string) $row['offer_id'] ][] = (int) $row['id'];
}
}
$update_stmt = $pdo -> prepare(
'UPDATE products SET
title = :title,
description = :description,
custom_label_1 = COALESCE(:custom_label_1, custom_label_1),
price = COALESCE(:price, price)
WHERE id = :id'
);
$insert_stmt = $pdo -> prepare(
'INSERT INTO products (client_id, offer_id, title, description, custom_label_1, price)
VALUES (:client_id, :offer_id, :title, :description, :custom_label_1, :price)'
);
$updated_count = 0;
$inserted_count = 0;
foreach ( $batch as $item )
{
$title = $item['title'] !== '' ? $item['title'] : null;
$desc = $item['description'] !== '' ? $item['description'] : null;
$cl1 = $item['custom_label_1'] !== '' ? $item['custom_label_1'] : null;
$price = $item['price'];
if ( !empty( $existing[ $item['offer_id'] ] ) )
{
// aktualizujemy WSZYSTKIE legacy duplikaty (utrzymujemy spojnosc danych)
foreach ( $existing[ $item['offer_id'] ] as $row_id )
{
$update_stmt -> execute( [
':title' => $title,
':description' => $desc,
':custom_label_1' => $cl1,
':price' => $price,
':id' => $row_id,
] );
$updated_count++;
}
}
else
{
$insert_stmt -> execute( [
':client_id' => $client_id,
':offer_id' => $item['offer_id'],
':title' => $title,
':description' => $desc,
':custom_label_1' => $cl1,
':price' => $price,
] );
$inserted_count++;
}
}
$pdo -> commit();
$report['updated'] += $updated_count;
$report['inserted'] += $inserted_count;
}
catch ( \Throwable $e )
{
if ( $pdo -> inTransaction() )
{
$pdo -> rollBack();
}
if ( count( $report['errors'] ) < 20 )
{
$report['errors'][] = 'Batch flush error: ' . $e -> getMessage();
}
$report['skipped'] += count( $batch );
}
}
}