This commit is contained in:
2026-05-06 23:19:35 +02:00
parent 34e6b6373f
commit b1b5e416ba
16 changed files with 1448 additions and 65 deletions

View File

@@ -0,0 +1,290 @@
<?php
/**
* Migrator: scala duplikaty w tabeli `products` powstale przez wielokrotne importy.
*
* Reguly:
* - Grupowanie duplikatow: (client_id, TRIM(offer_id)).
* - Winner per grupa: ORDER BY clicks_all_time DESC, impressions_all_time DESC,
* agg_rows DESC, id ASC.
* - Strategia: WINNER KEEPS ALL.
* Tabele z UNIQUE zawierajacym product_id: usuwamy z losera te wiersze
* ktore konflikuja z winnerem, reszte przepinamy UPDATE.
* Tabele bez UNIQUE na product_id: zwykly UPDATE.
* - Wszystko per-loser w transakcji.
*
* Tryby uruchomienia:
* php merge_duplicate_products.php --dry-run (domyslny - tylko liczy)
* php merge_duplicate_products.php --execute (faktyczne zmiany)
* php merge_duplicate_products.php --execute --client=2 (ograniczenie do klienta)
*/
error_reporting(E_ALL);
ini_set('display_errors', '1');
chdir(__DIR__ . '/..');
require 'config.php';
require 'libraries/medoo/medoo.php';
$opts = getopt('', ['dry-run', 'execute', 'client::', 'limit::']);
$DRY_RUN = !isset($opts['execute']);
$CLIENT_ID = isset($opts['client']) ? (int) $opts['client'] : 0;
$LIMIT = isset($opts['limit']) ? (int) $opts['limit'] : 0;
$mdb = new medoo([
'database_type' => 'mysql',
'database_name' => $database['name'],
'server' => $database['remote_host'],
'username' => $database['user'],
'password' => $database['password'],
'charset' => 'utf8'
]);
$pdo = $mdb->pdo;
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
echo str_repeat('=', 70) . "\n";
echo " TRYB: " . ($DRY_RUN ? 'DRY-RUN (zadne zmiany nie beda zapisane)' : 'EXECUTE (zmiany zostana zapisane)') . "\n";
echo " KLIENT: " . ($CLIENT_ID > 0 ? $CLIENT_ID : 'WSZYSCY') . "\n";
echo " LIMIT GRUP: " . ($LIMIT > 0 ? $LIMIT : 'BRAK') . "\n";
echo str_repeat('=', 70) . "\n";
// === 1. Pobierz wszystkie grupy duplikatow ===
$where = "TRIM(COALESCE(offer_id,'')) <> ''";
if ($CLIENT_ID > 0) $where .= " AND client_id = " . $CLIENT_ID;
$groups_sql = "
SELECT client_id, TRIM(offer_id) AS oid, COUNT(*) AS cnt, GROUP_CONCAT(id ORDER BY id) AS ids
FROM products
WHERE $where
GROUP BY client_id, TRIM(offer_id)
HAVING cnt > 1
ORDER BY client_id, oid
";
if ($LIMIT > 0) $groups_sql .= " LIMIT " . $LIMIT;
$groups = $pdo->query($groups_sql)->fetchAll(PDO::FETCH_ASSOC);
echo "Grup duplikatow: " . count($groups) . "\n";
if (empty($groups)) {
echo "Brak duplikatow do scalenia.\n";
exit(0);
}
// === 2. Statystyki sumaryczne ===
$stat = [
'groups' => count($groups),
'losers_total' => 0,
'pa_conflicts_to_delete' => 0,
'pa_to_repoint' => 0,
'pt_conflicts_to_delete' => 0,
'pt_to_repoint' => 0,
'ph_conflicts_to_delete' => 0,
'ph_to_repoint' => 0,
'ph30_conflicts_to_delete' => 0,
'ph30_to_repoint' => 0,
'kpt_conflicts_to_delete' => 0,
'kpt_to_repoint' => 0,
'comments_to_repoint' => 0,
'alerts_to_repoint' => 0,
'sync_logs_to_repoint' => 0,
'products_to_delete' => 0,
];
// === 3. Iteracja ===
$processed = 0;
foreach ($groups as $g) {
$client_id = (int) $g['client_id'];
$oid = $g['oid'];
$ids = array_map('intval', explode(',', $g['ids']));
// wybor winnera
$rank_sql = "
SELECT p.id,
COALESCE(SUM(pa.clicks_all_time),0) AS tot_clicks,
COALESCE(SUM(pa.impressions_all_time),0) AS tot_impr,
COUNT(pa.id) AS agg_rows
FROM products p
LEFT JOIN products_aggregate pa ON pa.product_id = p.id
WHERE p.id IN (" . implode(',', $ids) . ")
GROUP BY p.id
ORDER BY tot_clicks DESC, tot_impr DESC, agg_rows DESC, p.id ASC
";
$ranked = $pdo->query($rank_sql)->fetchAll(PDO::FETCH_ASSOC);
$winner_id = (int) $ranked[0]['id'];
$loser_ids = array_filter($ids, fn($i) => $i !== $winner_id);
$stat['losers_total'] += count($loser_ids);
foreach ($loser_ids as $loser_id) {
try {
if (!$DRY_RUN) $pdo->beginTransaction();
// --- products_aggregate: UNIQUE (product_id, campaign_id, ad_group_id) ---
$conflict_count = (int) $pdo->query("
SELECT COUNT(*) FROM products_aggregate pa_l
INNER JOIN products_aggregate pa_w
ON pa_w.product_id = $winner_id
AND pa_w.campaign_id = pa_l.campaign_id
AND pa_w.ad_group_id = pa_l.ad_group_id
WHERE pa_l.product_id = $loser_id
")->fetchColumn();
$total_count = (int) $pdo->query("
SELECT COUNT(*) FROM products_aggregate WHERE product_id = $loser_id
")->fetchColumn();
$stat['pa_conflicts_to_delete'] += $conflict_count;
$stat['pa_to_repoint'] += ($total_count - $conflict_count);
if (!$DRY_RUN) {
$pdo->exec("
DELETE pa_l FROM products_aggregate pa_l
INNER JOIN products_aggregate pa_w
ON pa_w.product_id = $winner_id
AND pa_w.campaign_id = pa_l.campaign_id
AND pa_w.ad_group_id = pa_l.ad_group_id
WHERE pa_l.product_id = $loser_id
");
$pdo->exec("UPDATE products_aggregate SET product_id = $winner_id WHERE product_id = $loser_id");
}
// --- products_temp: UNIQUE (product_id, campaign_id, ad_group_id) ---
$pt_conflict = (int) $pdo->query("
SELECT COUNT(*) FROM products_temp pt_l
INNER JOIN products_temp pt_w
ON pt_w.product_id = $winner_id
AND pt_w.campaign_id = pt_l.campaign_id
AND pt_w.ad_group_id = pt_l.ad_group_id
WHERE pt_l.product_id = $loser_id
")->fetchColumn();
$pt_total = (int) $pdo->query("SELECT COUNT(*) FROM products_temp WHERE product_id = $loser_id")->fetchColumn();
$stat['pt_conflicts_to_delete'] += $pt_conflict;
$stat['pt_to_repoint'] += ($pt_total - $pt_conflict);
if (!$DRY_RUN) {
$pdo->exec("
DELETE pt_l FROM products_temp pt_l
INNER JOIN products_temp pt_w
ON pt_w.product_id = $winner_id
AND pt_w.campaign_id = pt_l.campaign_id
AND pt_w.ad_group_id = pt_l.ad_group_id
WHERE pt_l.product_id = $loser_id
");
$pdo->exec("UPDATE products_temp SET product_id = $winner_id WHERE product_id = $loser_id");
}
// --- products_history: UNIQUE (product_id, campaign_id, ad_group_id, date_add) ---
$ph_conflict = (int) $pdo->query("
SELECT COUNT(*) FROM products_history h_l
INNER JOIN products_history h_w
ON h_w.product_id = $winner_id
AND h_w.campaign_id = h_l.campaign_id
AND h_w.ad_group_id = h_l.ad_group_id
AND h_w.date_add = h_l.date_add
WHERE h_l.product_id = $loser_id
")->fetchColumn();
$ph_total = (int) $pdo->query("SELECT COUNT(*) FROM products_history WHERE product_id = $loser_id")->fetchColumn();
$stat['ph_conflicts_to_delete'] += $ph_conflict;
$stat['ph_to_repoint'] += ($ph_total - $ph_conflict);
if (!$DRY_RUN) {
$pdo->exec("
DELETE h_l FROM products_history h_l
INNER JOIN products_history h_w
ON h_w.product_id = $winner_id
AND h_w.campaign_id = h_l.campaign_id
AND h_w.ad_group_id = h_l.ad_group_id
AND h_w.date_add = h_l.date_add
WHERE h_l.product_id = $loser_id
");
$pdo->exec("UPDATE products_history SET product_id = $winner_id WHERE product_id = $loser_id");
}
// --- products_history_30: UNIQUE (product_id, campaign_id, ad_group_id, date_add) ---
$ph30_conflict = (int) $pdo->query("
SELECT COUNT(*) FROM products_history_30 h_l
INNER JOIN products_history_30 h_w
ON h_w.product_id = $winner_id
AND h_w.campaign_id = h_l.campaign_id
AND h_w.ad_group_id = h_l.ad_group_id
AND h_w.date_add = h_l.date_add
WHERE h_l.product_id = $loser_id
")->fetchColumn();
$ph30_total = (int) $pdo->query("SELECT COUNT(*) FROM products_history_30 WHERE product_id = $loser_id")->fetchColumn();
$stat['ph30_conflicts_to_delete'] += $ph30_conflict;
$stat['ph30_to_repoint'] += ($ph30_total - $ph30_conflict);
if (!$DRY_RUN) {
$pdo->exec("
DELETE h_l FROM products_history_30 h_l
INNER JOIN products_history_30 h_w
ON h_w.product_id = $winner_id
AND h_w.campaign_id = h_l.campaign_id
AND h_w.ad_group_id = h_l.ad_group_id
AND h_w.date_add = h_l.date_add
WHERE h_l.product_id = $loser_id
");
$pdo->exec("UPDATE products_history_30 SET product_id = $winner_id WHERE product_id = $loser_id");
}
// --- products_keyword_planner_terms: UNIQUE (product_id, source_url, keyword_text) ---
$kpt_conflict = (int) $pdo->query("
SELECT COUNT(*) FROM products_keyword_planner_terms k_l
INNER JOIN products_keyword_planner_terms k_w
ON k_w.product_id = $winner_id
AND k_w.source_url = k_l.source_url
AND k_w.keyword_text = k_l.keyword_text
WHERE k_l.product_id = $loser_id
")->fetchColumn();
$kpt_total = (int) $pdo->query("SELECT COUNT(*) FROM products_keyword_planner_terms WHERE product_id = $loser_id")->fetchColumn();
$stat['kpt_conflicts_to_delete'] += $kpt_conflict;
$stat['kpt_to_repoint'] += ($kpt_total - $kpt_conflict);
if (!$DRY_RUN) {
$pdo->exec("
DELETE k_l FROM products_keyword_planner_terms k_l
INNER JOIN products_keyword_planner_terms k_w
ON k_w.product_id = $winner_id
AND k_w.source_url = k_l.source_url
AND k_w.keyword_text = k_l.keyword_text
WHERE k_l.product_id = $loser_id
");
$pdo->exec("UPDATE products_keyword_planner_terms SET product_id = $winner_id WHERE product_id = $loser_id");
}
// --- Tabele bez UNIQUE na product_id: prosty UPDATE ---
$stat['comments_to_repoint'] += (int) $pdo->query("SELECT COUNT(*) FROM products_comments WHERE product_id = $loser_id")->fetchColumn();
$stat['alerts_to_repoint'] += (int) $pdo->query("SELECT COUNT(*) FROM campaign_alerts WHERE product_id = $loser_id")->fetchColumn();
$stat['sync_logs_to_repoint'] += (int) $pdo->query("SELECT COUNT(*) FROM products_merchant_sync_log WHERE product_id = $loser_id")->fetchColumn();
if (!$DRY_RUN) {
$pdo->exec("UPDATE products_comments SET product_id = $winner_id WHERE product_id = $loser_id");
$pdo->exec("UPDATE campaign_alerts SET product_id = $winner_id WHERE product_id = $loser_id");
$pdo->exec("UPDATE products_merchant_sync_log SET product_id = $winner_id WHERE product_id = $loser_id");
}
// --- Wreszcie: usun losera z products ---
$stat['products_to_delete']++;
if (!$DRY_RUN) {
$pdo->exec("DELETE FROM products WHERE id = $loser_id");
$pdo->commit();
}
} catch (Throwable $e) {
if (!$DRY_RUN && $pdo->inTransaction()) $pdo->rollBack();
echo "BLAD przy losere $loser_id (winner $winner_id, client $client_id, offer '$oid'): " . $e->getMessage() . "\n";
throw $e;
}
}
$processed++;
if ($processed % 100 === 0) echo " ... przetworzono $processed grup\n";
}
echo "\n" . str_repeat('=', 70) . "\n";
echo " STATYSTYKI " . ($DRY_RUN ? '(DRY-RUN)' : '(WYKONANO)') . "\n";
echo str_repeat('=', 70) . "\n";
foreach ($stat as $k => $v) printf(" %-30s : %d\n", $k, $v);
if ($DRY_RUN) {
echo "\nAby wykonac zmiany, uruchom z flaga --execute\n";
}