update
This commit is contained in:
290
tmp/merge_duplicate_products.php
Normal file
290
tmp/merge_duplicate_products.php
Normal file
@@ -0,0 +1,290 @@
|
||||
<?php
|
||||
/**
|
||||
* Migrator: scala duplikaty w tabeli `products` powstale przez wielokrotne importy.
|
||||
*
|
||||
* Reguly:
|
||||
* - Grupowanie duplikatow: (client_id, TRIM(offer_id)).
|
||||
* - Winner per grupa: ORDER BY clicks_all_time DESC, impressions_all_time DESC,
|
||||
* agg_rows DESC, id ASC.
|
||||
* - Strategia: WINNER KEEPS ALL.
|
||||
* Tabele z UNIQUE zawierajacym product_id: usuwamy z losera te wiersze
|
||||
* ktore konflikuja z winnerem, reszte przepinamy UPDATE.
|
||||
* Tabele bez UNIQUE na product_id: zwykly UPDATE.
|
||||
* - Wszystko per-loser w transakcji.
|
||||
*
|
||||
* Tryby uruchomienia:
|
||||
* php merge_duplicate_products.php --dry-run (domyslny - tylko liczy)
|
||||
* php merge_duplicate_products.php --execute (faktyczne zmiany)
|
||||
* php merge_duplicate_products.php --execute --client=2 (ograniczenie do klienta)
|
||||
*/
|
||||
|
||||
error_reporting(E_ALL);
|
||||
ini_set('display_errors', '1');
|
||||
chdir(__DIR__ . '/..');
|
||||
require 'config.php';
|
||||
require 'libraries/medoo/medoo.php';
|
||||
|
||||
$opts = getopt('', ['dry-run', 'execute', 'client::', 'limit::']);
|
||||
$DRY_RUN = !isset($opts['execute']);
|
||||
$CLIENT_ID = isset($opts['client']) ? (int) $opts['client'] : 0;
|
||||
$LIMIT = isset($opts['limit']) ? (int) $opts['limit'] : 0;
|
||||
|
||||
$mdb = new medoo([
|
||||
'database_type' => 'mysql',
|
||||
'database_name' => $database['name'],
|
||||
'server' => $database['remote_host'],
|
||||
'username' => $database['user'],
|
||||
'password' => $database['password'],
|
||||
'charset' => 'utf8'
|
||||
]);
|
||||
|
||||
$pdo = $mdb->pdo;
|
||||
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
|
||||
|
||||
echo str_repeat('=', 70) . "\n";
|
||||
echo " TRYB: " . ($DRY_RUN ? 'DRY-RUN (zadne zmiany nie beda zapisane)' : 'EXECUTE (zmiany zostana zapisane)') . "\n";
|
||||
echo " KLIENT: " . ($CLIENT_ID > 0 ? $CLIENT_ID : 'WSZYSCY') . "\n";
|
||||
echo " LIMIT GRUP: " . ($LIMIT > 0 ? $LIMIT : 'BRAK') . "\n";
|
||||
echo str_repeat('=', 70) . "\n";
|
||||
|
||||
// === 1. Pobierz wszystkie grupy duplikatow ===
|
||||
$where = "TRIM(COALESCE(offer_id,'')) <> ''";
|
||||
if ($CLIENT_ID > 0) $where .= " AND client_id = " . $CLIENT_ID;
|
||||
|
||||
$groups_sql = "
|
||||
SELECT client_id, TRIM(offer_id) AS oid, COUNT(*) AS cnt, GROUP_CONCAT(id ORDER BY id) AS ids
|
||||
FROM products
|
||||
WHERE $where
|
||||
GROUP BY client_id, TRIM(offer_id)
|
||||
HAVING cnt > 1
|
||||
ORDER BY client_id, oid
|
||||
";
|
||||
if ($LIMIT > 0) $groups_sql .= " LIMIT " . $LIMIT;
|
||||
|
||||
$groups = $pdo->query($groups_sql)->fetchAll(PDO::FETCH_ASSOC);
|
||||
echo "Grup duplikatow: " . count($groups) . "\n";
|
||||
|
||||
if (empty($groups)) {
|
||||
echo "Brak duplikatow do scalenia.\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// === 2. Statystyki sumaryczne ===
|
||||
$stat = [
|
||||
'groups' => count($groups),
|
||||
'losers_total' => 0,
|
||||
'pa_conflicts_to_delete' => 0,
|
||||
'pa_to_repoint' => 0,
|
||||
'pt_conflicts_to_delete' => 0,
|
||||
'pt_to_repoint' => 0,
|
||||
'ph_conflicts_to_delete' => 0,
|
||||
'ph_to_repoint' => 0,
|
||||
'ph30_conflicts_to_delete' => 0,
|
||||
'ph30_to_repoint' => 0,
|
||||
'kpt_conflicts_to_delete' => 0,
|
||||
'kpt_to_repoint' => 0,
|
||||
'comments_to_repoint' => 0,
|
||||
'alerts_to_repoint' => 0,
|
||||
'sync_logs_to_repoint' => 0,
|
||||
'products_to_delete' => 0,
|
||||
];
|
||||
|
||||
// === 3. Iteracja ===
|
||||
$processed = 0;
|
||||
foreach ($groups as $g) {
|
||||
$client_id = (int) $g['client_id'];
|
||||
$oid = $g['oid'];
|
||||
$ids = array_map('intval', explode(',', $g['ids']));
|
||||
|
||||
// wybor winnera
|
||||
$rank_sql = "
|
||||
SELECT p.id,
|
||||
COALESCE(SUM(pa.clicks_all_time),0) AS tot_clicks,
|
||||
COALESCE(SUM(pa.impressions_all_time),0) AS tot_impr,
|
||||
COUNT(pa.id) AS agg_rows
|
||||
FROM products p
|
||||
LEFT JOIN products_aggregate pa ON pa.product_id = p.id
|
||||
WHERE p.id IN (" . implode(',', $ids) . ")
|
||||
GROUP BY p.id
|
||||
ORDER BY tot_clicks DESC, tot_impr DESC, agg_rows DESC, p.id ASC
|
||||
";
|
||||
$ranked = $pdo->query($rank_sql)->fetchAll(PDO::FETCH_ASSOC);
|
||||
$winner_id = (int) $ranked[0]['id'];
|
||||
$loser_ids = array_filter($ids, fn($i) => $i !== $winner_id);
|
||||
|
||||
$stat['losers_total'] += count($loser_ids);
|
||||
|
||||
foreach ($loser_ids as $loser_id) {
|
||||
try {
|
||||
if (!$DRY_RUN) $pdo->beginTransaction();
|
||||
|
||||
// --- products_aggregate: UNIQUE (product_id, campaign_id, ad_group_id) ---
|
||||
$conflict_count = (int) $pdo->query("
|
||||
SELECT COUNT(*) FROM products_aggregate pa_l
|
||||
INNER JOIN products_aggregate pa_w
|
||||
ON pa_w.product_id = $winner_id
|
||||
AND pa_w.campaign_id = pa_l.campaign_id
|
||||
AND pa_w.ad_group_id = pa_l.ad_group_id
|
||||
WHERE pa_l.product_id = $loser_id
|
||||
")->fetchColumn();
|
||||
|
||||
$total_count = (int) $pdo->query("
|
||||
SELECT COUNT(*) FROM products_aggregate WHERE product_id = $loser_id
|
||||
")->fetchColumn();
|
||||
|
||||
$stat['pa_conflicts_to_delete'] += $conflict_count;
|
||||
$stat['pa_to_repoint'] += ($total_count - $conflict_count);
|
||||
|
||||
if (!$DRY_RUN) {
|
||||
$pdo->exec("
|
||||
DELETE pa_l FROM products_aggregate pa_l
|
||||
INNER JOIN products_aggregate pa_w
|
||||
ON pa_w.product_id = $winner_id
|
||||
AND pa_w.campaign_id = pa_l.campaign_id
|
||||
AND pa_w.ad_group_id = pa_l.ad_group_id
|
||||
WHERE pa_l.product_id = $loser_id
|
||||
");
|
||||
$pdo->exec("UPDATE products_aggregate SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
}
|
||||
|
||||
// --- products_temp: UNIQUE (product_id, campaign_id, ad_group_id) ---
|
||||
$pt_conflict = (int) $pdo->query("
|
||||
SELECT COUNT(*) FROM products_temp pt_l
|
||||
INNER JOIN products_temp pt_w
|
||||
ON pt_w.product_id = $winner_id
|
||||
AND pt_w.campaign_id = pt_l.campaign_id
|
||||
AND pt_w.ad_group_id = pt_l.ad_group_id
|
||||
WHERE pt_l.product_id = $loser_id
|
||||
")->fetchColumn();
|
||||
$pt_total = (int) $pdo->query("SELECT COUNT(*) FROM products_temp WHERE product_id = $loser_id")->fetchColumn();
|
||||
$stat['pt_conflicts_to_delete'] += $pt_conflict;
|
||||
$stat['pt_to_repoint'] += ($pt_total - $pt_conflict);
|
||||
|
||||
if (!$DRY_RUN) {
|
||||
$pdo->exec("
|
||||
DELETE pt_l FROM products_temp pt_l
|
||||
INNER JOIN products_temp pt_w
|
||||
ON pt_w.product_id = $winner_id
|
||||
AND pt_w.campaign_id = pt_l.campaign_id
|
||||
AND pt_w.ad_group_id = pt_l.ad_group_id
|
||||
WHERE pt_l.product_id = $loser_id
|
||||
");
|
||||
$pdo->exec("UPDATE products_temp SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
}
|
||||
|
||||
// --- products_history: UNIQUE (product_id, campaign_id, ad_group_id, date_add) ---
|
||||
$ph_conflict = (int) $pdo->query("
|
||||
SELECT COUNT(*) FROM products_history h_l
|
||||
INNER JOIN products_history h_w
|
||||
ON h_w.product_id = $winner_id
|
||||
AND h_w.campaign_id = h_l.campaign_id
|
||||
AND h_w.ad_group_id = h_l.ad_group_id
|
||||
AND h_w.date_add = h_l.date_add
|
||||
WHERE h_l.product_id = $loser_id
|
||||
")->fetchColumn();
|
||||
$ph_total = (int) $pdo->query("SELECT COUNT(*) FROM products_history WHERE product_id = $loser_id")->fetchColumn();
|
||||
$stat['ph_conflicts_to_delete'] += $ph_conflict;
|
||||
$stat['ph_to_repoint'] += ($ph_total - $ph_conflict);
|
||||
|
||||
if (!$DRY_RUN) {
|
||||
$pdo->exec("
|
||||
DELETE h_l FROM products_history h_l
|
||||
INNER JOIN products_history h_w
|
||||
ON h_w.product_id = $winner_id
|
||||
AND h_w.campaign_id = h_l.campaign_id
|
||||
AND h_w.ad_group_id = h_l.ad_group_id
|
||||
AND h_w.date_add = h_l.date_add
|
||||
WHERE h_l.product_id = $loser_id
|
||||
");
|
||||
$pdo->exec("UPDATE products_history SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
}
|
||||
|
||||
// --- products_history_30: UNIQUE (product_id, campaign_id, ad_group_id, date_add) ---
|
||||
$ph30_conflict = (int) $pdo->query("
|
||||
SELECT COUNT(*) FROM products_history_30 h_l
|
||||
INNER JOIN products_history_30 h_w
|
||||
ON h_w.product_id = $winner_id
|
||||
AND h_w.campaign_id = h_l.campaign_id
|
||||
AND h_w.ad_group_id = h_l.ad_group_id
|
||||
AND h_w.date_add = h_l.date_add
|
||||
WHERE h_l.product_id = $loser_id
|
||||
")->fetchColumn();
|
||||
$ph30_total = (int) $pdo->query("SELECT COUNT(*) FROM products_history_30 WHERE product_id = $loser_id")->fetchColumn();
|
||||
$stat['ph30_conflicts_to_delete'] += $ph30_conflict;
|
||||
$stat['ph30_to_repoint'] += ($ph30_total - $ph30_conflict);
|
||||
|
||||
if (!$DRY_RUN) {
|
||||
$pdo->exec("
|
||||
DELETE h_l FROM products_history_30 h_l
|
||||
INNER JOIN products_history_30 h_w
|
||||
ON h_w.product_id = $winner_id
|
||||
AND h_w.campaign_id = h_l.campaign_id
|
||||
AND h_w.ad_group_id = h_l.ad_group_id
|
||||
AND h_w.date_add = h_l.date_add
|
||||
WHERE h_l.product_id = $loser_id
|
||||
");
|
||||
$pdo->exec("UPDATE products_history_30 SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
}
|
||||
|
||||
// --- products_keyword_planner_terms: UNIQUE (product_id, source_url, keyword_text) ---
|
||||
$kpt_conflict = (int) $pdo->query("
|
||||
SELECT COUNT(*) FROM products_keyword_planner_terms k_l
|
||||
INNER JOIN products_keyword_planner_terms k_w
|
||||
ON k_w.product_id = $winner_id
|
||||
AND k_w.source_url = k_l.source_url
|
||||
AND k_w.keyword_text = k_l.keyword_text
|
||||
WHERE k_l.product_id = $loser_id
|
||||
")->fetchColumn();
|
||||
$kpt_total = (int) $pdo->query("SELECT COUNT(*) FROM products_keyword_planner_terms WHERE product_id = $loser_id")->fetchColumn();
|
||||
$stat['kpt_conflicts_to_delete'] += $kpt_conflict;
|
||||
$stat['kpt_to_repoint'] += ($kpt_total - $kpt_conflict);
|
||||
|
||||
if (!$DRY_RUN) {
|
||||
$pdo->exec("
|
||||
DELETE k_l FROM products_keyword_planner_terms k_l
|
||||
INNER JOIN products_keyword_planner_terms k_w
|
||||
ON k_w.product_id = $winner_id
|
||||
AND k_w.source_url = k_l.source_url
|
||||
AND k_w.keyword_text = k_l.keyword_text
|
||||
WHERE k_l.product_id = $loser_id
|
||||
");
|
||||
$pdo->exec("UPDATE products_keyword_planner_terms SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
}
|
||||
|
||||
// --- Tabele bez UNIQUE na product_id: prosty UPDATE ---
|
||||
$stat['comments_to_repoint'] += (int) $pdo->query("SELECT COUNT(*) FROM products_comments WHERE product_id = $loser_id")->fetchColumn();
|
||||
$stat['alerts_to_repoint'] += (int) $pdo->query("SELECT COUNT(*) FROM campaign_alerts WHERE product_id = $loser_id")->fetchColumn();
|
||||
$stat['sync_logs_to_repoint'] += (int) $pdo->query("SELECT COUNT(*) FROM products_merchant_sync_log WHERE product_id = $loser_id")->fetchColumn();
|
||||
|
||||
if (!$DRY_RUN) {
|
||||
$pdo->exec("UPDATE products_comments SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
$pdo->exec("UPDATE campaign_alerts SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
$pdo->exec("UPDATE products_merchant_sync_log SET product_id = $winner_id WHERE product_id = $loser_id");
|
||||
}
|
||||
|
||||
// --- Wreszcie: usun losera z products ---
|
||||
$stat['products_to_delete']++;
|
||||
if (!$DRY_RUN) {
|
||||
$pdo->exec("DELETE FROM products WHERE id = $loser_id");
|
||||
$pdo->commit();
|
||||
}
|
||||
|
||||
} catch (Throwable $e) {
|
||||
if (!$DRY_RUN && $pdo->inTransaction()) $pdo->rollBack();
|
||||
echo "BLAD przy losere $loser_id (winner $winner_id, client $client_id, offer '$oid'): " . $e->getMessage() . "\n";
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
$processed++;
|
||||
if ($processed % 100 === 0) echo " ... przetworzono $processed grup\n";
|
||||
}
|
||||
|
||||
echo "\n" . str_repeat('=', 70) . "\n";
|
||||
echo " STATYSTYKI " . ($DRY_RUN ? '(DRY-RUN)' : '(WYKONANO)') . "\n";
|
||||
echo str_repeat('=', 70) . "\n";
|
||||
foreach ($stat as $k => $v) printf(" %-30s : %d\n", $k, $v);
|
||||
|
||||
if ($DRY_RUN) {
|
||||
echo "\nAby wykonac zmiany, uruchom z flaga --execute\n";
|
||||
}
|
||||
Reference in New Issue
Block a user