Files
google-ads-ver-2/scripts/reports/fetch_semstorm_data.py
2026-05-15 09:28:11 +02:00

172 lines
5.4 KiB
Python

#!/usr/bin/env python3
"""
Pobiera dane SEO z Semstorm API (pozycje TOP 3/10/20/50, traffic).
Użycie:
python scripts/reports/fetch_semstorm_data.py --domain innsi.pl
python scripts/reports/fetch_semstorm_data.py --domain innsi.pl --month 2026-02
"""
import argparse
import json
import os
import sys
import io
from pathlib import Path
if __name__ == "__main__":
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
# When imported as module, don't touch stdout
import requests
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from src.gads_v2.config import load_env
load_env(ROOT / ".env")
def get_semstorm_token():
"""Authenticate and get bearer token."""
base = os.environ.get("SEMSTORM_API_BASE", "https://api.semstorm.com")
login = os.environ.get("SEMSTORM_LOGIN", "")
password = os.environ.get("SEMSTORM_PASSWORD", "")
if not login or not password:
raise ValueError("Brak SEMSTORM_LOGIN / SEMSTORM_PASSWORD w .env")
r = requests.post(f"{base}/consumer/login", data={
"username": login,
"password": password,
}, headers={"Accept": "application/json"}, timeout=30)
r.raise_for_status()
token = r.json().get("token", "")
if not token:
raise ValueError("Semstorm: brak tokenu w odpowiedzi logowania")
return base, token
def _history_path(domain):
"""Path to local cumulative Semstorm history file."""
return ROOT / "clients" / domain / "semstorm_history.json"
def _load_local_history(domain):
"""Load locally stored Semstorm history."""
path = _history_path(domain)
if path.exists():
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
return []
def _save_local_history(domain, entries):
"""Save Semstorm history locally (deduplicated, sorted)."""
path = _history_path(domain)
path.parent.mkdir(parents=True, exist_ok=True)
# Deduplicate by month, keep latest per month
by_month = {}
for e in entries:
by_month[e["month"]] = e
sorted_entries = sorted(by_month.values(), key=lambda x: x["date"])
with open(path, "w", encoding="utf-8") as f:
json.dump(sorted_entries, f, indent=2, ensure_ascii=False)
return sorted_entries
def _get_report_start(domain):
"""Get REPORT_START_DATE for domain from .env."""
key = f"REPORT_START_DATE_{domain}"
return os.environ.get(key)
def fetch_domain_stats(domain, month=None):
"""Fetch Semstorm domain stats. Merges API data with local history."""
base, token = get_semstorm_token()
r = requests.post(f"{base}/semstorm/v4/explorer/domain-stats",
json={"domains": [domain]},
headers={
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": f"Bearer {token}",
},
timeout=30,
)
r.raise_for_status()
data = r.json()
api_entries = []
if data.get("success") and domain in data.get("results", {}):
domain_data = data["results"][domain]
for date_key, metrics in domain_data.items():
kw = metrics.get("keywords", {})
api_entries.append({
"date": f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:]}",
"month": f"{date_key[:4]}-{date_key[4:6]}",
"top3": kw.get("top3", 0),
"top10": kw.get("top10", 0),
"top20": kw.get("top20", 0),
"top50": kw.get("top50", 0),
"top100": kw.get("top100", 0),
"traffic": metrics.get("traffic", 0),
})
# Merge with local history (local + API, deduplicated)
local_entries = _load_local_history(domain)
all_entries = local_entries + api_entries
entries = _save_local_history(domain, all_entries)
# Filter by REPORT_START_DATE if set
start = _get_report_start(domain)
if start:
entries = [e for e in entries if e["month"] >= start]
# If month specified, find that month + previous for MoM
if month:
current = next((e for e in entries if e["month"] == month), None)
prev_entries = [e for e in entries if e["month"] < month]
previous = prev_entries[-1] if prev_entries else None
result = {
"current": current,
"previous": previous,
"history": entries,
}
if current and previous:
result["mom_change"] = {
"top3_pct": _pct(current["top3"], previous["top3"]),
"top10_pct": _pct(current["top10"], previous["top10"]),
"top50_pct": _pct(current["top50"], previous["top50"]),
"traffic_pct": _pct(current["traffic"], previous["traffic"]),
}
return result
# Return latest + history
return {
"current": entries[-1] if entries else None,
"previous": entries[-2] if len(entries) > 1 else None,
"history": entries,
}
def _pct(current, previous):
if previous == 0:
return 100.0 if current > 0 else 0.0
return round(((current - previous) / previous) * 100, 1)
def main():
parser = argparse.ArgumentParser(description="Pobierz dane Semstorm")
parser.add_argument("--domain", required=True)
parser.add_argument("--month", help="YYYY-MM")
args = parser.parse_args()
data = fetch_domain_stats(args.domain, args.month)
print(json.dumps(data, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()