#!/usr/bin/env python3 """ Pobiera dane SEO z Semstorm API (pozycje TOP 3/10/20/50, traffic). Użycie: python scripts/reports/fetch_semstorm_data.py --domain innsi.pl python scripts/reports/fetch_semstorm_data.py --domain innsi.pl --month 2026-02 """ import argparse import json import os import sys import io from pathlib import Path if __name__ == "__main__": sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") # When imported as module, don't touch stdout import requests ROOT = Path(__file__).parent.parent.parent sys.path.insert(0, str(ROOT)) from src.gads_v2.config import load_env load_env(ROOT / ".env") def get_semstorm_token(): """Authenticate and get bearer token.""" base = os.environ.get("SEMSTORM_API_BASE", "https://api.semstorm.com") login = os.environ.get("SEMSTORM_LOGIN", "") password = os.environ.get("SEMSTORM_PASSWORD", "") if not login or not password: raise ValueError("Brak SEMSTORM_LOGIN / SEMSTORM_PASSWORD w .env") r = requests.post(f"{base}/consumer/login", data={ "username": login, "password": password, }, headers={"Accept": "application/json"}, timeout=30) r.raise_for_status() token = r.json().get("token", "") if not token: raise ValueError("Semstorm: brak tokenu w odpowiedzi logowania") return base, token def _history_path(domain): """Path to local cumulative Semstorm history file.""" return ROOT / "clients" / domain / "semstorm_history.json" def _load_local_history(domain): """Load locally stored Semstorm history.""" path = _history_path(domain) if path.exists(): with open(path, "r", encoding="utf-8") as f: return json.load(f) return [] def _save_local_history(domain, entries): """Save Semstorm history locally (deduplicated, sorted).""" path = _history_path(domain) path.parent.mkdir(parents=True, exist_ok=True) # Deduplicate by month, keep latest per month by_month = {} for e in entries: by_month[e["month"]] = e sorted_entries = sorted(by_month.values(), key=lambda x: x["date"]) with open(path, "w", encoding="utf-8") as f: json.dump(sorted_entries, f, indent=2, ensure_ascii=False) return sorted_entries def _get_report_start(domain): """Get REPORT_START_DATE for domain from .env.""" key = f"REPORT_START_DATE_{domain}" return os.environ.get(key) def fetch_domain_stats(domain, month=None): """Fetch Semstorm domain stats. Merges API data with local history.""" base, token = get_semstorm_token() r = requests.post(f"{base}/semstorm/v4/explorer/domain-stats", json={"domains": [domain]}, headers={ "Accept": "application/json", "Content-Type": "application/json", "Authorization": f"Bearer {token}", }, timeout=30, ) r.raise_for_status() data = r.json() api_entries = [] if data.get("success") and domain in data.get("results", {}): domain_data = data["results"][domain] for date_key, metrics in domain_data.items(): kw = metrics.get("keywords", {}) api_entries.append({ "date": f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:]}", "month": f"{date_key[:4]}-{date_key[4:6]}", "top3": kw.get("top3", 0), "top10": kw.get("top10", 0), "top20": kw.get("top20", 0), "top50": kw.get("top50", 0), "top100": kw.get("top100", 0), "traffic": metrics.get("traffic", 0), }) # Merge with local history (local + API, deduplicated) local_entries = _load_local_history(domain) all_entries = local_entries + api_entries entries = _save_local_history(domain, all_entries) # Filter by REPORT_START_DATE if set start = _get_report_start(domain) if start: entries = [e for e in entries if e["month"] >= start] # If month specified, find that month + previous for MoM if month: current = next((e for e in entries if e["month"] == month), None) prev_entries = [e for e in entries if e["month"] < month] previous = prev_entries[-1] if prev_entries else None result = { "current": current, "previous": previous, "history": entries, } if current and previous: result["mom_change"] = { "top3_pct": _pct(current["top3"], previous["top3"]), "top10_pct": _pct(current["top10"], previous["top10"]), "top50_pct": _pct(current["top50"], previous["top50"]), "traffic_pct": _pct(current["traffic"], previous["traffic"]), } return result # Return latest + history return { "current": entries[-1] if entries else None, "previous": entries[-2] if len(entries) > 1 else None, "history": entries, } def _pct(current, previous): if previous == 0: return 100.0 if current > 0 else 0.0 return round(((current - previous) / previous) * 100, 1) def main(): parser = argparse.ArgumentParser(description="Pobierz dane Semstorm") parser.add_argument("--domain", required=True) parser.add_argument("--month", help="YYYY-MM") args = parser.parse_args() data = fetch_domain_stats(args.domain, args.month) print(json.dumps(data, indent=2, ensure_ascii=False)) if __name__ == "__main__": main()