172 lines
5.4 KiB
Python
172 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Pobiera dane SEO z Semstorm API (pozycje TOP 3/10/20/50, traffic).
|
|
|
|
Użycie:
|
|
python scripts/reports/fetch_semstorm_data.py --domain innsi.pl
|
|
python scripts/reports/fetch_semstorm_data.py --domain innsi.pl --month 2026-02
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import io
|
|
from pathlib import Path
|
|
|
|
if __name__ == "__main__":
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
# When imported as module, don't touch stdout
|
|
|
|
import requests
|
|
ROOT = Path(__file__).parent.parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
from src.gads_v2.config import load_env
|
|
|
|
load_env(ROOT / ".env")
|
|
|
|
|
|
def get_semstorm_token():
|
|
"""Authenticate and get bearer token."""
|
|
base = os.environ.get("SEMSTORM_API_BASE", "https://api.semstorm.com")
|
|
login = os.environ.get("SEMSTORM_LOGIN", "")
|
|
password = os.environ.get("SEMSTORM_PASSWORD", "")
|
|
|
|
if not login or not password:
|
|
raise ValueError("Brak SEMSTORM_LOGIN / SEMSTORM_PASSWORD w .env")
|
|
|
|
r = requests.post(f"{base}/consumer/login", data={
|
|
"username": login,
|
|
"password": password,
|
|
}, headers={"Accept": "application/json"}, timeout=30)
|
|
r.raise_for_status()
|
|
|
|
token = r.json().get("token", "")
|
|
if not token:
|
|
raise ValueError("Semstorm: brak tokenu w odpowiedzi logowania")
|
|
return base, token
|
|
|
|
|
|
def _history_path(domain):
|
|
"""Path to local cumulative Semstorm history file."""
|
|
return ROOT / "clients" / domain / "semstorm_history.json"
|
|
|
|
|
|
def _load_local_history(domain):
|
|
"""Load locally stored Semstorm history."""
|
|
path = _history_path(domain)
|
|
if path.exists():
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
return []
|
|
|
|
|
|
def _save_local_history(domain, entries):
|
|
"""Save Semstorm history locally (deduplicated, sorted)."""
|
|
path = _history_path(domain)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
# Deduplicate by month, keep latest per month
|
|
by_month = {}
|
|
for e in entries:
|
|
by_month[e["month"]] = e
|
|
sorted_entries = sorted(by_month.values(), key=lambda x: x["date"])
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(sorted_entries, f, indent=2, ensure_ascii=False)
|
|
return sorted_entries
|
|
|
|
|
|
def _get_report_start(domain):
|
|
"""Get REPORT_START_DATE for domain from .env."""
|
|
key = f"REPORT_START_DATE_{domain}"
|
|
return os.environ.get(key)
|
|
|
|
|
|
def fetch_domain_stats(domain, month=None):
|
|
"""Fetch Semstorm domain stats. Merges API data with local history."""
|
|
base, token = get_semstorm_token()
|
|
|
|
r = requests.post(f"{base}/semstorm/v4/explorer/domain-stats",
|
|
json={"domains": [domain]},
|
|
headers={
|
|
"Accept": "application/json",
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {token}",
|
|
},
|
|
timeout=30,
|
|
)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
|
|
api_entries = []
|
|
if data.get("success") and domain in data.get("results", {}):
|
|
domain_data = data["results"][domain]
|
|
for date_key, metrics in domain_data.items():
|
|
kw = metrics.get("keywords", {})
|
|
api_entries.append({
|
|
"date": f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:]}",
|
|
"month": f"{date_key[:4]}-{date_key[4:6]}",
|
|
"top3": kw.get("top3", 0),
|
|
"top10": kw.get("top10", 0),
|
|
"top20": kw.get("top20", 0),
|
|
"top50": kw.get("top50", 0),
|
|
"top100": kw.get("top100", 0),
|
|
"traffic": metrics.get("traffic", 0),
|
|
})
|
|
|
|
# Merge with local history (local + API, deduplicated)
|
|
local_entries = _load_local_history(domain)
|
|
all_entries = local_entries + api_entries
|
|
entries = _save_local_history(domain, all_entries)
|
|
|
|
# Filter by REPORT_START_DATE if set
|
|
start = _get_report_start(domain)
|
|
if start:
|
|
entries = [e for e in entries if e["month"] >= start]
|
|
|
|
# If month specified, find that month + previous for MoM
|
|
if month:
|
|
current = next((e for e in entries if e["month"] == month), None)
|
|
prev_entries = [e for e in entries if e["month"] < month]
|
|
previous = prev_entries[-1] if prev_entries else None
|
|
|
|
result = {
|
|
"current": current,
|
|
"previous": previous,
|
|
"history": entries,
|
|
}
|
|
if current and previous:
|
|
result["mom_change"] = {
|
|
"top3_pct": _pct(current["top3"], previous["top3"]),
|
|
"top10_pct": _pct(current["top10"], previous["top10"]),
|
|
"top50_pct": _pct(current["top50"], previous["top50"]),
|
|
"traffic_pct": _pct(current["traffic"], previous["traffic"]),
|
|
}
|
|
return result
|
|
|
|
# Return latest + history
|
|
return {
|
|
"current": entries[-1] if entries else None,
|
|
"previous": entries[-2] if len(entries) > 1 else None,
|
|
"history": entries,
|
|
}
|
|
|
|
|
|
def _pct(current, previous):
|
|
if previous == 0:
|
|
return 100.0 if current > 0 else 0.0
|
|
return round(((current - previous) / previous) * 100, 1)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Pobierz dane Semstorm")
|
|
parser.add_argument("--domain", required=True)
|
|
parser.add_argument("--month", help="YYYY-MM")
|
|
args = parser.parse_args()
|
|
|
|
data = fetch_domain_stats(args.domain, args.month)
|
|
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|