Files
google-ads-ver-2/scripts/reports/fetch_seo_links.py
2026-05-15 09:28:11 +02:00

115 lines
3.1 KiB
Python

#!/usr/bin/env python3
"""
Pobiera linki SEO z Google Sheets (publiczny CSV export).
Użycie:
python scripts/reports/fetch_seo_links.py --domain innsi.pl --month 2026-02
"""
import argparse
import csv
import io
import json
import os
import sys
from pathlib import Path
if __name__ == "__main__":
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
import requests
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from src.gads_v2.config import load_env
load_env(ROOT / ".env")
def fetch_seo_links(domain, month):
"""Fetch SEO links for given domain and month from Google Sheets.
Returns list of dicts: [{"date": "2026-02-01", "url": "https://..."}]
"""
env_key = f"GSHEET_SEO_LINKS_{domain}"
sheet_config = os.environ.get(env_key, "")
if not sheet_config:
return None
if ":" in sheet_config:
spreadsheet_id, gid = sheet_config.split(":", 1)
else:
spreadsheet_id = sheet_config
gid = "0"
export_url = f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}"
r = requests.get(export_url, timeout=30)
r.raise_for_status()
r.encoding = "utf-8"
reader = csv.DictReader(io.StringIO(r.text))
links = []
for row in reader:
date = row.get("Data", "").strip()
url = row.get("URL", "").strip()
if not date or not url:
continue
# Match month (date format: YYYY-MM-DD)
if date[:7] == month:
links.append({"date": date, "url": url})
return links
def fetch_seo_activities(domain, month):
"""Fetch SEO activities description for given domain and month.
Returns string with activities text, or None.
"""
env_key = f"GSHEET_SEO_ACTIVITIES_{domain}"
sheet_config = os.environ.get(env_key, "")
if not sheet_config:
return None
if ":" in sheet_config:
spreadsheet_id, gid = sheet_config.split(":", 1)
else:
spreadsheet_id = sheet_config
gid = "0"
export_url = f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}"
r = requests.get(export_url, timeout=30)
r.raise_for_status()
r.encoding = "utf-8"
reader = csv.DictReader(io.StringIO(r.text))
for row in reader:
date = row.get("Data", "").strip()
text = row.get("URL", "").strip() # Column is named URL but contains text
if not date or not text:
continue
if date[:7] == month:
return text
return None
def main():
parser = argparse.ArgumentParser(description="Pobierz linki SEO z Google Sheets")
parser.add_argument("--domain", required=True)
parser.add_argument("--month", required=True, help="YYYY-MM")
args = parser.parse_args()
links = fetch_seo_links(args.domain, args.month)
if links is None:
print(f"Brak konfiguracji GSHEET_SEO_LINKS_{args.domain} w .env")
sys.exit(1)
print(json.dumps(links, indent=2, ensure_ascii=False))
print(f"\nLiczba linkow w {args.month}: {len(links)}")
if __name__ == "__main__":
main()