115 lines
3.1 KiB
Python
115 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Pobiera linki SEO z Google Sheets (publiczny CSV export).
|
|
|
|
Użycie:
|
|
python scripts/reports/fetch_seo_links.py --domain innsi.pl --month 2026-02
|
|
"""
|
|
|
|
import argparse
|
|
import csv
|
|
import io
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
if __name__ == "__main__":
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
import requests
|
|
ROOT = Path(__file__).parent.parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
from src.gads_v2.config import load_env
|
|
|
|
load_env(ROOT / ".env")
|
|
|
|
|
|
def fetch_seo_links(domain, month):
|
|
"""Fetch SEO links for given domain and month from Google Sheets.
|
|
|
|
Returns list of dicts: [{"date": "2026-02-01", "url": "https://..."}]
|
|
"""
|
|
env_key = f"GSHEET_SEO_LINKS_{domain}"
|
|
sheet_config = os.environ.get(env_key, "")
|
|
if not sheet_config:
|
|
return None
|
|
|
|
if ":" in sheet_config:
|
|
spreadsheet_id, gid = sheet_config.split(":", 1)
|
|
else:
|
|
spreadsheet_id = sheet_config
|
|
gid = "0"
|
|
|
|
export_url = f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}"
|
|
|
|
r = requests.get(export_url, timeout=30)
|
|
r.raise_for_status()
|
|
r.encoding = "utf-8"
|
|
|
|
reader = csv.DictReader(io.StringIO(r.text))
|
|
links = []
|
|
for row in reader:
|
|
date = row.get("Data", "").strip()
|
|
url = row.get("URL", "").strip()
|
|
if not date or not url:
|
|
continue
|
|
# Match month (date format: YYYY-MM-DD)
|
|
if date[:7] == month:
|
|
links.append({"date": date, "url": url})
|
|
|
|
return links
|
|
|
|
|
|
def fetch_seo_activities(domain, month):
|
|
"""Fetch SEO activities description for given domain and month.
|
|
|
|
Returns string with activities text, or None.
|
|
"""
|
|
env_key = f"GSHEET_SEO_ACTIVITIES_{domain}"
|
|
sheet_config = os.environ.get(env_key, "")
|
|
if not sheet_config:
|
|
return None
|
|
|
|
if ":" in sheet_config:
|
|
spreadsheet_id, gid = sheet_config.split(":", 1)
|
|
else:
|
|
spreadsheet_id = sheet_config
|
|
gid = "0"
|
|
|
|
export_url = f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}"
|
|
|
|
r = requests.get(export_url, timeout=30)
|
|
r.raise_for_status()
|
|
r.encoding = "utf-8"
|
|
|
|
reader = csv.DictReader(io.StringIO(r.text))
|
|
for row in reader:
|
|
date = row.get("Data", "").strip()
|
|
text = row.get("URL", "").strip() # Column is named URL but contains text
|
|
if not date or not text:
|
|
continue
|
|
if date[:7] == month:
|
|
return text
|
|
|
|
return None
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Pobierz linki SEO z Google Sheets")
|
|
parser.add_argument("--domain", required=True)
|
|
parser.add_argument("--month", required=True, help="YYYY-MM")
|
|
args = parser.parse_args()
|
|
|
|
links = fetch_seo_links(args.domain, args.month)
|
|
if links is None:
|
|
print(f"Brak konfiguracji GSHEET_SEO_LINKS_{args.domain} w .env")
|
|
sys.exit(1)
|
|
|
|
print(json.dumps(links, indent=2, ensure_ascii=False))
|
|
print(f"\nLiczba linkow w {args.month}: {len(links)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|