first commit
This commit is contained in:
114
scripts/reports/fetch_seo_links.py
Normal file
114
scripts/reports/fetch_seo_links.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Pobiera linki SEO z Google Sheets (publiczny CSV export).
|
||||
|
||||
Użycie:
|
||||
python scripts/reports/fetch_seo_links.py --domain innsi.pl --month 2026-02
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
import requests
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
from src.gads_v2.config import load_env
|
||||
|
||||
load_env(ROOT / ".env")
|
||||
|
||||
|
||||
def fetch_seo_links(domain, month):
|
||||
"""Fetch SEO links for given domain and month from Google Sheets.
|
||||
|
||||
Returns list of dicts: [{"date": "2026-02-01", "url": "https://..."}]
|
||||
"""
|
||||
env_key = f"GSHEET_SEO_LINKS_{domain}"
|
||||
sheet_config = os.environ.get(env_key, "")
|
||||
if not sheet_config:
|
||||
return None
|
||||
|
||||
if ":" in sheet_config:
|
||||
spreadsheet_id, gid = sheet_config.split(":", 1)
|
||||
else:
|
||||
spreadsheet_id = sheet_config
|
||||
gid = "0"
|
||||
|
||||
export_url = f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}"
|
||||
|
||||
r = requests.get(export_url, timeout=30)
|
||||
r.raise_for_status()
|
||||
r.encoding = "utf-8"
|
||||
|
||||
reader = csv.DictReader(io.StringIO(r.text))
|
||||
links = []
|
||||
for row in reader:
|
||||
date = row.get("Data", "").strip()
|
||||
url = row.get("URL", "").strip()
|
||||
if not date or not url:
|
||||
continue
|
||||
# Match month (date format: YYYY-MM-DD)
|
||||
if date[:7] == month:
|
||||
links.append({"date": date, "url": url})
|
||||
|
||||
return links
|
||||
|
||||
|
||||
def fetch_seo_activities(domain, month):
|
||||
"""Fetch SEO activities description for given domain and month.
|
||||
|
||||
Returns string with activities text, or None.
|
||||
"""
|
||||
env_key = f"GSHEET_SEO_ACTIVITIES_{domain}"
|
||||
sheet_config = os.environ.get(env_key, "")
|
||||
if not sheet_config:
|
||||
return None
|
||||
|
||||
if ":" in sheet_config:
|
||||
spreadsheet_id, gid = sheet_config.split(":", 1)
|
||||
else:
|
||||
spreadsheet_id = sheet_config
|
||||
gid = "0"
|
||||
|
||||
export_url = f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}"
|
||||
|
||||
r = requests.get(export_url, timeout=30)
|
||||
r.raise_for_status()
|
||||
r.encoding = "utf-8"
|
||||
|
||||
reader = csv.DictReader(io.StringIO(r.text))
|
||||
for row in reader:
|
||||
date = row.get("Data", "").strip()
|
||||
text = row.get("URL", "").strip() # Column is named URL but contains text
|
||||
if not date or not text:
|
||||
continue
|
||||
if date[:7] == month:
|
||||
return text
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Pobierz linki SEO z Google Sheets")
|
||||
parser.add_argument("--domain", required=True)
|
||||
parser.add_argument("--month", required=True, help="YYYY-MM")
|
||||
args = parser.parse_args()
|
||||
|
||||
links = fetch_seo_links(args.domain, args.month)
|
||||
if links is None:
|
||||
print(f"Brak konfiguracji GSHEET_SEO_LINKS_{args.domain} w .env")
|
||||
sys.exit(1)
|
||||
|
||||
print(json.dumps(links, indent=2, ensure_ascii=False))
|
||||
print(f"\nLiczba linkow w {args.month}: {len(links)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user