From d203370870dc7ca6730d9171357a02e1e80d3da4 Mon Sep 17 00:00:00 2001 From: Jacek Pyziak Date: Tue, 5 May 2026 20:31:55 +0200 Subject: [PATCH] update --- .paul/PROJECT.md | 97 ++++++++ .paul/ROADMAP.md | 31 +++ .paul/STATE.md | 54 +++++ .paul/config.md | 33 +++ .paul/governance/governance_2026-05-05.jsonl | 16 ++ .../01-01-PLAN.md | 219 ++++++++++++++++++ .../01-01-SUMMARY.md | 38 +++ CLAUDE.md | 48 ++++ autoload/class.Cron.php | 188 +++++++++++---- 9 files changed, 682 insertions(+), 42 deletions(-) create mode 100644 .paul/PROJECT.md create mode 100644 .paul/ROADMAP.md create mode 100644 .paul/STATE.md create mode 100644 .paul/config.md create mode 100644 .paul/governance/governance_2026-05-05.jsonl create mode 100644 .paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md create mode 100644 .paul/phases/01-dataforseo-cost-optimization/01-01-SUMMARY.md create mode 100644 CLAUDE.md diff --git a/.paul/PROJECT.md b/.paul/PROJECT.md new file mode 100644 index 0000000..ecdd100 --- /dev/null +++ b/.paul/PROJECT.md @@ -0,0 +1,97 @@ +# rank24.pl + +## What This Is + +Aplikacja webowa do monitorowania pozycji stron internetowych w wyszukiwarce Google na wybrane frazy kluczowe. Umożliwia śledzenie zmian pozycji w czasie oraz zarządzanie monitorowanymi frazami i domenami. + +## Core Value + +Użytkownicy mogą monitorować pozycje swoich stron w Google dla wybranych fraz kluczowych i śledzić historię zmian. + +## Current State + +| Attribute | Value | +|-----------|-------| +| Version | 0.1.0 | +| Status | Prototype | +| Last Updated | 2026-05-05 | + +## Requirements + +### Validated (Shipped) + +- [x] Monitorowanie pozycji w Google (scraping) +- [x] Obsługa proxy do zapytań Google +- [x] System cachowania wyników +- [x] Baza danych MySQL (medoo.php) +- [x] System szablonów (Savant3) +- [x] Harmonogram zadań cron + +### Active (In Progress) + +- [ ] [To be defined during planning] + +### Planned (Next) + +- [ ] [To be defined during planning] + +### Out of Scope + +- [To be identified during planning] + +## Target Users + +**Primary:** Właściciele stron internetowych i agencje SEO +- Chcą śledzić pozycje swoich stron w Google +- Potrzebują historii zmian pozycji +- Monitorują wiele fraz i domen jednocześnie + +## Context + +**Business Context:** +Narzędzie SEO do monitorowania rankingów w Google. Projekt rank24.pl — własna platforma do śledzenia pozycji. + +**Technical Context:** +PHP (bez frameworka), MySQL, własny system szablonów Savant3, medoo jako ORM, system proxy do scrappingu Google, harmonogram cron. + +## Constraints + +### Technical Constraints +- PHP (vanilla, bez Composer/frameworka) +- MySQL jako baza danych +- Scraping Google przez proxy + +### Business Constraints +- [To be identified during planning] + +### Compliance Constraints +- [To be identified during planning] + +## Key Decisions + +| Decision | Rationale | Date | Status | +|----------|-----------|------|--------| +| PHP bez frameworka | Istniejąca architektura | - | Active | +| Scraping przez proxy | Omijanie limitów Google | - | Active | + +## Success Metrics + +| Metric | Target | Current | Status | +|--------|--------|---------|--------| +| Dokładność pozycji | 95%+ | TBD | TBD | +| Czas odświeżania | < 24h | TBD | TBD | + +## Tech Stack + +| Layer | Technology | Notes | +|-------|------------|-------| +| Backend | PHP (vanilla) | Brak frameworka | +| Frontend | HTML/JS/jQuery | Własne szablony | +| Database | MySQL | medoo.php jako ORM | +| Templates | Savant3 | Własny system szablonów | +| Scraping | GoogleScraper | Własna klasa, proxy | +| Cron | class.Cron.php | Harmonogram zadań | + +--- +*PROJECT.md — Updated when requirements or context change* +*Last updated: 2026-05-05* diff --git a/.paul/ROADMAP.md b/.paul/ROADMAP.md new file mode 100644 index 0000000..4ba7965 --- /dev/null +++ b/.paul/ROADMAP.md @@ -0,0 +1,31 @@ +# Roadmap: rank24.pl + +## Overview + +Rozwoj platformy rank24.pl do monitorowania pozycji stron w Google. Projekt przechodzi od istniejacego prototypu przez stabilizacje i nowe funkcjonalnosci az do pelnej platformy SEO. + +## Current Milestone + +**v0.1 Initial Release** (v0.1.0) +Status: In progress +Phases: 0 of 1 complete + +## Phases + +**Phase Numbering:** +- Integer phases (1, 2, 3): Planned milestone work +- Decimal phases (2.1, 2.2): Urgent insertions (marked with [INSERTED]) + +| Phase | Name | Plans | Status | Completed | +|-------|------|-------|--------|-----------| +| 1 | DataForSEO cost optimization | 1 | Apply complete | - | + +## Phase Details + +### Phase 1: DataForSEO cost optimization + +Reduce Google rank-checking API spend by adapting cron frequency and DataForSEO `depth` to phrase stability. Manual `days_offset` values remain authoritative. DataForSEO checks are capped at top 50 positions. + +--- +*Roadmap created: 2026-05-05* +*Last updated: 2026-05-05* diff --git a/.paul/STATE.md b/.paul/STATE.md new file mode 100644 index 0000000..962785b --- /dev/null +++ b/.paul/STATE.md @@ -0,0 +1,54 @@ +# Project State + +## Project Reference + +See: .paul/PROJECT.md (updated 2026-05-05) + +**Core value:** Uzytkownicy moga monitorowac pozycje swoich stron w Google dla wybranych fraz kluczowych i sledzic historie zmian. +**Current focus:** DataForSEO cost optimization applied + +## Current Position + +Milestone: v0.1 Initial Release +Phase: 1 of 1 (DataForSEO cost optimization) - Apply complete +Plan: 01-01 applied, awaiting UNIFY +Status: APPLY complete, ready for UNIFY +Last activity: 2026-05-05 20:12:44 +02:00 - Applied .paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md + +Progress: +- Milestone: [#####-----] 50% +- Phase 1: [########--] 80% + +## Loop Position + +Current loop state: +``` +PLAN ---> APPLY ---> UNIFY + x x o [Apply complete, awaiting unify] +``` + +## Accumulated Context + +### Decisions +- DataForSEO rank checks will not go deeper than top 50. +- Manual `days_offset` values remain authoritative. +- Phrases without manual interval use adaptive backend scheduling based on recent stability. +- Scope is backend-only for this plan. + +### Deferred Issues +- DataForSEO credentials remain hardcoded for now; secrets cleanup is deferred. +- No admin UI or cost dashboard in this plan. +- SonarQube scan skipped because only one source file changed. + +### Blockers/Concerns +None. + +## Session Continuity + +Last session: 2026-05-05 20:12:44 +02:00 +Stopped at: Plan 01-01 applied +Next action: Run $paul-unify .paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md +Resume file: .paul/phases/01-dataforseo-cost-optimization/01-01-SUMMARY.md + +--- +*STATE.md - Updated after every significant action* diff --git a/.paul/config.md b/.paul/config.md new file mode 100644 index 0000000..a59d468 --- /dev/null +++ b/.paul/config.md @@ -0,0 +1,33 @@ +# Project Config + +**Project:** rank24.pl +**Created:** 2026-05-05 + +## Project Settings + +```yaml +project: + name: rank24.pl + version: 0.0.0 +``` + +## Integrations + +### SonarQube + +```yaml +sonarqube: + enabled: true + project_key: rank24.pl +``` + +## Preferences + +```yaml +preferences: + auto_commit: false + verbose_output: false +``` + +--- +*Config created: 2026-05-05* diff --git a/.paul/governance/governance_2026-05-05.jsonl b/.paul/governance/governance_2026-05-05.jsonl new file mode 100644 index 0000000..3137423 --- /dev/null +++ b/.paul/governance/governance_2026-05-05.jsonl @@ -0,0 +1,16 @@ +{"ts":"2026-05-05T17:41:53Z","tool":"Bash","cmd":"mkdir -p \"C:/visual studio code/projekty/rank24.pl/.paul/phases\" \"C:/visual studio code/projekty/rank24.pl/.paul/codebase\" && echo \"Created\"\",\"description\":\"Create .paul directory structure\","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:43:22Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\PROJECT.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:43:27Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\ROADMAP.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:43:33Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\STATE.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:43:49Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\CLAUDE.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:49:45Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\config.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:52:39Z","tool":"Bash","cmd":"Get-ChildItem -Path \"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\" -Recurse -Directory | Select-Object -ExpandProperty FullName | head -30\"},\"tool_response\":{\"stdout\":\"/usr/bin/bash: line","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:53:08Z","tool":"Bash","cmd":"cd \"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\" && find . -type f -name \"*.php\" | grep -E \"^./[^/]*\\\\.php$|autoload/|templates|functions|libraries/(medoo|grid)\" | head -100\"},\"tool_re","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:55:06Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\codebase\\\\stack.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:55:32Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\codebase\\\\architecture.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:55:47Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\codebase\\\\conventions.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:55:57Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\codebase\\\\testing.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:56:14Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\codebase\\\\integrations.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:56:43Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\codebase\\\\concerns.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:56:55Z","tool":"Write","file":"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\\\\.paul\\\\codebase\\\\db_schema.md","cwd":"/c/visual studio code/projekty/rank24.pl"} +{"ts":"2026-05-05T17:57:08Z","tool":"Bash","cmd":"cd \"C:\\\\visual studio code\\\\projekty\\\\rank24.pl\" && git add .paul/codebase/ && git commit -m \"$(cat <<'EOF'\\nDodano mapę kodu w .paul/codebase/ (7 dokumentów)\\n\\nWygenerowano przez równ","cwd":"/c/visual studio code/projekty/rank24.pl"} diff --git a/.paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md b/.paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md new file mode 100644 index 0000000..6d390e1 --- /dev/null +++ b/.paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md @@ -0,0 +1,219 @@ +--- +phase: 01-dataforseo-cost-optimization +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - autoload/class.Cron.php +autonomous: true +delegation: off +--- + + +## Goal +Reduce DataForSEO spending for Google rank checks by making cron scheduling and SERP depth adaptive, with a hard maximum of top 50 results. + +## Purpose +DataForSEO Organic SERP pricing changed on 2025-09-19 so the base price covers only the first page of 10 organic results; deeper `depth` values now cost more. The current cron logic uses `depth` values 30, 50, and 100, which makes many checks substantially more expensive. This plan keeps rank tracking useful while cutting avoidable API spend. + +## Output +Backend-only changes in `autoload/class.Cron.php`: +- manual `days_offset` remains an explicit override +- phrases without manual interval get an adaptive interval based on recent rank stability +- DataForSEO requests never use `depth` above 50 +- deep checks are reduced for stable or low-priority cases + + + + +- **Depth policy** - Jak agresywnie ciac glebokosc sprawdzania DataForSEO? + -> Odpowiedz: Uzytkownik potrzebowal rozpisania. Przyjeto adaptacyjna polityke kosztowa oraz twardy limit: nie sprawdzamy dalej niz do 50 pozycji. +- **Intervals** - Czy mozemy zmieniac czestotliwosc sprawdzania fraz przez `days_offset`? + -> Odpowiedz: Tak, jesli powstanie mechanizm automatycznie sprawdzajacy rzadziej stabilne frazy, a czesciej niestabilne, z uwzglednieniem fraz majacych na sztywno wpisany interwal. +- **Scope** - Czy plan ma obejmowac panel administracyjny? + -> Odpowiedz: Moze to byc backend-only. + + +## Project Context +@.paul/PROJECT.md +@.paul/ROADMAP.md +@.paul/STATE.md +@.paul/codebase/architecture.md +@.paul/codebase/db_schema.md +@.paul/codebase/concerns.md + +## Source Files +@autoload/class.Cron.php +@cron.php +@dsf.php +@autoload/factory/class.Ranker.php + +## External Pricing Context +- DataForSEO Organic SERP pricing/depth update FAQ: https://dataforseo.com/help-center/serp-api-pricing-depth-update-faq +- DataForSEO SERP additional cost explanation: https://dataforseo.com/help-center/serp-api-cost-explained +- DataForSEO Google Organic Task POST docs: https://docs.dataforseo.com/v3/serp/google/organic/task_post/ + + + + +## AC-1: Manual Intervals Stay Authoritative +```gherkin +Given a phrase has `days_offset` set to a positive value +When `Cron::post_phrases_positions_dfs3()` selects phrases for DataForSEO +Then the phrase is eligible only when its manual interval is due +And the adaptive interval does not shorten or override that manual setting +``` + +## AC-2: Stable Phrases Are Checked Less Often +```gherkin +Given a phrase has no manual `days_offset` +And its recent recorded positions are stable +When the DataForSEO posting cron runs +Then the phrase is not sent every day +And its next eligibility is delayed according to the adaptive stability rule +``` + +## AC-3: Unstable Or New Phrases Stay Fresh +```gherkin +Given a phrase has no manual `days_offset` +And it is new, missing recent history, or has volatile recent positions +When the DataForSEO posting cron runs +Then the phrase remains eligible more frequently than stable phrases +And rank tracking does not become stale for unstable keywords +``` + +## AC-4: DataForSEO Depth Is Capped At 50 +```gherkin +Given any active phrase selected for a DataForSEO v3 Google Organic task +When the request payload is built +Then the request `depth` is never greater than 50 +And no request attempts to check positions beyond top 50 +``` + +## AC-5: Current Data Flow Remains Compatible +```gherkin +Given DataForSEO returns a completed task through the existing postback flow +When `Cron::get_phrases_positions_dfs3()` processes the result +Then position rows continue to be inserted or updated as before +And `last_checked`, `ds_id`, `ds_ready`, and `filled_missing_positions` continue to be maintained +``` + + + + + + + Task 1: Add DataForSEO cost policy helpers + autoload/class.Cron.php + + Add small private/static helper methods inside `Cron` for the DataForSEO v3 flow: + - `getDfsRecentPositions($phrase_id, $limit)` to read the latest non-empty positions from `pro_rr_phrases_positions`. + - `getDfsPositionVolatility($positions)` to classify recent movement using simple absolute deltas. + - `getDfsAdaptiveIntervalDays($row, $positions)` to return the automatic interval for phrases where `days_offset` is empty. + - `getDfsDepth($last_position, $positions)` to return a capped depth. + + Policy to implement: + - Hard cap: `depth <= 50` always. + - If no previous position exists: use `depth = 50`, because first discovery still needs a reasonable search window but must not exceed top 50. + - Last position 1-10: use `depth = 10` for stable phrases, `depth = 20` for volatile phrases. + - Last position 11-20: use `depth = 20` for stable phrases, `depth = 30` for volatile phrases. + - Last position 21-50: use `depth = 50`. + - Last position >50: use `depth = 50`; if not found again, store/handle as not found according to the existing result behavior. + + Adaptive interval policy: + - Missing or short history: 1 day. + - Volatile phrase, e.g. max movement in recent checks greater than 5 positions: 1 day. + - Mild movement, e.g. max movement 2-5 positions: 2 days. + - Stable top 10, e.g. max movement 0-1 position across at least 5 recent checks: 3 days. + - Stable positions 11-50 across at least 5 recent checks: 5 days. + - Keep thresholds as named local constants or clearly named helper variables, not scattered magic numbers. + + Avoid: + - Adding a new database column in this plan; use existing `days_offset`, `last_checked`, and position history. + - Introducing Composer or a new framework. + - Changing credentials handling in this plan; it is known debt but separate from cost control. + + php -l autoload/class.Cron.php + AC-2, AC-3, and AC-4 have clear helper logic available for the cron selection and payload code. + + + + Task 2: Apply adaptive eligibility in phrase selection + autoload/class.Cron.php + + Update `Cron::post_phrases_positions_dfs3()` so it no longer blindly selects the first active unchecked phrase and sends it daily. + + Required behavior: + - Keep all existing active date filters for phrase/site `date_start` and `date_end`. + - Keep `ds_id IS NULL` so already posted tasks are not duplicated. + - Preserve manual `days_offset`: if present, the phrase is due only when `DATE_ADD(last_checked, INTERVAL days_offset DAY) <= CURRENT_DATE`, plus the existing `last_checked = '2012-01-01'` refresh behavior. + - For phrases with empty `days_offset`, compute automatic due status from recent positions and `last_checked`. + - Ensure an ineligible stable phrase cannot block later eligible phrases. If SQL cannot express the adaptive rule cleanly, select a bounded candidate pool ordered by site/name/phrase and iterate in PHP until the first due phrase is found. + - If no due phrase exists, return `[ 'status' => 'empty' ]` as before. + + The candidate-pool approach is acceptable and preferred over risky MySQL 5 window-function assumptions. Keep the pool bounded, e.g. 100-300 active candidates, so a cron hit remains cheap. + + php -l autoload/class.Cron.php + AC-1, AC-2, and AC-3 satisfied: manual intervals are respected, stable automatic phrases are skipped until due, and eligible later phrases are not blocked. + + + + Task 3: Build cheaper DataForSEO payloads + autoload/class.Cron.php + + Replace the current `depth` calculation in `Cron::post_phrases_positions_dfs3()`: + - Remove the current 30/50/100 ladder. + - Use `getDfsDepth()` from Task 1. + - Ensure both localization branches use the same payload policy. + - Keep `priority => 1`, `language_code => "pl"`, `postback_data => "advanced"`, and the existing postback URL flow unchanged. + - Include the computed interval/depth in the returned cron message so operations can see why a phrase was sent, e.g. depth and whether interval is manual or adaptive. + + Also harden result processing only where it directly protects the new policy: + - Initialize `$phrase_position` and `$site_url` before looping through result items. + - If the domain is not found in returned top 50, store position `0` or the existing "not found" representation used by the app, without PHP notices. + - Do not expand result retrieval beyond the posted task result. + + php -l autoload/class.Cron.php + AC-4 and AC-5 satisfied: no request exceeds depth 50 and the existing DataForSEO post/get lifecycle still writes positions correctly. + + + + + + +## DO NOT CHANGE +- Do not add or run database migrations in this plan. +- Do not modify `dsf.php` unless implementation discovers the current postback marker is incompatible with the unchanged flow. +- Do not change DataForSEO credentials or move secrets in this plan. +- Do not replace DataForSEO with another provider in this plan. +- Do not change UI templates or admin panels. + +## SCOPE LIMITS +- This is backend cost control only. +- The maximum tracked position becomes top 50 for DataForSEO checks. +- Existing historical positions beyond top 50 are not rewritten. +- Security debt listed in `.paul/codebase/concerns.md` remains deferred unless it directly blocks this plan. + + + + +Before declaring plan complete: +- [ ] `php -l autoload/class.Cron.php` +- [ ] Review `Cron::post_phrases_positions_dfs3()` and confirm `depth` cannot exceed 50. +- [ ] Review manual `days_offset` path and confirm it remains authoritative. +- [ ] Review automatic interval path and confirm stable phrases cannot block other due candidates. +- [ ] Review `Cron::get_phrases_positions_dfs3()` and confirm no undefined-position notices are introduced for not-found-in-top-50 results. + + + +- DataForSEO v3 Google Organic task payloads never request more than top 50. +- Phrases with manual `days_offset` keep their configured schedule. +- Phrases without manual `days_offset` get adaptive scheduling based on recent stability. +- Cron still returns `ok` when a task is posted and `empty` when nothing is due. +- Existing DataForSEO postback/get result flow continues to update ranking tables. + + + +After completion, create `.paul/phases/01-dataforseo-cost-optimization/01-01-SUMMARY.md`. + diff --git a/.paul/phases/01-dataforseo-cost-optimization/01-01-SUMMARY.md b/.paul/phases/01-dataforseo-cost-optimization/01-01-SUMMARY.md new file mode 100644 index 0000000..9934ed3 --- /dev/null +++ b/.paul/phases/01-dataforseo-cost-optimization/01-01-SUMMARY.md @@ -0,0 +1,38 @@ +# Summary: 01-01 DataForSEO Cost Optimization + +## Plan + +`.paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md` + +## Completed + +- Added DataForSEO cost-policy helpers in `autoload/class.Cron.php`. +- Added adaptive interval calculation for phrases without manual `days_offset`. +- Preserved manual `days_offset` as the authoritative schedule override. +- Changed DataForSEO candidate selection from a single SQL-selected phrase to a bounded candidate pool ordered by oldest `last_checked`, then filtered in PHP. +- Removed the old `30/50/100` depth ladder. +- Capped all DataForSEO v3 Google Organic requests at top 50. +- Hardened result processing so "not found in returned top 50" stores position `0` without relying on an undefined `$site`. +- Added depth and interval metadata to the cron success message. + +## Verification + +- `php -l autoload\class.Cron.php` - passed. +- Confirmed no `100` literal remains in `autoload/class.Cron.php`. +- Confirmed DataForSEO payloads use computed `$depth` from `getDfsDepth()`. +- Confirmed both localization branches use the same computed depth. + +## Deviations + +- No SonarQube scan was run because only one source file was modified. +- No codebase docs were updated because this plan changed one existing backend file and did not introduce schema, dependency, API, or multi-file architecture changes. + +## Files Modified + +- `autoload/class.Cron.php` +- `.paul/STATE.md` +- `.paul/phases/01-dataforseo-cost-optimization/01-01-SUMMARY.md` + +## Next + +Run `$paul-unify .paul/phases/01-dataforseo-cost-optimization/01-01-PLAN.md` to reconcile the implementation against the plan and close the loop. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..93c3deb --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,48 @@ +# Projektowe zasady dla rank24.pl + +## Stack + +- **Język:** PHP (vanilla, bez frameworka) +- **Frontend:** HTML + JavaScript + jQuery +- **Baza danych:** MySQL +- **ORM:** medoo.php +- **Szablony:** Savant3 +- **Scraping:** Własna klasa GoogleScraper + proxy +- **Harmonogram:** class.Cron.php + +## Zasady kodu + +- Stosuj PSR-12 dla formatowania kodu PHP +- Nazewnictwo: PascalCase dla klas (class.NazwaKlasy.php), camelCase dla metod, snake_case dla zmiennych DB +- Unikaj zagnieżdżeń > 3 poziomy — wydzielaj do metod +- Komentarze tylko gdy wyjaśniają "dlaczego", nie "co" +- Klasy w katalogu `autoload/` ładowane automatycznie +- Szablony w katalogu `templates/` + +## Baza danych + +- Schemat dokumentowany w `.paul/codebase/db_schema.md` +- ORM: medoo.php (plik `libraries/medoo.php`) +- Baza: MySQL, host lokalny + produkcja + +## Testy + +- Brak automatycznych testów — testowanie manualne +- Sprawdzaj logi błędów PHP oraz output klasy Cron + +## Dokumentacja + +- Dokumentacja techniczna w `.paul/codebase/` +- Przy każdej zmianie aktualizuj odpowiednie pliki (db_schema.md, stack.md) + +## Wdrażanie + +- FTP/SFTP na serwer produkcyjny (konfiguracja w ftp-kr.json / sftp.json) +- Pliki konfiguracyjne: config.php (lokalne dane DB/proxy) + +## UI/Frontend + +- Szablony HTML w katalogu `templates/` +- Layouty w `layout/` +- Style CSS/SCSS w `libraries/style-css/` i `libraries/style-scss/` +- jQuery + własne pluginy JS diff --git a/autoload/class.Cron.php b/autoload/class.Cron.php index 336c6ba..dfd3f20 100644 --- a/autoload/class.Cron.php +++ b/autoload/class.Cron.php @@ -1,6 +1,118 @@ select( 'pro_rr_phrases_positions', [ 'position', 'date' ], [ + 'AND' => [ + 'phrase_id' => $phrase_id, + 'position[!]' => null + ], + 'ORDER' => [ 'date' => 'DESC' ], + 'LIMIT' => $limit + ] ); + } + + private static function getDfsPositionVolatility( $positions ) + { + if ( !is_array( $positions ) or count( $positions ) < 2 ) + return 'short'; + + $max_delta = 0; + $last = null; + + foreach ( $positions as $position ) + { + $current = (int)$position['position']; + + if ( $current == 0 ) + return 'volatile'; + + if ( $last !== null ) + $max_delta = max( $max_delta, abs( $last - $current ) ); + + $last = $current; + } + + if ( $max_delta > 5 ) + return 'volatile'; + + if ( $max_delta > 1 ) + return 'mild'; + + return 'stable'; + } + + private static function getDfsAdaptiveIntervalDays( $row, $positions ) + { + $volatility = self::getDfsPositionVolatility( $positions ); + + if ( !is_array( $positions ) or count( $positions ) < 3 ) + return 1; + + if ( $volatility == 'volatile' ) + return 1; + + if ( $volatility == 'mild' ) + return 2; + + if ( count( $positions ) >= 5 ) + { + $last_position = (int)$positions[0]['position']; + + if ( $last_position > 0 and $last_position <= 10 ) + return 3; + + if ( $last_position > 10 and $last_position <= 50 ) + return 5; + } + + return 1; + } + + private static function getDfsDepth( $last_position, $positions ) + { + $last_position = (int)$last_position; + $volatility = self::getDfsPositionVolatility( $positions ); + + if ( !$last_position ) + return 50; + + if ( $last_position <= 10 ) + return $volatility == 'stable' ? 10 : 20; + + if ( $last_position <= 20 ) + return $volatility == 'stable' ? 20 : 30; + + return 50; + } + + private static function isDfsPhraseDue( $row, $positions ) + { + if ( $row['last_checked'] == '2012-01-01' or !$row['last_checked'] ) + return true; + + if ( date( 'Y-m-d', strtotime( $row['last_checked'] ) ) == date( 'Y-m-d' ) ) + return false; + + if ( $row['days_offset'] and (int)$row['days_offset'] > 0 ) + $interval_days = (int)$row['days_offset']; + else + $interval_days = self::getDfsAdaptiveIntervalDays( $row, $positions ); + + return date( 'Y-m-d', strtotime( '+' . $interval_days . ' days', strtotime( $row['last_checked'] ) ) ) <= date( 'Y-m-d' ); + } + + private static function getDfsIntervalLabel( $row, $positions ) + { + if ( $row['days_offset'] and (int)$row['days_offset'] > 0 ) + return 'manual:' . (int)$row['days_offset'] . 'd'; + + return 'auto:' . self::getDfsAdaptiveIntervalDays( $row, $positions ) . 'd'; + } + public static function fill_missing_positions() { global $mdb; @@ -161,7 +273,9 @@ class Cron $result = $client->get('/v3/serp/google/organic/task_get/advanced/' . $row['ds_id'] ); if ( $result['status_code'] == '20000' ) { - $sites = $result['tasks'][0]['result'][0]['items']; + $sites = isset( $result['tasks'][0]['result'][0]['items'] ) ? $result['tasks'][0]['result'][0]['items'] : []; + $phrase_position = 0; + $site_url = ''; foreach ( $sites as $site ) { @@ -201,13 +315,11 @@ class Cron $mdb -> insert( 'phrase_positions_statistic', [ 'phrase_id' => $row['id'], - 'position' => $site['rank_group'], + 'position' => (int)$phrase_position, 'date' => date( 'Y-m-d' ) ] ); } - $phrase_position = $site['rank_group']; - $mdb -> update( 'pro_rr_phrases', [ 'last_checked' => date( 'Y-m-d' ), 'filled_missing_positions' => 0, 'ds_id' => null, 'ds_ready' => 0 ], [ 'id' => $row['id'] ] ); return [ @@ -227,48 +339,40 @@ class Cron { global $mdb; - $sql = 'SELECT * FROM ( ' - . 'SELECT ' - . 'prp.id, phrase, url, localization, last_checked, days_offset, prs.name ' - . 'FROM ' - . 'pro_rr_phrases AS prp, pro_rr_sites AS prs ' - . 'WHERE ' - . 'prs.id = prp.site_id ' - . 'AND ' - . 'last_checked != \'' . date( 'Y-m-d' ) . '\' ' - . 'AND ' - . '( prp.date_end >= \'' . date( 'Y-m-d' ) . '\' OR prp.date_end IS NULL ) ' - . 'AND ' - . '( prp.date_start <= \'' . date( 'Y-m-d' ) . '\' OR prp.date_start IS NULL ) ' - . 'AND ' - . '( prs.date_start <= \'' . date( 'Y-m-d' ) . '\' OR prs.date_start IS NULL ) ' - . 'AND ' - . '( prs.date_end >= \'' . date( 'Y-m-d' ) . '\' OR prs.date_end IS NULL ) ' - . 'AND ' - . 'ds_id IS NULL ' - . ') AS q1 ' - . 'WHERE ' - . '( ' - . 'days_offset IS NULL ' - . 'OR ' - . 'days_offset IS NOT NULL AND DATE( DATE_ADD( last_checked, INTERVAL +days_offset DAY ) ) <= CURRENT_DATE ' - . ') OR last_checked = \'2012-01-01\' ' - . 'ORDER BY ' - . 'name ASC, phrase ASC ' - . 'LIMIT 1'; + $sql = 'SELECT ' + . 'prp.id, phrase, url, localization, last_checked, days_offset, prs.name ' + . 'FROM ' + . 'pro_rr_phrases AS prp, pro_rr_sites AS prs ' + . 'WHERE ' + . 'prs.id = prp.site_id ' + . 'AND ' + . '( last_checked != \'' . date( 'Y-m-d' ) . '\' OR last_checked IS NULL ) ' + . 'AND ' + . '( prp.date_end >= \'' . date( 'Y-m-d' ) . '\' OR prp.date_end IS NULL ) ' + . 'AND ' + . '( prp.date_start <= \'' . date( 'Y-m-d' ) . '\' OR prp.date_start IS NULL ) ' + . 'AND ' + . '( prs.date_start <= \'' . date( 'Y-m-d' ) . '\' OR prs.date_start IS NULL ) ' + . 'AND ' + . '( prs.date_end >= \'' . date( 'Y-m-d' ) . '\' OR prs.date_end IS NULL ) ' + . 'AND ' + . 'ds_id IS NULL ' + . 'ORDER BY ' + . 'last_checked ASC, name ASC, phrase ASC ' + . 'LIMIT 200'; $results = $mdb -> query( $sql ) -> fetchAll( \PDO::FETCH_ASSOC ); if ( is_array( $results ) and !empty( $results ) ) foreach ( $results as $row ) { + $positions = self::getDfsRecentPositions( $row['id'], 5 ); + + if ( !self::isDfsPhraseDue( $row, $positions ) ) + continue; + $client = new RestClient( 'https://api.dataforseo.com/', null, 'pyziak84@gmail.com', '0p4rYWDNoK63eUUw' ); - $last_position = $mdb -> get( 'pro_rr_phrases_positions', 'position', [ 'AND' => [ 'phrase_id' => $row['id'] ], 'ORDER' => [ 'date' => 'DESC' ] ] ); - if ( $last_position <= 10 ) { - $depth = 30; - } elseif ( $last_position <= 30 ) { - $depth = 50; - } else { - $depth = 100; - } + $last_position = is_array( $positions ) and !empty( $positions ) ? (int)$positions[0]['position'] : 0; + $depth = self::getDfsDepth( $last_position, $positions ); + $interval_label = self::getDfsIntervalLabel( $row, $positions ); if ( $row['localization'] and (int)$row['localization'] ) { @@ -303,7 +407,7 @@ class Cron return [ 'status' => 'ok', - 'msg' => 'Wysyłam do sprawdzenia frazę: ' . $row['phrase'] . ' - ' . $row['url'] . ' - (API 3.0) - ' . $task_post_result['tasks'][0]['id'] + 'msg' => 'Wysylam do sprawdzenia fraze: ' . $row['phrase'] . ' - ' . $row['url'] . ' - depth: ' . $depth . ', interval: ' . $interval_label . ' - (API 3.0) - ' . $task_post_result['tasks'][0]['id'] ]; } }