This commit is contained in:
2026-04-09 11:44:45 +02:00
parent 7ff7ff3a92
commit 61c66bfd55
79 changed files with 13667 additions and 144 deletions

View File

@@ -137,6 +137,7 @@ class ArticleController extends Controller
}
$content = (string) ($post['content']['rendered'] ?? '');
$postUrl = trim((string) ($post['link'] ?? ''));
$postDate = (string) ($post['date'] ?? '');
$publishedAt = $postDate !== '' ? date('Y-m-d H:i:s', strtotime($postDate)) : date('Y-m-d H:i:s');
if ($publishedAt === '1970-01-01 00:00:00') {
@@ -150,6 +151,7 @@ class ArticleController extends Controller
'title' => $title,
'content' => $content,
'wp_post_id' => $wpPostId,
'wp_post_url' => $postUrl !== '' ? $postUrl : null,
'status' => 'published',
'published_at' => $publishedAt,
]);

View File

@@ -5,6 +5,8 @@ namespace App\Controllers;
use App\Core\Auth;
use App\Core\Config;
use App\Core\Controller;
use App\Core\Database;
use App\Core\Database\Migrator;
use App\Services\ImageService;
use App\Services\OpenAIService;
@@ -20,6 +22,10 @@ class SettingsController extends Controller
'article_min_words',
'article_max_words',
'article_generation_prompt',
'internal_linking_enabled',
'internal_link_min_count',
'internal_link_max_count',
'openai_embedding_model',
'image_generation_prompt',
'semstorm_login',
'semstorm_password',
@@ -36,9 +42,13 @@ class SettingsController extends Controller
private array $settingDefaults = [
'openai_model' => 'gpt-4o',
'image_provider' => 'freepik',
'article_min_words' => '800',
'article_max_words' => '1200',
'article_min_words' => '900',
'article_max_words' => '1400',
'article_generation_prompt' => OpenAIService::DEFAULT_ARTICLE_PROMPT_TEMPLATE,
'internal_linking_enabled' => '1',
'internal_link_min_count' => '2',
'internal_link_max_count' => '4',
'openai_embedding_model' => 'text-embedding-3-small',
'image_generation_prompt' => ImageService::DEFAULT_FREEPIK_PROMPT_TEMPLATE,
'semstorm_api_base' => 'https://api.semstorm.com',
'semstorm_timeout_seconds' => '30',
@@ -76,4 +86,48 @@ class SettingsController extends Controller
$this->flash('success', 'Ustawienia zostały zapisane.');
$this->redirect('/settings');
}
public function database(): void
{
Auth::requireLogin();
$migrator = new Migrator(
Database::getInstance(),
dirname(__DIR__, 2) . '/migrations'
);
$status = $migrator->status();
$runLogs = $_SESSION['migration_logs'] ?? null;
unset($_SESSION['migration_logs']);
$this->view('settings/database', [
'status' => $status,
'runLogs' => $runLogs,
]);
}
public function migrate(): void
{
Auth::requireLogin();
$migrator = new Migrator(
Database::getInstance(),
dirname(__DIR__, 2) . '/migrations'
);
try {
$result = $migrator->runPending();
$_SESSION['migration_logs'] = $result['logs'];
if ($result['executed'] > 0) {
$this->flash('success', "Wykonano {$result['executed']} migracji.");
} else {
$this->flash('info', 'Brak oczekujacych migracji.');
}
} catch (\Throwable $e) {
$this->flash('danger', 'Blad migracji: ' . $e->getMessage());
}
$this->redirect('/settings/database');
}
}

View File

@@ -0,0 +1,118 @@
<?php
namespace App\Controllers;
use App\Core\Auth;
use App\Core\Config;
use App\Core\Controller;
use App\Core\Database;
use App\Helpers\Logger;
use App\Services\StatLinkService;
class StatLinkController extends Controller
{
public function index(): void
{
Auth::requireLogin();
$db = Database::getInstance();
$page = max(1, (int) $this->input('page', 1));
$perPage = 20;
$offset = ($page - 1) * $perPage;
// Stats
$stats = [];
$statsStmt = $db->query(
"SELECT status, COUNT(*) as cnt FROM statlink_links GROUP BY status"
);
foreach ($statsStmt->fetchAll() as $row) {
$stats[$row['status']] = (int) $row['cnt'];
}
$totalLinks = array_sum($stats);
$totalPages = max(1, (int) ceil($totalLinks / $perPage));
// Links with article and site info
$stmt = $db->prepare(
"SELECT sl.*, a.title as article_title, s.name as site_name
FROM statlink_links sl
LEFT JOIN articles a ON sl.article_id = a.id
LEFT JOIN sites s ON sl.site_id = s.id
ORDER BY sl.added_at DESC
LIMIT :limit OFFSET :offset"
);
$stmt->bindValue('limit', $perPage, \PDO::PARAM_INT);
$stmt->bindValue('offset', $offset, \PDO::PARAM_INT);
$stmt->execute();
$links = $stmt->fetchAll();
$this->view('statlink/index', [
'links' => $links,
'stats' => $stats,
'page' => $page,
'totalPages' => $totalPages,
'totalLinks' => $totalLinks,
]);
}
public function runByToken(): void
{
$configuredToken = (string) Config::get('SEO_TRIGGER_TOKEN', '');
$providedToken = (string) $this->input('token', '');
if ($providedToken === '') {
$providedToken = (string) ($_SERVER['HTTP_X_STATLINK_TOKEN'] ?? '');
}
if ($configuredToken === '') {
$this->json(['success' => false, 'message' => 'Token trigger is disabled.'], 503);
return;
}
if ($providedToken === '' || !hash_equals($configuredToken, $providedToken)) {
$this->json(['success' => false, 'message' => 'Forbidden'], 403);
return;
}
@set_time_limit(300);
$service = new StatLinkService();
// One action per run: prioritize removing expired, then retry failed, then add new
$removeResult = $service->removeExpiredLinks();
if ($removeResult['removed'] > 0 || $removeResult['errors'] > 0) {
$this->json([
'success' => true,
'action' => 'remove',
'removed' => $removeResult['removed'],
'remove_errors' => $removeResult['errors'],
]);
return;
}
// Retry failed links before adding new ones
$retryResult = $service->retryFailedLinks();
if ($retryResult['retried'] > 0 || $retryResult['errors'] > 0) {
$this->json([
'success' => true,
'action' => 'retry',
'retried' => $retryResult['retried'],
'still_failed' => $retryResult['still_failed'],
'retry_errors' => $retryResult['errors'],
'diagnostics' => $service->getLastDiagnostics(),
]);
return;
}
$addResult = $service->processNewArticles();
$this->json([
'success' => true,
'action' => 'add',
'added' => $addResult['added'],
'skipped' => $addResult['skipped'],
'add_errors' => $addResult['errors'],
'diagnostics' => $service->getLastDiagnostics(),
]);
}
}

View File

@@ -19,7 +19,7 @@ abstract class Controller
{
http_response_code($statusCode);
header('Content-Type: application/json');
echo json_encode($data);
echo json_encode($data, JSON_INVALID_UTF8_SUBSTITUTE | JSON_UNESCAPED_UNICODE);
exit;
}

View File

@@ -0,0 +1,155 @@
<?php
declare(strict_types=1);
namespace App\Core\Database;
use PDO;
use RuntimeException;
use Throwable;
final class Migrator
{
public function __construct(
private readonly PDO $pdo,
private readonly string $migrationsPath
) {
}
/**
* @return array{total:int, applied:int, pending:int, pending_files:array<int, string>}
*/
public function status(): array
{
$this->ensureMigrationsTable();
$allFiles = $this->migrationFiles();
$appliedFiles = $this->appliedFilenames();
$pendingFiles = array_values(array_diff($allFiles, $appliedFiles));
return [
'total' => count($allFiles),
'applied' => count($appliedFiles),
'pending' => count($pendingFiles),
'pending_files' => $pendingFiles,
];
}
/**
* @return array{executed:int, skipped:int, logs:array<int, string>}
*/
public function runPending(): array
{
$this->acquireLock();
try {
$this->ensureMigrationsTable();
$allFiles = $this->migrationFiles();
$appliedFiles = $this->appliedFilenames();
$pendingFiles = array_values(array_diff($allFiles, $appliedFiles));
$insert = $this->pdo->prepare(
'INSERT INTO migrations (filename, executed_at) VALUES (:filename, :executed_at)'
);
$executed = 0;
$skipped = 0;
$logs = [];
foreach ($pendingFiles as $filename) {
$fullPath = rtrim($this->migrationsPath, '/\\') . DIRECTORY_SEPARATOR . $filename;
$sql = file_get_contents($fullPath);
if ($sql === false || trim($sql) === '') {
$skipped++;
$logs[] = '[skip-empty] ' . $filename;
continue;
}
try {
$this->pdo->beginTransaction();
$this->pdo->exec($sql);
$insert->execute([
'filename' => $filename,
'executed_at' => date('Y-m-d H:i:s'),
]);
if ($this->pdo->inTransaction()) {
$this->pdo->commit();
}
$executed++;
$logs[] = '[ok] ' . $filename;
} catch (Throwable $exception) {
if ($this->pdo->inTransaction()) {
$this->pdo->rollBack();
}
$logs[] = '[error] ' . $filename . ' - ' . $exception->getMessage();
throw $exception;
}
}
return [
'executed' => $executed,
'skipped' => $skipped,
'logs' => $logs,
];
} finally {
$this->releaseLock();
}
}
private function ensureMigrationsTable(): void
{
$this->pdo->exec(
'CREATE TABLE IF NOT EXISTS migrations (
id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
filename VARCHAR(190) NOT NULL,
executed_at DATETIME NOT NULL,
UNIQUE KEY migrations_filename_unique (filename)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci'
);
}
private function acquireLock(): void
{
$statement = $this->pdo->query("SELECT GET_LOCK('backpro_migrations_lock', 10)");
$value = $statement !== false ? $statement->fetchColumn() : false;
if ((string) $value !== '1') {
throw new RuntimeException('Nie mozna uzyskac blokady migracji. Sprobuj ponownie za chwile.');
}
}
private function releaseLock(): void
{
$this->pdo->query("DO RELEASE_LOCK('backpro_migrations_lock')");
}
/**
* @return array<int, string>
*/
private function migrationFiles(): array
{
$files = glob(rtrim($this->migrationsPath, '/\\') . DIRECTORY_SEPARATOR . '*.sql');
if (!is_array($files)) {
return [];
}
$filenames = array_map(static fn(string $path): string => basename($path), $files);
sort($filenames);
return $filenames;
}
/**
* @return array<int, string>
*/
private function appliedFilenames(): array
{
$statement = $this->pdo->query('SELECT filename FROM migrations ORDER BY filename');
$rows = $statement->fetchAll(PDO::FETCH_COLUMN);
if (!is_array($rows)) {
return [];
}
return array_values(array_map(static fn(mixed $value): string => (string) $value, $rows));
}
}

View File

@@ -90,6 +90,22 @@ class Article extends Model
return $stmt->fetchAll(\PDO::FETCH_COLUMN);
}
public static function findPublishedLinkCandidatesBySite(int $siteId, int $limit = 60): array
{
$stmt = self::db()->prepare(
"SELECT a.id, a.title, a.content, a.wp_post_id, a.wp_post_url, a.published_at, a.created_at
FROM articles a
WHERE a.site_id = :site_id
AND a.status = 'published'
AND a.wp_post_id IS NOT NULL
AND a.content <> ''
ORDER BY COALESCE(a.published_at, a.created_at) DESC, a.id DESC
LIMIT {$limit}"
);
$stmt->execute(['site_id' => $siteId]);
return $stmt->fetchAll();
}
public static function existsBySiteAndWpPostId(int $siteId, int $wpPostId): bool
{
$stmt = self::db()->prepare(

View File

@@ -667,8 +667,10 @@ PHP;
);
if ($wpPostId) {
$wpPostUrl = $wp->getPostLink($site, (int) $wpPostId);
Article::update((int) $article['id'], [
'wp_post_id' => (int) $wpPostId,
'wp_post_url' => $wpPostUrl,
'status' => 'published',
'error_message' => null,
]);

View File

@@ -0,0 +1,479 @@
<?php
namespace App\Services;
use App\Core\Config;
use App\Helpers\Logger;
use App\Models\Article;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
class InternalLinkService
{
private const DEFAULT_MIN_LINKS = 2;
private const DEFAULT_MAX_LINKS = 4;
private const DEFAULT_EMBEDDING_MODEL = 'text-embedding-3-small';
private const EMBEDDING_INPUT_MAX_CHARS = 1800;
private const MIN_PARAGRAPH_LENGTH = 120;
private Client $client;
public function __construct()
{
$this->client = new Client([
'base_uri' => 'https://api.openai.com/v1/',
'timeout' => 45,
]);
}
public function enrichContentWithInternalLinks(array $site, string $title, string $htmlContent): array
{
if (!$this->isEnabled()) {
return [
'content' => $htmlContent,
'links_added' => 0,
'targets' => [],
'mode' => 'disabled',
];
}
$siteId = (int) ($site['id'] ?? 0);
if ($siteId <= 0 || trim($htmlContent) === '') {
return [
'content' => $htmlContent,
'links_added' => 0,
'targets' => [],
'mode' => 'invalid_input',
];
}
$maxLinks = $this->sanitizeLimit(Config::getDbSetting('internal_link_max_count', (string) self::DEFAULT_MAX_LINKS), self::DEFAULT_MAX_LINKS, 1, 8);
$minLinks = $this->sanitizeLimit(Config::getDbSetting('internal_link_min_count', (string) self::DEFAULT_MIN_LINKS), self::DEFAULT_MIN_LINKS, 0, $maxLinks);
$candidates = Article::findPublishedLinkCandidatesBySite($siteId, 80);
if (count($candidates) === 0) {
return [
'content' => $htmlContent,
'links_added' => 0,
'targets' => [],
'mode' => 'no_candidates',
];
}
$ranked = $this->rankCandidates($site, $title, $htmlContent, $candidates);
if (count($ranked) === 0) {
return [
'content' => $htmlContent,
'links_added' => 0,
'targets' => [],
'mode' => 'ranking_empty',
];
}
$selected = array_slice($ranked, 0, $maxLinks);
if (count($selected) < $minLinks && count($ranked) >= $minLinks) {
$selected = array_slice($ranked, 0, $minLinks);
}
$injected = $this->injectLinksIntoParagraphs($htmlContent, $selected);
return [
'content' => $injected['content'],
'links_added' => $injected['links_added'],
'targets' => $injected['targets'],
'mode' => $injected['mode'],
];
}
private function isEnabled(): bool
{
$raw = (string) Config::getDbSetting('internal_linking_enabled', '1');
return $raw !== '0';
}
private function rankCandidates(array $site, string $title, string $htmlContent, array $candidates): array
{
$sourceText = $this->buildEmbeddingText($title, $htmlContent);
$candidatePayload = [];
foreach ($candidates as $candidate) {
$candidateId = (int) ($candidate['id'] ?? 0);
$candidateWpPostId = (int) ($candidate['wp_post_id'] ?? 0);
$candidateTitle = trim((string) ($candidate['title'] ?? ''));
if ($candidateId <= 0 || $candidateWpPostId <= 0 || $candidateTitle === '') {
continue;
}
$url = $this->resolveCandidateUrl($site, $candidate);
if ($url === null) {
continue;
}
$candidatePayload[] = [
'id' => $candidateId,
'title' => $candidateTitle,
'url' => $url,
'text' => $this->buildEmbeddingText($candidateTitle, (string) ($candidate['content'] ?? '')),
];
}
if (count($candidatePayload) === 0) {
return [];
}
$semanticScores = $this->calculateSemanticScores($sourceText, $candidatePayload);
if ($semanticScores === null) {
Logger::warning('Internal linking fallback to lexical ranking (embeddings unavailable).', 'publish');
return $this->calculateLexicalRanking($sourceText, $candidatePayload);
}
usort($semanticScores, static fn (array $a, array $b) => $b['score'] <=> $a['score']);
return array_values(array_filter($semanticScores, static fn (array $row) => $row['score'] > 0.15));
}
private function calculateSemanticScores(string $sourceText, array $candidates): ?array
{
$apiKey = Config::getDbSetting('openai_api_key', Config::get('OPENAI_API_KEY'));
if (!is_string($apiKey) || trim($apiKey) === '') {
return null;
}
$model = (string) Config::getDbSetting('openai_embedding_model', self::DEFAULT_EMBEDDING_MODEL);
$inputs = [$sourceText];
foreach ($candidates as $candidate) {
$inputs[] = $candidate['text'];
}
try {
$response = $this->client->post('embeddings', [
'headers' => [
'Authorization' => 'Bearer ' . $apiKey,
'Content-Type' => 'application/json',
],
'json' => [
'model' => $model,
'input' => $inputs,
],
]);
} catch (GuzzleException $e) {
Logger::warning('Embeddings API error for internal linking: ' . $e->getMessage(), 'publish');
return null;
}
$data = json_decode($response->getBody()->getContents(), true);
if (!is_array($data) || !isset($data['data']) || !is_array($data['data']) || count($data['data']) !== count($inputs)) {
Logger::warning('Embeddings response invalid for internal linking.', 'publish');
return null;
}
$vectors = [];
foreach ($data['data'] as $item) {
$embedding = $item['embedding'] ?? null;
if (!is_array($embedding)) {
return null;
}
$vectors[] = array_map('floatval', $embedding);
}
$sourceVector = $vectors[0];
$ranked = [];
foreach ($candidates as $index => $candidate) {
$candidateVector = $vectors[$index + 1] ?? null;
if (!is_array($candidateVector)) {
continue;
}
$ranked[] = [
'id' => $candidate['id'],
'title' => $candidate['title'],
'url' => $candidate['url'],
'score' => $this->cosineSimilarity($sourceVector, $candidateVector),
];
}
return $ranked;
}
private function calculateLexicalRanking(string $sourceText, array $candidates): array
{
$sourceTokens = $this->tokenize($sourceText);
$sourceSet = array_fill_keys($sourceTokens, true);
$ranked = [];
foreach ($candidates as $candidate) {
$targetTokens = $this->tokenize($candidate['text']);
if (count($targetTokens) === 0) {
continue;
}
$shared = 0;
foreach ($targetTokens as $token) {
if (isset($sourceSet[$token])) {
$shared++;
}
}
$score = $shared / max(1, count(array_unique($targetTokens)));
$ranked[] = [
'id' => $candidate['id'],
'title' => $candidate['title'],
'url' => $candidate['url'],
'score' => $score,
];
}
usort($ranked, static fn (array $a, array $b) => $b['score'] <=> $a['score']);
return $ranked;
}
private function injectLinksIntoParagraphs(string $html, array $selected): array
{
if (!class_exists(\DOMDocument::class) || !class_exists(\DOMXPath::class)) {
return [
'content' => $html,
'links_added' => 0,
'targets' => [],
'mode' => 'dom_extension_missing',
];
}
$doc = new \DOMDocument('1.0', 'UTF-8');
$wrappedHtml = '<div id="backpro-root">' . $html . '</div>';
libxml_use_internal_errors(true);
$loaded = $doc->loadHTML('<?xml encoding="utf-8" ?>' . $wrappedHtml, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
if (!$loaded) {
return [
'content' => $html,
'links_added' => 0,
'targets' => [],
'mode' => 'dom_parse_failed',
];
}
$xpath = new \DOMXPath($doc);
$paragraphNodes = $xpath->query('//div[@id="backpro-root"]//p');
if ($paragraphNodes === false || $paragraphNodes->length === 0) {
return [
'content' => $html,
'links_added' => 0,
'targets' => [],
'mode' => 'no_paragraphs',
];
}
$availableParagraphIndexes = [];
for ($i = 0; $i < $paragraphNodes->length; $i++) {
$paragraphText = trim((string) $paragraphNodes->item($i)?->textContent);
if (mb_strlen($paragraphText) >= self::MIN_PARAGRAPH_LENGTH) {
$availableParagraphIndexes[] = $i;
}
}
if (count($availableParagraphIndexes) === 0) {
return [
'content' => $html,
'links_added' => 0,
'targets' => [],
'mode' => 'paragraphs_too_short',
];
}
$usedParagraphs = [];
$targets = [];
$linksAdded = 0;
foreach ($selected as $candidate) {
$index = $this->pickBestParagraphIndex($paragraphNodes, $availableParagraphIndexes, $usedParagraphs, (string) $candidate['title']);
if ($index === null) {
continue;
}
$paragraph = $paragraphNodes->item($index);
if (!$paragraph instanceof \DOMElement) {
continue;
}
$paragraph->appendChild($doc->createTextNode(' Sprawdz tez: '));
$anchor = $doc->createElement('a', (string) $candidate['title']);
$anchor->setAttribute('href', (string) $candidate['url']);
$anchor->setAttribute('title', (string) $candidate['title']);
$paragraph->appendChild($anchor);
$paragraph->appendChild($doc->createTextNode('.'));
$usedParagraphs[$index] = true;
$targets[] = [
'title' => (string) $candidate['title'],
'url' => (string) $candidate['url'],
'score' => (float) ($candidate['score'] ?? 0.0),
];
$linksAdded++;
}
$root = $xpath->query('//div[@id="backpro-root"]')->item(0);
if (!$root instanceof \DOMElement) {
return [
'content' => $html,
'links_added' => 0,
'targets' => [],
'mode' => 'root_missing',
];
}
$newHtml = '';
foreach ($root->childNodes as $child) {
$newHtml .= $doc->saveHTML($child);
}
return [
'content' => $newHtml !== '' ? $newHtml : $html,
'links_added' => $linksAdded,
'targets' => $targets,
'mode' => 'ok',
];
}
private function pickBestParagraphIndex(\DOMNodeList $paragraphNodes, array $candidateIndexes, array $usedParagraphs, string $title): ?int
{
$titleTokens = array_values(array_unique($this->tokenize($title)));
$bestIndex = null;
$bestScore = -1;
foreach ($candidateIndexes as $index) {
if (isset($usedParagraphs[$index])) {
continue;
}
$paragraphNode = $paragraphNodes->item($index);
if (!$paragraphNode instanceof \DOMElement) {
continue;
}
$paragraphText = mb_strtolower(trim((string) $paragraphNode->textContent));
if ($paragraphText === '') {
continue;
}
$score = 0;
foreach ($titleTokens as $token) {
if (mb_strlen($token) < 4) {
continue;
}
if (str_contains($paragraphText, $token)) {
$score++;
}
}
if ($score > $bestScore) {
$bestScore = $score;
$bestIndex = $index;
}
}
return $bestIndex;
}
private function buildEmbeddingText(string $title, string $htmlContent): string
{
$cleanTitle = trim($title);
$plain = trim((string) preg_replace('/\s+/u', ' ', strip_tags($htmlContent)));
$text = $cleanTitle;
if ($plain !== '') {
$text .= "\n\n" . mb_substr($plain, 0, self::EMBEDDING_INPUT_MAX_CHARS);
}
return trim($text);
}
private function resolveCandidateUrl(array $site, array $candidate): ?string
{
$storedUrl = trim((string) ($candidate['wp_post_url'] ?? ''));
if ($storedUrl !== '') {
return $storedUrl;
}
$siteUrl = rtrim((string) ($site['url'] ?? ''), '/');
$wpPostId = (int) ($candidate['wp_post_id'] ?? 0);
if ($siteUrl === '' || $wpPostId <= 0) {
return null;
}
return $siteUrl . '/?p=' . $wpPostId;
}
private function cosineSimilarity(array $a, array $b): float
{
$count = min(count($a), count($b));
if ($count === 0) {
return 0.0;
}
$dot = 0.0;
$normA = 0.0;
$normB = 0.0;
for ($i = 0; $i < $count; $i++) {
$dot += $a[$i] * $b[$i];
$normA += $a[$i] * $a[$i];
$normB += $b[$i] * $b[$i];
}
if ($normA <= 0.0 || $normB <= 0.0) {
return 0.0;
}
return $dot / (sqrt($normA) * sqrt($normB));
}
private function tokenize(string $text): array
{
$text = mb_strtolower($text);
$text = preg_replace('/[^\p{L}\p{N}\s]/u', ' ', $text) ?? $text;
$parts = preg_split('/\s+/u', $text);
if (!is_array($parts)) {
return [];
}
$stopWords = [
'oraz', 'ktory', 'ktora', 'ktore', 'ktorych', 'przez', 'takze', 'bardzo', 'mozna', 'mozesz',
'czyli', 'jest', 'sa', 'ten', 'ta', 'to', 'dla', 'pod', 'nad', 'sie', 'jako', 'aby', 'albo', 'lub',
'or', 'and', 'the', 'with', 'from', 'that', 'this', 'tego', 'tych', 'tym', 'juz', 'wiecej', 'mniej',
];
$stopSet = array_fill_keys($stopWords, true);
$tokens = [];
foreach ($parts as $part) {
$token = trim($part);
if ($token === '' || mb_strlen($token) < 3) {
continue;
}
if (isset($stopSet[$token])) {
continue;
}
$tokens[] = $token;
}
return $tokens;
}
private function sanitizeLimit(mixed $value, int $default, int $min, int $max): int
{
$intValue = (int) $value;
if ($intValue === 0) {
$intValue = $default;
}
if ($intValue < $min) {
return $min;
}
if ($intValue > $max) {
return $max;
}
return $intValue;
}
}

View File

@@ -2,14 +2,16 @@
namespace App\Services;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use App\Core\Config;
use App\Helpers\Logger;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
class OpenAIService
{
public const DEFAULT_ARTICLE_PROMPT_TEMPLATE = 'Jesteś doświadczonym copywriterem SEO. Pisz artykuły w języku polskim, optymalizowane pod SEO. Artykuł powinien mieć {min_words}-{max_words} słów, zawierać nagłówki H2 i H3, być angażujący i merytoryczny. Formatuj treść w HTML (bez tagów <html>, <body>, <head>). Zwróć odpowiedź WYŁĄCZNIE w formacie JSON: {"title": "tytuł artykułu", "content": "treść HTML artykułu"}';
public const DEFAULT_ARTICLE_PROMPT_TEMPLATE = 'Jestes doswiadczonym redaktorem SEO i ghostwriterem. Pisz po polsku naturalnie, konkretnie i bez AI-owych klisz. Artykul ma miec od {min_words} do {max_words} slow. Formatuj tresc tylko jako HTML (bez <html>, <body>, <head>) i zachowaj strukture: lead, sekcje H2/H3, listy praktyczne, sekcja FAQ. Uzywaj realnych przykladow i jezyka, ktory brzmi jak tekst napisany przez czlowieka z doswiadczeniem. Nie naduzywaj pytan retorycznych i unikaj fraz typu: "w dzisiejszych czasach", "podsumowujac", "warto zauwazyc". Zwracaj odpowiedz WYLACZNIE jako JSON: {"title":"...","content":"..."}';
private const MIN_ACCEPTABLE_H2 = 3;
private Client $client;
@@ -25,8 +27,13 @@ class OpenAIService
{
$apiKey = Config::getDbSetting('openai_api_key', Config::get('OPENAI_API_KEY'));
$model = Config::getDbSetting('openai_model', Config::get('OPENAI_MODEL', 'gpt-4o'));
$minWords = Config::getDbSetting('article_min_words', '800');
$maxWords = Config::getDbSetting('article_max_words', '1200');
$minWords = $this->sanitizeWordLimit(Config::getDbSetting('article_min_words', '900'), 900);
$maxWords = $this->sanitizeWordLimit(Config::getDbSetting('article_max_words', '1400'), 1400);
if ($maxWords < $minWords) {
$maxWords = $minWords;
}
$systemPromptTemplate = Config::getDbSetting('article_generation_prompt', self::DEFAULT_ARTICLE_PROMPT_TEMPLATE);
if (!is_string($systemPromptTemplate) || trim($systemPromptTemplate) === '') {
@@ -40,22 +47,70 @@ class OpenAIService
$existingList = !empty($existingTitles)
? implode("\n- ", $existingTitles)
: '(brak - to pierwszy artykuł z tego tematu)';
: '(brak - to pierwszy artykul z tego tematu)';
$systemPrompt = strtr($systemPromptTemplate, [
'{min_words}' => (string) $minWords,
'{max_words}' => (string) $maxWords,
]);
$userPrompt = "Napisz artykuł na temat: {$topicName}\n";
$userPrompt .= "Tytul ma byc samodzielny i nie moze zaczynac sie od nazwy tematu ani kategorii.\n";
if (!empty($topicDescription)) {
$userPrompt .= "Wytyczne: {$topicDescription}\n";
$qualityFeedback = '';
$lastPrompt = '';
for ($attempt = 1; $attempt <= 2; $attempt++) {
$userPrompt = $this->buildUserPrompt(
$topicName,
$topicDescription,
$existingList,
$qualityFeedback,
$minWords,
$maxWords
);
$fullPrompt = $systemPrompt . "\n\n" . $userPrompt;
$lastPrompt = $fullPrompt;
$content = $this->requestStructuredArticle($apiKey, (string) $model, $systemPrompt, $userPrompt);
if ($content === null) {
continue;
}
$article = json_decode($content, true);
if (!isset($article['title']) || !isset($article['content'])) {
Logger::error('Invalid JSON structure from OpenAI: ' . $content, 'openai');
$qualityFeedback = 'Zwroc poprawny JSON z polami "title" i "content".';
continue;
}
$title = trim((string) ($article['title'] ?? ''));
$htmlContent = trim((string) ($article['content'] ?? ''));
$quality = $this->validateQuality($title, $htmlContent, $minWords);
if (!$quality['ok']) {
$qualityFeedback = 'Poprzednia wersja nie spelnila wymagan: ' . implode('; ', $quality['issues']) . '. Napisz nowa, lepsza wersje.';
Logger::warning(
'Generated article quality retry for topic "' . $topicName . '" (attempt ' . $attempt . '): ' . implode('; ', $quality['issues']),
'openai'
);
continue;
}
Logger::info('Generated article: ' . $title, 'openai');
return [
'title' => $title,
'content' => $htmlContent,
'model' => (string) $model,
'prompt' => $fullPrompt,
];
}
$userPrompt .= "\nWAŻNE - NIE pisz o następujących tematach, bo artykuły o nich już istnieją na stronie:\n- {$existingList}";
$fullPrompt = $systemPrompt . "\n\n" . $userPrompt;
Logger::error('OpenAI generation failed after quality retries', 'openai');
return null;
}
private function requestStructuredArticle(string $apiKey, string $model, string $systemPrompt, string $userPrompt): ?string
{
try {
$response = $this->client->post('chat/completions', [
'headers' => [
@@ -68,38 +123,125 @@ class OpenAIService
['role' => 'system', 'content' => $systemPrompt],
['role' => 'user', 'content' => $userPrompt],
],
'temperature' => 0.8,
'max_tokens' => 4000,
'temperature' => 0.9,
'max_tokens' => 5000,
'response_format' => ['type' => 'json_object'],
],
]);
$data = json_decode($response->getBody()->getContents(), true);
$content = $data['choices'][0]['message']['content'] ?? null;
if (!$content) {
Logger::error('Empty response from OpenAI', 'openai');
return null;
}
$article = json_decode($content, true);
if (!isset($article['title']) || !isset($article['content'])) {
Logger::error('Invalid JSON structure from OpenAI: ' . $content, 'openai');
return null;
}
Logger::info("Generated article: {$article['title']}", 'openai');
return [
'title' => $article['title'],
'content' => $article['content'],
'model' => $model,
'prompt' => $fullPrompt,
];
} catch (GuzzleException $e) {
Logger::error('OpenAI API error: ' . $e->getMessage(), 'openai');
return null;
}
$data = json_decode($response->getBody()->getContents(), true);
$content = $data['choices'][0]['message']['content'] ?? null;
if (!is_string($content) || trim($content) === '') {
Logger::error('Empty response from OpenAI', 'openai');
return null;
}
return $content;
}
private function buildUserPrompt(
string $topicName,
string $topicDescription,
string $existingList,
string $qualityFeedback,
int $minWords,
int $maxWords
): string {
$prompt = "Napisz artykul na temat: {$topicName}\n";
$prompt .= "Docelowa dlugosc: {$minWords}-{$maxWords} slow.\n";
$prompt .= "Tytul ma byc samodzielny i nie moze zaczynac sie od nazwy tematu ani kategorii.\n";
$prompt .= "Tresc ma byc konkretna, praktyczna i naturalna. Bez ogolnikow.\n";
$prompt .= "Wstep: 2-3 krotkie akapity i jasna obietnica, czego czytelnik sie dowie.\n";
$prompt .= "Srodek: minimum 3 sekcje H2, w kazdej przynajmniej jeden konkret (przyklad, liczba, scenariusz, checklista).\n";
$prompt .= "Wstaw jedna sekcje H2 o nazwie \"Najczestsze bledy\" i jedna H2 \"FAQ\" z 3 pytaniami i odpowiedziami.\n";
$prompt .= "Zakonczenie ma byc praktyczne: \"Co warto zapamietac\" jako lista punktowana.\n";
$prompt .= "Uzywaj tylko HTML: <p>, <h2>, <h3>, <ul>, <ol>, <li>, <strong>, <em>, <blockquote>, <table>, <tr>, <th>, <td>.\n";
if ($topicDescription !== '') {
$prompt .= "Wytyczne redakcyjne od klienta: {$topicDescription}\n";
}
$prompt .= "\nNie powielaj tematow, bo te tytuly juz istnieja:\n- {$existingList}\n";
if ($qualityFeedback !== '') {
$prompt .= "\nKrytyczne poprawki do wdrozenia: {$qualityFeedback}\n";
}
return $prompt;
}
private function validateQuality(string $title, string $content, int $minWords): array
{
$issues = [];
$wordCount = $this->countWords($content);
if ($title === '') {
$issues[] = 'brak tytulu';
}
if ($content === '') {
$issues[] = 'brak tresci';
}
if ($wordCount < $minWords) {
$issues[] = 'za malo slow (' . $wordCount . ')';
}
$h2Count = preg_match_all('/<h2\b[^>]*>/i', $content);
if ($h2Count < self::MIN_ACCEPTABLE_H2) {
$issues[] = 'za malo naglowkow H2';
}
if (preg_match('/<h2\b[^>]*>\s*faq\s*<\/h2>/iu', $content) !== 1) {
$issues[] = 'brak sekcji FAQ';
}
if (!str_contains(mb_strtolower($content), 'co warto zapamietac')) {
$issues[] = 'brak sekcji koncowej z konkretami';
}
return [
'ok' => empty($issues),
'issues' => $issues,
];
}
private function countWords(string $html): int
{
$plain = trim(strip_tags($html));
if ($plain === '') {
return 0;
}
$parts = preg_split('/\s+/u', $plain);
if (!is_array($parts)) {
return 0;
}
return count(array_filter($parts, static fn ($item) => $item !== ''));
}
private function sanitizeWordLimit(mixed $value, int $default): int
{
$intValue = (int) $value;
if ($intValue === 0) {
return $default;
}
if ($intValue < 400) {
return 400;
}
if ($intValue > 4000) {
return 4000;
}
return $intValue;
}
}

View File

@@ -11,6 +11,7 @@ class PublisherService
{
private TopicBalancer $topicBalancer;
private OpenAIService $openAI;
private InternalLinkService $internalLinkService;
private ImageService $imageService;
private WordPressService $wordpress;
@@ -18,6 +19,7 @@ class PublisherService
{
$this->topicBalancer = new TopicBalancer();
$this->openAI = new OpenAIService();
$this->internalLinkService = new InternalLinkService();
$this->imageService = new ImageService();
$this->wordpress = new WordPressService();
}
@@ -103,6 +105,18 @@ class PublisherService
private function publishPreparedArticle(array $site, array $topic, array $article, ?int $existingArticleId = null): array
{
$linkingResult = $this->internalLinkService->enrichContentWithInternalLinks(
$site,
(string) ($article['title'] ?? ''),
(string) ($article['content'] ?? '')
);
$article['content'] = (string) ($linkingResult['content'] ?? (string) ($article['content'] ?? ''));
Logger::info(
'Internal linking mode=' . (string) ($linkingResult['mode'] ?? 'unknown')
. ', links_added=' . (int) ($linkingResult['links_added'] ?? 0),
'publish'
);
$imageUrl = null;
$mediaId = null;
$image = $this->imageService->generate((string) $article['title'], (string) $topic['name']);
@@ -122,7 +136,8 @@ class PublisherService
(string) $article['title'],
(string) $article['content'],
$topic['wp_category_id'],
$mediaId
$mediaId,
$this->buildExcerpt((string) $article['content'])
);
if (!$wpPostId) {
@@ -137,12 +152,14 @@ class PublisherService
}
Logger::info("Opublikowano post: wp_post_id={$wpPostId}", 'publish');
$wpPostUrl = $this->wordpress->getPostLink($site, (int) $wpPostId);
if ($existingArticleId !== null) {
Article::update($existingArticleId, [
'title' => (string) $article['title'],
'content' => (string) $article['content'],
'wp_post_id' => $wpPostId,
'wp_post_url' => $wpPostUrl,
'image_url' => $imageUrl,
'status' => 'published',
'ai_model' => $article['model'] ?? null,
@@ -157,6 +174,7 @@ class PublisherService
'title' => (string) $article['title'],
'content' => (string) $article['content'],
'wp_post_id' => $wpPostId,
'wp_post_url' => $wpPostUrl,
'image_url' => $imageUrl,
'status' => 'published',
'ai_model' => $article['model'] ?? null,
@@ -281,4 +299,27 @@ class PublisherService
return ['success' => true, 'message' => $message];
}
private function buildExcerpt(string $htmlContent): string
{
$plain = trim((string) preg_replace('/\s+/u', ' ', strip_tags($htmlContent)));
if ($plain === '') {
return '';
}
$maxLength = 155;
if (mb_strlen($plain) <= $maxLength) {
return $plain;
}
$cut = mb_substr($plain, 0, $maxLength + 1);
$lastSpace = mb_strrpos($cut, ' ');
if ($lastSpace !== false && $lastSpace > 80) {
$cut = mb_substr($cut, 0, $lastSpace);
} else {
$cut = mb_substr($cut, 0, $maxLength);
}
return rtrim($cut, " \t\n\r\0\x0B.,;:!?") . '.';
}
}

View File

@@ -0,0 +1,720 @@
<?php
declare(strict_types=1);
namespace App\Services;
use App\Core\Config;
use App\Core\Database;
use App\Helpers\Logger;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
class StatLinkService
{
private Client $http;
private CookieJar $cookies;
private bool $loggedIn = false;
private string $baseUrl;
private string $login;
private string $password;
private const MAX_LINKS_PER_RUN = 1;
private const LINK_LIFETIME_DAYS = 60;
public function __construct()
{
$this->baseUrl = rtrim((string) Config::get('statlink_url', 'https://statlink.pl'), '/');
$this->login = (string) Config::get('statlink_login', '');
$this->password = (string) Config::get('statlink_password', '');
$this->cookies = new CookieJar();
$this->http = new Client([
'cookies' => $this->cookies,
'connect_timeout' => 60,
'timeout' => 120,
'verify' => false,
'headers' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'pl-PL,pl;q=0.9,en;q=0.8',
],
]);
}
public function login(): bool
{
if ($this->loggedIn) {
return true;
}
if ($this->login === '' || $this->password === '') {
$this->loginDiagnostic = 'FAIL: brak danych logowania w .env';
Logger::error('StatLink: brak danych logowania w .env', 'statlink');
return false;
}
try {
// First GET the homepage to pick up session cookies and hidden fields
$homePage = $this->http->get($this->baseUrl . '/');
$homeBody = (string) $homePage->getBody();
$postujForm = '';
if (preg_match('/name="postuj_form"\s+value="([^"]*)"/', $homeBody, $pfm)) {
$postujForm = $pfm[1];
}
$response = $this->http->post($this->baseUrl . '/20,zaloguj', [
'form_params' => [
'email' => $this->login,
'haslo' => $this->password,
'zaloguj' => 'ZALOGUJ',
'postuj_form' => $postujForm,
],
'allow_redirects' => true,
]);
$body = (string) $response->getBody();
if (str_contains($body, 'Zalogowano') || str_contains($body, 'zalogowany') || str_contains($body, 'Vampirius')) {
$this->loggedIn = true;
$this->loginDiagnostic = 'OK: zalogowano';
Logger::info('StatLink: zalogowano pomyslnie', 'statlink');
return true;
}
$this->loginDiagnostic = 'FAIL: brak potwierdzenia logowania. Fragment: ' . mb_substr(strip_tags($body), 0, 300);
Logger::error('StatLink: login nie powiodl sie — brak potwierdzenia zalogowania', 'statlink');
return false;
} catch (\Throwable $e) {
$this->loginDiagnostic = 'EXCEPTION: ' . $e->getMessage();
Logger::error('StatLink: blad logowania — ' . $e->getMessage(), 'statlink');
return false;
}
}
/** @var array Diagnostic log for the last operation */
private array $lastDiagnostics = [];
private ?string $loginDiagnostic = null;
public function getLastDiagnostics(): array
{
$diag = [];
if ($this->loginDiagnostic !== null) {
$diag[] = 'LOGIN: ' . $this->loginDiagnostic;
}
return array_merge($diag, $this->lastDiagnostics);
}
private function sanitizeAnchor(string $anchor): string
{
// Transliterate Polish diacritics to ASCII equivalents
$map = [
'ą' => 'a', 'ć' => 'c', 'ę' => 'e', 'ł' => 'l', 'ń' => 'n',
'ó' => 'o', 'ś' => 's', 'ź' => 'z', 'ż' => 'z',
'Ą' => 'A', 'Ć' => 'C', 'Ę' => 'E', 'Ł' => 'L', 'Ń' => 'N',
'Ó' => 'O', 'Ś' => 'S', 'Ź' => 'Z', 'Ż' => 'Z',
];
$anchor = strtr($anchor, $map);
// Remove any remaining non-ASCII characters, keep allowed: alphanumeric .,+-_?!&\:= and space
$anchor = preg_replace('/[^a-zA-Z0-9 .,+\-_?!&\\\\:=]/', '', $anchor);
return trim($anchor);
}
public function addLink(string $url, string $anchor): ?int
{
$this->lastDiagnostics = [];
// Sanitize anchor for StatLink's character restrictions
$anchor = $this->sanitizeAnchor($anchor);
if (mb_strlen($anchor) < 2) {
$this->lastDiagnostics[] = 'FAIL: anchor za krotki po sanityzacji: "' . $anchor . '"';
return null;
}
if (!$this->loggedIn && !$this->login()) {
$this->lastDiagnostics[] = 'FAIL: login nie powiodl sie';
return null;
}
$this->lastDiagnostics[] = 'OK: zalogowano';
try {
// GET the links page to scrape CSRF token and categories
$pageResponse = $this->http->get($this->baseUrl . '/148,twoje-linki');
$pageHtml = (string) $pageResponse->getBody();
$this->lastDiagnostics[] = 'OK: pobrano strone linkow (HTTP ' . $pageResponse->getStatusCode() . ', ' . strlen($pageHtml) . ' bytes)';
$csrfToken = $this->scrapeCsrfToken($pageHtml);
if ($csrfToken === null) {
$this->lastDiagnostics[] = 'FAIL: brak tokenu CSRF w HTML (szukam name="niepozwol")';
// Save a snippet of the page for debugging
$this->lastDiagnostics[] = 'DEBUG: fragment HTML (500 znakow): ' . substr(strip_tags($pageHtml), 0, 500);
Logger::error('StatLink: nie udalo sie pobrac tokenu CSRF', 'statlink');
return null;
}
$this->lastDiagnostics[] = 'OK: CSRF token pobrany (' . strlen($csrfToken) . ' znakow)';
$categories = $this->scrapeCategories($pageHtml);
$this->lastDiagnostics[] = 'OK: kategorie: ' . (count($categories) > 0 ? implode(',', $categories) : 'brak');
// Determine protocol
$isHttps = str_starts_with($url, 'https://');
$cleanUrl = preg_replace('#^https?://#', '', $url);
$cleanUrl = rtrim($cleanUrl, '/');
$this->lastDiagnostics[] = "INFO: wysylam link={$cleanUrl}, anchor={$anchor}, https=" . ($isHttps ? '1' : '0');
// Build form data
$formData = [
'editadd' => '',
'niepozwol' => $csrfToken,
'https' => $isHttps ? '1' : '0',
'link' => $cleanUrl,
'anchor' => $anchor,
'fraza_kluczowa1' => '',
'fraza_kluczowa2' => '',
'fraza_kluczowa3' => '',
'wylacznosc' => '',
'frazowy' => '',
'tylko_https' => '',
'min_ilosc_znakow' => '0',
'statrank_min' => '0',
'reaguj_statrank_min' => '',
'statrank_max' => '10',
'semstorm_keywords_top_min' => '0',
'ilosc_dziennie' => '0.02',
'ilosc_max' => '10',
'ilosc_max_powiadom' => '',
'ilosc_nofollow' => '0',
'max_ilosc_domena' => '5',
'skanuj_pos' => '',
'pos_limit' => '',
'pos_limit_powiadom' => '',
'wstrzymaj' => '',
'id_grupy' => '0',
'nowa_grupa' => '',
'pomin_wlasne' => '',
'zapisz' => 'DODAJ',
];
// Build multipart-like form with categories
$params = $formData;
// Categories need special handling (multiple values for same key)
// Guzzle form_params doesn't support duplicate keys, so use body directly
$bodyParts = [];
foreach ($params as $key => $value) {
$bodyParts[] = urlencode($key) . '=' . urlencode((string) $value);
}
foreach ($categories as $catId) {
$bodyParts[] = 'id_kategorie_multiple%5B%5D=' . urlencode((string) $catId);
}
$bodyString = implode('&', $bodyParts);
$response = $this->http->post($this->baseUrl . '/148,twoje-linki#lista', [
'headers' => [
'Content-Type' => 'application/x-www-form-urlencoded',
],
'body' => $bodyString,
'allow_redirects' => true,
]);
$responseHtml = (string) $response->getBody();
$this->lastDiagnostics[] = 'OK: formularz wyslany (HTTP ' . $response->getStatusCode() . ', ' . strlen($responseHtml) . ' bytes)';
// Check for common error/warning messages in the response
$errorPatterns = ['błąd', 'Błąd', 'error', 'Error', 'nieprawidłow', 'Nieprawidłow', 'nie można', 'Nie można', 'istnieje', 'Istnieje', 'Limit', 'limit', 'za dużo', 'Za dużo'];
$foundMessages = [];
$strippedHtml = strip_tags($responseHtml);
foreach ($errorPatterns as $pat) {
$pos = mb_stripos($strippedHtml, $pat);
if ($pos !== false) {
$start = max(0, $pos - 40);
$foundMessages[] = mb_substr($strippedHtml, $start, 100);
}
}
if (!empty($foundMessages)) {
$this->lastDiagnostics[] = 'WARN: znalezione komunikaty w odpowiedzi: ' . implode(' | ', array_unique($foundMessages));
}
// Check for "Dodano Link" success message (case-insensitive)
if (stripos($responseHtml, 'Dodano Link') !== false || stripos($responseHtml, 'Dodano link') !== false || stripos($responseHtml, 'dodano link') !== false) {
// Try to extract ID directly from the response HTML first
$statlinkId = $this->findLinkIdInHtml($responseHtml, $cleanUrl);
if ($statlinkId === null) {
// Fallback: search via separate request
$statlinkId = $this->findLinkIdBySearch($cleanUrl);
}
$this->lastDiagnostics[] = 'OK: potwierdzenie "Dodano Link" znalezione, statlink_id=' . ($statlinkId ?? 'nieznany');
Logger::info("StatLink: dodano link" . ($statlinkId ? " ID {$statlinkId}" : '') . " dla {$cleanUrl}", 'statlink');
return $statlinkId ?: -1; // -1 means added but ID unknown
}
$this->lastDiagnostics[] = 'WARN: brak "Dodano Link" w odpowiedzi';
// Extract meaningful content from the response — skip navigation/header junk
$strippedFull = strip_tags($responseHtml);
// Look for form-area messages: find "Twoje Linki" heading and capture text after it
$formAreaPos = mb_strpos($strippedFull, 'Twoje Linki');
if ($formAreaPos !== false) {
$formArea = mb_substr($strippedFull, $formAreaPos, 2000);
// Collapse whitespace for readability
$formArea = trim(preg_replace('/\s+/', ' ', $formArea));
$this->lastDiagnostics[] = 'DEBUG: sekcja formularza: ' . mb_substr($formArea, 0, 1500);
} else {
$this->lastDiagnostics[] = 'DEBUG: fragment odpowiedzi (1500 znakow): ' . mb_substr(preg_replace('/\s+/', ' ', $strippedFull), 0, 1500);
}
// Also check for alert/message divs in raw HTML
if (preg_match_all('/<div[^>]*class="[^"]*(?:alert|message|info|error|warning|komunikat)[^"]*"[^>]*>(.*?)<\/div>/si', $responseHtml, $alertMatches)) {
$alerts = array_map(function($m) { return trim(strip_tags($m)); }, $alertMatches[1]);
$alerts = array_filter($alerts);
if (!empty($alerts)) {
$this->lastDiagnostics[] = 'ALERT_DIVS: ' . implode(' | ', $alerts);
}
}
// No "Dodano" confirmation — check if link already exists in StatLink
$existingId = $this->findLinkIdBySearch($cleanUrl);
if ($existingId !== null) {
$this->lastDiagnostics[] = 'OK: link juz istnieje w StatLink (ID ' . $existingId . ')';
Logger::info("StatLink: link {$cleanUrl} juz istnieje w systemie (ID {$existingId})", 'statlink');
return $existingId; // treat as success — link is there
}
$this->lastDiagnostics[] = 'FAIL: link nie znaleziony w StatLink po wysylce';
Logger::warning("StatLink: wyslano link {$cleanUrl}, ale brak potwierdzenia dodania", 'statlink');
return null;
} catch (\Throwable $e) {
$this->lastDiagnostics[] = 'EXCEPTION: ' . $e->getMessage();
Logger::error("StatLink: blad dodawania linku {$url}" . $e->getMessage(), 'statlink');
return null;
}
}
public function removeLink(int $statlinkId): bool
{
if (!$this->loggedIn && !$this->login()) {
return false;
}
try {
$response = $this->http->post($this->baseUrl . '/148,twoje-linki#lista0', [
'form_params' => [
'statlink_id' => (string) $statlinkId,
'usun' => 'Usuń',
],
'allow_redirects' => true,
]);
$body = (string) $response->getBody();
// Verify link was removed by checking it no longer appears
$remainingIds = $this->scrapeExistingLinkIds($body);
if (!in_array($statlinkId, $remainingIds)) {
Logger::info("StatLink: usunieto link ID {$statlinkId}", 'statlink');
return true;
}
Logger::warning("StatLink: link ID {$statlinkId} nadal widoczny po probie usuniecia", 'statlink');
return false;
} catch (\Throwable $e) {
Logger::error("StatLink: blad usuwania linku ID {$statlinkId}" . $e->getMessage(), 'statlink');
return false;
}
}
public function processNewArticles(): array
{
$db = Database::getInstance();
$stats = ['added' => 0, 'skipped' => 0, 'errors' => 0];
// Find published articles with wp_post_id but without an active statlink entry
$stmt = $db->prepare(
"SELECT a.id, a.title, a.wp_post_id, a.wp_post_url, a.site_id, a.topic_id,
t.name as topic_name, s.url as site_url,
s.api_user as site_api_user, s.api_token as site_api_token
FROM articles a
JOIN topics t ON a.topic_id = t.id
JOIN sites s ON a.site_id = s.id
WHERE a.status = 'published'
AND a.wp_post_id IS NOT NULL
AND a.wp_post_id > 0
AND a.id NOT IN (
SELECT sl.article_id FROM statlink_links sl
)
ORDER BY a.published_at ASC
LIMIT :limit"
);
$stmt->bindValue('limit', self::MAX_LINKS_PER_RUN, \PDO::PARAM_INT);
$stmt->execute();
$articles = $stmt->fetchAll();
if (empty($articles)) {
Logger::info('StatLink: brak nowych artykulow do dodania', 'statlink');
return $stats;
}
if (!$this->login()) {
Logger::error('StatLink: nie mozna zalogowac — przerywam processNewArticles', 'statlink');
$stats['errors'] = count($articles);
return $stats;
}
$wordpress = new WordPressService();
$counter = 0;
foreach ($articles as $article) {
// Alternate anchor: even = title, odd = topic keyword
$anchor = ($counter % 2 === 0)
? (string) $article['title']
: (string) $article['topic_name'];
// Resolve article URL: use wp_post_url if available, otherwise fetch from WP API
$url = (string) ($article['wp_post_url'] ?? '');
if ($url === '') {
$site = [
'id' => $article['site_id'],
'url' => $article['site_url'],
'api_user' => $article['site_api_user'],
'api_token' => $article['site_api_token'],
];
$url = (string) ($wordpress->getPostLink($site, (int) $article['wp_post_id']) ?? '');
// Save resolved URL back to article for future use
if ($url !== '') {
$db->prepare("UPDATE articles SET wp_post_url = :url WHERE id = :id")
->execute(['url' => $url, 'id' => $article['id']]);
Logger::info("StatLink: uzupelniono wp_post_url dla artykulu ID {$article['id']}: {$url}", 'statlink');
}
}
if ($url === '') {
Logger::warning("StatLink: brak URL dla artykulu ID {$article['id']} — pomijam", 'statlink');
$stats['skipped']++;
$counter++;
continue;
}
try {
$statlinkId = $this->addLink($url, $anchor);
$now = date('Y-m-d H:i:s');
$expiresAt = date('Y-m-d H:i:s', strtotime('+' . self::LINK_LIFETIME_DAYS . ' days'));
if ($statlinkId !== null) {
$db->prepare(
"INSERT INTO statlink_links (article_id, site_id, statlink_id, anchor, link_url, added_at, expires_at, status)
VALUES (:article_id, :site_id, :statlink_id, :anchor, :link_url, :added_at, :expires_at, 'active')"
)->execute([
'article_id' => $article['id'],
'site_id' => $article['site_id'],
'statlink_id' => $statlinkId,
'anchor' => $anchor,
'link_url' => $url,
'added_at' => $now,
'expires_at' => $expiresAt,
]);
$stats['added']++;
Logger::info("StatLink: dodano artykul ID {$article['id']} jako link {$statlinkId}", 'statlink');
} else {
// Save as failed so we don't retry endlessly
$db->prepare(
"INSERT INTO statlink_links (article_id, site_id, anchor, link_url, added_at, expires_at, status, error_message)
VALUES (:article_id, :site_id, :anchor, :link_url, :added_at, :expires_at, 'failed', 'StatLink nie zaakceptowal linku')"
)->execute([
'article_id' => $article['id'],
'site_id' => $article['site_id'],
'anchor' => $anchor,
'link_url' => $url,
'added_at' => $now,
'expires_at' => $expiresAt,
]);
$stats['skipped']++;
Logger::warning("StatLink: link {$url} nie zostal dodany — oznaczono jako failed", 'statlink');
}
} catch (\Throwable $e) {
$stats['errors']++;
Logger::error("StatLink: blad przetwarzania artykulu ID {$article['id']}" . $e->getMessage(), 'statlink');
// Save failed attempt
$now = date('Y-m-d H:i:s');
$expiresAt = date('Y-m-d H:i:s', strtotime('+' . self::LINK_LIFETIME_DAYS . ' days'));
$db->prepare(
"INSERT INTO statlink_links (article_id, site_id, anchor, link_url, added_at, expires_at, status, error_message)
VALUES (:article_id, :site_id, :anchor, :link_url, :added_at, :expires_at, 'failed', :error)"
)->execute([
'article_id' => $article['id'],
'site_id' => $article['site_id'],
'anchor' => $anchor,
'link_url' => $url,
'added_at' => $now,
'expires_at' => $expiresAt,
'error' => $e->getMessage(),
]);
}
$counter++;
// Small delay between requests to avoid rate limiting
if ($counter < count($articles)) {
sleep(2);
}
}
Logger::info("StatLink processNewArticles: added={$stats['added']}, skipped={$stats['skipped']}, errors={$stats['errors']}", 'statlink');
return $stats;
}
public function retryFailedLinks(): array
{
$db = Database::getInstance();
$stats = ['retried' => 0, 'still_failed' => 0, 'errors' => 0];
// Find failed links to retry (max 3 retries tracked by retry_count or re-attempts)
$stmt = $db->prepare(
"SELECT sl.id, sl.article_id, sl.site_id, sl.link_url, sl.anchor
FROM statlink_links sl
WHERE sl.status = 'failed'
ORDER BY sl.added_at ASC
LIMIT :limit"
);
$stmt->bindValue('limit', self::MAX_LINKS_PER_RUN, \PDO::PARAM_INT);
$stmt->execute();
$failedLinks = $stmt->fetchAll();
if (empty($failedLinks)) {
return $stats;
}
if (!$this->login()) {
Logger::error('StatLink: nie mozna zalogowac — przerywam retryFailedLinks', 'statlink');
$stats['errors'] = count($failedLinks);
// Update error_message on failed links so we see the login diagnostic
$loginDiag = $this->loginDiagnostic ?? 'LOGIN FAIL: nieznany powod';
foreach ($failedLinks as $link) {
$db->prepare("UPDATE statlink_links SET error_message = :error WHERE id = :id")
->execute(['error' => $loginDiag, 'id' => $link['id']]);
}
return $stats;
}
foreach ($failedLinks as $link) {
$url = (string) $link['link_url'];
$anchor = (string) $link['anchor'];
if ($url === '') {
// No URL — delete the failed record so processNewArticles can re-pick it
$db->prepare("DELETE FROM statlink_links WHERE id = :id")
->execute(['id' => $link['id']]);
$stats['still_failed']++;
continue;
}
try {
$statlinkId = $this->addLink($url, $anchor);
if ($statlinkId !== null) {
$now = date('Y-m-d H:i:s');
$expiresAt = date('Y-m-d H:i:s', strtotime('+' . self::LINK_LIFETIME_DAYS . ' days'));
$db->prepare(
"UPDATE statlink_links
SET statlink_id = :statlink_id, status = 'active',
added_at = :added_at, expires_at = :expires_at, error_message = NULL
WHERE id = :id"
)->execute([
'statlink_id' => $statlinkId,
'added_at' => $now,
'expires_at' => $expiresAt,
'id' => $link['id'],
]);
$stats['retried']++;
Logger::info("StatLink: retry OK — artykul ID {$link['article_id']} dodany jako link {$statlinkId}", 'statlink');
} else {
$stats['still_failed']++;
$diag = implode(' | ', $this->lastDiagnostics);
$db->prepare(
"UPDATE statlink_links SET error_message = :error WHERE id = :id"
)->execute([
'error' => 'Retry failed: ' . mb_substr($diag, 0, 500),
'id' => $link['id'],
]);
Logger::warning("StatLink: retry FAIL — artykul ID {$link['article_id']}, link {$url}", 'statlink');
}
} catch (\Throwable $e) {
$stats['errors']++;
$db->prepare(
"UPDATE statlink_links SET error_message = :error WHERE id = :id"
)->execute(['error' => 'Retry exception: ' . $e->getMessage(), 'id' => $link['id']]);
Logger::error("StatLink: retry blad — artykul ID {$link['article_id']}" . $e->getMessage(), 'statlink');
}
}
Logger::info("StatLink retryFailedLinks: retried={$stats['retried']}, still_failed={$stats['still_failed']}, errors={$stats['errors']}", 'statlink');
return $stats;
}
public function removeExpiredLinks(): array
{
$db = Database::getInstance();
$stats = ['removed' => 0, 'errors' => 0];
$stmt = $db->prepare(
"SELECT id, statlink_id, link_url, article_id
FROM statlink_links
WHERE status = 'active'
AND expires_at < NOW()
ORDER BY expires_at ASC
LIMIT :limit"
);
$stmt->bindValue('limit', self::MAX_LINKS_PER_RUN, \PDO::PARAM_INT);
$stmt->execute();
$expiredLinks = $stmt->fetchAll();
if (empty($expiredLinks)) {
Logger::info('StatLink: brak wygaslych linkow do usuniecia', 'statlink');
return $stats;
}
if (!$this->login()) {
Logger::error('StatLink: nie mozna zalogowac — przerywam removeExpiredLinks', 'statlink');
$stats['errors'] = count($expiredLinks);
return $stats;
}
foreach ($expiredLinks as $i => $link) {
$statlinkId = (int) $link['statlink_id'];
if ($statlinkId <= 0) {
// No statlink_id — just mark as removed
$db->prepare(
"UPDATE statlink_links SET status = 'removed', removed_at = NOW() WHERE id = :id"
)->execute(['id' => $link['id']]);
$stats['removed']++;
continue;
}
try {
$removed = $this->removeLink($statlinkId);
if ($removed) {
$db->prepare(
"UPDATE statlink_links SET status = 'removed', removed_at = NOW() WHERE id = :id"
)->execute(['id' => $link['id']]);
$stats['removed']++;
Logger::info("StatLink: usunieto wygasly link ID {$statlinkId} (artykul {$link['article_id']})", 'statlink');
} else {
$db->prepare(
"UPDATE statlink_links SET status = 'expired', error_message = 'Nie udalo sie usunac ze StatLink' WHERE id = :id"
)->execute(['id' => $link['id']]);
$stats['errors']++;
}
} catch (\Throwable $e) {
$stats['errors']++;
$db->prepare(
"UPDATE statlink_links SET status = 'expired', error_message = :error WHERE id = :id"
)->execute(['id' => $link['id'], 'error' => $e->getMessage()]);
Logger::error("StatLink: blad usuwania linku ID {$statlinkId}" . $e->getMessage(), 'statlink');
}
// Small delay between requests
if ($i < count($expiredLinks) - 1) {
sleep(2);
}
}
Logger::info("StatLink removeExpiredLinks: removed={$stats['removed']}, errors={$stats['errors']}", 'statlink');
return $stats;
}
/**
* Extract statlink_id from HTML that already contains the link (e.g. form response).
*/
private function findLinkIdInHtml(string $html, string $cleanUrl): ?int
{
// Try multiple URL variants: with/without trailing slash, with/without protocol
$urlVariants = [
$cleanUrl,
rtrim($cleanUrl, '/'),
'https://' . $cleanUrl,
'https://' . rtrim($cleanUrl, '/'),
'http://' . $cleanUrl,
'http://' . rtrim($cleanUrl, '/'),
];
if (preg_match_all('/name="statlink_id"\s+value="(\d+)"/', $html, $idMatches)) {
foreach ($idMatches[1] as $candidateId) {
$idPos = strpos($html, 'value="' . $candidateId . '"');
if ($idPos === false) {
continue;
}
// Check a wide region around the ID for our URL
$regionStart = max(0, $idPos - 3000);
$region = substr($html, $regionStart, 6000);
foreach ($urlVariants as $variant) {
if (stripos($region, $variant) !== false) {
return (int) $candidateId;
}
}
}
}
return null;
}
private function findLinkIdBySearch(string $cleanUrl): ?int
{
try {
// Extract domain for search query
$searchTerm = explode('/', $cleanUrl)[0];
$response = $this->http->post($this->baseUrl . '/148,twoje-linki#lista0', [
'form_params' => [
'statlink_szukaj' => $searchTerm,
'ilosc_na_stronie_linki' => '100',
'statlink_szukaj_go' => '1',
],
'allow_redirects' => true,
]);
$body = (string) $response->getBody();
return $this->findLinkIdInHtml($body, $cleanUrl);
} catch (\Throwable $e) {
Logger::warning("StatLink: blad wyszukiwania ID linku — " . $e->getMessage(), 'statlink');
return null;
}
}
private function scrapeCsrfToken(string $html): ?string
{
if (preg_match('/name="niepozwol"\s+value="([^"]+)"/', $html, $matches)) {
return $matches[1];
}
if (preg_match('/value="([^"]+)"\s+.*?name="niepozwol"/', $html, $matches)) {
return $matches[1];
}
return null;
}
private function scrapeCategories(string $html): array
{
$categories = [];
if (preg_match_all('/name="id_kategorie_multiple\[\]"\s+value="(\d+)"/', $html, $matches)) {
$categories = array_map('intval', $matches[1]);
}
return $categories;
}
private function scrapeExistingLinkIds(string $html): array
{
$ids = [];
// Links table has forms with hidden statlink_id fields
if (preg_match_all('/name="statlink_id"\s+value="(\d+)"/', $html, $matches)) {
$ids = array_map('intval', array_unique($matches[1]));
}
return $ids;
}
}

View File

@@ -132,7 +132,8 @@ class WordPressService
string $title,
string $content,
?int $categoryId = null,
?int $mediaId = null
?int $mediaId = null,
?string $excerpt = null
): ?int {
$auth = $this->requireAuthOption($site, 'createPost');
if ($auth === null) {
@@ -155,6 +156,10 @@ class WordPressService
$postData['featured_media'] = $mediaId;
}
if (is_string($excerpt) && trim($excerpt) !== '') {
$postData['excerpt'] = trim($excerpt);
}
$response = $this->requestWp($site, 'POST', 'wp/v2/posts', [
'auth' => $auth,
'json' => $postData,
@@ -171,7 +176,7 @@ class WordPressService
}
// Fall back to XML-RPC.
return $this->createPostXmlRpc($site, $auth, $title, $content, $categoryId, $mediaId);
return $this->createPostXmlRpc($site, $auth, $title, $content, $categoryId, $mediaId, $excerpt);
}
public function getPublishedPosts(array $site, int $perPage = 100): array|false
@@ -188,7 +193,7 @@ class WordPressService
'status' => 'publish',
'per_page' => $perPage,
'page' => $page,
'_fields' => 'id,title,content,date,categories',
'_fields' => 'id,title,content,date,categories,link',
];
$options = ['query' => $query];
if ($auth !== null) {
@@ -434,6 +439,29 @@ class WordPressService
return ['success' => false, 'message' => (string) ($retry['message'] ?? 'Blad zmiany permalink.')];
}
public function getPostLink(array $site, int $wpPostId): ?string
{
if ($wpPostId <= 0) {
return null;
}
$auth = $this->buildAuthOption($site);
$options = ['query' => ['_fields' => 'link']];
if ($auth !== null) {
$options['auth'] = $auth;
}
try {
$response = $this->requestWp($site, 'GET', 'wp/v2/posts/' . $wpPostId, $options);
$data = json_decode($response->getBody()->getContents(), true);
$link = trim((string) ($data['link'] ?? ''));
return $link !== '' ? $link : null;
} catch (GuzzleException $e) {
Logger::warning("WP getPostLink failed for {$site['url']}: " . $e->getMessage(), 'wordpress');
return null;
}
}
public function enableSearchEngineIndexing(array $site): array
{
$result = $this->callRemoteService($site, 'set_blog_public', ['blog_public' => '1']);
@@ -619,12 +647,24 @@ class WordPressService
// ── XML-RPC fallback methods ──────────────────────────────────────
private function createPostXmlRpc(array $site, array $auth, string $title, string $content, ?int $categoryId, ?int $mediaId): ?int
private function createPostXmlRpc(
array $site,
array $auth,
string $title,
string $content,
?int $categoryId,
?int $mediaId,
?string $excerpt
): ?int
{
$fields = '<member><name>post_title</name><value><string>' . $this->xmlEsc($title) . '</string></value></member>'
. '<member><name>post_content</name><value><string>' . $this->xmlEsc($content) . '</string></value></member>'
. '<member><name>post_status</name><value><string>publish</string></value></member>';
if (is_string($excerpt) && trim($excerpt) !== '') {
$fields .= '<member><name>mt_excerpt</name><value><string>' . $this->xmlEsc(trim($excerpt)) . '</string></value></member>';
}
if ($categoryId) {
$fields .= '<member><name>terms</name><value><struct>'
. '<member><name>category</name><value><array><data>'