diff --git a/bridges/CybernewsBridge.php b/bridges/CybernewsBridge.php new file mode 100644 index 00000000..64146f6f --- /dev/null +++ b/bridges/CybernewsBridge.php @@ -0,0 +1,114 @@ +url as $entry) { + $url = trim((string) $entry->loc); + $lastmod = trim((string) $entry->lastmod); + + if (!$url) { + continue; + } + + $pathParts = explode('/', trim(parse_url($url, PHP_URL_PATH), '/')); + $category = isset($pathParts[0]) && $pathParts[0] !== '' ? $pathParts[0] : ''; + + // Skip non-English versions + if (in_array($category, ['nl', 'de'], true)) { + continue; + } + + $namespaces = $entry->getNamespaces(true); + $title = ''; + + if (isset($namespaces['news'])) { + $news = $entry->children($namespaces['news'])->news; + + if ($news) { + $title = trim((string) $news->title); + } + } + + if (!$title) { + continue; + } + + $this->items[] = [ + 'title' => $title, + 'uri' => $url, + 'uid' => $url, + 'timestamp' => strtotime($lastmod), + 'categories' => $category ? [$category] : [], + 'content' => $this->fetchFullArticle($url), + ]; + + if (count($this->items) >= self::MAX_ARTICLES) { + break; + } + } + } + + private function fetchFullArticle(string $url): string + { + $html = getSimpleHTMLDOMCached($url); + + if (!$html) { + return 'Unable to fetch article content'; + } + + $article = $html->find('article', 0); + + if (!$article) { + return 'Unable to parse article content'; + } + + // Remove unnecessary elements + $removeSelectors = [ + 'script', + 'style', + 'div.links-bar', + 'div.google-news-cta', + 'div.a-wrapper', + 'div.embed_youtube', + ]; + + foreach ($removeSelectors as $selector) { + foreach ($article->find($selector) as $element) { + $element->outertext = ''; + } + } + + // Handle lazy-loaded images + foreach ($article->find('img') as $img) { + if (!empty($img->{'data-src'})) { + $img->src = $img->{'data-src'}; + unset($img->{'data-src'}); + } + } + + return $article->innertext; + } +}