[CybernewsBridge] add bridge (#4665)
Some checks are pending
Build Image on Commit and Release / bake (push) Waiting to run
Lint / phpcs (7.4) (push) Waiting to run
Lint / phpcompatibility (7.4) (push) Waiting to run
Lint / executable_php_files_check (push) Waiting to run
Tests / phpunit8 (7.4) (push) Waiting to run
Tests / phpunit8 (8.0) (push) Waiting to run
Tests / phpunit8 (8.1) (push) Waiting to run
Tests / phpunit8 (8.2) (push) Waiting to run
Tests / phpunit8 (8.3) (push) Waiting to run
Tests / phpunit8 (8.4) (push) Waiting to run

* [CybernewsBridge] add bridge

* [CybernewsBridge] fix lint

* [CybernewsBridge] add header

* [CybernewsBridge] fix url

* [CybernewsBridge] fix url 2

* [CybernewsBridge] revert header

* [CybernewsBridge] refactor

* [CybernewsBridge] final

* [CybernewsBridge] lint
This commit is contained in:
tillcash 2025-08-14 18:01:47 +05:30 committed by GitHub
parent 3a9e398228
commit 1b584b4551
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

114
bridges/CybernewsBridge.php Normal file
View file

@ -0,0 +1,114 @@
<?php
declare(strict_types=1);
class CybernewsBridge extends BridgeAbstract
{
const NAME = 'Cybernews';
const URI = 'https://cybernews.com';
const DESCRIPTION = 'Fetches the latest news from Cybernews';
const MAINTAINER = 'tillcash';
const CACHE_TIMEOUT = 3600; // 1 hour
const MAX_ARTICLES = 5;
public function collectData()
{
$sitemapXml = getContents(self::URI . '/news-sitemap.xml');
if (!$sitemapXml) {
throwServerException('Unable to retrieve Cybernews sitemap');
}
$sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA);
if (!$sitemap) {
throwServerException('Unable to parse Cybernews sitemap');
}
foreach ($sitemap->url as $entry) {
$url = trim((string) $entry->loc);
$lastmod = trim((string) $entry->lastmod);
if (!$url) {
continue;
}
$pathParts = explode('/', trim(parse_url($url, PHP_URL_PATH), '/'));
$category = isset($pathParts[0]) && $pathParts[0] !== '' ? $pathParts[0] : '';
// Skip non-English versions
if (in_array($category, ['nl', 'de'], true)) {
continue;
}
$namespaces = $entry->getNamespaces(true);
$title = '';
if (isset($namespaces['news'])) {
$news = $entry->children($namespaces['news'])->news;
if ($news) {
$title = trim((string) $news->title);
}
}
if (!$title) {
continue;
}
$this->items[] = [
'title' => $title,
'uri' => $url,
'uid' => $url,
'timestamp' => strtotime($lastmod),
'categories' => $category ? [$category] : [],
'content' => $this->fetchFullArticle($url),
];
if (count($this->items) >= self::MAX_ARTICLES) {
break;
}
}
}
private function fetchFullArticle(string $url): string
{
$html = getSimpleHTMLDOMCached($url);
if (!$html) {
return 'Unable to fetch article content';
}
$article = $html->find('article', 0);
if (!$article) {
return 'Unable to parse article content';
}
// Remove unnecessary elements
$removeSelectors = [
'script',
'style',
'div.links-bar',
'div.google-news-cta',
'div.a-wrapper',
'div.embed_youtube',
];
foreach ($removeSelectors as $selector) {
foreach ($article->find($selector) as $element) {
$element->outertext = '';
}
}
// Handle lazy-loaded images
foreach ($article->find('img') as $img) {
if (!empty($img->{'data-src'})) {
$img->src = $img->{'data-src'};
unset($img->{'data-src'});
}
}
return $article->innertext;
}
}