mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-08-16 15:31:26 +02:00
[CybernewsBridge] add bridge (#4665)
Some checks are pending
Build Image on Commit and Release / bake (push) Waiting to run
Lint / phpcs (7.4) (push) Waiting to run
Lint / phpcompatibility (7.4) (push) Waiting to run
Lint / executable_php_files_check (push) Waiting to run
Tests / phpunit8 (7.4) (push) Waiting to run
Tests / phpunit8 (8.0) (push) Waiting to run
Tests / phpunit8 (8.1) (push) Waiting to run
Tests / phpunit8 (8.2) (push) Waiting to run
Tests / phpunit8 (8.3) (push) Waiting to run
Tests / phpunit8 (8.4) (push) Waiting to run
Some checks are pending
Build Image on Commit and Release / bake (push) Waiting to run
Lint / phpcs (7.4) (push) Waiting to run
Lint / phpcompatibility (7.4) (push) Waiting to run
Lint / executable_php_files_check (push) Waiting to run
Tests / phpunit8 (7.4) (push) Waiting to run
Tests / phpunit8 (8.0) (push) Waiting to run
Tests / phpunit8 (8.1) (push) Waiting to run
Tests / phpunit8 (8.2) (push) Waiting to run
Tests / phpunit8 (8.3) (push) Waiting to run
Tests / phpunit8 (8.4) (push) Waiting to run
* [CybernewsBridge] add bridge * [CybernewsBridge] fix lint * [CybernewsBridge] add header * [CybernewsBridge] fix url * [CybernewsBridge] fix url 2 * [CybernewsBridge] revert header * [CybernewsBridge] refactor * [CybernewsBridge] final * [CybernewsBridge] lint
This commit is contained in:
parent
3a9e398228
commit
1b584b4551
1 changed files with 114 additions and 0 deletions
114
bridges/CybernewsBridge.php
Normal file
114
bridges/CybernewsBridge.php
Normal file
|
@ -0,0 +1,114 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
class CybernewsBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Cybernews';
|
||||
const URI = 'https://cybernews.com';
|
||||
const DESCRIPTION = 'Fetches the latest news from Cybernews';
|
||||
const MAINTAINER = 'tillcash';
|
||||
const CACHE_TIMEOUT = 3600; // 1 hour
|
||||
const MAX_ARTICLES = 5;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$sitemapXml = getContents(self::URI . '/news-sitemap.xml');
|
||||
|
||||
if (!$sitemapXml) {
|
||||
throwServerException('Unable to retrieve Cybernews sitemap');
|
||||
}
|
||||
|
||||
$sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA);
|
||||
|
||||
if (!$sitemap) {
|
||||
throwServerException('Unable to parse Cybernews sitemap');
|
||||
}
|
||||
|
||||
foreach ($sitemap->url as $entry) {
|
||||
$url = trim((string) $entry->loc);
|
||||
$lastmod = trim((string) $entry->lastmod);
|
||||
|
||||
if (!$url) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$pathParts = explode('/', trim(parse_url($url, PHP_URL_PATH), '/'));
|
||||
$category = isset($pathParts[0]) && $pathParts[0] !== '' ? $pathParts[0] : '';
|
||||
|
||||
// Skip non-English versions
|
||||
if (in_array($category, ['nl', 'de'], true)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$namespaces = $entry->getNamespaces(true);
|
||||
$title = '';
|
||||
|
||||
if (isset($namespaces['news'])) {
|
||||
$news = $entry->children($namespaces['news'])->news;
|
||||
|
||||
if ($news) {
|
||||
$title = trim((string) $news->title);
|
||||
}
|
||||
}
|
||||
|
||||
if (!$title) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'title' => $title,
|
||||
'uri' => $url,
|
||||
'uid' => $url,
|
||||
'timestamp' => strtotime($lastmod),
|
||||
'categories' => $category ? [$category] : [],
|
||||
'content' => $this->fetchFullArticle($url),
|
||||
];
|
||||
|
||||
if (count($this->items) >= self::MAX_ARTICLES) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function fetchFullArticle(string $url): string
|
||||
{
|
||||
$html = getSimpleHTMLDOMCached($url);
|
||||
|
||||
if (!$html) {
|
||||
return 'Unable to fetch article content';
|
||||
}
|
||||
|
||||
$article = $html->find('article', 0);
|
||||
|
||||
if (!$article) {
|
||||
return 'Unable to parse article content';
|
||||
}
|
||||
|
||||
// Remove unnecessary elements
|
||||
$removeSelectors = [
|
||||
'script',
|
||||
'style',
|
||||
'div.links-bar',
|
||||
'div.google-news-cta',
|
||||
'div.a-wrapper',
|
||||
'div.embed_youtube',
|
||||
];
|
||||
|
||||
foreach ($removeSelectors as $selector) {
|
||||
foreach ($article->find($selector) as $element) {
|
||||
$element->outertext = '';
|
||||
}
|
||||
}
|
||||
|
||||
// Handle lazy-loaded images
|
||||
foreach ($article->find('img') as $img) {
|
||||
if (!empty($img->{'data-src'})) {
|
||||
$img->src = $img->{'data-src'};
|
||||
unset($img->{'data-src'});
|
||||
}
|
||||
}
|
||||
|
||||
return $article->innertext;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue