mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-08-30 22:59:46 +02:00
fix GolemBridge pagination (and avoid having identical content on multiple pages - especially images)
Some checks are pending
Build Image on Commit and Release / bake (push) Waiting to run
Lint / phpcs (7.4) (push) Waiting to run
Lint / phpcompatibility (7.4) (push) Waiting to run
Lint / executable_php_files_check (push) Waiting to run
Tests / phpunit8 (8.1) (push) Waiting to run
Tests / phpunit8 (8.2) (push) Waiting to run
Tests / phpunit8 (8.3) (push) Waiting to run
Tests / phpunit8 (8.4) (push) Waiting to run
Tests / phpunit8 (7.4) (push) Waiting to run
Tests / phpunit8 (8.0) (push) Waiting to run
Some checks are pending
Build Image on Commit and Release / bake (push) Waiting to run
Lint / phpcs (7.4) (push) Waiting to run
Lint / phpcompatibility (7.4) (push) Waiting to run
Lint / executable_php_files_check (push) Waiting to run
Tests / phpunit8 (8.1) (push) Waiting to run
Tests / phpunit8 (8.2) (push) Waiting to run
Tests / phpunit8 (8.3) (push) Waiting to run
Tests / phpunit8 (8.4) (push) Waiting to run
Tests / phpunit8 (7.4) (push) Waiting to run
Tests / phpunit8 (8.0) (push) Waiting to run
This commit is contained in:
parent
32f324dbb5
commit
3f33d0e312
1 changed files with 31 additions and 17 deletions
|
@ -82,6 +82,7 @@ class GolemBridge extends FeedExpander
|
||||||
// URI without RSS feed reference
|
// URI without RSS feed reference
|
||||||
$item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content;
|
$item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content;
|
||||||
|
|
||||||
|
if (!array_key_exists('categories', $item)) {
|
||||||
$categories = $articlePage->find('div.go-tag-list__tags a.go-tag');
|
$categories = $articlePage->find('div.go-tag-list__tags a.go-tag');
|
||||||
foreach ($categories as $category) {
|
foreach ($categories as $category) {
|
||||||
$trimmedcategories[] = trim(html_entity_decode($category->plaintext));
|
$trimmedcategories[] = trim(html_entity_decode($category->plaintext));
|
||||||
|
@ -89,18 +90,27 @@ class GolemBridge extends FeedExpander
|
||||||
if (isset($trimmedcategories)) {
|
if (isset($trimmedcategories)) {
|
||||||
$item['categories'] = array_unique($trimmedcategories);
|
$item['categories'] = array_unique($trimmedcategories);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$item['content'] .= $this->extractContent($articlePage);
|
$item['content'] .= $this->extractContent($articlePage, $item['content']);
|
||||||
|
|
||||||
// next page
|
// next page
|
||||||
$nextUri = $articlePage->find('link[rel="next"]', 0);
|
$nextUri = $articlePage->find('li.go-pagination__item--next>a', 0);
|
||||||
$uri = $nextUri ? static::URI . $nextUri->href : null;
|
if ($nextUri) {
|
||||||
|
$nextUri = $nextUri->href;
|
||||||
|
if (str_starts_with($nextUri, '/')) {
|
||||||
|
$nextUri = substr($nextUri, 1);
|
||||||
|
}
|
||||||
|
$uri = static::URI . $nextUri;
|
||||||
|
} else {
|
||||||
|
$uri = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function extractContent($page)
|
private function extractContent($page, $prevcontent)
|
||||||
{
|
{
|
||||||
$item = '';
|
$item = '';
|
||||||
|
|
||||||
|
@ -150,6 +160,7 @@ class GolemBridge extends FeedExpander
|
||||||
}
|
}
|
||||||
|
|
||||||
$header = $article->find('header', 0);
|
$header = $article->find('header', 0);
|
||||||
|
if (isset($header)) {
|
||||||
foreach ($header->find('p, figure') as $element) {
|
foreach ($header->find('p, figure') as $element) {
|
||||||
$item .= $element;
|
$item .= $element;
|
||||||
}
|
}
|
||||||
|
@ -158,10 +169,13 @@ class GolemBridge extends FeedExpander
|
||||||
foreach ($article->find('img[data-src-full][src*="."]') as $img) {
|
foreach ($article->find('img[data-src-full][src*="."]') as $img) {
|
||||||
$img->src = $img->getAttribute('data-src-full');
|
$img->src = $img->getAttribute('data-src-full');
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
foreach ($article->find('div.go-article-header__intro, p, h1, h2, h3, pre, img[src*="."], div[class*="golem_tablediv"], iframe, video') as $element) {
|
foreach ($article->find('div.go-article-header__intro, p, h1, h2, h3, pre, img[src*="."], div[class*="golem_tablediv"], iframe, video') as $element) {
|
||||||
|
if (!str_contains($prevcontent, $element)) {
|
||||||
$item .= $element;
|
$item .= $element;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue