mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-08-03 09:35:57 +02:00
Reformat codebase v4 (#2872)
Reformat code base to PSR12 Co-authored-by: rssbridge <noreply@github.com>
This commit is contained in:
parent
66568e3a39
commit
4f75591060
398 changed files with 58607 additions and 56442 deletions
|
@ -1,352 +1,368 @@
|
|||
<?php
|
||||
class JustETFBridge extends BridgeAbstract {
|
||||
const NAME = 'justETF Bridge';
|
||||
const URI = 'https://www.justetf.com';
|
||||
const DESCRIPTION = 'Currently only supports the news feed';
|
||||
const MAINTAINER = 'logmanoriginal';
|
||||
const PARAMETERS = array(
|
||||
'News' => array(
|
||||
'full' => array(
|
||||
'name' => 'Full Article',
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Enable to load full articles'
|
||||
)
|
||||
),
|
||||
'Profile' => array(
|
||||
'isin' => array(
|
||||
'name' => 'ISIN',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'exampleValue' => 'IE00B4X9L533',
|
||||
'pattern' => '[a-zA-Z]{2}[a-zA-Z0-9]{10}',
|
||||
'title' => 'ISIN, consisting of 2-letter country code, 9-character identifier, check character'
|
||||
),
|
||||
'strategy' => array(
|
||||
'name' => 'Include Strategy',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => 'checked'
|
||||
),
|
||||
'description' => array(
|
||||
'name' => 'Include Description',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => 'checked'
|
||||
)
|
||||
),
|
||||
'global' => array(
|
||||
'lang' => array(
|
||||
'name' => 'Language',
|
||||
'type' => 'list',
|
||||
'values' => array(
|
||||
'Englisch' => 'en',
|
||||
'Deutsch' => 'de',
|
||||
'Italiano' => 'it'
|
||||
),
|
||||
'defaultValue' => 'Englisch'
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
public function collectData() {
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
defaultLinkTo($html, static::URI);
|
||||
|
||||
switch($this->queriedContext) {
|
||||
case 'News':
|
||||
$this->collectNews($html);
|
||||
break;
|
||||
case 'Profile':
|
||||
$this->collectProfile($html);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public function getURI() {
|
||||
$uri = static::URI;
|
||||
|
||||
if($this->getInput('lang')) {
|
||||
$uri .= '/' . $this->getInput('lang');
|
||||
}
|
||||
|
||||
switch($this->queriedContext) {
|
||||
case 'News':
|
||||
$uri .= '/news';
|
||||
break;
|
||||
case 'Profile':
|
||||
$uri .= '/etf-profile.html?' . http_build_query(array(
|
||||
'isin' => strtoupper($this->getInput('isin'))
|
||||
));
|
||||
break;
|
||||
}
|
||||
|
||||
return $uri;
|
||||
}
|
||||
|
||||
public function getName() {
|
||||
$name = static::NAME;
|
||||
|
||||
$name .= ($this->queriedContext) ? ' - ' . $this->queriedContext : '';
|
||||
|
||||
switch($this->queriedContext) {
|
||||
case 'News': break;
|
||||
case 'Profile':
|
||||
if($this->getInput('isin')) {
|
||||
$name .= ' ISIN ' . strtoupper($this->getInput('isin'));
|
||||
}
|
||||
}
|
||||
|
||||
if($this->getInput('lang')) {
|
||||
$name .= ' (' . strtoupper($this->getInput('lang')) . ')';
|
||||
}
|
||||
|
||||
return $name;
|
||||
}
|
||||
|
||||
#region Common
|
||||
|
||||
/**
|
||||
* Fixes dates depending on the choosen language:
|
||||
*
|
||||
* de : dd.mm.yy
|
||||
* en : dd.mm.yy
|
||||
* it : dd/mm/yy
|
||||
*
|
||||
* Basically strtotime doesn't convert dates correctly due to formats
|
||||
* being hard to interpret. So we use the DateTime object, manually
|
||||
* fixing dates and times (set to 00:00:00.000).
|
||||
*
|
||||
* We don't know the timezone, so just assume +00:00 (or whatever
|
||||
* DateTime chooses)
|
||||
*/
|
||||
private function fixDate($date) {
|
||||
switch($this->getInput('lang')) {
|
||||
case 'en':
|
||||
case 'de':
|
||||
$df = date_create_from_format('d.m.y', $date);
|
||||
break;
|
||||
case 'it':
|
||||
$df = date_create_from_format('d/m/y', $date);
|
||||
break;
|
||||
}
|
||||
|
||||
date_time_set($df, 0, 0);
|
||||
class JustETFBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'justETF Bridge';
|
||||
const URI = 'https://www.justetf.com';
|
||||
const DESCRIPTION = 'Currently only supports the news feed';
|
||||
const MAINTAINER = 'logmanoriginal';
|
||||
const PARAMETERS = [
|
||||
'News' => [
|
||||
'full' => [
|
||||
'name' => 'Full Article',
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Enable to load full articles'
|
||||
]
|
||||
],
|
||||
'Profile' => [
|
||||
'isin' => [
|
||||
'name' => 'ISIN',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'exampleValue' => 'IE00B4X9L533',
|
||||
'pattern' => '[a-zA-Z]{2}[a-zA-Z0-9]{10}',
|
||||
'title' => 'ISIN, consisting of 2-letter country code, 9-character identifier, check character'
|
||||
],
|
||||
'strategy' => [
|
||||
'name' => 'Include Strategy',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => 'checked'
|
||||
],
|
||||
'description' => [
|
||||
'name' => 'Include Description',
|
||||
'type' => 'checkbox',
|
||||
'defaultValue' => 'checked'
|
||||
]
|
||||
],
|
||||
'global' => [
|
||||
'lang' => [
|
||||
'name' => 'Language',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Englisch' => 'en',
|
||||
'Deutsch' => 'de',
|
||||
'Italiano' => 'it'
|
||||
],
|
||||
'defaultValue' => 'Englisch'
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
defaultLinkTo($html, static::URI);
|
||||
|
||||
switch ($this->queriedContext) {
|
||||
case 'News':
|
||||
$this->collectNews($html);
|
||||
break;
|
||||
case 'Profile':
|
||||
$this->collectProfile($html);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$uri = static::URI;
|
||||
|
||||
if ($this->getInput('lang')) {
|
||||
$uri .= '/' . $this->getInput('lang');
|
||||
}
|
||||
|
||||
switch ($this->queriedContext) {
|
||||
case 'News':
|
||||
$uri .= '/news';
|
||||
break;
|
||||
case 'Profile':
|
||||
$uri .= '/etf-profile.html?' . http_build_query([
|
||||
'isin' => strtoupper($this->getInput('isin'))
|
||||
]);
|
||||
break;
|
||||
}
|
||||
|
||||
return $uri;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$name = static::NAME;
|
||||
|
||||
$name .= ($this->queriedContext) ? ' - ' . $this->queriedContext : '';
|
||||
|
||||
switch ($this->queriedContext) {
|
||||
case 'News':
|
||||
break;
|
||||
case 'Profile':
|
||||
if ($this->getInput('isin')) {
|
||||
$name .= ' ISIN ' . strtoupper($this->getInput('isin'));
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->getInput('lang')) {
|
||||
$name .= ' (' . strtoupper($this->getInput('lang')) . ')';
|
||||
}
|
||||
|
||||
return $name;
|
||||
}
|
||||
|
||||
#region Common
|
||||
|
||||
/**
|
||||
* Fixes dates depending on the choosen language:
|
||||
*
|
||||
* de : dd.mm.yy
|
||||
* en : dd.mm.yy
|
||||
* it : dd/mm/yy
|
||||
*
|
||||
* Basically strtotime doesn't convert dates correctly due to formats
|
||||
* being hard to interpret. So we use the DateTime object, manually
|
||||
* fixing dates and times (set to 00:00:00.000).
|
||||
*
|
||||
* We don't know the timezone, so just assume +00:00 (or whatever
|
||||
* DateTime chooses)
|
||||
*/
|
||||
private function fixDate($date)
|
||||
{
|
||||
switch ($this->getInput('lang')) {
|
||||
case 'en':
|
||||
case 'de':
|
||||
$df = date_create_from_format('d.m.y', $date);
|
||||
break;
|
||||
case 'it':
|
||||
$df = date_create_from_format('d/m/y', $date);
|
||||
break;
|
||||
}
|
||||
|
||||
date_time_set($df, 0, 0);
|
||||
|
||||
// Debug::log(date_format($df, 'U'));
|
||||
|
||||
return date_format($df, 'U');
|
||||
}
|
||||
|
||||
private function extractImages($article)
|
||||
{
|
||||
// Notice: We can have zero or more images (though it should mostly be 1)
|
||||
$elements = $article->find('img');
|
||||
|
||||
$images = [];
|
||||
|
||||
foreach ($elements as $img) {
|
||||
// Skip the logo (mostly provided part of a hidden div)
|
||||
if (substr($img->src, strrpos($img->src, '/') + 1) === 'logo.png') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$images[] = $img->src;
|
||||
}
|
||||
|
||||
return $images;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region News
|
||||
|
||||
private function collectNews($html)
|
||||
{
|
||||
$articles = $html->find('div.newsTopArticle')
|
||||
or returnServerError('No articles found! Layout might have changed!');
|
||||
|
||||
foreach ($articles as $article) {
|
||||
$item = [];
|
||||
|
||||
// Common data
|
||||
|
||||
$item['uri'] = $this->extractNewsUri($article);
|
||||
$item['timestamp'] = $this->extractNewsDate($article);
|
||||
$item['title'] = $this->extractNewsTitle($article);
|
||||
|
||||
// Debug::log(date_format($df, 'U'));
|
||||
if ($this->getInput('full')) {
|
||||
$uri = $this->extractNewsUri($article);
|
||||
|
||||
return date_format($df, 'U');
|
||||
}
|
||||
$html = getSimpleHTMLDOMCached($uri)
|
||||
or returnServerError('Failed loading full article from ' . $uri);
|
||||
|
||||
private function extractImages($article) {
|
||||
// Notice: We can have zero or more images (though it should mostly be 1)
|
||||
$elements = $article->find('img');
|
||||
$fullArticle = $html->find('div.article', 0)
|
||||
or returnServerError('No content found! Layout might have changed!');
|
||||
|
||||
$images = array();
|
||||
defaultLinkTo($fullArticle, static::URI);
|
||||
|
||||
$item['author'] = $this->extractFullArticleAuthor($fullArticle);
|
||||
$item['content'] = $this->extractFullArticleContent($fullArticle);
|
||||
$item['enclosures'] = $this->extractImages($fullArticle);
|
||||
} else {
|
||||
$item['content'] = $this->extractNewsDescription($article);
|
||||
$item['enclosures'] = $this->extractImages($article);
|
||||
}
|
||||
|
||||
foreach($elements as $img) {
|
||||
// Skip the logo (mostly provided part of a hidden div)
|
||||
if(substr($img->src, strrpos($img->src, '/') + 1) === 'logo.png')
|
||||
continue;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
$images[] = $img->src;
|
||||
}
|
||||
private function extractNewsUri($article)
|
||||
{
|
||||
$element = $article->find('a', 0)
|
||||
or returnServerError('Anchor not found!');
|
||||
|
||||
return $images;
|
||||
}
|
||||
return $element->href;
|
||||
}
|
||||
|
||||
#endregion
|
||||
private function extractNewsDate($article)
|
||||
{
|
||||
$element = $article->find('div.subheadline', 0)
|
||||
or returnServerError('Date not found!');
|
||||
|
||||
#region News
|
||||
// Debug::log($element->plaintext);
|
||||
|
||||
private function collectNews($html) {
|
||||
$articles = $html->find('div.newsTopArticle')
|
||||
or returnServerError('No articles found! Layout might have changed!');
|
||||
$date = trim(explode('|', $element->plaintext)[0]);
|
||||
|
||||
foreach($articles as $article) {
|
||||
return $this->fixDate($date);
|
||||
}
|
||||
|
||||
$item = array();
|
||||
private function extractNewsDescription($article)
|
||||
{
|
||||
$element = $article->find('span.newsText', 0)
|
||||
or returnServerError('Description not found!');
|
||||
|
||||
// Common data
|
||||
$element->find('a', 0)->onclick = '';
|
||||
|
||||
$item['uri'] = $this->extractNewsUri($article);
|
||||
$item['timestamp'] = $this->extractNewsDate($article);
|
||||
$item['title'] = $this->extractNewsTitle($article);
|
||||
// Debug::log($element->innertext);
|
||||
|
||||
if($this->getInput('full')) {
|
||||
return $element->innertext;
|
||||
}
|
||||
|
||||
$uri = $this->extractNewsUri($article);
|
||||
private function extractNewsTitle($article)
|
||||
{
|
||||
$element = $article->find('h3', 0)
|
||||
or returnServerError('Title not found!');
|
||||
|
||||
$html = getSimpleHTMLDOMCached($uri)
|
||||
or returnServerError('Failed loading full article from ' . $uri);
|
||||
return $element->plaintext;
|
||||
}
|
||||
|
||||
$fullArticle = $html->find('div.article', 0)
|
||||
or returnServerError('No content found! Layout might have changed!');
|
||||
private function extractFullArticleContent($article)
|
||||
{
|
||||
$element = $article->find('div.article_body', 0)
|
||||
or returnServerError('Article body not found!');
|
||||
|
||||
defaultLinkTo($fullArticle, static::URI);
|
||||
// Remove teaser image
|
||||
$element->find('img.teaser-img', 0)->outertext = '';
|
||||
|
||||
$item['author'] = $this->extractFullArticleAuthor($fullArticle);
|
||||
$item['content'] = $this->extractFullArticleContent($fullArticle);
|
||||
$item['enclosures'] = $this->extractImages($fullArticle);
|
||||
// Remove self advertisements
|
||||
foreach ($element->find('.call-action') as $adv) {
|
||||
$adv->outertext = '';
|
||||
}
|
||||
|
||||
} else {
|
||||
// Remove tips
|
||||
foreach ($element->find('.panel-edu') as $tip) {
|
||||
$tip->outertext = '';
|
||||
}
|
||||
|
||||
$item['content'] = $this->extractNewsDescription($article);
|
||||
$item['enclosures'] = $this->extractImages($article);
|
||||
// Remove inline scripts (used for i.e. interactive graphs) as they are
|
||||
// rendered as a long series of strings
|
||||
foreach ($element->find('script') as $script) {
|
||||
$script->outertext = '[Content removed! Visit site to see full contents!]';
|
||||
}
|
||||
|
||||
}
|
||||
return $element->innertext;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
private function extractFullArticleAuthor($article)
|
||||
{
|
||||
$element = $article->find('span[itemprop=name]', 0)
|
||||
or returnServerError('Author not found!');
|
||||
|
||||
private function extractNewsUri($article) {
|
||||
$element = $article->find('a', 0)
|
||||
or returnServerError('Anchor not found!');
|
||||
return $element->plaintext;
|
||||
}
|
||||
|
||||
return $element->href;
|
||||
}
|
||||
#endregion
|
||||
|
||||
private function extractNewsDate($article) {
|
||||
$element = $article->find('div.subheadline', 0)
|
||||
or returnServerError('Date not found!');
|
||||
#region Profile
|
||||
|
||||
// Debug::log($element->plaintext);
|
||||
private function collectProfile($html)
|
||||
{
|
||||
$item = [];
|
||||
|
||||
$date = trim(explode('|', $element->plaintext)[0]);
|
||||
$item['uri'] = $this->getURI();
|
||||
$item['timestamp'] = $this->extractProfileDate($html);
|
||||
$item['title'] = $this->extractProfiletitle($html);
|
||||
$item['author'] = $this->extractProfileAuthor($html);
|
||||
$item['content'] = $this->extractProfileContent($html);
|
||||
|
||||
return $this->fixDate($date);
|
||||
}
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
private function extractNewsDescription($article) {
|
||||
$element = $article->find('span.newsText', 0)
|
||||
or returnServerError('Description not found!');
|
||||
private function extractProfileDate($html)
|
||||
{
|
||||
$element = $html->find('div.infobox div.vallabel', 0)
|
||||
or returnServerError('Date not found!');
|
||||
|
||||
$element->find('a', 0)->onclick = '';
|
||||
// Debug::log($element->plaintext);
|
||||
|
||||
// Debug::log($element->innertext);
|
||||
$date = trim(explode("\r\n", $element->plaintext)[1]);
|
||||
|
||||
return $element->innertext;
|
||||
}
|
||||
return $this->fixDate($date);
|
||||
}
|
||||
|
||||
private function extractNewsTitle($article) {
|
||||
$element = $article->find('h3', 0)
|
||||
or returnServerError('Title not found!');
|
||||
private function extractProfileTitle($html)
|
||||
{
|
||||
$element = $html->find('span.h1', 0)
|
||||
or returnServerError('Title not found!');
|
||||
|
||||
return $element->plaintext;
|
||||
}
|
||||
return $element->plaintext;
|
||||
}
|
||||
|
||||
private function extractFullArticleContent($article) {
|
||||
$element = $article->find('div.article_body', 0)
|
||||
or returnServerError('Article body not found!');
|
||||
private function extractProfileContent($html)
|
||||
{
|
||||
// There are a few thins we are interested:
|
||||
// - Investment Strategy
|
||||
// - Description
|
||||
// - Quote
|
||||
|
||||
// Remove teaser image
|
||||
$element->find('img.teaser-img', 0)->outertext = '';
|
||||
$strategy = $html->find('div.tab-container div.col-sm-6 p', 0)
|
||||
or returnServerError('Investment Strategy not found!');
|
||||
|
||||
// Remove self advertisements
|
||||
foreach($element->find('.call-action') as $adv) {
|
||||
$adv->outertext = '';
|
||||
}
|
||||
// Description requires a bit of cleanup due to lack of propper identification
|
||||
|
||||
// Remove tips
|
||||
foreach($element->find('.panel-edu') as $tip) {
|
||||
$tip->outertext = '';
|
||||
}
|
||||
$description = $html->find('div.headline', 5)
|
||||
or returnServerError('Description container not found!');
|
||||
|
||||
// Remove inline scripts (used for i.e. interactive graphs) as they are
|
||||
// rendered as a long series of strings
|
||||
foreach($element->find('script') as $script) {
|
||||
$script->outertext = '[Content removed! Visit site to see full contents!]';
|
||||
}
|
||||
$description = $description->parent();
|
||||
|
||||
return $element->innertext;
|
||||
}
|
||||
foreach ($description->find('div') as $div) {
|
||||
$div->outertext = '';
|
||||
}
|
||||
|
||||
private function extractFullArticleAuthor($article) {
|
||||
$element = $article->find('span[itemprop=name]', 0)
|
||||
or returnServerError('Author not found!');
|
||||
$quote = $html->find('div.infobox div.val', 0)
|
||||
or returnServerError('Quote not found!');
|
||||
|
||||
return $element->plaintext;
|
||||
}
|
||||
$quote_html = '<strong>Quote</strong><br><p>' . $quote . '</p>';
|
||||
$strategy_html = '';
|
||||
$description_html = '';
|
||||
|
||||
#endregion
|
||||
if ($this->getInput('strategy') === true) {
|
||||
$strategy_html = '<strong>Strategy</strong><br><p>' . $strategy . '</p><br>';
|
||||
}
|
||||
|
||||
#region Profile
|
||||
if ($this->getInput('description') === true) {
|
||||
$description_html = '<strong>Description</strong><br><p>' . $description . '</p><br>';
|
||||
}
|
||||
|
||||
private function collectProfile($html) {
|
||||
$item = array();
|
||||
return $strategy_html . $description_html . $quote_html;
|
||||
}
|
||||
|
||||
$item['uri'] = $this->getURI();
|
||||
$item['timestamp'] = $this->extractProfileDate($html);
|
||||
$item['title'] = $this->extractProfiletitle($html);
|
||||
$item['author'] = $this->extractProfileAuthor($html);
|
||||
$item['content'] = $this->extractProfileContent($html);
|
||||
private function extractProfileAuthor($html)
|
||||
{
|
||||
// Use ISIN + WKN as author
|
||||
// Notice: "identfier" is not a typo [sic]!
|
||||
$element = $html->find('span.identfier', 0)
|
||||
or returnServerError('Author not found!');
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
return $element->plaintext;
|
||||
}
|
||||
|
||||
private function extractProfileDate($html) {
|
||||
$element = $html->find('div.infobox div.vallabel', 0)
|
||||
or returnServerError('Date not found!');
|
||||
|
||||
// Debug::log($element->plaintext);
|
||||
|
||||
$date = trim(explode("\r\n", $element->plaintext)[1]);
|
||||
|
||||
return $this->fixDate($date);
|
||||
}
|
||||
|
||||
private function extractProfileTitle($html) {
|
||||
$element = $html->find('span.h1', 0)
|
||||
or returnServerError('Title not found!');
|
||||
|
||||
return $element->plaintext;
|
||||
}
|
||||
|
||||
private function extractProfileContent($html) {
|
||||
// There are a few thins we are interested:
|
||||
// - Investment Strategy
|
||||
// - Description
|
||||
// - Quote
|
||||
|
||||
$strategy = $html->find('div.tab-container div.col-sm-6 p', 0)
|
||||
or returnServerError('Investment Strategy not found!');
|
||||
|
||||
// Description requires a bit of cleanup due to lack of propper identification
|
||||
|
||||
$description = $html->find('div.headline', 5)
|
||||
or returnServerError('Description container not found!');
|
||||
|
||||
$description = $description->parent();
|
||||
|
||||
foreach($description->find('div') as $div) {
|
||||
$div->outertext = '';
|
||||
}
|
||||
|
||||
$quote = $html->find('div.infobox div.val', 0)
|
||||
or returnServerError('Quote not found!');
|
||||
|
||||
$quote_html = '<strong>Quote</strong><br><p>' . $quote . '</p>';
|
||||
$strategy_html = '';
|
||||
$description_html = '';
|
||||
|
||||
if($this->getInput('strategy') === true) {
|
||||
$strategy_html = '<strong>Strategy</strong><br><p>' . $strategy . '</p><br>';
|
||||
}
|
||||
|
||||
if($this->getInput('description') === true) {
|
||||
$description_html = '<strong>Description</strong><br><p>' . $description . '</p><br>';
|
||||
}
|
||||
|
||||
return $strategy_html . $description_html . $quote_html;
|
||||
}
|
||||
|
||||
private function extractProfileAuthor($html) {
|
||||
// Use ISIN + WKN as author
|
||||
// Notice: "identfier" is not a typo [sic]!
|
||||
$element = $html->find('span.identfier', 0)
|
||||
or returnServerError('Author not found!');
|
||||
|
||||
return $element->plaintext;
|
||||
}
|
||||
|
||||
#endregion
|
||||
#endregion
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue