[bridges] make them FeedExpander

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière 2016-09-14 13:33:38 +02:00
parent 244516f0a2
commit 8f58c9f86b
7 changed files with 190 additions and 306 deletions

View file

@ -1,54 +1,55 @@
<?php
class NeuviemeArtBridge extends BridgeAbstract {
class NeuviemeArtBridge extends FeedExpander {
const MAINTAINER = "ORelio";
const NAME = '9ème Art Bridge';
const URI = "http://www.9emeart.fr/";
const DESCRIPTION = "Returns the newest articles.";
const MAINTAINER = "ORelio";
const NAME = '9ème Art Bridge';
const URI = "http://www.9emeart.fr/";
const DESCRIPTION = "Returns the newest articles.";
public function collectData(){
private function StripWithDelimiters($string, $start, $end) {
while (strpos($string, $start) !== false) {
$section_to_remove = substr($string, strpos($string, $start));
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
$string = str_replace($section_to_remove, '', $string);
} return $string;
}
function StripWithDelimiters($string, $start, $end) {
while (strpos($string, $start) !== false) {
$section_to_remove = substr($string, strpos($string, $start));
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
$string = str_replace($section_to_remove, '', $string);
} return $string;
}
protected function parseItem($item){
$item = parent::parseItem($item);
$feedUrl = self::URI.'9emeart.rss';
$html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl);
$limit = 0;
$article_html = $this->getSimpleHTMLDOMCached($item['uri']);
if(!$article_html){
$item['content'] = 'Could not request 9eme Art: '.$item['uri'];
return $item;
}
foreach ($html->find('item') as $element) {
if ($limit < 5) {
$article_image = '';
foreach ($article_html->find('img.img_full') as $img){
if ($img->alt == $item['title']){
$article_image = self::URI.$img->src;
break;
}
}
//Retrieve article Uri and get that page
$article_uri = $element->find('guid', 0)->plaintext;
$article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri);
$article_content='';
if($article_image){
$article_content = '<p><img src="'.$article_image.'" /></p>';
}
$article_content .= str_replace(
'src="/', 'src="'.self::URI,
$article_html->find('div.newsGenerique_con', 0)->innertext
);
$article_content = $this->StripWithDelimiters($article_content, '<script', '</script>');
$article_content = $this->StripWithDelimiters($article_content, '<style', '</style>');
$article_content = $this->StripWithDelimiters($article_content, '<link', '>');
//Build article contents from corresponding elements
$article_title = trim($element->find('title', 0)->plaintext);
$article_image = $element->find('enclosure', 0)->url;
foreach ($article_html->find('img.img_full') as $img)
if ($img->alt == $article_title)
$article_image = self::URI.$img->src;
$article_content = '<p><img src="'.$article_image.'" /></p>'
.str_replace('src="/', 'src="'.self::URI, $article_html->find('div.newsGenerique_con', 0)->innertext);
$article_content = StripWithDelimiters($article_content, '<script', '</script>');
$article_content = StripWithDelimiters($article_content, '<style', '</style>');
$article_content = StripWithDelimiters($article_content, '<link', '>');
$item['content'] = $article_content;
//Build and add final item
$item = array();
$item['uri'] = $article_uri;
$item['title'] = $article_title;
$item['author'] = $article_html->find('a[class=upp transition_fast upp]', 0)->plaintext;
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
$item['content'] = $article_content;
$this->items[] = $item;
$limit++;
}
}
}
return $item;
}
public function collectData(){
$feedUrl = self::URI.'9emeart.rss';
$this->collectExpandableDatas($feedUrl);
}
}