mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-08-05 02:24:35 +02:00
Merge branch 'FeedExpander' of https://github.com/logmanoriginal/rss-bridge
This commit is contained in:
commit
671703cd37
6 changed files with 225 additions and 126 deletions
|
@ -1,40 +1,26 @@
|
|||
<?php
|
||||
class AcrimedBridge extends RssExpander{
|
||||
class AcrimedBridge extends FeedExpander {
|
||||
|
||||
const MAINTAINER = "qwertygc";
|
||||
const NAME = "Acrimed Bridge";
|
||||
const URI = "http://www.acrimed.org/";
|
||||
const DESCRIPTION = "Returns the newest articles.";
|
||||
|
||||
const MAINTAINER = "qwertygc";
|
||||
const NAME = "Acrimed Bridge";
|
||||
const URI = "http://www.acrimed.org/";
|
||||
const DESCRIPTION = "Returns the newest articles.";
|
||||
public function collectData(){
|
||||
$this->collectExpandableDatas("http://www.acrimed.org/spip.php?page=backend");
|
||||
}
|
||||
|
||||
public function collectData(){
|
||||
protected function parseItem($newsItem){
|
||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
||||
|
||||
$this->collectExpandableDatas(static::URI.'spip.php?page=backend');
|
||||
$hs = new HTMLSanitizer();
|
||||
$articlePage = $this->getSimpleHTMLDOM($newsItem->link);
|
||||
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||
$article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/");
|
||||
$item['content'] = $article;
|
||||
|
||||
}
|
||||
|
||||
protected function parseRSSItem($newsItem) {
|
||||
|
||||
$hs = new HTMLSanitizer();
|
||||
|
||||
$namespaces = $newsItem->getNameSpaces(true);
|
||||
$dc = $newsItem->children($namespaces['dc']);
|
||||
|
||||
$item = array();
|
||||
$item['uri'] = trim($newsItem->link);
|
||||
$item['title'] = trim($newsItem->title);
|
||||
$item['timestamp'] = strtotime($dc->date);
|
||||
|
||||
$articlePage = $this->getSimpleHTMLDOM($newsItem->link);
|
||||
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||
$article = HTMLSanitizer::defaultImageSrcTo($article, static::URI);
|
||||
|
||||
$item['content'] = $article;
|
||||
|
||||
|
||||
return $item;
|
||||
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getCacheDuration(){
|
||||
return 4800; // 2 hours
|
||||
|
|
62
bridges/FeedExpanderExampleBridge.php
Normal file
62
bridges/FeedExpanderExampleBridge.php
Normal file
|
@ -0,0 +1,62 @@
|
|||
<?php
|
||||
class FeedExpanderExampleBridge extends FeedExpander {
|
||||
|
||||
const MAINTAINER = 'logmanoriginal';
|
||||
const NAME = 'FeedExpander Example';
|
||||
const URI = '#';
|
||||
const DESCRIPTION = 'Example bridge to test FeedExpander';
|
||||
|
||||
const PARAMETERS = array(
|
||||
'Feed' => array(
|
||||
'version' => array(
|
||||
'name' => 'Version',
|
||||
'type' => 'list',
|
||||
'required' => true,
|
||||
'title' => 'Select your feed format/version',
|
||||
'defaultValue' => 'RSS 2.0',
|
||||
'values' => array(
|
||||
'RSS 0.91' => 'rss_0_9_1',
|
||||
'RSS 1.0' => 'rss_1_0',
|
||||
'RSS 2.0' => 'rss_2_0',
|
||||
'ATOM 1.0' => 'atom_1_0'
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
public function collectData(){
|
||||
switch($this->getInput('version')){
|
||||
case 'rss_0_9_1':
|
||||
parent::collectExpandableDatas('http://static.userland.com/gems/backend/sampleRss.xml');
|
||||
break;
|
||||
case 'rss_1_0':
|
||||
parent::collectExpandableDatas('http://feeds.nature.com/nature/rss/current?format=xml');
|
||||
break;
|
||||
case 'rss_2_0':
|
||||
parent::collectExpandableDatas('http://feeds.rssboard.org/rssboard?format=xml');
|
||||
break;
|
||||
case 'atom_1_0':
|
||||
parent::collectExpandableDatas('http://segfault.linuxmint.com/feed/atom/');
|
||||
break;
|
||||
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem) {
|
||||
switch($this->getInput('version')){
|
||||
case 'rss_0_9_1':
|
||||
return $this->parseRSS_0_9_1_Item($newsItem);
|
||||
break;
|
||||
case 'rss_1_0':
|
||||
return $this->parseRSS_1_0_Item($newsItem);
|
||||
break;
|
||||
case 'rss_2_0':
|
||||
return $this->parseRSS_2_0_Item($newsItem);
|
||||
break;
|
||||
case 'atom_1_0':
|
||||
return $this->parseATOMItem($newsItem);
|
||||
break;
|
||||
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,34 +1,22 @@
|
|||
<?php
|
||||
define("FREENEWS_RSS", 'http://feeds.feedburner.com/Freenews-Freebox?format=xml');
|
||||
class FreenewsBridge extends RssExpander {
|
||||
class FreenewsBridge extends FeedExpander {
|
||||
|
||||
const MAINTAINER = "mitsukarenai";
|
||||
const NAME = "Freenews";
|
||||
const URI = "http://freenews.fr";
|
||||
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox). Ne rentrez pas d'id si vous voulez accéder aux actualités générales.";
|
||||
const MAINTAINER = "mitsukarenai";
|
||||
const NAME = "Freenews";
|
||||
const URI = "http://freenews.fr";
|
||||
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox). Ne rentrez pas d'id si vous voulez accéder aux actualités générales.";
|
||||
|
||||
public function collectData(){
|
||||
parent::collectExpandableDatas(FREENEWS_RSS);
|
||||
parent::collectExpandableDatas('http://feeds.feedburner.com/Freenews-Freebox?format=xml');
|
||||
}
|
||||
|
||||
protected function parseRSSItem($newsItem) {
|
||||
$item = array();
|
||||
$item['title'] = trim($newsItem->title);
|
||||
$this->debugMessage("item has for title \"".$item['title']."\"");
|
||||
if(empty($newsItem->guid)) {
|
||||
$item['uri'] = (string) $newsItem->link;
|
||||
} else {
|
||||
$item['uri'] = (string) $newsItem->guid;
|
||||
}
|
||||
// now load that uri from cache
|
||||
$this->debugMessage("now loading page ".$item['uri']);
|
||||
protected function parseItem($newsItem) {
|
||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
||||
|
||||
$articlePage = $this->get_cached($item['uri']);
|
||||
|
||||
$content = $articlePage->find('.post-container', 0);
|
||||
$item['content'] = $content->innertext;
|
||||
$item['author'] = $articlePage->find('a[rel=author]', 0)->innertext;
|
||||
// format should parse 2014-03-25T16:21:20Z. But, according to http://stackoverflow.com/a/10478469, it is not that simple
|
||||
$item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem);
|
||||
|
||||
return $item;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,35 +1,19 @@
|
|||
<?php
|
||||
class Les400CulsBridge extends RssExpander{
|
||||
|
||||
const MAINTAINER = "unknown";
|
||||
const NAME = "Les 400 Culs";
|
||||
const URI = "http://sexes.blogs.liberation.fr/";
|
||||
const DESCRIPTION = "La planete sexe vue par Agnes Girard via rss-bridge";
|
||||
class Les400CulsBridge extends FeedExpander{
|
||||
|
||||
const MAINTAINER = "unknown";
|
||||
const NAME = "Les 400 Culs";
|
||||
const URI = "http://sexes.blogs.liberation.fr/";
|
||||
const DESCRIPTION = "La planete sexe vue par Agnes Girard via rss-bridge";
|
||||
|
||||
public function collectData(){
|
||||
$this->collectExpandableDatas(self::URI.'feeds/');
|
||||
$this->collectExpandableDatas(self::URI . 'feeds/');
|
||||
}
|
||||
|
||||
protected function parseRSSItem($newsItem) {
|
||||
$item = array();
|
||||
$item['title'] = trim((string) $newsItem->title);
|
||||
$this->debugMessage("browsing item ".var_export($newsItem, true));
|
||||
if(empty($newsItem->guid)) {
|
||||
$item['uri'] = (string) $newsItem->link;
|
||||
} else {
|
||||
$item['uri'] = (string) $newsItem->guid;
|
||||
}
|
||||
// now load that uri from cache
|
||||
$this->debugMessage("now loading page ".$item['uri']);
|
||||
// $articlePage = $this->get_cached($item['uri']);
|
||||
|
||||
// $content = $articlePage->find('.post-container', 0);
|
||||
$item['content'] = (string) $newsItem->description;
|
||||
$item['author'] = (string) $newsItem->author;
|
||||
$item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem);
|
||||
return $item;
|
||||
protected function parseItem($newsItem){
|
||||
return $this->parseRSS_2_0_Item($newsItem);
|
||||
}
|
||||
|
||||
public function getCacheDuration(){
|
||||
return 7200; // 2h hours
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
<?php
|
||||
class TheOatmealBridge extends RssExpander{
|
||||
class TheOatmealBridge extends FeedExpander{
|
||||
|
||||
const MAINTAINER = "Riduidel";
|
||||
const NAME = "The Oatmeal";
|
||||
|
@ -10,44 +10,17 @@ class TheOatmealBridge extends RssExpander{
|
|||
$this->collectExpandableDatas('http://feeds.feedburner.com/oatmealfeed');
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem) {
|
||||
$item = $this->parseRSS_1_0_Item($newsItem);
|
||||
|
||||
/**
|
||||
* Since the oatmeal produces a weird RSS feed, I have to fix it by loading the items separatly from the feed infos
|
||||
*/
|
||||
protected function collect_RSS_2_0_data($rssContent) {
|
||||
$rssContent->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
|
||||
$rssHeaderContent = $rssContent->channel[0];
|
||||
$this->debugMessage("RSS content is ===========\n".var_export($rssHeaderContent, true)."===========");
|
||||
$this->load_RSS_2_0_feed_data($rssHeaderContent);
|
||||
foreach($rssContent->item as $item) {
|
||||
$this->debugMessage("parsing item ".var_export($item, true));
|
||||
$this->items[] = $this->parseRSSItem($item);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected function parseRSSItem($newsItem) {
|
||||
$namespaces = $newsItem->getNameSpaces(true);
|
||||
$dc = $newsItem->children($namespaces['dc']);
|
||||
$rdf = $newsItem->children($namespaces['rdf']);
|
||||
$item = array();
|
||||
$item['title'] = trim($newsItem->title);
|
||||
$this->debugMessage("browsing Oatmeal item ".var_export($newsItem, true));
|
||||
$item['uri']=(string) $newsItem->attributes($namespaces['rdf'])->about;
|
||||
// now load that uri from cache
|
||||
$this->debugMessage("now loading page ".$item['uri']);
|
||||
$articlePage = $this->get_cached($item['uri']);
|
||||
|
||||
$content = $articlePage->find('#comic', 0);
|
||||
if($content==null) {
|
||||
$content = $articlePage->find('#blog');
|
||||
}
|
||||
$item['content'] = $content->innertext;
|
||||
if(is_null($content)) // load alternative
|
||||
$content = $articlePage->find('#blog', 0);
|
||||
|
||||
if(!is_null($content))
|
||||
$item['content'] = $content->innertext;
|
||||
|
||||
$this->debugMessage("dc content is ".var_export($dc, true));
|
||||
$item['author'] = (string) $dc->creator;
|
||||
$item['timestamp'] = DateTime::createFromFormat(DateTime::ISO8601, $dc->date)->getTimestamp();
|
||||
$this->debugMessage("writtem by ".$item['author']." on ".$item['timestamp']);
|
||||
return $item;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue