[core] extract BridgeAbstract methods to make them functions

- returnError, returnServerError, returnClientError ,debugMessage are
  moved to lib/error.php

- getContents, getSimpleHTMLDOM, getSimpleHTMLDOMCached are moved to
  lib/contents.php

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière 2016-09-25 23:22:33 +02:00
parent ad534444fa
commit f1fb95b257
111 changed files with 485 additions and 477 deletions

View file

@ -8,25 +8,11 @@ abstract class BridgeAbstract implements BridgeInterface {
const MAINTAINER = 'No maintainer';
const PARAMETERS = array();
public $useProxy = true;
protected $cache;
protected $items = array();
protected $inputs = array();
protected $queriedContext = '';
protected function returnError($message, $code){
throw new \HttpException($message, $code);
}
protected function returnClientError($message){
$this->returnError($message, 400);
}
protected function returnServerError($message){
$this->returnError($message, 500);
}
/**
* Return items stored in the bridge
* @return mixed
@ -260,7 +246,7 @@ abstract class BridgeAbstract implements BridgeInterface {
if(empty(static::PARAMETERS)){
if(!empty($inputs)){
$this->returnClientError('Invalid parameters value(s)');
returnClientError('Invalid parameters value(s)');
}
$this->collectData();
@ -271,15 +257,15 @@ abstract class BridgeAbstract implements BridgeInterface {
}
if(!$this->validateData($inputs)){
$this->returnClientError('Invalid parameters value(s)');
returnClientError('Invalid parameters value(s)');
}
// Guess the paramter context from input data
$this->queriedContext = $this->getQueriedContext($inputs);
if(is_null($this->queriedContext)){
$this->returnClientError('Required parameter(s) missing');
returnClientError('Required parameter(s) missing');
} elseif($this->queriedContext === false){
$this->returnClientError('Mixed context parameters');
returnClientError('Mixed context parameters');
}
$this->setInputs($inputs, $this->queriedContext);
@ -313,157 +299,4 @@ abstract class BridgeAbstract implements BridgeInterface {
public function setCache(\CacheAbstract $cache){
$this->cache = $cache;
}
public function debugMessage($text){
if(!file_exists('DEBUG')) {
return;
}
$backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3);
$calling = $backtrace[2];
$message = $calling['file'] . ':'
. $calling['line'] . ' class '
. get_class($this) . '->'
. $calling['function'] . ' - '
. $text;
error_log($message);
}
protected function getContents($url
, $use_include_path = false
, $context = null
, $offset = 0
, $maxlen = null){
$contextOptions = array(
'http' => array(
'user_agent' => ini_get('user_agent')
)
);
if(defined('PROXY_URL') && $this->useProxy){
$contextOptions['http']['proxy'] = PROXY_URL;
$contextOptions['http']['request_fulluri'] = true;
if(is_null($context)){
$context = stream_context_create($contextOptions);
} else {
$prevContext = $context;
if(!stream_context_set_option($context, $contextOptions)){
$context = $prevContext;
}
}
}
if(is_null($maxlen)){
$content = @file_get_contents($url, $use_include_path, $context, $offset);
} else {
$content = @file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
}
if($content === false)
$this->debugMessage('Cant\'t download ' . $url);
// handle compressed data
foreach($http_response_header as $header){
if(stristr($header, 'content-encoding')){
switch(true){
case stristr($header, 'gzip'):
$content = gzinflate(substr($content, 10, -8));
break;
case stristr($header, 'compress'):
//TODO
case stristr($header, 'deflate'):
//TODO
case stristr($header, 'brotli'):
//TODO
$this->returnServerError($header . '=> Not implemented yet');
break;
case stristr($header, 'identity'):
break;
default:
$this->returnServerError($header . '=> Unknown compression');
}
}
}
return $content;
}
protected function getSimpleHTMLDOM($url
, $use_include_path = false
, $context = null
, $offset = 0
, $maxLen = null
, $lowercase = true
, $forceTagsClosed = true
, $target_charset = DEFAULT_TARGET_CHARSET
, $stripRN = true
, $defaultBRText = DEFAULT_BR_TEXT
, $defaultSpanText = DEFAULT_SPAN_TEXT){
$content = $this->getContents($url, $use_include_path, $context, $offset, $maxLen);
return str_get_html($content
, $lowercase
, $forceTagsClosed
, $target_charset
, $stripRN
, $defaultBRText
, $defaultSpanText);
}
/**
* Maintain locally cached versions of pages to avoid multiple downloads.
* @param url url to cache
* @param duration duration of the cache file in seconds (default: 24h/86400s)
* @return content of the file as string
*/
public function getSimpleHTMLDOMCached($url
, $duration = 86400
, $use_include_path = false
, $context = null
, $offset = 0
, $maxLen = null
, $lowercase = true
, $forceTagsClosed = true
, $target_charset = DEFAULT_TARGET_CHARSET
, $stripRN = true
, $defaultBRText = DEFAULT_BR_TEXT
, $defaultSpanText = DEFAULT_SPAN_TEXT){
$this->debugMessage('Caching url ' . $url . ', duration ' . $duration);
$filepath = __DIR__ . '/../cache/pages/' . sha1($url) . '.cache';
$this->debugMessage('Cache file ' . $filepath);
if(file_exists($filepath) && filectime($filepath) < time() - $duration){
unlink ($filepath);
$this->debugMessage('Cached file deleted: ' . $filepath);
}
if(file_exists($filepath)){
$this->debugMessage('Loading cached file ' . $filepath);
touch($filepath);
$content = file_get_contents($filepath);
} else {
$this->debugMessage('Caching ' . $url . ' to ' . $filepath);
$dir = substr($filepath, 0, strrpos($filepath, '/'));
if(!is_dir($dir)){
$this->debugMessage('Creating directory ' . $dir);
mkdir($dir, 0777, true);
}
$content = $this->getContents($url, $use_include_path, $context, $offset, $maxLen);
if($content !== false){
file_put_contents($filepath, $content);
}
}
return str_get_html($content
, $lowercase
, $forceTagsClosed
, $target_charset
, $stripRN
, $defaultBRText
, $defaultSpanText);
}
}

View file

@ -9,46 +9,46 @@ abstract class FeedExpander extends BridgeAbstract {
public function collectExpandableDatas($url, $maxItems = -1){
if(empty($url)){
$this->returnServerError('There is no $url for this RSS expander');
returnServerError('There is no $url for this RSS expander');
}
$this->debugMessage('Loading from ' . $url);
debugMessage('Loading from ' . $url);
/* Notice we do not use cache here on purpose:
* we want a fresh view of the RSS stream each time
*/
$content = $this->getContents($url)
or $this->returnServerError('Could not request ' . $url);
$content = getContents($url)
or returnServerError('Could not request ' . $url);
$rssContent = simplexml_load_string($content);
$this->debugMessage('Detecting feed format/version');
debugMessage('Detecting feed format/version');
switch(true){
case isset($rssContent->item[0]):
$this->debugMessage('Detected RSS 1.0 format');
debugMessage('Detected RSS 1.0 format');
$this->feedType = "RSS_1_0";
break;
case isset($rssContent->channel[0]):
$this->debugMessage('Detected RSS 0.9x or 2.0 format');
debugMessage('Detected RSS 0.9x or 2.0 format');
$this->feedType = "RSS_2_0";
break;
case isset($rssContent->entry[0]):
$this->debugMessage('Detected ATOM format');
debugMessage('Detected ATOM format');
$this->feedType = "ATOM_1_0";
break;
default:
$this->debugMessage('Unknown feed format/version');
$this->returnServerError('The feed format is unknown!');
debugMessage('Unknown feed format/version');
returnServerError('The feed format is unknown!');
break;
}
$this->debugMessage('Calling function "collect_' . $this->feedType . '_data"');
debugMessage('Calling function "collect_' . $this->feedType . '_data"');
$this->{'collect_' . $this->feedType . '_data'}($rssContent, $maxItems);
}
protected function collect_RSS_1_0_data($rssContent, $maxItems){
$this->load_RSS_2_0_feed_data($rssContent->channel[0]);
foreach($rssContent->item as $item){
$this->debugMessage('parsing item ' . var_export($item, true));
debugMessage('parsing item ' . var_export($item, true));
$this->items[] = $this->parseItem($item);
if($maxItems !== -1 && count($this->items) >= $maxItems) break;
}
@ -56,13 +56,13 @@ abstract class FeedExpander extends BridgeAbstract {
protected function collect_RSS_2_0_data($rssContent, $maxItems){
$rssContent = $rssContent->channel[0];
$this->debugMessage('RSS content is ===========\n'
debugMessage('RSS content is ===========\n'
. var_export($rssContent, true)
. '===========');
$this->load_RSS_2_0_feed_data($rssContent);
foreach($rssContent->item as $item){
$this->debugMessage('parsing item ' . var_export($item, true));
debugMessage('parsing item ' . var_export($item, true));
$this->items[] = $this->parseItem($item);
if($maxItems !== -1 && count($this->items) >= $maxItems) break;
}
@ -71,7 +71,7 @@ abstract class FeedExpander extends BridgeAbstract {
protected function collect_ATOM_1_0_data($content, $maxItems){
$this->load_ATOM_feed_data($content);
foreach($content->entry as $item){
$this->debugMessage('parsing item ' . var_export($item, true));
debugMessage('parsing item ' . var_export($item, true));
$this->items[] = $this->parseItem($item);
if($maxItems !== -1 && count($this->items) >= $maxItems) break;
}
@ -190,7 +190,7 @@ abstract class FeedExpander extends BridgeAbstract {
case 'ATOM_1_0':
return $this->parseATOMItem($item);
break;
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
default: returnClientError('Unknown version ' . $this->getInput('version') . '!');
}
}

View file

@ -16,6 +16,8 @@ require __DIR__ . '/FeedExpander.php';
require __DIR__ . '/Cache.php';
require __DIR__ . '/CacheAbstract.php';
require __DIR__ . '/HTMLUtils.php';
require __DIR__ . '/error.php';
require __DIR__ . '/contents.php';
$vendorLibSimpleHtmlDom = __DIR__ . PATH_VENDOR . '/simplehtmldom/simple_html_dom.php';
if( !file_exists($vendorLibSimpleHtmlDom) ){

142
lib/contents.php Normal file
View file

@ -0,0 +1,142 @@
<?php
function getContents($url
, $use_include_path = false
, $context = null
, $offset = 0
, $maxlen = null
){
$contextOptions = array(
'http' => array(
'user_agent' => ini_get('user_agent')
)
);
if(defined('PROXY_URL') && !defined('NOPROXY')){
$contextOptions['http']['proxy'] = PROXY_URL;
$contextOptions['http']['request_fulluri'] = true;
if(is_null($context)){
$context = stream_context_create($contextOptions);
} else {
$prevContext = $context;
if(!stream_context_set_option($context, $contextOptions)){
$context = $prevContext;
}
}
}
if(is_null($maxlen)){
$content = @file_get_contents($url, $use_include_path, $context, $offset);
} else {
$content = @file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
}
if($content === false)
debugMessage('Cant\'t download ' . $url);
// handle compressed data
foreach($http_response_header as $header){
if(stristr($header, 'content-encoding')){
switch(true){
case stristr($header, 'gzip'):
$content = gzinflate(substr($content, 10, -8));
break;
case stristr($header, 'compress'):
//TODO
case stristr($header, 'deflate'):
//TODO
case stristr($header, 'brotli'):
//TODO
returnServerError($header . '=> Not implemented yet');
break;
case stristr($header, 'identity'):
break;
default:
returnServerError($header . '=> Unknown compression');
}
}
}
return $content;
}
function getSimpleHTMLDOM($url
, $use_include_path = false
, $context = null
, $offset = 0
, $maxLen = null
, $lowercase = true
, $forceTagsClosed = true
, $target_charset = DEFAULT_TARGET_CHARSET
, $stripRN = true
, $defaultBRText = DEFAULT_BR_TEXT
, $defaultSpanText = DEFAULT_SPAN_TEXT
){
$content = getContents($url, $use_include_path, $context, $offset, $maxLen);
return str_get_html($content
, $lowercase
, $forceTagsClosed
, $target_charset
, $stripRN
, $defaultBRText
, $defaultSpanText);
}
/**
* Maintain locally cached versions of pages to avoid multiple downloads.
* @param url url to cache
* @param duration duration of the cache file in seconds (default: 24h/86400s)
* @return content of the file as string
*/
function getSimpleHTMLDOMCached($url
, $duration = 86400
, $use_include_path = false
, $context = null
, $offset = 0
, $maxLen = null
, $lowercase = true
, $forceTagsClosed = true
, $target_charset = DEFAULT_TARGET_CHARSET
, $stripRN = true
, $defaultBRText = DEFAULT_BR_TEXT
, $defaultSpanText = DEFAULT_SPAN_TEXT
){
debugMessage('Caching url ' . $url . ', duration ' . $duration);
$filepath = __DIR__ . '/../cache/pages/' . sha1($url) . '.cache';
debugMessage('Cache file ' . $filepath);
if(file_exists($filepath) && filectime($filepath) < time() - $duration){
unlink ($filepath);
debugMessage('Cached file deleted: ' . $filepath);
}
if(file_exists($filepath)){
debugMessage('Loading cached file ' . $filepath);
touch($filepath);
$content = file_get_contents($filepath);
} else {
debugMessage('Caching ' . $url . ' to ' . $filepath);
$dir = substr($filepath, 0, strrpos($filepath, '/'));
if(!is_dir($dir)){
debugMessage('Creating directory ' . $dir);
mkdir($dir, 0777, true);
}
$content = getContents($url, $use_include_path, $context, $offset, $maxLen);
if($content !== false){
file_put_contents($filepath, $content);
}
}
return str_get_html($content
, $lowercase
, $forceTagsClosed
, $target_charset
, $stripRN
, $defaultBRText
, $defaultSpanText);
}
?>

30
lib/error.php Normal file
View file

@ -0,0 +1,30 @@
<?php
function returnError($message, $code){
throw new \HttpException($message, $code);
}
function returnClientError($message){
returnError($message, 400);
}
function returnServerError($message){
returnError($message, 500);
}
function debugMessage($text){
if(!file_exists('DEBUG')) {
return;
}
$backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3);
$calling = $backtrace[2];
$message = $calling['file'] . ':'
. $calling['line'] . ' class '
. $calling['class'] . '->'
. $calling['function'] . ' - '
. $text;
error_log($message);
}
?>