all: Use ->remove() instead of ->outertext = ''

simplehtmldom 1.9 introduced new functions to recursively remove
nodes from the DOM. This allows removing elements without the need
to re-load the document by using $html->load($html->save()), which
is very inefficient.

Find more information about remove() at
https://simplehtmldom.sourceforge.io/docs/1.9/api/simple_html_dom_node/remove/
This commit is contained in:
logmanoriginal 2019-06-01 21:15:30 +02:00
parent 014b698f67
commit 052844f5e1
21 changed files with 44 additions and 49 deletions

View file

@ -50,18 +50,18 @@ class AsahiShimbunAJWBridge extends BridgeAbstract {
$e_lead = $element->find('span.Lead', 0);
if ($e_lead) {
$item['content'] = $e_lead->innertext;
$e_lead->outertext = '';
$e_lead->remove();
} else {
$item['content'] = $element->innertext;
}
$e_date = $element->find('span.EnDate', 0);
if ($e_date) {
$item['timestamp'] = strtotime($e_date->innertext);
$e_date->outertext = '';
$e_date->remove();
}
$e_video = $element->find('span.EnVideo', 0);
if ($e_video) {
$e_video->outertext = '';
$e_video->remove();
$element->innertext = "VIDEO: $element->innertext";
}
$item['title'] = $element->innertext;