Revert "all: Use ->remove() instead of ->outertext = ''"

This reverts commit 052844f5e1.

There is a bug in ->remove() that causes the parser to incorrectly
identify elements in the DOM tree that shouldn't exist anymore.

References #1151
This commit is contained in:
logmanoriginal 2019-06-02 13:03:26 +02:00
parent 468d8be72d
commit 6c4098d655
21 changed files with 49 additions and 44 deletions

View file

@ -50,7 +50,7 @@ class DauphineLibereBridge extends FeedExpander {
private function extractContent($url){
$html2 = getSimpleHTMLDOMCached($url);
foreach ($html2->find('.noprint, link, script, iframe, .shareTool, .contentInfo') as $remove) {
$remove->remove();
$remove->outertext = '';
}
return $html2->find('div.content', 0)->innertext;
}