Revert "all: Use ->remove() instead of ->outertext = ''"

This reverts commit 052844f5e1.

There is a bug in ->remove() that causes the parser to incorrectly
identify elements in the DOM tree that shouldn't exist anymore.

References #1151
This commit is contained in:
logmanoriginal 2019-06-02 13:03:26 +02:00
parent 468d8be72d
commit 6c4098d655
21 changed files with 49 additions and 44 deletions

View file

@ -141,7 +141,7 @@ class WikipediaBridge extends BridgeAbstract {
$anchorFallbackIndex = 0){
// Clean the bottom of the featured article
if ($element->find('div', -1))
$element->find('div', -1)->remove();
$element->find('div', -1)->outertext = '';
// The title and URI of the article can be found in an anchor containing
// the string '...' in most wikis ('full article ...')
@ -202,10 +202,10 @@ class WikipediaBridge extends BridgeAbstract {
// Let's remove a couple of things from the article
$table = $content->find('#toc', 0); // Table of contents
if(!$table === false)
$table->remove();
$table->outertext = '';
foreach($content->find('ol.references') as $reference) // References
$reference->remove();
$reference->outertext = '';
return str_replace('href="/', 'href="' . $this->getURI() . '/', $content->innertext);
}