[Core] Add html/convertLazyLoading (+ document stripRecursiveHTMLSection) (#3157)

* [core] Add html/convertLazyLoading($dom)

Looks for lazy-loading attributes such as 'data-src' and converts
them back to regular ones such as 'src', easier for RSS readers.
It also converts <picture> elements to plain <img> elements.

* [core] Document html/stripRecursiveHTMLSection()

Add documentation for that function (no code changes).

* [WordPressBridge] Use convertLazyLoading()

* [WordPressBridge] Unwrap image figures

<img> inside <figure> may not display on RSS readers.
This converts them back to <img>, without losing caption if present.

* [ZDNet] Convert lazy loading images

* [code] html/stripRecursiveHTMLSection: Fix typo
This commit is contained in:
ORelio 2022-11-20 12:41:59 +01:00 committed by GitHub
parent 2f7f13d9fe
commit d592e2cb15
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 15 deletions

View file

@ -74,20 +74,8 @@ class WordPressBridge extends FeedExpander
}
}
// Convert lazy-loading images and iframes (videos...)
foreach ($article->find('img, iframe') as $img) {
if (!empty($img->getAttribute('data-src'))) {
$img->src = $img->getAttribute('data-src');
} elseif (!empty($img->getAttribute('data-srcset'))) {
$img->src = explode(' ', $img->getAttribute('data-srcset'))[0];
} elseif (!empty($img->getAttribute('data-lazy-src'))) {
$img->src = $img->getAttribute('data-lazy-src');
} elseif (!empty($img->getAttribute('srcset'))) {
$img->src = explode(' ', $img->getAttribute('srcset'))[0];
}
}
// Find article main image
$article = convertLazyLoading($article);
$article_image = $article_html->find('img.wp-post-image', 0);
if (!empty($item['content']) && (!is_object($article_image) || empty($article_image->src))) {
$article_image = str_get_html($item['content'])->find('img.wp-post-image', 0);
@ -105,6 +93,11 @@ class WordPressBridge extends FeedExpander
}
}
// Unwrap images figures
foreach ($article->find('figure.wp-block-image') as $figure) {
$figure->outertext = $figure->innertext;
}
if (!is_null($article)) {
$item['content'] = $this->cleanContent($article->innertext);
$item['content'] = defaultLinkTo($item['content'], $item['uri']);