RSS-Bridge.rss-bridge/bridges/AutoPodcasterBridge.php

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

140 lines
4.6 KiB
PHP
Raw Normal View History

<?php
2024-03-16 23:18:36 +01:00
class AutoPodcasterBridge extends FeedExpander
{
const MAINTAINER = 'boyska';
const NAME = 'Auto Podcaster';
const URI = '';
2019-09-17 12:40:06 +02:00
const CACHE_TIMEOUT = 300; // 5 minuti
2024-03-16 23:18:36 +01:00
const DESCRIPTION = 'Make a "multimedia" podcast out of a normal feed';
const PARAMETERS = ['url' => [
2024-03-16 22:34:21 +01:00
'url' => [
'name' => 'URL',
2024-04-24 00:01:02 +02:00
'exampleValue' => 'https://lorem-rss.herokuapp.com/feed?unit=day',
'required' => true
2024-03-16 22:34:21 +01:00
],
'feed_only' => [
'name' => 'Only look at the content of the feed, don\'t check on the website',
'type' => 'checkbox',
2024-04-24 00:01:02 +02:00
'defaultValue' => 'checked',
2024-03-16 22:34:21 +01:00
'required' => false,
]
2024-03-16 23:18:36 +01:00
]];
2024-03-16 23:18:36 +01:00
private function archiveIsAudioFormat($formatString)
{
return strpos($formatString, 'MP3') !== false ||
strpos($formatString, 'Ogg') === 0;
}
2024-03-16 23:18:36 +01:00
private function extractAudio($dom)
{
$audios = [];
2024-03-16 23:18:36 +01:00
foreach ($dom->find('audio') as $audioEl) {
$sources = [];
2024-03-16 23:18:36 +01:00
if ($audioEl->src !== false) {
$sources[] = $audioEl->src;
}
2024-03-16 23:18:36 +01:00
foreach ($audioEl->find('source') as $sourceEl) {
$sources[] = $sourceEl->src;
}
2024-03-16 23:18:36 +01:00
if ($sources) {
2019-09-17 12:36:59 +02:00
$audios[$sources[0]] = ['sources' => $sources];
}
}
return $audios;
}
2024-03-16 23:18:36 +01:00
private function extractIframeArchive($dom)
{
$audios = [];
2024-03-16 23:18:36 +01:00
foreach ($dom->find('iframe') as $iframeEl) {
if (strpos($iframeEl->src, 'https://archive.org/embed/') === 0) {
$listURL = preg_replace('/\/embed\//', '/details/', $iframeEl->src, 1) . '?output=json';
$baseURL = preg_replace('/\/embed\//', '/download/', $iframeEl->src, 1);
$list = json_decode(file_get_contents($listURL));
$audios = [];
2024-03-16 23:18:36 +01:00
foreach ($list->files as $name => $data) {
if (
$data->source === 'original' &&
$this->archiveIsAudioFormat($data->format)
) {
$audios[$baseURL . $name] = ['sources' => [$baseURL . $name]];
}
}
2024-03-16 23:18:36 +01:00
foreach ($list->files as $name => $data) {
if (
$data->source === 'derivative' &&
$this->archiveIsAudioFormat($data->format) &&
2024-03-16 23:18:36 +01:00
isset($audios[$baseURL . '/' . $data->original])
) {
$audios[$baseURL . '/' . $data->original]['sources'][] = $baseURL . $name;
}
}
}
}
return $audios;
}
2024-03-16 23:18:36 +01:00
protected function parseItem($newItem)
{
$item = parent::parseItem($newItem);
2024-03-16 23:18:36 +01:00
if (! $this->getInput('feed_only')) {
2024-03-16 22:34:21 +01:00
$dom = getSimpleHTMLDOMCached($item['uri']);
// $dom will be false in case of errors
} else {
$dom = false;
}
$audios = [];
if ($dom !== false) {
/* 1st extraction method: by "audio" tag */
$audios = array_merge($audios, $this->extractAudio($dom));
/* 2nd extraction method: by "iframe" tag */
$audios = array_merge($audios, $this->extractIframeArchive($dom));
2024-03-16 23:18:36 +01:00
} elseif ($item['content'] !== null) {
2024-03-16 22:34:21 +01:00
$item_dom = str_get_html($item['content']);
/* 1st extraction method: by "audio" tag */
2024-03-16 22:34:21 +01:00
$audios = array_merge($audios, $this->extractAudio($item_dom));
/* 2nd extraction method: by "iframe" tag */
2024-03-16 23:18:36 +01:00
$audios = array_merge(
$audios,
$this->extractIframeArchive($item_dom)
);
}
2024-03-16 23:18:36 +01:00
if (count($audios) === 0) {
return null;
}
$item['enclosures'] = [];
2024-03-16 23:18:36 +01:00
foreach (array_values($audios) as $audio) {
$item['enclosures'][] = $audio['sources'][0];
}
return $item;
2024-03-16 23:18:36 +01:00
}
public function collectData()
{
if ($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
// just in case someone find a way to access local files by playing with the url
returnClientError('The url parameter must either refer to http or https protocol.');
}
$this->collectExpandableDatas($this->getURI());
}
2024-03-16 23:18:36 +01:00
public function getName()
{
if (!is_null($this->getInput('url'))) {
return self::NAME . ' : ' . $this->getInput('url');
}
2024-03-16 23:18:36 +01:00
return parent::getName();
}
public function getURI()
{
return $this->getInput('url');
}
}