diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index faec9f09..15297cc3 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,8 +1,21 @@ FROM rssbridge/rss-bridge:latest -RUN apt-get update && \ - apt-get install --yes --no-install-recommends \ - git && \ - pecl install xdebug && \ - pear install PHP_CodeSniffer && \ - docker-php-ext-enable xdebug \ No newline at end of file +COPY --chmod=755 post-create-command.sh /usr/local/bin/post-create-command + +ADD https://raw.githubusercontent.com/docker-library/php/master/docker-php-ext-enable /usr/local/bin/docker-php-ext-enable +RUN chmod u+x /usr/local/bin/docker-php-ext-enable + +ADD https://getcomposer.org/installer /usr/local/bin/composer-installer.php +RUN chmod u+x /usr/local/bin/composer-installer.php +RUN php /usr/local/bin/composer-installer.php --check && \ + php /usr/local/bin/composer-installer.php --filename=composer --install-dir=/usr/local/bin + +RUN apt-get update && \ + apt-get install -y \ + git \ + php-dev \ + make \ + unzip + +RUN pecl install xdebug && \ + PHP_INI_DIR=/etc/php/8.2/fpm docker-php-ext-enable xdebug diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 6e625b8a..564da38a 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -6,9 +6,9 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "php.validate.executablePath": "/usr/local/bin/php", - "phpSniffer.executablesFolder": "/usr/local/bin/", - "phpcs.executablePath": "/usr/local/bin/phpcs", + "php.validate.executablePath": "/usr/bin/php", + "phpSniffer.executablesFolder": "/root/.config/composer/vendor/bin", + "phpcs.executablePath": "/root/.config/composer/vendor/bin/phpcs", "phpcs.lintOnType": false }, @@ -22,6 +22,9 @@ ] } }, + "remoteEnv": { + "PATH": "${containerEnv:PATH}:/root/.config/composer/vendor/bin", + }, "forwardPorts": [3100, 9000, 9003], - "postCreateCommand": "cp .devcontainer/nginx.conf /etc/nginx/conf.d/default.conf && cp .devcontainer/xdebug.ini /usr/local/etc/php/conf.d/xdebug.ini && mkdir .vscode && cp .devcontainer/launch.json .vscode && echo '*' > whitelist.txt && chmod a+x \"$(pwd)\" && rm -rf /var/www/html && ln -s \"$(pwd)\" /var/www/html && nginx && php-fpm -D" + "postCreateCommand": "/usr/local/bin/post-create-command" } \ No newline at end of file diff --git a/.devcontainer/launch.json b/.devcontainer/launch.json index e1b473b8..7fe617e9 100644 --- a/.devcontainer/launch.json +++ b/.devcontainer/launch.json @@ -9,7 +9,8 @@ "type": "php", "request": "launch", "port": 9003, - "auto": true + "auto": true, + "log": true }, { "name": "Launch currently open script", diff --git a/.devcontainer/nginx.conf b/.devcontainer/nginx.conf index 46502cb4..0e5db6dc 100644 --- a/.devcontainer/nginx.conf +++ b/.devcontainer/nginx.conf @@ -12,6 +12,6 @@ server { location ~ \.php$ { include snippets/fastcgi-php.conf; - fastcgi_pass 127.0.0.1:9000; + fastcgi_pass unix:/var/run/php/php8.2-fpm.sock; } -} \ No newline at end of file +} diff --git a/.devcontainer/post-create-command.sh b/.devcontainer/post-create-command.sh new file mode 100755 index 00000000..aa3819eb --- /dev/null +++ b/.devcontainer/post-create-command.sh @@ -0,0 +1,27 @@ +#/bin/sh + +cp .devcontainer/nginx.conf /etc/nginx/conf.d/default.conf +cp .devcontainer/xdebug.ini /etc/php/8.2/fpm/conf.d/xdebug.ini + +# This should download some dev-dependencies, like phpunit and the PHP code sniffers +composer global require "phpunit/phpunit:^9" +composer global require "squizlabs/php_codesniffer:^3.6" +composer global require "phpcompatibility/php-compatibility:^9.3" + +# We need to this manually for running the PHPCompatibility ruleset +phpcs --config-set installed_paths /root/.config/composer/vendor/phpcompatibility/php-compatibility + +mkdir -p .vscode +cp .devcontainer/launch.json .vscode + +echo '*' > whitelist.txt + +chmod a+x $(pwd) +rm -rf /var/www/html +ln -s $(pwd) /var/www/html + +# Solves possible issue of cache directory not being accessible +chown www-data:www-data -R $(pwd)/cache + +nginx +php-fpm8.2 -D \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 5981fb34..28053256 100644 --- a/.gitattributes +++ b/.gitattributes @@ -47,8 +47,6 @@ phpcs.xml export-ignore phpcompatibility.xml export-ignore tests/ export-ignore cache/.gitkeep export-ignore -bridges/DemoBridge.php export-ignore -bridges/FeedExpanderExampleBridge.php export-ignore ## Composer # diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 00000000..7ebb4030 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1,7 @@ +# Visual Studio Code +.vscode/* + +# Generated files +comment*.md +comment*.txt +*.html diff --git a/.github/ISSUE_TEMPLATE/bridge-request.md b/.github/ISSUE_TEMPLATE/bridge-request.md index 174dc095..088cc3d6 100644 --- a/.github/ISSUE_TEMPLATE/bridge-request.md +++ b/.github/ISSUE_TEMPLATE/bridge-request.md @@ -49,9 +49,9 @@ Please describe what you expect from the bridge. Whenever possible provide sampl - _Default limit_: 5 - [ ] Load full articles - _Cache articles_ (articles are stored in a local cache on first request): yes - - _Cache timeout_ (max = 24 hours): 24 hours + - _Cache timeout_ : 24 hours - [X] Balance requests (RSS-Bridge uses cached versions to reduce bandwith usage) - - _Timeout_ (default = 5 minutes, max = 24 hours): 5 minutes + - _Timeout_ (default = 5 minutes): 5 minutes diff --git a/.github/prtester.py b/.github/prtester.py index df6cc1ff..c5c5be22 100644 --- a/.github/prtester.py +++ b/.github/prtester.py @@ -1,113 +1,208 @@ +import argparse import requests -import itertools +import re from bs4 import BeautifulSoup from datetime import datetime -import os.path +from typing import Iterable +import os +import glob +import urllib # This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge # # This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of # RSS-Bridge, generate a feed for each of the bridges and save the output as html files. -# It also replaces the default static CSS link with a hardcoded link to @em92's public instance, so viewing +# It also add a tag with the url of em's public instance, so viewing # the HTML file locally will actually work as designed. -def testBridges(bridges,status): - for bridge in bridges: - if bridge.get('data-ref'): # Some div entries are empty, this ignores those - bridgeid = bridge.get('id') - bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata - print(bridgeid + "\n") - bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html' - forms = bridge.find_all("form") - formid = 1 - for form in forms: - # a bridge can have multiple contexts, named 'forms' in html - # this code will produce a fully working formstring that should create a working feed when called - # this will create an example feed for every single context, to test them all - formstring = '' - errormessages = [] - parameters = form.find_all("input") - lists = form.find_all("select") - # this for/if mess cycles through all available input parameters, checks if it required, then pulls - # the default or examplevalue and then combines it all together into the formstring - # if an example or default value is missing for a required attribute, it will throw an error - # any non-required fields are not tested!!! - for parameter in parameters: - if parameter.get('type') == 'hidden' and parameter.get('name') == 'context': - cleanvalue = parameter.get('value').replace(" ","+") - formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue - if parameter.get('type') == 'number' or parameter.get('type') == 'text': - if parameter.has_attr('required'): - if parameter.get('placeholder') == '': - if parameter.get('value') == '': - errormessages.append(parameter.get('name')) - else: - formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value') +ARTIFACT_FILE_EXTENSION = '.html' + +class Instance: + name = '' + url = '' + +def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload: bool, title: str, output_file: str): + start_date = datetime.now() + + prid = os.getenv('PR') + artifact_base_url = f'https://rss-bridge.github.io/rss-bridge-tests/prs/{prid}' + artifact_directory = os.getcwd() + for file in glob.glob(f'*{ARTIFACT_FILE_EXTENSION}', root_dir=artifact_directory): + os.remove(file) + + table_rows = [] + for instance in instances: + page = requests.get(instance.url) # Use python requests to grab the rss-bridge main page + soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup + bridge_cards = soup.select('.bridge-card') # get a soup-formatted list of all bridges on the rss-bridge page + table_rows += testBridges( + instance=instance, + bridge_cards=bridge_cards, + with_upload=with_upload, + with_reduced_upload=with_reduced_upload, + artifact_directory=artifact_directory, + artifact_base_url=artifact_base_url) # run the main scraping code with the list of bridges + with open(file=output_file, mode='w+', encoding='utf-8') as file: + table_rows_value = '\n'.join(sorted(table_rows)) + file.write(f''' +## {title} +| Bridge | Context | Status | +| - | - | - | +{table_rows_value} + +*last change: {start_date.strftime("%A %Y-%m-%d %H:%M:%S")}* + '''.strip()) + +def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool, artifact_directory: str, artifact_base_url: str) -> Iterable: + instance_suffix = '' + if instance.name: + instance_suffix = f' ({instance.name})' + table_rows = [] + for bridge_card in bridge_cards: + bridgeid = bridge_card.get('id') + bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata + print(f'{bridgeid}{instance_suffix}') + bridge_name = bridgeid.replace('Bridge', '') + context_forms = bridge_card.find_all("form") + form_number = 1 + for context_form in context_forms: + # a bridge can have multiple contexts, named 'forms' in html + # this code will produce a fully working url that should create a working feed when called + # this will create an example feed for every single context, to test them all + context_parameters = {} + error_messages = [] + context_name = '*untitled*' + context_name_element = context_form.find_previous_sibling('h5') + if context_name_element and context_name_element.text.strip() != '': + context_name = context_name_element.text + parameters = context_form.find_all("input") + lists = context_form.find_all("select") + # this for/if mess cycles through all available input parameters, checks if it required, then pulls + # the default or examplevalue and then combines it all together into the url parameters + # if an example or default value is missing for a required attribute, it will throw an error + # any non-required fields are not tested!!! + for parameter in parameters: + parameter_type = parameter.get('type') + parameter_name = parameter.get('name') + if parameter_type == 'hidden': + context_parameters[parameter_name] = parameter.get('value') + if parameter_type == 'number' or parameter_type == 'text': + if parameter.has_attr('required'): + if parameter.get('placeholder') == '': + if parameter.get('value') == '': + error_messages.append(f'Missing example or default value for parameter "{parameter_name}"') else: - formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder') - # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring - if parameter.get('type') == 'checkbox': - if parameter.has_attr('checked'): - formstring = formstring + '&' + parameter.get('name') + '=on' - for listing in lists: - selectionvalue = '' - listname = listing.get('name') - cleanlist = [] - for option in listing.contents: - if 'optgroup' in option.name: - cleanlist.extend(option) + context_parameters[parameter_name] = parameter.get('value') else: - cleanlist.append(option) - firstselectionentry = 1 - for selectionentry in cleanlist: - if firstselectionentry: + context_parameters[parameter_name] = parameter.get('placeholder') + # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the url parameters + if parameter_type == 'checkbox': + if parameter.has_attr('checked'): + context_parameters[parameter_name] = 'on' + for listing in lists: + selectionvalue = '' + listname = listing.get('name') + cleanlist = [] + options = listing.find_all('option') + for option in options: + if 'optgroup' in option.name: + cleanlist.extend(option) + else: + cleanlist.append(option) + firstselectionentry = 1 + for selectionentry in cleanlist: + if firstselectionentry: + selectionvalue = selectionentry.get('value') + firstselectionentry = 0 + else: + if 'selected' in selectionentry.attrs: selectionvalue = selectionentry.get('value') - firstselectionentry = 0 - else: - if 'selected' in selectionentry.attrs: - selectionvalue = selectionentry.get('value') - break - formstring = formstring + '&' + listname + '=' + selectionvalue - if not errormessages: - # if all example/default values are present, form the full request string, run the request, replace the static css - # file with the url of em's public instance and then upload it to termpad.com, a pastebin-like-site. - r = requests.get(URL + bridgestring + formstring) - pagetext = r.text.replace('static/style.css','https://rss-bridge.org/bridge01/static/style.css') - pagetext = pagetext.encode("utf_8") - termpad = requests.post(url="https://termpad.com/", data=pagetext) - termpadurl = termpad.text - termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/') - termpadurl = termpadurl.replace('\n','') - with open(os.getcwd() + '/comment.txt', 'a+') as file: - file.write("\n") - file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |") + break + context_parameters[listname] = selectionvalue + artifact_url = 'about:blank' + if error_messages: + status = '
'.join(map(lambda m: f'❌ `{m}`', error_messages)) + else: + # if all example/default values are present, form the full request url, run the request, add a tag with + # the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and + # then save it to a html file. + context_parameters.update({ + 'action': 'display', + 'bridge': bridgeid, + 'format': 'Html', + }) + request_url = f'{instance.url}/?{urllib.parse.urlencode(context_parameters)}' + response = requests.get(request_url) + page_text = response.text.replace('','') + page_text = page_text.encode("utf_8") + soup = BeautifulSoup(page_text, "html.parser") + status_messages = [] + if response.status_code != 200: + status_messages += [f'❌ `HTTP status {response.status_code} {response.reason}`'] else: - # if there are errors (which means that a required value has no example or default value), log out which error appeared - termpad = requests.post(url="https://termpad.com/", data=str(errormessages)) - termpadurl = termpad.text - termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/') - termpadurl = termpadurl.replace('\n','') - with open(os.getcwd() + '/comment.txt', 'a+') as file: - file.write("\n") - file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |") - formid += 1 + feed_items = soup.select('.feeditem') + feed_items_length = len(feed_items) + if feed_items_length <= 0: + status_messages += [f'⚠️ `The feed has no items`'] + elif feed_items_length == 1 and len(soup.select('.error')) > 0: + status_messages += [f'❌ `{getFirstLine(feed_items[0].text)}`'] + status_messages += map(lambda e: f'❌ `{getFirstLine(e.text)}`', soup.select('.error .error-type') + soup.select('.error .error-message')) + for item_element in soup.select('.feeditem'): # remove all feed items to not accidentally selected
 tags from item content
+                    item_element.decompose()
+                status_messages += map(lambda e: f'⚠️ `{getFirstLine(e.text)}`', soup.find_all('pre'))
+                status_messages = list(dict.fromkeys(status_messages)) # remove duplicates
+                status = '
'.join(status_messages) + status_is_ok = status == ''; + if status_is_ok: + status = '✔️' + if with_upload and (not with_reduced_upload or not status_is_ok): + filename = f'{bridge_name} {form_number}{instance_suffix}{ARTIFACT_FILE_EXTENSION}' + filename = re.sub(r'[^a-z0-9 \_\-\.]', '', filename, flags=re.I).replace(' ', '_') + with open(file=f'{artifact_directory}/{filename}', mode='wb') as file: + file.write(page_text) + artifact_url = f'{artifact_base_url}/{filename}' + table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({artifact_url}) | {status} |') + form_number += 1 + return table_rows -gitstatus = ["current", "pr"] -now = datetime.now() -date_time = now.strftime("%Y-%m-%d, %H:%M:%S") +def getFirstLine(value: str) -> str: + # trim whitespace and remove text that can break the table or is simply unnecessary + clean_value = re.sub(r'^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip()) + first_line = next(iter(clean_value.splitlines()), '') + max_length = 250 + if (len(first_line) > max_length): + first_line = first_line[:max_length] + '...' + return first_line -with open(os.getcwd() + '/comment.txt', 'w+') as file: - file.write(''' ## Pull request artifacts -| file | last change | -| ---- | ------ |''') - -for status in gitstatus: # run this twice, once for the current version, once for the PR version - if status == "current": - port = "3000" # both ports are defined in the corresponding workflow .yml file - elif status == "pr": - port = "3001" - URL = "http://localhost:" + port - page = requests.get(URL) # Use python requests to grab the rss-bridge main page - soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup - bridges = soup.find_all("section") # get a soup-formatted list of all bridges on the rss-bridge page - testBridges(bridges,status) # run the main scraping code with the list of bridges and the info if this is for the current version or the pr version +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--instances', nargs='+') + parser.add_argument('--no-upload', action='store_true') + parser.add_argument('--reduced-upload', action='store_true') + parser.add_argument('--title', default='Pull request artifacts') + parser.add_argument('--output-file', default=os.getcwd() + '/comment.txt') + args = parser.parse_args() + instances = [] + if args.instances: + for instance_arg in args.instances: + instance_arg_parts = instance_arg.split('::') + instance = Instance() + instance.name = instance_arg_parts[1].strip() if len(instance_arg_parts) >= 2 else '' + instance.url = instance_arg_parts[0].strip().rstrip("/") + instances.append(instance) + else: + instance = Instance() + instance.name = 'current' + instance.url = 'http://localhost:3000' + instances.append(instance) + instance = Instance() + instance.name = 'pr' + instance.url = 'http://localhost:3001' + instances.append(instance) + main( + instances=instances, + with_upload=not args.no_upload, + with_reduced_upload=args.reduced_upload and not args.no_upload, + title=args.title, + output_file=args.output_file + ); diff --git a/.github/workflows/dockerbuild.yml b/.github/workflows/dockerbuild.yml index 82d8611a..39645558 100644 --- a/.github/workflows/dockerbuild.yml +++ b/.github/workflows/dockerbuild.yml @@ -21,7 +21,7 @@ jobs: - name: Docker meta id: docker_meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: | ${{ env.DOCKERHUB_SLUG }} @@ -33,26 +33,26 @@ jobs: type=raw,value=stable,enable=${{ startsWith(github.ref, 'refs/tags/20') }} - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push - uses: docker/bake-action@v2 + uses: docker/bake-action@v5 with: files: | ./docker-bake.hcl diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index b00c898a..e0201022 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -9,7 +9,7 @@ jobs: documentation: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: persist-credentials: false - name: Setup PHP diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 83911ab6..79201edd 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,12 +8,12 @@ on: jobs: phpcs: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: php-versions: ['7.4'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php-versions }} @@ -21,12 +21,12 @@ jobs: - run: phpcs . --standard=phpcs.xml --warning-severity=0 --extensions=php -p phpcompatibility: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: php-versions: ['7.4'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php-versions }} @@ -36,9 +36,9 @@ jobs: - run: ~/.composer/vendor/bin/phpcs . --standard=phpcompatibility.xml --warning-severity=0 --extensions=php -p executable_php_files_check: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - run: | if find -name "*.php" -executable -type f -print -exec false {} + then diff --git a/.github/workflows/prhtmlgenerator.yml b/.github/workflows/prhtmlgenerator.yml index cacb6642..bfc89e7a 100644 --- a/.github/workflows/prhtmlgenerator.yml +++ b/.github/workflows/prhtmlgenerator.yml @@ -5,24 +5,41 @@ on: branches: [ master ] jobs: + check-bridges: + name: Check if bridges were changed + runs-on: ubuntu-latest + outputs: + BRIDGES: ${{ steps.check1.outputs.BRIDGES }} + steps: + - name: Check number of bridges + id: check1 + run: | + PR=${{github.event.number}}; + wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch; + bridgeamount=$(cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq | wc -l); + echo "BRIDGES=$bridgeamount" >> "$GITHUB_OUTPUT" test-pr: name: Generate HTML runs-on: ubuntu-latest + needs: check-bridges + if: needs.check-bridges.outputs.BRIDGES > 0 + env: + PYTHONUNBUFFERED: 1 # Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989 steps: - name: Check out self - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{github.event.pull_request.head.ref}} repository: ${{github.event.pull_request.head.repo.full_name}} - name: Check out rss-bridge run: | PR=${{github.event.number}}; - wget -O requirements.txt https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester-requirements.txt; - wget https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester.py; + wget -O requirements.txt https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester-requirements.txt; + wget https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester.py; wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch; touch DEBUG; - cat $PR.patch | grep " bridges/.*\.php" | sed "s= bridges/\(.*\)Bridge.php.*=\1=g" | sort | uniq > whitelist.txt + cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt - name: Start Docker - Current run: | docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest @@ -31,9 +48,9 @@ jobs: docker build -t prbuild .; docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3001:80 prbuild - name: Setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.7' + python-version: '3.13' cache: 'pip' - name: Install requirements run: | @@ -49,9 +66,17 @@ jobs: body="${body//$'\n'/'%0A'}"; body="${body//$'\r'/'%0D'}"; echo "bodylength=${#body}" >> $GITHUB_OUTPUT + env: + PR: ${{ github.event.number }} + - name: Upload generated tests + uses: actions/upload-artifact@v4 + id: upload-generated-tests + with: + name: tests + path: '*.html' - name: Find Comment if: ${{ steps.testrun.outputs.bodylength > 130 }} - uses: peter-evans/find-comment@v2 + uses: peter-evans/find-comment@v3 id: fc with: issue-number: ${{ github.event.pull_request.number }} @@ -59,9 +84,43 @@ jobs: body-includes: Pull request artifacts - name: Create or update comment if: ${{ steps.testrun.outputs.bodylength > 130 }} - uses: peter-evans/create-or-update-comment@v2 + uses: peter-evans/create-or-update-comment@v4 with: comment-id: ${{ steps.fc.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body-file: comment.txt edit-mode: replace + upload_tests: + name: Upload tests + runs-on: ubuntu-latest + needs: test-pr + steps: + - uses: actions/checkout@v4 + with: + repository: 'RSS-Bridge/rss-bridge-tests' + ref: 'main' + token: ${{ secrets.RSSTESTER_ACTION }} + + - name: Setup git config + run: | + git config --global user.name "GitHub Actions" + git config --global user.email "<>" + + - name: Download tests + uses: actions/download-artifact@v4 + with: + name: tests + + - name: Move tests + run: | + cd prs + mkdir -p ${{github.event.number}} + cd ${{github.event.number}} + mv -f $GITHUB_WORKSPACE/*.html . + + - name: Commit and push generated tests + run: | + export COMMIT_MESSAGE="Added tests for PR ${{github.event.number}}" + git add . + git commit -m "$COMMIT_MESSAGE" || exit 0 + git push diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e7684e6b..435369fa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,14 +8,16 @@ on: jobs: phpunit8: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: - php-versions: ['7.4', '8.0', '8.1'] + php-versions: ['7.4', '8.0', '8.1', '8.2', '8.3', '8.4'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php-versions }} + env: + update: true - run: composer install - run: composer test diff --git a/.gitignore b/.gitignore index f574992d..6ed95489 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ data/ *.pydevproject .project .metadata -bin/ tmp/ *.tmp *.bak @@ -230,6 +229,9 @@ pip-log.txt DEBUG config.ini.php config/* +!config/nginx.conf +!config/php-fpm.conf +!config/php.ini ###################### ## VisualStudioCode ## diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f1080743..d27421aa 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -15,7 +15,7 @@ * [Astalaseven](https://github.com/Astalaseven) * [Astyan-42](https://github.com/Astyan-42) * [austinhuang0131](https://github.com/austinhuang0131) -* [AxorPL](https://github.com/AxorPL) +* [axor-mst](https://github.com/axor-mst) * [ayacoo](https://github.com/ayacoo) * [az5he6ch](https://github.com/az5he6ch) * [b1nj](https://github.com/b1nj) @@ -23,6 +23,7 @@ * [Binnette](https://github.com/Binnette) * [BoboTiG](https://github.com/BoboTiG) * [Bockiii](https://github.com/Bockiii) +* [brtsos](https://github.com/brtsos) * [captn3m0](https://github.com/captn3m0) * [chemel](https://github.com/chemel) * [Chouchen](https://github.com/Chouchen) @@ -144,6 +145,7 @@ * [Niehztog](https://github.com/Niehztog) * [NikNikYkt](https://github.com/NikNikYkt) * [Nono-m0le](https://github.com/Nono-m0le) +* [NotsoanoNimus](https://github.com/NotsoanoNimus) * [obsiwitch](https://github.com/obsiwitch) * [Ololbu](https://github.com/Ololbu) * [ORelio](https://github.com/ORelio) diff --git a/Dockerfile b/Dockerfile index 8157dc12..fb783344 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,75 @@ -FROM lwthiker/curl-impersonate:0.5-ff-slim-buster AS curlimpersonate - -FROM php:8.0.27-fpm-buster AS rssbridge +FROM debian:12-slim AS rssbridge LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one." LABEL repository="https://github.com/RSS-Bridge/rss-bridge" LABEL website="https://github.com/RSS-Bridge/rss-bridge" -RUN apt-get update && \ +ARG DEBIAN_FRONTEND=noninteractive +RUN set -xe && \ + apt-get update && \ apt-get install --yes --no-install-recommends \ + ca-certificates \ nginx \ - zlib1g-dev \ - libzip-dev \ - libmemcached-dev \ nss-plugin-pem \ - libicu-dev && \ - docker-php-ext-install zip && \ - docker-php-ext-install intl && \ - pecl install memcached && \ - docker-php-ext-enable memcached && \ - docker-php-ext-enable opcache && \ - mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini" + php-curl \ + php-fpm \ + php-intl \ + # php-json is enabled by default with PHP 8.2 in Debian 12 + php-mbstring \ + php-memcached \ + # php-opcache is enabled by default with PHP 8.2 in Debian 12 + # php-openssl is enabled by default with PHP 8.2 in Debian 12 + php-sqlite3 \ + php-xml \ + php-zip \ + # php-zlib is enabled by default with PHP 8.2 in Debian 12 + # for downloading libcurl-impersonate + curl \ + # for patching libcurl-impersonate + patchelf \ + && \ + # install curl-impersonate library + curlimpersonate_version=1.0.0rc2 && \ + { \ + { \ + [ $(arch) = 'aarch64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \ + sha512sum="c8add80e7a0430a074edea1a11f73d03044c48e848e164af2d6f362866623e29bede207a50f18f95b1bc5ab3d33f5c31408be60a6da66b74a0d176eebe299116" \ + ; } \ + || { \ + [ $(arch) = 'armv7l' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \ + sha512sum="d0403ca4ad55a8d499b120e5675c7b5a0dc4946af49c933e91fc24455ffe5e122aa21ee95554612ff5d1bd6faea1556e1e1b9c821918e2644cc9bcbddc05747a" \ + ; } \ + || { \ + [ $(arch) = 'x86_64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \ + sha512sum="35cafda2b96df3218a6d8545e0947a899837ede51c90f7ef2980bd2d99dbd67199bc620000df28b186727300b8c7046d506807fb48ee0fbc068dc8ae01986339" \ + ; } \ + } && \ + curl -LO "https://github.com/lexiforest/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \ + echo "$sha512sum $archive" | sha512sum -c - && \ + mkdir -p /usr/local/lib/curl-impersonate && \ + tar xaf "$archive" -C /usr/local/lib/curl-impersonate && \ + patchelf --set-soname libcurl.so.4 /usr/local/lib/curl-impersonate/libcurl-impersonate.so && \ + rm "$archive" && \ + apt-get purge --assume-yes curl patchelf && \ + rm -rf /var/lib/apt/lists/* -COPY ./config/nginx.conf /etc/nginx/sites-enabled/default +ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate.so +ENV CURL_IMPERSONATE chrome131 + +# logs should go to stdout / stderr +RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \ + ln -sfT /dev/stdout /var/log/nginx/access.log; \ + chown -R --no-dereference www-data:adm /var/log/nginx/ + +COPY ./config/nginx.conf /etc/nginx/sites-available/default +COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf +COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini COPY --chown=www-data:www-data ./ /app/ -COPY --from=curlimpersonate /usr/local/lib/libcurl-impersonate-ff.so /usr/local/lib/curl-impersonate/ - -ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so - -ENV CURL_IMPERSONATE ff91esr - EXPOSE 80 ENTRYPOINT ["/app/docker-entrypoint.sh"] diff --git a/README.md b/README.md index e9f7360d..05042630 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,25 @@ ![RSS-Bridge](static/logo_600px.png) -RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one. +RSS-Bridge is a PHP web application. + +It generates web feeds for websites that don't have one. + +Officially hosted instance: https://rss-bridge.org/bridge01/ + +IRC channel #rssbridge at https://libera.chat/ + +[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html) + +Alternatively find another +[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html). + +Requires minimum PHP 7.4. + [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) -[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#rssbridge:libera.chat) [![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions) ||| @@ -15,116 +28,219 @@ RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for website |![Screenshot #1](/static/screenshot-1.png?raw=true)|![Screenshot #2](/static/screenshot-2.png?raw=true)| |![Screenshot #3](/static/screenshot-3.png?raw=true)|![Screenshot #4](/static/screenshot-4.png?raw=true)| |![Screenshot #5](/static/screenshot-5.png?raw=true)|![Screenshot #6](/static/screenshot-6.png?raw=true)| -|![Screenshot #7](/static/twitter-form.png?raw=true)|![Screenshot #8](/static/twitter-rasmus.png?raw=true)| -## A subset of bridges +## A subset of bridges (15/447) -* `YouTube` : YouTube user channel, playlist or search -* `Twitter` : Return keyword/hashtag search or user timeline -* `Telegram` : Return the latest posts from a public group -* `Reddit` : Return the latest posts from a subreddit or user -* `Filter` : Filter an existing feed url -* `Vk` : Latest posts from a user or group -* `FeedMerge` : Merge two or more existing feeds into one -* `Twitch` : Fetch the latest videos from a channel -* `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords - -And [many more](bridges/), thanks to the community! - -[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html) - -Check out RSS-Bridge right now on https://rss-bridge.org/bridge01 or find another -[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html). +* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge) +* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge) +* `FeedReducerBridge`: [Reduce a noisy feed by some percentage](https://rss-bridge.org/bridge01/#bridge-FeedReducerBridge) +* `FilterBridge`: [Filter a feed by excluding/including items by keyword](https://rss-bridge.org/bridge01/#bridge-FilterBridge) +* `GettrBridge`: [Fetches the latest posts from a GETTR user](https://rss-bridge.org/bridge01/#bridge-GettrBridge) +* `MastodonBridge`: [Fetches statuses from a Mastodon (ActivityPub) instance](https://rss-bridge.org/bridge01/#bridge-MastodonBridge) +* `RedditBridge`: [Fetches posts from a user/subredit (with filtering options)](https://rss-bridge.org/bridge01/#bridge-RedditBridge) +* `RumbleBridge`: [Fetches channel/user videos](https://rss-bridge.org/bridge01/#bridge-RumbleBridge) +* `SoundcloudBridge`: [Fetches music by username](https://rss-bridge.org/bridge01/#bridge-SoundcloudBridge) +* `TelegramBridge`: [Fetches posts from a public channel](https://rss-bridge.org/bridge01/#bridge-TelegramBridge) +* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge) +* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge) +* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge) +* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge) +* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge) +* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's Posts tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge) ## Tutorial -RSS-Bridge requires php 7.4 (or higher). +### How to install on traditional shared web hosting -### Install with composer or git +RSS-Bridge can basically be unzipped into a web folder. Should be working instantly. + +Latest zip: +https://github.com/RSS-Bridge/rss-bridge/archive/refs/heads/master.zip (2MB) + +### How to install on Debian 12 (nginx + php-fpm) + +These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month). ```shell +timedatectl set-timezone Europe/Oslo + +apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl + +# Create a user account +useradd --shell /bin/bash --create-home rss-bridge + cd /var/www -composer create-project --no-dev rss-bridge/rss-bridge + +# Create folder and change its ownership to rss-bridge +mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/ + +# Become rss-bridge +su rss-bridge + +# Clone master branch into existing folder +git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/ +cd rss-bridge + +# Copy over the default config (OPTIONAL) +cp -v config.default.ini.php config.ini.php + +# Recursively give full permissions to user/owner +chmod 700 --recursive ./ + +# Give read and execute to others on folder ./static +chmod o+rx ./ ./static + +# Recursively give give read to others on folder ./static +chmod o+r --recursive ./static ``` -```shell -cd /var/www -git clone https://github.com/RSS-Bridge/rss-bridge.git -``` - -Config: - -```shell -# Give the http user write permission to the cache folder -chown www-data:www-data /var/www/rss-bridge/cache - -# Optionally copy over the default config file -cp config.default.ini.php config.ini.php -``` - -Example config for nginx: +Nginx config: ```nginx -# /etc/nginx/sites-enabled/rssbridge +# /etc/nginx/sites-enabled/rss-bridge.conf + server { listen 80; - server_name example.com; - root /var/www/rss-bridge; - index index.php; - location ~ \.php$ { + # TODO: change to your own server name + server_name example.com; + + access_log /var/log/nginx/rss-bridge.access.log; + error_log /var/log/nginx/rss-bridge.error.log; + log_not_found off; + + # Intentionally not setting a root folder + + # Static content only served here + location /static/ { + alias /var/www/rss-bridge/static/; + } + + # Pass off to php-fpm only when location is EXACTLY == / + location = / { + root /var/www/rss-bridge/; include snippets/fastcgi-php.conf; - fastcgi_read_timeout 60s; - fastcgi_pass unix:/run/php/php-fpm.sock; + fastcgi_read_timeout 45s; + fastcgi_pass unix:/run/php/rss-bridge.sock; + } + + # Reduce log noise + location = /favicon.ico { + access_log off; + } + + # Reduce log noise + location = /robots.txt { + access_log off; } } ``` -### Install with Docker: +PHP FPM pool config: +```ini +; /etc/php/8.2/fpm/pool.d/rss-bridge.conf -Install by using docker image from Docker Hub: +[rss-bridge] + +user = rss-bridge +group = rss-bridge + +listen = /run/php/rss-bridge.sock + +listen.owner = www-data +listen.group = www-data + +; Create 10 workers standing by to serve requests +pm = static +pm.max_children = 10 + +; Respawn worker after 500 requests (workaround for memory leaks etc.) +pm.max_requests = 500 +``` + +PHP ini config: +```ini +; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini + +max_execution_time = 15 +memory_limit = 64M +``` + +Restart fpm and nginx: + +```shell +# Lint and restart php-fpm +php-fpm8.2 -t && systemctl restart php8.2-fpm + +# Lint and restart nginx +nginx -t && systemctl restart nginx +``` + +### How to install from Composer + +Install the latest release. + +```shell +cd /var/www +composer create-project -v --no-dev --no-scripts rss-bridge/rss-bridge +``` + +### How to install with Caddy + +TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785 + +### Install from Docker Hub: + +Install by downloading the docker image from Docker Hub: ```bash # Create container -docker create --name=rss-bridge --publish 3000:80 rssbridge/rss-bridge +docker create --name=rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rssbridge/rss-bridge +``` +You can put custom `config.ini.php` and bridges into `./config`. + +**You must restart container for custom changes to take effect.** + +See `docker-entrypoint.sh` for details. + +```bash # Start container docker start rss-bridge ``` Browse http://localhost:3000/ -Install by locally building the image: +### Install by locally building from Dockerfile ```bash # Build image from Dockerfile docker build -t rss-bridge . # Create container -docker create --name rss-bridge --publish 3000:80 rss-bridge +docker create --name rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rss-bridge +``` -# Start the container +You can put custom `config.ini.php` and bridges into `./config`. + +**You must restart container for custom changes to take effect.** + +See `docker-entrypoint.sh` for details. + +```bash +# Start container docker start rss-bridge ``` Browse http://localhost:3000/ -#### Install with docker-compose +### Install with docker-compose (using Docker Hub) -Create a `docker-compose.yml` file locally with with the following content: -```yml -version: '2' -services: - rss-bridge: - image: rssbridge/rss-bridge:latest - volumes: - - :/config - ports: - - 3000:80 - restart: unless-stopped -``` +You can put custom `config.ini.php` and bridges into `./config`. -Then launch with `docker-compose`: +**You must restart container for custom changes to take effect.** + +See `docker-entrypoint.sh` for details. ```bash docker-compose up @@ -132,13 +248,14 @@ docker-compose up Browse http://localhost:3000/ -### Alternative installation methods +### Other installation methods [![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge) [![Deploy to Heroku](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) [![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html) +[![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge) -The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and +The Heroku quick deploy currently does not work. It might work if you fork this repo and modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688 Learn more in @@ -146,13 +263,81 @@ Learn more in ## How-to -### How to create a new bridge from scratch +### How to fix "Access denied." + +Output is from php-fpm. It is unable to read index.php. + + chown rss-bridge:rss-bridge /var/www/rss-bridge/index.php + +### How to password-protect the instance (token) + +Modify `config.ini.php`: + + [authentication] + + token = "hunter2" + +### How to remove all cache items + +As current user: + + bin/cache-clear + +As user rss-bridge: + + sudo -u rss-bridge bin/cache-clear + +As root: + + sudo bin/cache-clear + +### How to remove all expired cache items + + bin/cache-prune + +### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" + +```shell +# Give rss-bridge ownership +chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache + +# Or, give www-data ownership +chown www-data:www-data -R /var/www/rss-bridge/cache + +# Or, give everyone write permission +chmod 777 -R /var/www/rss-bridge/cache + +# Or last ditch effort (CAREFUL) +rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/ +``` + +### How to fix "attempt to write a readonly database" + +The sqlite files (db, wal and shm) are not writeable. + + chown -v rss-bridge:rss-bridge cache/* + +### How to fix "Unable to prepare statement: 1, no such table: storage" + + rm cache/* + +### How to create a completely new bridge + +New code files MUST have `declare(strict_types=1);` at the top of file: + +```php +bridgeFactory = new BridgeFactory(); + public function __construct( + BridgeFactory $bridgeFactory + ) { + $this->bridgeFactory = $bridgeFactory; } - public function execute(array $request) + public function __invoke(Request $request): Response { - if (!Debug::isEnabled()) { - throw new \Exception('This action is only available in debug mode!'); + if (Configuration::getConfig('system', 'env') !== 'dev') { + return new Response('This action is only available in dev environment!', 403); } - if (!isset($request['bridge'])) { - return render_template('connectivity.html.php'); + $bridgeName = $request->get('bridge'); + if (!$bridgeName) { + return new Response(render_template('connectivity.html.php')); + } + $bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName); + if (!$bridgeClassName) { + return new Response('Bridge not found', 404); } - - $bridgeClassName = $this->bridgeFactory->createBridgeClassName($request['bridge']); - return $this->reportBridgeConnectivity($bridgeClassName); } @@ -52,29 +43,25 @@ class ConnectivityAction implements ActionInterface throw new \Exception('Bridge is not whitelisted!'); } - $retVal = [ - 'bridge' => $bridgeClassName, - 'successful' => false, - 'http_code' => 200, - ]; - $bridge = $this->bridgeFactory->create($bridgeClassName); $curl_opts = [ - CURLOPT_CONNECTTIMEOUT => 5 + CURLOPT_CONNECTTIMEOUT => 5, + CURLOPT_FOLLOWLOCATION => true, + ]; + $result = [ + 'bridge' => $bridgeClassName, + 'successful' => false, + 'http_code' => null, ]; try { - $reply = getContents($bridge::URI, [], $curl_opts, true); - - if ($reply['code'] === 200) { - $retVal['successful'] = true; - if (strpos(implode('', $reply['status_lines']), '301 Moved Permanently')) { - $retVal['http_code'] = 301; - } + $response = getContents($bridge::URI, [], $curl_opts, true); + $result['http_code'] = $response->getCode(); + if (in_array($result['http_code'], [200])) { + $result['successful'] = true; } } catch (\Exception $e) { - $retVal['successful'] = false; } - return new Response(Json::encode($retVal), 200, ['Content-Type' => 'text/json']); + return new Response(Json::encode($result), 200, ['content-type' => 'text/json']); } } diff --git a/actions/DetectAction.php b/actions/DetectAction.php index 6c9fa22d..8d3d6263 100644 --- a/actions/DetectAction.php +++ b/actions/DetectAction.php @@ -1,53 +1,51 @@ bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + $url = $request->get('url'); + $format = $request->get('format'); + + if (!$url) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url'])); } if (!$format) { - throw new \Exception('You must specify a format!'); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format'])); } - $bridgeFactory = new BridgeFactory(); - - foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) { - if (!$bridgeFactory->isEnabled($bridgeClassName)) { + foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { continue; } - $bridge = $bridgeFactory->create($bridgeClassName); + $bridge = $this->bridgeFactory->create($bridgeClassName); - $bridgeParams = $bridge->detectParameters($targetURL); + $bridgeParams = $bridge->detectParameters($url); - if (is_null($bridgeParams)) { + if (!$bridgeParams) { continue; } - $bridgeParams['bridge'] = $bridgeClassName; - $bridgeParams['format'] = $format; - - $url = '?action=display&' . http_build_query($bridgeParams); - return new Response('', 301, ['Location' => $url]); + $query = [ + 'action' => 'display', + 'bridge' => $bridgeClassName, + 'format' => $format, + ]; + $query = array_merge($query, $bridgeParams); + return new Response('', 301, ['location' => '?' . http_build_query($query)]); } - throw new \Exception('No bridge found for given URL: ' . $targetURL); + return new Response(render(__DIR__ . '/../templates/error.html.php', [ + 'message' => 'No bridge found for given URL: ' . $url, + ])); } } diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 66acd4cc..97f53caa 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -1,44 +1,43 @@ cache = $cache; + $this->logger = $logger; + $this->bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response { - if (Configuration::getConfig('system', 'enable_maintenance_mode')) { - return new Response('503 Service Unavailable', 503); + $bridgeName = $request->get('bridge'); + $format = $request->get('format'); + $noproxy = $request->get('_noproxy'); + + if (!$bridgeName) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge name parameter']), 400); + } + $bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName); + if (!$bridgeClassName) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404); } - $bridgeFactory = new BridgeFactory(); - - $bridgeClassName = $bridgeFactory->createBridgeClassName($request['bridge'] ?? ''); - - $format = $request['format'] ?? null; if (!$format) { - throw new \Exception('You must specify a format!'); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400); } - if (!$bridgeFactory->isEnabled($bridgeClassName)) { - throw new \Exception('This bridge is not whitelisted'); + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400); } - $formatFactory = new FormatFactory(); - $format = $formatFactory->create($format); - - $bridge = $bridgeFactory->create($bridgeClassName); - $bridge->loadConfiguration(); - - $noproxy = $request['_noproxy'] ?? null; + // Disable proxy (if enabled and per user's request) if ( Configuration::getConfig('proxy', 'url') && Configuration::getConfig('proxy', 'by_bridge') @@ -48,176 +47,134 @@ class DisplayAction implements ActionInterface define('NOPROXY', true); } - $cacheTimeout = $request['_cache_timeout'] ?? null; - if (Configuration::getConfig('cache', 'custom_timeout') && $cacheTimeout) { - $cacheTimeout = (int) $cacheTimeout; - } else { - // At this point the query argument might still be in the url but it won't be used - $cacheTimeout = $bridge->getCacheTimeout(); + $cacheKey = 'http_' . json_encode($request->toArray()); + + $bridge = $this->bridgeFactory->create($bridgeClassName); + + $response = $this->createResponse($request, $bridge, $format); + + if ($response->getCode() === 200) { + $ttl = $request->get('_cache_timeout'); + if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) { + $ttl = (int) $ttl; + } else { + $ttl = $bridge->getCacheTimeout(); + } + $this->cache->set($cacheKey, $response, $ttl); } - // Remove parameters that don't concern bridges - $bridge_params = array_diff_key( - $request, - array_fill_keys( - [ - 'action', - 'bridge', - 'format', - '_noproxy', - '_cache_timeout', - '_error_time' - ], - '' - ) - ); - - // Remove parameters that don't concern caches - $cache_params = array_diff_key( - $request, - array_fill_keys( - [ - 'action', - 'format', - '_noproxy', - '_cache_timeout', - '_error_time' - ], - '' - ) - ); - - $cache = RssBridge::getCache(); - $cache->setScope(''); - $cache->setKey($cache_params); - // This cache purge will basically delete all cache items older than 24h, regardless of scope and key - $cache->purgeCache(86400); - - $items = []; - $infos = []; - $mtime = $cache->getTime(); - - if ( - $mtime - && (time() - $cacheTimeout < $mtime) - && !Debug::isEnabled() - ) { - // At this point we found the feed in the cache and debug mode is disabled - - if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) { - // The client wants to know if the feed has changed since its last check - $stime = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); - if ($mtime <= $stime) { - $lastModified2 = gmdate('D, d M Y H:i:s ', $mtime) . 'GMT'; - return new Response('', 304, ['Last-Modified' => $lastModified2]); - } - } - - // Load the feed from cache and prepare it - $cached = $cache->loadData(); - if (isset($cached['items']) && isset($cached['extraInfos'])) { - foreach ($cached['items'] as $item) { - $items[] = new FeedItem($item); - } - $infos = $cached['extraInfos']; - } - } else { - // At this point we did NOT find the feed in the cache or debug mode is enabled. - try { - $bridge->setDatas($bridge_params); - $bridge->collectData(); - - $items = $bridge->getItems(); - - if (isset($items[0]) && is_array($items[0])) { - $feedItems = []; - foreach ($items as $item) { - $feedItems[] = new FeedItem($item); - } - $items = $feedItems; - } - $infos = [ - 'name' => $bridge->getName(), - 'uri' => $bridge->getURI(), - 'donationUri' => $bridge->getDonationURI(), - 'icon' => $bridge->getIcon() - ]; - } catch (\Throwable $e) { - if ($e instanceof HttpException) { - // Produce a smaller log record for http exceptions - Logger::warning(sprintf('Exception in %s: %s', $bridgeClassName, create_sane_exception_message($e))); - } else { - // Log the exception - Logger::error(sprintf('Exception in %s', $bridgeClassName), ['e' => $e]); - } - - // Emit error only if we are passed the error report limit - $errorCount = self::logBridgeError($bridge->getName(), $e->getCode()); - if ($errorCount >= Configuration::getConfig('error', 'report_limit')) { - if (Configuration::getConfig('error', 'output') === 'feed') { - // Emit the error as a feed item in a feed so that feed readers can pick it up - $items[] = $this->createFeedItemFromException($e, $bridge); - } elseif (Configuration::getConfig('error', 'output') === 'http') { - // Emit as a regular web response - throw $e; - } - } - } - - // Unfortunately need to set scope and key again because they might be modified - $cache->setScope(''); - $cache->setKey($cache_params); - $cache->saveData([ - 'items' => array_map(function (FeedItem $item) { - return $item->toArray(); - }, $items), - 'extraInfos' => $infos - ]); - } - - $format->setItems($items); - $format->setExtraInfos($infos); - $lastModified = $cache->getTime(); - $format->setLastModified($lastModified); - $headers = []; - if ($lastModified) { - $headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $lastModified) . 'GMT'; - } - $headers['Content-Type'] = $format->getMimeType() . '; charset=' . $format->getCharset(); - return new Response($format->stringify(), 200, $headers); + return $response; } - private function createFeedItemFromException($e, BridgeInterface $bridge): FeedItem + private function createResponse(Request $request, BridgeAbstract $bridge, string $format) { - $item = new FeedItem(); + $items = []; + + try { + $bridge->loadConfiguration(); + // Remove parameters that don't concern bridges + $remove = [ + 'token', + 'action', + 'bridge', + 'format', + '_noproxy', + '_cache_timeout', + '_error_time', + '_', // Some RSS readers add a cache-busting parameter (_=) to feed URLs, detect and ignore them. + ]; + $requestArray = $request->toArray(); + $input = array_diff_key($requestArray, array_fill_keys($remove, '')); + $bridge->setInput($input); + $bridge->collectData(); + $items = $bridge->getItems(); + } catch (\Throwable $e) { + if ($e instanceof ClientException) { + $this->logger->debug(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); + } elseif ($e instanceof RateLimitException) { + $this->logger->debug(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); + return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 429); + } elseif ($e instanceof HttpException) { + if (in_array($e->getCode(), [429, 503])) { + // Log with debug, immediately reproduce and return + $this->logger->debug(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); + return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), $e->getCode()); + } + // Some other status code which we let fail normally (but don't log it) + } else { + $this->logger->error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]); + } + $errorOutput = Configuration::getConfig('error', 'output'); + $reportLimit = Configuration::getConfig('error', 'report_limit'); + $errorCount = 1; + if ($reportLimit > 1) { + $errorCount = $this->logBridgeError($bridge->getName(), $e->getCode()); + } + // Let clients know about the error if we are passed the report limit + if ($errorCount >= $reportLimit) { + if ($errorOutput === 'feed') { + // Render the exception as a feed item + $items = [$this->createFeedItemFromException($e, $bridge)]; + } elseif ($errorOutput === 'http') { + return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500); + } elseif ($errorOutput === 'none') { + // Do nothing (produces an empty feed) + } + } + } + + $formatFactory = new FormatFactory(); + $format = $formatFactory->create($format); + + $format->setItems($items); + $format->setFeed($bridge->getFeed()); + $now = time(); + $format->setLastModified($now); + $headers = [ + 'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT', + 'content-type' => $format->getMimeType() . '; charset=UTF-8', + ]; + $body = $format->render(); + + // This is supposed to remove non-utf8 byte sequences, but I'm unsure if it works + ini_set('mbstring.substitute_character', 'none'); + $body = mb_convert_encoding($body, 'UTF-8', 'UTF-8'); + + return new Response($body, 200, $headers); + } + + private function createFeedItemFromException($e, BridgeAbstract $bridge): array + { + $item = []; // Create a unique identifier every 24 hours $uniqueIdentifier = urlencode((int)(time() / 86400)); - $itemTitle = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier); - $item->setTitle($itemTitle); - $item->setURI(get_current_url()); - $item->setTimestamp(time()); + $title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier); + + $item['title'] = $title; + $item['uri'] = get_current_url(); + $item['timestamp'] = time(); // Create an item identifier for feed readers e.g. "staysafetv twitch videos_19389" - $item->setUid($bridge->getName() . '_' . $uniqueIdentifier); + $item['uid'] = $bridge->getName() . '_' . $uniqueIdentifier; $content = render_template(__DIR__ . '/../templates/bridge-error.html.php', [ - 'error' => render_template(__DIR__ . '/../templates/error.html.php', ['e' => $e]), + 'error' => render_template(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 'searchUrl' => self::createGithubSearchUrl($bridge), - 'issueUrl' => self::createGithubIssueUrl($bridge, $e, create_sane_exception_message($e)), + 'issueUrl' => self::createGithubIssueUrl($bridge, $e), 'maintainer' => $bridge->getMaintainer(), ]); - $item->setContent($content); + $item['content'] = $content; + return $item; } - private static function logBridgeError($bridgeName, $code) + private function logBridgeError($bridgeName, $code) { - $cache = RssBridge::getCache(); - $cache->setScope('error_reporting'); - $cache->setkey([$bridgeName . '_' . $code]); - - if ($report = $cache->loadData()) { + // todo: it's not really necessary to json encode $report + $cacheKey = 'error_reporting_' . $bridgeName . '_' . $code; + $report = $this->cache->get($cacheKey); + if ($report) { $report = Json::decode($report); $report['time'] = time(); $report['count']++; @@ -228,26 +185,39 @@ class DisplayAction implements ActionInterface 'count' => 1, ]; } - $cache->saveData(Json::encode($report)); + $ttl = 86400 * 5; + $this->cache->set($cacheKey, Json::encode($report), $ttl); return $report['count']; } - private static function createGithubIssueUrl($bridge, $e, string $message): string + private static function createGithubIssueUrl(BridgeAbstract $bridge, \Throwable $e): string { - return sprintf('https://github.com/RSS-Bridge/rss-bridge/issues/new?%s', http_build_query([ - 'title' => sprintf('%s failed with error %s', $bridge->getName(), $e->getCode()), + $maintainer = $bridge->getMaintainer(); + if (str_contains($maintainer, ',')) { + $maintainers = explode(',', $maintainer); + } else { + $maintainers = [$maintainer]; + } + $maintainers = array_map('trim', $maintainers); + + $queryString = $_SERVER['QUERY_STRING'] ?? ''; + $query = [ + 'title' => $bridge->getName() . ' failed with: ' . $e->getMessage(), 'body' => sprintf( - "```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```", - $message, + "```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```\nMaintainer: @%s", + create_sane_exception_message($e), implode("\n", trace_to_call_points(trace_from_exception($e))), - $_SERVER['QUERY_STRING'] ?? '', + $queryString, Configuration::getVersion(), PHP_OS_FAMILY, - phpversion() ?: 'Unknown' + phpversion() ?: 'Unknown', + implode(', @', $maintainers), ), 'labels' => 'Bridge-Broken', - 'assignee' => $bridge->getMaintainer(), - ])); + 'assignee' => $maintainer[0], + ]; + + return 'https://github.com/RSS-Bridge/rss-bridge/issues/new?' . http_build_query($query); } private static function createGithubSearchUrl($bridge): string diff --git a/actions/FindfeedAction.php b/actions/FindfeedAction.php new file mode 100644 index 00000000..e18c3e1d --- /dev/null +++ b/actions/FindfeedAction.php @@ -0,0 +1,95 @@ +bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + $url = $request->get('url'); + $format = $request->get('format'); + + if (!$url) { + return new Response('You must specify a url', 400); + } + if (!$format) { + return new Response('You must specify a format', 400); + } + + $results = []; + foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { + continue; + } + + $bridge = $this->bridgeFactory->create($bridgeClassName); + + $bridgeParams = $bridge->detectParameters($url); + + if ($bridgeParams === null) { + continue; + } + + // It's allowed to have no 'context' in a bridge (only a default context without any name) + // In this case, the reference to the parameters are found in the first element of the PARAMETERS array + + $context = $bridgeParams['context'] ?? 0; + + $bridgeData = []; + // Construct the array of parameters + foreach ($bridgeParams as $key => $value) { + // 'context' is a special case : it's a bridge parameters, there is no "name" for this parameter + if ($key == 'context') { + $bridgeData[$key]['name'] = 'Context'; + $bridgeData[$key]['value'] = $value; + } else { + $bridgeData[$key]['name'] = $this->getParameterName($bridge, $context, $key); + $bridgeData[$key]['value'] = $value; + } + } + + $bridgeParams['bridge'] = $bridgeClassName; + $bridgeParams['format'] = $format; + $content = [ + 'url' => './?action=display&' . http_build_query($bridgeParams), + 'bridgeParams' => $bridgeParams, + 'bridgeData' => $bridgeData, + 'bridgeMeta' => [ + 'name' => $bridge::NAME, + 'description' => $bridge::DESCRIPTION, + 'parameters' => $bridge::PARAMETERS, + 'icon' => $bridge->getIcon(), + ], + ]; + $results[] = $content; + } + if ($results === []) { + return new Response(Json::encode(['message' => 'No bridge found for given url']), 404, ['content-type' => 'application/json']); + } + return new Response(Json::encode($results), 200, ['content-type' => 'application/json']); + } + + // Get parameter name in the actual context, or in the global parameter + private function getParameterName($bridge, $context, $key) + { + if (isset($bridge::PARAMETERS[$context][$key]['name'])) { + $name = $bridge::PARAMETERS[$context][$key]['name']; + } else if (isset($bridge::PARAMETERS['global'][$key]['name'])) { + $name = $bridge::PARAMETERS['global'][$key]['name']; + } else { + $name = 'Variable "' . $key . '" (No name provided)'; + } + return $name; + } +} diff --git a/actions/FrontpageAction.php b/actions/FrontpageAction.php index 40d25ea4..79ffb4f5 100644 --- a/actions/FrontpageAction.php +++ b/actions/FrontpageAction.php @@ -2,35 +2,48 @@ final class FrontpageAction implements ActionInterface { - public function execute(array $request) + private BridgeFactory $bridgeFactory; + + public function __construct( + BridgeFactory $bridgeFactory + ) { + $this->bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response { - $showInactive = (bool) ($request['show_inactive'] ?? null); + $token = $request->getAttribute('token'); + + $messages = []; $activeBridges = 0; - $bridgeFactory = new BridgeFactory(); - $bridgeClassNames = $bridgeFactory->getBridgeClassNames(); + $bridgeClassNames = $this->bridgeFactory->getBridgeClassNames(); - $formatFactory = new FormatFactory(); - $formats = $formatFactory->getFormatNames(); + foreach ($this->bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) { + $messages[] = [ + 'body' => sprintf('Warning : Bridge "%s" not found', $missingEnabledBridge), + 'level' => 'warning' + ]; + } $body = ''; foreach ($bridgeClassNames as $bridgeClassName) { - if ($bridgeFactory->isEnabled($bridgeClassName)) { - $body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats); + if ($this->bridgeFactory->isEnabled($bridgeClassName)) { + $body .= BridgeCard::render($this->bridgeFactory, $bridgeClassName, $token); $activeBridges++; - } elseif ($showInactive) { - $body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats, false) . PHP_EOL; } } - return render(__DIR__ . '/../templates/frontpage.html.php', [ - 'messages' => [], - 'admin_email' => Configuration::getConfig('admin', 'email'), - 'admin_telegram' => Configuration::getConfig('admin', 'telegram'), - 'bridges' => $body, - 'active_bridges' => $activeBridges, - 'total_bridges' => count($bridgeClassNames), - 'show_inactive' => $showInactive, - ]); + $response = new Response(render(__DIR__ . '/../templates/frontpage.html.php', [ + 'messages' => $messages, + 'admin_email' => Configuration::getConfig('admin', 'email'), + 'admin_telegram' => Configuration::getConfig('admin', 'telegram'), + 'bridges' => $body, + 'active_bridges' => $activeBridges, + 'total_bridges' => count($bridgeClassNames), + ])); + + // TODO: The rendered template could be cached, but beware config changes that changes the html + return $response; } } diff --git a/actions/HealthAction.php b/actions/HealthAction.php index 8ae5df1b..13365a3c 100644 --- a/actions/HealthAction.php +++ b/actions/HealthAction.php @@ -4,7 +4,7 @@ declare(strict_types=1); class HealthAction implements ActionInterface { - public function execute(array $request) + public function __invoke(Request $request): Response { $response = [ 'code' => 200, diff --git a/actions/ListAction.php b/actions/ListAction.php index 6ce7e33e..f6347f9c 100644 --- a/actions/ListAction.php +++ b/actions/ListAction.php @@ -1,42 +1,36 @@ bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response { $list = new \stdClass(); $list->bridges = []; $list->total = 0; - $bridgeFactory = new BridgeFactory(); - - foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) { - $bridge = $bridgeFactory->create($bridgeClassName); + foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { + $bridge = $this->bridgeFactory->create($bridgeClassName); $list->bridges[$bridgeClassName] = [ - 'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive', - 'uri' => $bridge->getURI(), - 'donationUri' => $bridge->getDonationURI(), - 'name' => $bridge->getName(), - 'icon' => $bridge->getIcon(), - 'parameters' => $bridge->getParameters(), - 'maintainer' => $bridge->getMaintainer(), - 'description' => $bridge->getDescription() + 'status' => $this->bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive', + 'uri' => $bridge->getURI(), + 'donationUri' => $bridge->getDonationURI(), + 'name' => $bridge->getName(), + 'icon' => $bridge->getIcon(), + 'parameters' => $bridge->getParameters(), + 'maintainer' => $bridge->getMaintainer(), + 'description' => $bridge->getDescription() ]; } $list->total = count($list->bridges); - return new Response(Json::encode($list), 200, ['Content-Type' => 'application/json']); + return new Response(Json::encode($list), 200, ['content-type' => 'application/json']); } } diff --git a/actions/SetBridgeCacheAction.php b/actions/SetBridgeCacheAction.php deleted file mode 100644 index a9a598bd..00000000 --- a/actions/SetBridgeCacheAction.php +++ /dev/null @@ -1,50 +0,0 @@ -createBridgeClassName($request['bridge'] ?? ''); - - // whitelist control - if (!$bridgeFactory->isEnabled($bridgeClassName)) { - throw new \Exception('This bridge is not whitelisted', 401); - die; - } - - $bridge = $bridgeFactory->create($bridgeClassName); - $bridge->loadConfiguration(); - $value = $request['value']; - - $cache = RssBridge::getCache(); - $cache->setScope(get_class($bridge)); - if (!is_array($key)) { - // not sure if $key is an array when it comes in from request - $key = [$key]; - } - $cache->setKey($key); - $cache->saveData($value); - - header('Content-Type: text/plain'); - echo 'done'; - } -} diff --git a/bin/cache-clear b/bin/cache-clear new file mode 100755 index 00000000..2ca84ce6 --- /dev/null +++ b/bin/cache-clear @@ -0,0 +1,16 @@ +#!/usr/bin/env php +clear(); diff --git a/bin/cache-prune b/bin/cache-prune new file mode 100755 index 00000000..bb72c4ac --- /dev/null +++ b/bin/cache-prune @@ -0,0 +1,24 @@ +#!/usr/bin/env php +prune(); diff --git a/bin/test b/bin/test new file mode 100755 index 00000000..74692410 --- /dev/null +++ b/bin/test @@ -0,0 +1,20 @@ +#!/usr/bin/env php +debug('This is a test debug message'); + +$logger->info('This is a test info message'); + +$logger->error('This is a test error message'); diff --git a/bridges/ABCNewsBridge.php b/bridges/ABCNewsBridge.php index c00fed1c..154eb489 100644 --- a/bridges/ABCNewsBridge.php +++ b/bridges/ABCNewsBridge.php @@ -31,17 +31,17 @@ class ABCNewsBridge extends BridgeAbstract { $url = sprintf('https://www.abc.net.au/news/%s', $this->getInput('topic')); $dom = getSimpleHTMLDOM($url); - $dom = $dom->find('div[data-component="CardList"]', 0); + $dom = $dom->find('div[data-component="PaginationList"]', 0); if (!$dom) { throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); } $dom = defaultLinkTo($dom, $this->getURI()); - foreach ($dom->find('div[data-component="GenericCard"]') as $article) { + foreach ($dom->find('article[data-component="DetailCard"]') as $article) { $a = $article->find('a', 0); $this->items[] = [ 'title' => $a->plaintext, 'uri' => $a->href, - 'content' => $article->find('[data-component="CardDescription"]', 0)->plaintext, + 'content' => $article->find('p', 0)->plaintext, 'timestamp' => strtotime($article->find('time', 0)->datetime), ]; } diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 6ca59cc5..2697dbc7 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -12,9 +12,29 @@ class AO3Bridge extends BridgeAbstract 'url' => [ 'name' => 'url', 'required' => true, - // Example: F/F tag, complete works only - 'exampleValue' => 'https://archiveofourown.org/works?work_search[complete]=T&tag_id=F*s*F', + // Example: F/F tag + 'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works', ], + 'range' => [ + 'name' => 'Chapter Content', + 'title' => 'Chapter(s) to include in each work\'s feed entry', + 'defaultValue' => null, + 'type' => 'list', + 'values' => [ + 'None' => null, + 'First' => 'first', + 'Latest' => 'last', + 'Entire work' => 'all', + ], + ], + 'unique' => [ + 'name' => 'Make separate entries for new fic chapters', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Make separate entries for new fic chapters', + 'defaultValue' => 'checked', + ], + 'limit' => self::LIMIT, ], 'Bookmarks' => [ 'user' => [ @@ -33,23 +53,19 @@ class AO3Bridge extends BridgeAbstract ], ] ]; + private $title; public function collectData() { switch ($this->queriedContext) { case 'Bookmarks': - $user = $this->getInput('user'); - $this->title = $user; - $url = self::URI - . '/users/' . $user - . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; - $this->collectList($url); + $this->collectList($this->getURI()); break; case 'List': - $this->collectList($this->getInput('url')); + $this->collectList($this->getURI()); break; case 'Work': - $this->collectWork($this->getInput('id')); + $this->collectWork($this->getURI()); break; } } @@ -60,9 +76,24 @@ class AO3Bridge extends BridgeAbstract */ private function collectList($url) { - $html = getSimpleHTMLDOM($url); + $version = 'v0.0.1'; + $headers = [ + "useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)" + ]; + $response = getContents($url, $headers); + + $html = \str_get_html($response); $html = defaultLinkTo($html, self::URI); + // Get list title. Will include page range + count in some cases + $heading = ($html->find('#main h2', 0)); + if ($heading->find('a.tag')) { + $heading = $heading->find('a.tag', 0); + } + $this->title = $heading->plaintext; + + $limit = $this->getInput('limit') ?? 3; + $count = 0; foreach ($html->find('.index.group > li') as $element) { $item = []; @@ -71,16 +102,70 @@ class AO3Bridge extends BridgeAbstract continue; // discard deleted works } $item['title'] = $title->plaintext; - $item['content'] = $element; $item['uri'] = $title->href; $strdate = $element->find('div p.datetime', 0)->plaintext; $item['timestamp'] = strtotime($strdate); + // detach from rest of page because remove() is buggy + $element = str_get_html($element->outertext()); + $tags = $element->find('ul.required-tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + $tags = $element->find('ul.tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + + $item['content'] = implode('', $element->childNodes()); + $chapters = $element->find('dl dd.chapters', 0); // bookmarked series and external works do not have a chapters count $chapters = (isset($chapters) ? $chapters->plaintext : 0); - $item['uid'] = $item['uri'] . "/$strdate/$chapters"; + if ($this->getInput('unique')) { + $item['uid'] = $item['uri'] . "/$strdate/$chapters"; + } else { + $item['uid'] = $item['uri']; + } + + + // Fetch workskin of desired chapter(s) in list + if ($this->getInput('range') && ($limit == 0 || $count++ < $limit)) { + $url = $item['uri']; + switch ($this->getInput('range')) { + case ('all'): + $url .= '?view_full_work=true'; + break; + case ('first'): + break; + case ('last'): + // only way to get this is using the navigate page unfortunately + $url .= '/navigate'; + $response = getContents($url, $headers); + $html = \str_get_html($response); + $html = defaultLinkTo($html, self::URI); + $url = $html->find('ol.index.group > li > a', -1)->href; + break; + } + $response = getContents($url, $headers); + + $html = \str_get_html($response); + $html = defaultLinkTo($html, self::URI); + // remove duplicate fic summary + if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) { + $ficsum->remove(); + } + $item['content'] .= $html->find('#workskin', 0); + } + + // Use predictability of download links to generate enclosures + $wid = explode('/', $item['uri'])[4]; + foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) { + $item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext; + } $this->items[] = $item; } @@ -89,20 +174,31 @@ class AO3Bridge extends BridgeAbstract /** * Feed for recent chapters of a specific work. */ - private function collectWork($id) + private function collectWork($url) { - $url = self::URI . "/works/$id/navigate"; - $response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']); - $html = \str_get_html($response['body']); + $version = 'v0.0.1'; + $headers = [ + "useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)" + ]; + $response = getContents($url . '/navigate', $headers); + + $html = \str_get_html($response); $html = defaultLinkTo($html, self::URI); + $response = getContents($url . '?view_full_work=true', $headers); + + $workhtml = \str_get_html($response); + $workhtml = defaultLinkTo($workhtml, self::URI); + $this->title = $html->find('h2 a', 0)->plaintext; - foreach ($html->find('ol.index.group > li') as $element) { + $nav = $html->find('ol.index.group > li'); + for ($i = 0; $i < count($nav); $i++) { $item = []; + $element = $nav[$i]; $item['title'] = $element->find('a', 0)->plaintext; - $item['content'] = $element; + $item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0); $item['uri'] = $element->find('a', 0)->href; $strdate = $element->find('span.datetime', 0)->plaintext; @@ -131,4 +227,24 @@ class AO3Bridge extends BridgeAbstract { return self::URI . '/favicon.ico'; } + + public function getURI() + { + $url = parent::getURI(); + switch ($this->queriedContext) { + case 'Bookmarks': + $user = $this->getInput('user'); + $url = self::URI + . '/users/' . $user + . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; + break; + case 'List': + $url = $this->getInput('url'); + break; + case 'Work': + $url = self::URI . '/works/' . $this->getInput('id'); + break; + } + return $url; + } } diff --git a/bridges/ARDAudiothekBridge.php b/bridges/ARDAudiothekBridge.php index 07a3cc68..e6b82775 100644 --- a/bridges/ARDAudiothekBridge.php +++ b/bridges/ARDAudiothekBridge.php @@ -63,13 +63,15 @@ class ARDAudiothekBridge extends BridgeAbstract public function collectData() { - $oldTz = date_default_timezone_get(); + $path = $this->getInput('path'); + $limit = $this->getInput('limit'); + $oldTz = date_default_timezone_get(); date_default_timezone_set('Europe/Berlin'); - $pathComponents = explode('/', $this->getInput('path')); + $pathComponents = explode('/', $path); if (empty($pathComponents)) { - returnClientError('Path may not be empty'); + throwClientException('Path may not be empty'); } if (count($pathComponents) < 2) { $showID = $pathComponents[0]; @@ -82,17 +84,21 @@ class ARDAudiothekBridge extends BridgeAbstract } $url = self::APIENDPOINT . 'programsets/' . $showID . '/'; - $rawJSON = getContents($url); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json1 = getContents($url); + $data1 = Json::decode($json1, false); + $processedJSON = $data1->data->programSet; + if (!$processedJSON) { + throw new \Exception('Unable to find show id: ' . $showID); + } - $limit = $this->getInput('limit'); $answerLength = 1; $offset = 0; $numberOfElements = 1; while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) { - $rawJSON = getContents($url . '?offset=' . $offset); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json2 = getContents($url . '?offset=' . $offset); + $data2 = Json::decode($json2, false); + $processedJSON = $data2->data->programSet; $answerLength = count($processedJSON->items->nodes); $offset = $offset + $answerLength; @@ -113,7 +119,16 @@ class ARDAudiothekBridge extends BridgeAbstract $item['timestamp'] = $audio->publicationStartDateAndTime; $item['uid'] = $audio->id; $item['author'] = $audio->programSet->publicationService->title; - $item['categories'] = [ $audio->programSet->editorialCategories->title ]; + + $category = $audio->programSet->editorialCategories->title ?? null; + if ($category) { + $item['categories'] = [$category]; + } + + $item['itunes'] = [ + 'duration' => $audio->duration, + ]; + $this->items[] = $item; } } diff --git a/bridges/ARDMediathekBridge.php b/bridges/ARDMediathekBridge.php index 6de8dad7..6f7272dc 100644 --- a/bridges/ARDMediathekBridge.php +++ b/bridges/ARDMediathekBridge.php @@ -40,6 +40,11 @@ class ARDMediathekBridge extends BridgeAbstract * @const IMAGEWIDTHPLACEHOLDER */ const IMAGEWIDTHPLACEHOLDER = '{width}'; + /** + * Title of the current show + * @var string + */ + private $title; const PARAMETERS = [ [ @@ -60,7 +65,7 @@ class ARDMediathekBridge extends BridgeAbstract $pathComponents = explode('/', $this->getInput('path')); if (empty($pathComponents)) { - returnClientError('Path may not be empty'); + throwClientException('Path may not be empty'); } if (count($pathComponents) < 2) { $showID = $pathComponents[0]; @@ -72,7 +77,7 @@ class ARDMediathekBridge extends BridgeAbstract } } - $url = self::APIENDPOINT . $showID . '/?pageSize=' . self::PAGESIZE; + $url = self::APIENDPOINT . $showID . '?pageSize=' . self::PAGESIZE; $rawJSON = getContents($url); $processedJSON = json_decode($rawJSON); @@ -93,6 +98,17 @@ class ARDMediathekBridge extends BridgeAbstract $this->items[] = $item; } + $this->title = $processedJSON->title; + date_default_timezone_set($oldTz); } + + /** {@inheritdoc} */ + public function getName() + { + if (!empty($this->title)) { + return $this->title; + } + return parent::getName(); + } } diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index d37f3ce4..f7bbd58e 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -20,17 +20,14 @@ class AcrimedBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas( - static::URI . 'spip.php?page=backend', - $this->getInput('limit') - ); + $url = 'https://www.acrimed.org/spip.php?page=backend'; + $limit = $this->getInput('limit'); + $this->collectExpandableDatas($url, $limit); } - protected function parseItem($newsItem) + protected function parseItem(array $item) { - $item = parent::parseItem($newsItem); - - $articlePage = getSimpleHTMLDOM($newsItem->link); + $articlePage = getSimpleHTMLDOM($item['uri']); $article = sanitize($articlePage->find('article.article1', 0)->innertext); $article = defaultLinkTo($article, static::URI); $item['content'] = $article; diff --git a/bridges/ActivisionResearchBridge.php b/bridges/ActivisionResearchBridge.php new file mode 100644 index 00000000..88af4b46 --- /dev/null +++ b/bridges/ActivisionResearchBridge.php @@ -0,0 +1,45 @@ +find('div[id="home-blog-feed"]', 0); + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + foreach ($dom->find('div[class="blog-entry"]') as $article) { + $a = $article->find('a', 0); + + $blogimg = extractFromDelimiters($article->find('div[class="blog-img"]', 0)->style, 'url(', ')'); + + $title = htmlspecialchars_decode($article->find('div[class="title"]', 0)->plaintext); + $author = htmlspecialchars_decode($article->find('div[class="author]', 0)->plaintext); + $date = $article->find('div[class="pubdate"]', 0)->plaintext; + + $entry = getSimpleHTMLDOMCached($a->href, static::CACHE_TIMEOUT * 7 * 4); + $entry = defaultLinkTo($entry, $this->getURI()); + + $content = $entry->find('div[class="blog-body"]', 0); + $tagsremove = ['script', 'iframe', 'input', 'form']; + $content = sanitize($content, $tagsremove); + $content = '' . $content; + + $this->items[] = [ + 'title' => $title, + 'author' => $author, + 'uri' => $a->href, + 'content' => $content, + 'timestamp' => strtotime($date), + ]; + } + } +} diff --git a/bridges/AirBreizhBridge.php b/bridges/AirBreizhBridge.php index a822625f..272c74ee 100644 --- a/bridges/AirBreizhBridge.php +++ b/bridges/AirBreizhBridge.php @@ -32,8 +32,7 @@ class AirBreizhBridge extends BridgeAbstract public function collectData() { $html = ''; - $html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme')) - or returnClientError('No results for this query.'); + $html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme')); foreach ($html->find('article') as $article) { $item = []; diff --git a/bridges/AllegroBridge.php b/bridges/AllegroBridge.php index 55457416..3509afa1 100644 --- a/bridges/AllegroBridge.php +++ b/bridges/AllegroBridge.php @@ -13,26 +13,42 @@ class AllegroBridge extends BridgeAbstract 'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660', 'required' => true, ], - 'sessioncookie' => [ - 'name' => 'The \'wdctx\' session cookie', - 'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits', - 'pattern' => '^.{250,};?$', - // phpcs:ignore - 'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd', - 'required' => false, + 'cookie' => [ + 'name' => 'The complete cookie value', + 'title' => 'Paste the cookie value from your browser, otherwise 403 gets returned', + 'required' => true, ], 'includeSponsoredOffers' => [ 'type' => 'checkbox', - 'name' => 'Include Sponsored Offers' + 'name' => 'Include Sponsored Offers', + 'defaultValue' => 'checked' + ], + 'includePromotedOffers' => [ + 'type' => 'checkbox', + 'name' => 'Include Promoted Offers', + 'defaultValue' => 'checked' ] ]]; public function getName() { - parse_str(parse_url($this->getInput('url'), PHP_URL_QUERY), $fields); + $url = $this->getInput('url'); + if (!$url) { + return parent::getName(); + } + $parsedUrl = parse_url($url, PHP_URL_QUERY); + if (!$parsedUrl) { + return parent::getName(); + } + parse_str($parsedUrl, $fields); - if ($query = array_key_exists('string', $fields) ? urldecode($fields['string']) : false) { - return $query; + if (array_key_exists('string', $fields)) { + $f = urldecode($fields['string']); + } else { + $f = false; + } + if ($f) { + return $f; } return parent::getName(); @@ -49,96 +65,56 @@ class AllegroBridge extends BridgeAbstract $url = preg_replace('/([?&])order=[^&]+(&|$)/', '$1', $this->getInput('url')); $url .= (parse_url($url, PHP_URL_QUERY) ? '&' : '?') . 'order=n'; - $opts = []; + $html = getContents($url, [], [CURLOPT_COOKIE => $this->getInput('cookie')]); - // If a session cookie is provided - if ($sessioncookie = $this->getInput('sessioncookie')) { - $opts[CURLOPT_COOKIE] = 'wdctx=' . $sessioncookie; + $storeData = null; + if (preg_match('/]*>\s*(\{\s*?"__listing_StoreState".*\})\s*<\/script>/i', $html, $match)) { + $data = json_decode($match[1], true); + $storeData = $data['__listing_StoreState'] ?? null; } - $html = getSimpleHTMLDOM($url, [], $opts); + foreach ($storeData['items']['elements'] as $elements) { + if (!array_key_exists('offerId', $elements)) { + continue; + } + if (!$this->getInput('includeSponsoredOffers') && $elements['isSponsored']) { + continue; + } + if (!$this->getInput('includePromotedOffers') && $elements['promoted']) { + continue; + } - # if no results found - if ($html->find('.mzmg_6m.m9qz_yo._6a66d_-fJr5')) { - return; - } - - $results = $html->find('._6a66d_V7Lel article'); - - if (!$this->getInput('includeSponsoredOffers')) { - $results = array_filter($results, function ($node) { - return $node->{'data-analytics-view-label'} != 'showSponsoredItems'; - }); - } - - foreach ($results as $post) { $item = []; + $item['uid'] = $elements['offerId']; + $item['uri'] = $elements['url']; + $item['title'] = $elements['alt']; - $item['uri'] = $post->find('._6a66d_LX75-', 0)->href; - -//TODO: port this over, whatever it does, from https://github.com/MK-PL/AllegroRSS -// if (arrayLinks.includes('events/clicks?')) { -// let sponsoredLink = new URL(arrayLinks).searchParams.get('redirect') -// arrayLinks = sponsoredLink.slice(0, sponsoredLink.indexOf('?')) -// } - - $item['title'] = $post->find('._6a66d_LX75-', 0)->innertext; - - $item['uid'] = $post->{'data-analytics-view-value'}; - - $descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/']; - $descriptionReplacements = ['', ': ', '', '  ']; - $description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext; - $descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description); - - $buyNowAuction = $post->find('.mqu1_g3.mvrt_0.mgn2_12', 0)->innertext ?? ''; - $buyNowAuction = str_replace(' find('._6a66d_ImOzU', 0)->innertext ?? ''; - - $price = $post->find('._6a66d_6R3iN', 0)->plaintext; - $price = empty($auctionTimeLeft) ? $price : $price . '- kwota licytacji'; - - $image = $post->find('._6a66d_44ioA img', 0)->{'data-src'} ?: $post->find('._6a66d_44ioA img', 0)->src ?? false; + $image = $elements['photos'][0]['medium']; if ($image) { $item['enclosures'] = [$image . '#.image']; } - $offerExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) { - return empty($node->find('.mvrt_0')); - }); + $price = $elements['price']['mainPrice']['amount']; + $currency = $elements['price']['mainPrice']['currency']; + $sellerType = $elements['seller']['title']; - $offerExtraInfo = $offerExtraInfo[0]->plaintext ?? ''; + $item['categories'] = [$sellerType]; - $isSmart = $post->find('._6a66d_TC2Zk', 0)->innertext ?? ''; - if (str_contains($isSmart, 'z kurierem')) { - $offerExtraInfo .= ', Smart z kurierem'; - } else { - $offerExtraInfo .= ', Smart'; + $description = ''; + foreach ($elements['parameters'] as $parameter) { + $item['categories'] = array_merge($item['categories'], $parameter['values']); + $description .= '
' . $parameter['name'] . ': ' . implode(',', $parameter['values']) . '
'; } - $item['categories'] = []; - $parameters = $post->find('dd'); - foreach ($parameters as $parameter) { - if (in_array(strtolower($parameter->innertext), ['brak', 'nie'])) { - continue; - } - - $item['categories'][] = $parameter->innertext; - } - - $item['content'] = $descriptionPretty - . '
' - . $price - . '
' - . $auctionTimeLeft - . '
' - . $buyNowAuction - . '
' - . $offerExtraInfo + $item['content'] = '
' + . $price . ' ' . $currency + . '
' + . $sellerType . '
' + . $description . '

'; $this->items[] = $item; } } } + diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index e7b2adb2..f5a5b33a 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -57,7 +57,7 @@ class AllocineFRBridge extends BridgeAbstract if (array_key_exists($category, $categories)) { return static::URI . $this->getLastSeasonURI($categories[$category]); } else { - returnClientError('Emission inconnue'); + throwClientException('Emission inconnue'); } } diff --git a/bridges/AllocineFRSortiesBridge.php b/bridges/AllocineFRSortiesBridge.php index b77c2f9b..a75187be 100644 --- a/bridges/AllocineFRSortiesBridge.php +++ b/bridges/AllocineFRSortiesBridge.php @@ -24,6 +24,7 @@ class AllocineFRSortiesBridge extends BridgeAbstract $thumb = $element->find('figure.thumbnail', 0); $meta = $element->find('div.meta-body', 0); $synopsis = $element->find('div.synopsis', 0); + $date = $element->find('span.date', 0); $title = $element->find('a[class*=meta-title-link]', 0); $content = trim(defaultLinkTo($thumb->outertext . $meta->outertext . $synopsis->outertext, static::URI)); @@ -34,8 +35,32 @@ class AllocineFRSortiesBridge extends BridgeAbstract $item['content'] = $content; $item['title'] = trim($title->innertext); + $item['timestamp'] = $this->frenchPubDateToTimestamp($date->plaintext); $item['uri'] = static::BASE_URI . '/' . substr($title->href, 1); $this->items[] = $item; } } + + private function frenchPubDateToTimestamp($date) + { + return strtotime( + strtr( + strtolower($date), + [ + 'janvier' => 'jan', + 'février' => 'feb', + 'mars' => 'march', + 'avril' => 'apr', + 'mai' => 'may', + 'juin' => 'jun', + 'juillet' => 'jul', + 'août' => 'aug', + 'septembre' => 'sep', + 'octobre' => 'oct', + 'novembre' => 'nov', + 'décembre' => 'dec' + ] + ) + ); + } } diff --git a/bridges/AmazonPriceTrackerBridge.php b/bridges/AmazonPriceTrackerBridge.php index 6de451f1..45eab097 100644 --- a/bridges/AmazonPriceTrackerBridge.php +++ b/bridges/AmazonPriceTrackerBridge.php @@ -2,7 +2,7 @@ class AmazonPriceTrackerBridge extends BridgeAbstract { - const MAINTAINER = 'captn3m0, sal0max'; + const MAINTAINER = 'captn3m0, sal0max, bagnacauda'; const NAME = 'Amazon Price Tracker'; const URI = 'https://www.amazon.com/'; const CACHE_TIMEOUT = 3600; // 1h @@ -13,7 +13,7 @@ class AmazonPriceTrackerBridge extends BridgeAbstract 'asin' => [ 'name' => 'ASIN', 'required' => true, - 'exampleValue' => 'B071GB1VMQ', + 'exampleValue' => 'B0923XT6K7', // https://stackoverflow.com/a/12827734 'pattern' => 'B[\dA-Z]{9}|\d{9}(X|\d)', ], @@ -125,14 +125,13 @@ class AmazonPriceTrackerBridge extends BridgeAbstract */ private function getImage($html) { + $image = 'https://placekitten.com/200/300'; $imageSrc = $html->find('#main-image-container img', 0); - if ($imageSrc) { $hiresImage = $imageSrc->getAttribute('data-old-hires'); $dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image'); $image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute); } - $image = $image ?: 'https://placekitten.com/200/300'; return << @@ -147,7 +146,7 @@ EOT; { $uri = $this->getURI(); - return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.'); + return getSimpleHTMLDOM($uri); } private function scrapePriceFromMetrics($html) @@ -170,19 +169,23 @@ EOT; private function scrapePriceTwister($html) { - $str = $html->find('.twister-plus-buying-options-price-data', 0); + $json = $html->find('.twister-plus-buying-options-price-data', 0); + if ($json == null) { + return null; + } - $data = json_decode($str->innertext, true); - if (count($data) === 1) { - $data = $data[0]; + $data = json_decode($json->innertext, true); + foreach ($data as $key => $value) { + $value = $value[0]; return [ - 'displayPrice' => $data['displayPrice'], - 'currency' => $data['currency'], - 'shipping' => '0', + 'displayPrice' => $value['displayPrice'], + 'price' => $value['priceAmount'], + 'currency' => $value['currencySymbol'], + 'shipping' => null, ]; } - return false; + return null; } private function scrapePriceGeneric($html) @@ -207,9 +210,21 @@ EOT; } $priceString = str_replace(str_split(self::WHITESPACE), '', $priceDiv->plaintext); - preg_match('/(\d+\.\d{0,2})/', $priceString, $matches); + $price = null; + $priceFound = false; + + // find longest repeated string + for ($offset = 0; $offset < strlen($priceString); $offset++) { + for ($length = 1; substr_count($priceString, substr($priceString, $offset, $length + 1)) >= 2; $length++) { + $priceFound = true; + } + + if ($priceFound) { + $price = substr($priceString, $offset, $length); + break; + } + } - $price = $matches[0] ?? null; $currency = str_replace($price, '', $priceString); if ($price != null && $currency != null) { @@ -217,7 +232,7 @@ EOT; 'price' => $price, 'displayPrice' => null, 'currency' => $currency, - 'shipping' => '0' + 'shipping' => null ]; } return $default; @@ -228,7 +243,7 @@ EOT; $html = $this->getHtml(); $this->title = $this->getTitle($html); $image = $this->getImage($html); - $data = $this->scrapePriceGeneric($html); + $data = $this->scrapePriceTwister($html) ?? $this->scrapePriceGeneric($html); // render $content = ''; @@ -237,7 +252,7 @@ EOT; $price = sprintf('%s %s', $data['price'], $data['currency']); } $content .= sprintf('%s
Price: %s', $image, $price); - if ($data['shipping'] !== '0') { + if ($data['shipping'] !== null) { $content .= sprintf('
Shipping: %s %s
', $data['shipping'], $data['currency']); } diff --git a/bridges/AnfrBridge.php b/bridges/AnfrBridge.php new file mode 100644 index 00000000..391fde77 --- /dev/null +++ b/bridges/AnfrBridge.php @@ -0,0 +1,278 @@ + [ + 'departement' => [ + 'name' => 'Département', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Ain' => '001', + 'Aisne' => '002', + 'Allier' => '003', + 'Alpes-de-Haute-Provence' => '004', + 'Hautes-Alpes' => '005', + 'Alpes-Maritimes' => '006', + 'Ardèche' => '007', + 'Ardennes' => '008', + 'Ariège' => '009', + 'Aube' => '010', + 'Aude' => '011', + 'Aveyron' => '012', + 'Bouches-du-Rhône' => '013', + 'Calvados' => '014', + 'Cantal' => '015', + 'Charente' => '016', + 'Charente-Maritime' => '017', + 'Cher' => '018', + 'Corrèze' => '019', + 'Corse-du-Sud' => '02A', + 'Haute-Corse' => '02B', + 'Côte-d\'Or' => '021', + 'Côtes-d\'Armor' => '022', + 'Creuse' => '023', + 'Dordogne' => '024', + 'Doubs' => '025', + 'Drôme' => '026', + 'Eure' => '027', + 'Eure-et-Loir' => '028', + 'Finistère' => '029', + 'Gard' => '030', + 'Haute-Garonne' => '031', + 'Gers' => '032', + 'Gironde' => '033', + 'Hérault' => '034', + 'Ille-et-Vilaine' => '035', + 'Indre' => '036', + 'Indre-et-Loire' => '037', + 'Isère' => '038', + 'Jura' => '039', + 'Landes' => '040', + 'Loir-et-Cher' => '041', + 'Loire' => '042', + 'Haute-Loire' => '043', + 'Loire-Atlantique' => '044', + 'Loiret' => '045', + 'Lot' => '046', + 'Lot-et-Garonne' => '047', + 'Lozère' => '048', + 'Maine-et-Loire' => '049', + 'Manche' => '050', + 'Marne' => '051', + 'Haute-Marne' => '052', + 'Mayenne' => '053', + 'Meurthe-et-Moselle' => '054', + 'Meuse' => '055', + 'Morbihan' => '056', + 'Moselle' => '057', + 'Nièvre' => '058', + 'Nord' => '059', + 'Oise' => '060', + 'Orne' => '061', + 'Pas-de-Calais' => '062', + 'Puy-de-Dôme' => '063', + 'Pyrénées-Atlantiques' => '064', + 'Hautes-Pyrénées' => '065', + 'Pyrénées-Orientales' => '066', + 'Bas-Rhin' => '067', + 'Haut-Rhin' => '068', + 'Rhône' => '069', + 'Haute-Saône' => '070', + 'Saône-et-Loire' => '071', + 'Sarthe' => '072', + 'Savoie' => '073', + 'Haute-Savoie' => '074', + 'Paris' => '075', + 'Seine-Maritime' => '076', + 'Seine-et-Marne' => '077', + 'Yvelines' => '078', + 'Deux-Sèvres' => '079', + 'Somme' => '080', + 'Tarn' => '081', + 'Tarn-et-Garonne' => '082', + 'Var' => '083', + 'Vaucluse' => '084', + 'Vendée' => '085', + 'Vienne' => '086', + 'Haute-Vienne' => '087', + 'Vosges' => '088', + 'Yonne' => '089', + 'Territoire de Belfort' => '090', + 'Essonne' => '091', + 'Hauts-de-Seine' => '092', + 'Seine-Saint-Denis' => '093', + 'Val-de-Marne' => '094', + 'Val-d\'Oise' => '095', + 'Guadeloupe' => '971', + 'Martinique' => '972', + 'Guyane' => '973', + 'La Réunion' => '974', + 'Saint-Pierre-et-Miquelon' => '975', + 'Mayotte' => '976', + 'Saint-Barthélemy' => '977', + 'Saint-Martin' => '978', + 'Terres australes et antarctiques françaises' => '984', + 'Wallis-et-Futuna' => '986', + 'Polynésie française' => '987', + 'Nouvelle-Calédonie' => '988', + 'Île de Clipperton' => '989' + ] + ], + 'generation' => [ + 'name' => 'Génération', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + '2G' => '2G', + '3G' => '3G', + '4G' => '4G', + '5G' => '5G', + ] + ], + 'operateur' => [ + 'name' => 'Opérateur', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Bouygues Télécom' => 'BOUYGUES TELECOM', + 'Dauphin Télécom' => 'DAUPHIN TELECOM', + 'Digiciel' => 'DIGICEL', + 'Free Caraïbes' => 'FREE CARAIBES', + 'Free Mobile' => 'FREE MOBILE', + 'GLOBALTEL' => 'GLOBALTEL', + 'Office des postes et télécommunications de Nouvelle Calédonie' => 'Gouv Nelle Calédonie (OPT)', + 'Maore Mobile' => 'MAORE MOBILE', + 'ONATi' => 'ONATI', + 'Orange' => 'ORANGE', + 'Outremer Telecom' => 'OUTREMER TELECOM', + 'Vodafone polynésie' => 'PMT/VODAPHONE', + 'SFR' => 'SFR', + 'SPM Télécom' => 'SPM TELECOM', + 'Service des Postes et Télécommunications de Polynésie Française' => 'Gouv Nelle Calédonie (OPT)', + 'SRR' => 'SRR', + 'Station étrangère' => 'Station étrangère', + 'Telco OI' => 'TELCO IO', + 'United Telecommunication Services Caraïbes' => 'UTS Caraibes', + 'Ora Mobile' => 'VITI SAS', + 'Zeop' => 'ZEOP' + ] + ], + 'statut' => [ + 'name' => 'Statut', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'En service' => 'En service', + 'Projet approuvé' => 'Projet approuvé', + 'Techniquement opérationnel' => 'Techniquement opérationnel', + ] + ] + ] + ]; + + public function collectData() + { + $urlParts = [ + 'id' => 'observatoire_2g_3g_4g', + 'resource_id' => '88ef0887-6b0f-4d3f-8545-6d64c8f597da', + 'fields' => 'id,adm_lb_nom,sta_nm_dpt,emr_lb_systeme,generation,date_maj,sta_nm_anfr,adr_lb_lieu,adr_lb_add1,adr_lb_add2,adr_lb_add3,adr_nm_cp,statut', + 'rows' => 10000 + ]; + + if (!empty($this->getInput('departement'))) { + $urlParts['refine.sta_nm_dpt'] = urlencode($this->getInput('departement')); + } + + if (!empty($this->getInput('generation'))) { + $urlParts['refine.generation'] = $this->getInput('generation'); + } + + if (!empty($this->getInput('operateur'))) { + // http_build_query() already does urlencoding so this call is redundant + $urlParts['refine.adm_lb_nom'] = urlencode($this->getInput('operateur')); + } + + if (!empty($this->getInput('statut'))) { + $urlParts['refine.statut'] = urlencode($this->getInput('statut')); + } + + // API seems to not play well with urlencoded data + $url = urljoin(static::URI, '/d4c/api/records/1.0/download/?' . urldecode(http_build_query($urlParts))); + + $json = getContents($url); + $data = Json::decode($json, false); + $records = $data->records; + $frequenciesByStation = []; + foreach ($records as $record) { + if (!isset($frequenciesByStation[$record->fields->sta_nm_anfr])) { + $street = sprintf( + '%s %s %s', + $record->fields->adr_lb_add1 ?? '', + $record->fields->adr_lb_add2 ?? '', + $record->fields->adr_lb_add3 ?? '' + ); + $frequenciesByStation[$record->fields->sta_nm_anfr] = [ + 'id' => $record->fields->sta_nm_anfr, + 'operator' => $record->fields->adm_lb_nom, + 'frequencies' => [], + 'lastUpdate' => 0, + 'address' => [ + 'street' => trim($street), + 'postCode' => $record->fields->adr_nm_cp, + 'city' => $record->fields->adr_lb_lieu + ] + ]; + } + + $frequenciesByStation[$record->fields->sta_nm_anfr]['frequencies'][] = [ + 'generation' => $record->fields->generation, + 'frequency' => $record->fields->emr_lb_systeme, + 'status' => $record->fields->statut, + 'updatedAt' => strtotime($record->fields->date_maj), + ]; + + $frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'] = max( + $frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'], + strtotime($record->fields->date_maj) + ); + } + + usort($frequenciesByStation, static fn ($a, $b) => $b['lastUpdate'] <=> $a['lastUpdate']); + + foreach ($frequenciesByStation as $station) { + $title = sprintf( + '[%s] Mise à jour de la station n°%s à %s (%s)', + $station['operator'], + $station['id'], + $station['address']['city'], + $station['address']['postCode'] + ); + + $array_reduce = array_reduce($station['frequencies'], static function ($carry, $frequency) { + return sprintf('%s
  • %s : %s
  • ', $carry, $frequency['frequency'], $frequency['status']); + }, ''); + + $content = sprintf( + '

    Adresse complète

    %s
    %s
    %s

    Fréquences

      %s

    ', + $station['address']['street'], + $station['address']['postCode'], + $station['address']['city'], + $array_reduce + ); + + $this->items[] = [ + 'uid' => $station['id'], + 'timestamp' => $station['lastUpdate'], + 'title' => $title, + 'content' => $content, + ]; + } + } +} \ No newline at end of file diff --git a/bridges/AnidexBridge.php b/bridges/AnidexBridge.php index 6d41365b..391ee4f8 100644 --- a/bridges/AnidexBridge.php +++ b/bridges/AnidexBridge.php @@ -152,7 +152,7 @@ class AnidexBridge extends BridgeAbstract } } if (empty($results) && empty($this->getInput('q'))) { - returnServerError('No results from Anidex: ' . $search_url); + throwServerException('No results from Anidex: ' . $search_url); } //Process each item individually diff --git a/bridges/AnisearchBridge.php b/bridges/AnisearchBridge.php new file mode 100644 index 00000000..c6f3d291 --- /dev/null +++ b/bridges/AnisearchBridge.php @@ -0,0 +1,87 @@ + [ + 'name' => 'Dub', + 'type' => 'list', + 'values' => [ + 'DE' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=de&sort=date&order=desc&view=4', + 'EN' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=en&sort=date&order=desc&view=4', + 'JP' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=ja&sort=date&order=desc&view=4' + ] + ], + 'trailers' => [ + 'name' => 'Trailers', + 'type' => 'checkbox', + 'title' => 'Will include trailes', + 'defaultValue' => false + ] + ]]; + + public function collectData() + { + $baseurl = 'https://www.anisearch.de/'; + $trailers = false; + $trailers = $this->getInput('trailers'); + $limit = 10; + if ($trailers) { + $limit = 5; + } + + $dom = getSimpleHTMLDOM($this->getInput('category')); + + foreach ($dom->find('li.btype0') as $key => $li) { + if ($key >= $limit) { + break; + } + + $a = $li->find('a', 0); + $title = $a->find('span.title', 0); + $url = $baseurl . $a->href; + + //get article + $domarticle = getSimpleHTMLDOM($url); + $content = $domarticle->find('div.details-text', 0); + + //get header-image and set absolute src + $headerimage = $domarticle->find('img#details-cover', 0); + $src = $headerimage->src; + + foreach ($content->find('.hidden') as $element) { + $element->remove(); + } + + //get trailer + $ytlink = ''; + if ($trailers) { + $trailerlink = $domarticle->find('section#trailers > div > div.swiper > ul.swiper-wrapper > li.swiper-slide > a', 0); + if (isset($trailerlink)) { + $trailersite = getSimpleHTMLDOM($baseurl . $trailerlink->href); + $trailer = $trailersite->find('div#video > iframe', 0); + $trailer = $trailer->{'data-xsrc'}; + $ytlink = << + EOT; + } + } + + $this->items[] = [ + 'title' => $title->plaintext, + 'uri' => $url, + 'content' => $headerimage . '
    ' . $content . $ytlink + ]; + } + } +} diff --git a/bridges/AnnasArchiveBridge.php b/bridges/AnnasArchiveBridge.php new file mode 100644 index 00000000..4a1c3e91 --- /dev/null +++ b/bridges/AnnasArchiveBridge.php @@ -0,0 +1,183 @@ + [ + 'name' => 'Query', + 'exampleValue' => 'apothecary diaries', + 'required' => true, + ], + 'ext' => [ + 'name' => 'Extension', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'azw3' => 'azw3', + 'cbr' => 'cbr', + 'cbz' => 'cbz', + 'djvu' => 'djvu', + 'epub' => 'epub', + 'fb2' => 'fb2', + 'fb2.zip' => 'fb2.zip', + 'mobi' => 'mobi', + 'pdf' => 'pdf', + ] + ], + 'lang' => [ + 'name' => 'Language', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Afrikaans [af]' => 'af', + 'Arabic [ar]' => 'ar', + 'Bangla [bn]' => 'bn', + 'Belarusian [be]' => 'be', + 'Bulgarian [bg]' => 'bg', + 'Catalan [ca]' => 'ca', + 'Chinese [zh]' => 'zh', + 'Church Slavic [cu]' => 'cu', + 'Croatian [hr]' => 'hr', + 'Czech [cs]' => 'cs', + 'Danish [da]' => 'da', + 'Dongxiang [sce]' => 'sce', + 'Dutch [nl]' => 'nl', + 'English [en]' => 'en', + 'French [fr]' => 'fr', + 'German [de]' => 'de', + 'Greek [el]' => 'el', + 'Hebrew [he]' => 'he', + 'Hindi [hi]' => 'hi', + 'Hungarian [hu]' => 'hu', + 'Indonesian [id]' => 'id', + 'Irish [ga]' => 'ga', + 'Italian [it]' => 'it', + 'Japanese [ja]' => 'ja', + 'Kazakh [kk]' => 'kk', + 'Korean [ko]' => 'ko', + 'Latin [la]' => 'la', + 'Latvian [lv]' => 'lv', + 'Lithuanian [lt]' => 'lt', + 'Luxembourgish [lb]' => 'lb', + 'Ndolo [ndl]' => 'ndl', + 'Norwegian [no]' => 'no', + 'Persian [fa]' => 'fa', + 'Polish [pl]' => 'pl', + 'Portuguese [pt]' => 'pt', + 'Romanian [ro]' => 'ro', + 'Russian [ru]' => 'ru', + 'Serbian [sr]' => 'sr', + 'Spanish [es]' => 'es', + 'Swedish [sv]' => 'sv', + 'Tamil [ta]' => 'ta', + 'Traditional Chinese [zh‑Hant]' => 'zh‑Hant', + 'Turkish [tr]' => 'tr', + 'Ukrainian [uk]' => 'uk', + 'Unknown language' => '_empty', + 'Unknown language [und]' => 'und', + 'Unknown language [urdu]' => 'urdu', + 'Urdu [ur]' => 'ur', + 'Vietnamese [vi]' => 'vi', + 'Welsh [cy]' => 'cy', + ] + ], + 'content' => [ + 'name' => 'Type', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Book (fiction)' => 'book_fiction', + 'Book (non‑fiction)' => 'book_nonfiction', + 'Book (unknown)' => 'book_unknown', + 'Comic book' => 'book_comic', + 'Journal article' => 'journal_article', + 'Magazine' => 'magazine', + 'Standards document' => 'standards_document', + ] + ], + 'src' => [ + 'name' => 'Source', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Internet Archive' => 'ia', + 'Libgen.li' => 'lgli', + 'Libgen.rs' => 'lgrs', + 'Sci‑Hub' => 'scihub', + 'Z‑Library' => 'zlib', + ] + ], + ] + ]; + + public function collectData() + { + $url = $this->getURI(); + $list = getSimpleHTMLDOMCached($url); + $list = defaultLinkTo($list, self::URI); + + // Don't attempt to do anything if not found message is given + if ($list->find('.js-not-found-additional')) { + return; + } + + $elements = $list->find('#aarecord-list > div'); + foreach ($elements as $element) { + // stop added entries once partial match list starts + if (str_contains($element->innertext, 'partial match')) { + break; + } + if ($element = $element->find('a', 0)) { + $item = []; + $item['title'] = $element->find('h3', 0)->plaintext; + $item['author'] = $element->find('div.italic', 0)->plaintext; + $item['uri'] = $element->href; + $item['content'] = $element->plaintext; + $item['uid'] = $item['uri']; + + $item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20); + if ($item_html) { + $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] .= $item_html->find('main img', 0); + $item['content'] .= $item_html->find('main .mt-4', 0); // Summary + foreach ($item_html->find('main ul.mb-4 > li > a.js-download-link') as $file) { + if (!str_contains($file->href, 'fast_download')) { + $item['enclosures'][] = $file->href; + } + } + // Remove bulk torrents from enclosures list + $item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']); + } + + $this->items[] = $item; + } + } + } + + public function getName() + { + $name = parent::getName(); + if ($this->getInput('q') != null) { + $name .= ' - ' . $this->getInput('q'); + } + return $name; + } + + public function getURI() + { + $params = array_filter([ // Filter to remove non-provided parameters + 'q' => $this->getInput('q'), + 'ext' => $this->getInput('ext'), + 'lang' => $this->getInput('lang'), + 'src' => $this->getInput('src'), + 'content' => $this->getInput('content'), + ]); + $url = parent::getURI() . 'search?sort=newest&' . http_build_query($params); + return $url; + } +} diff --git a/bridges/AppleAppStoreBridge.php b/bridges/AppleAppStoreBridge.php index a5fab59b..d6872a11 100644 --- a/bridges/AppleAppStoreBridge.php +++ b/bridges/AppleAppStoreBridge.php @@ -52,120 +52,183 @@ class AppleAppStoreBridge extends BridgeAbstract ], 'defaultValue' => 'US', ], + 'debug' => [ + 'name' => 'Debug Mode', + 'type' => 'checkbox', + 'defaultValue' => false + ] ]]; const PLATFORM_MAPPING = [ - 'iphone' => 'ios', - 'ipad' => 'ios', + 'iphone' => 'ios', + 'ipad' => 'ios', + 'mac' => 'osx' ]; - private function makeHtmlUrl($id, $country) + private $name; + + private function makeHtmlUrl() { - return 'https://apps.apple.com/' . $country . '/app/id' . $id; + $id = $this->getInput('id'); + $country = $this->getInput('country'); + return sprintf('https://apps.apple.com/%s/app/id%s', $country, $id); } - private function makeJsonUrl($id, $platform, $country) - { - return "https://amp-api.apps.apple.com/v1/catalog/$country/apps/$id?platform=$platform&extend=versionHistory"; - } - - public function getName() - { - if (isset($this->name)) { - return $this->name . ' - AppStore Updates'; - } - - return parent::getName(); - } - - /** - * In case of some platforms, the data is present in the initial response - */ - private function getDataFromShoebox($id, $platform, $country) - { - $uri = $this->makeHtmlUrl($id, $country); - $html = getSimpleHTMLDOMCached($uri, 3600); - $script = $html->find('script[id="shoebox-ember-data-store"]', 0); - - $json = json_decode($script->innertext, true); - return $json['data']; - } - - private function getJWTToken($id, $platform, $country) - { - $uri = $this->makeHtmlUrl($id, $country); - - $html = getSimpleHTMLDOMCached($uri, 3600); - - $meta = $html->find('meta[name="web-experience-app/config/environment"]', 0); - - $json = urldecode($meta->content); - - $json = json_decode($json); - - return $json->MEDIA_API->token; - } - - private function getAppData($id, $platform, $country, $token) - { - $uri = $this->makeJsonUrl($id, $platform, $country); - - $headers = [ - "Authorization: Bearer $token", - 'Origin: https://apps.apple.com', - ]; - - $json = json_decode(getContents($uri, $headers), true); - - return $json['data'][0]; - } - - /** - * Parses the version history from the data received - * @return array list of versions with details on each element - */ - private function getVersionHistory($data, $platform) - { - switch ($platform) { - case 'mac': - return $data['relationships']['platforms']['data'][0]['attributes']['versionHistory']; - default: - $os = self::PLATFORM_MAPPING[$platform]; - return $data['attributes']['platformAttributes'][$os]['versionHistory']; - } - } - - public function collectData() + private function makeJsonUrl() { $id = $this->getInput('id'); $country = $this->getInput('country'); $platform = $this->getInput('p'); - switch ($platform) { - case 'mac': - $data = $this->getDataFromShoebox($id, $platform, $country); - break; + $platform_param = ($platform === 'mac') ? 'mac' : $platform; - default: - $token = $this->getJWTToken($id, $platform, $country); - $data = $this->getAppData($id, $platform, $country, $token); + return sprintf( + 'https://amp-api-edge.apps.apple.com/v1/catalog/%s/apps/%s?platform=%s&extend=versionHistory', + $country, + $id, + $platform_param + ); + } + + public function getName() + { + if (isset($this->name)) { + return sprintf('%s - AppStore Updates', $this->name); } - $versionHistory = $this->getVersionHistory($data, $platform); - $name = $this->name = $data['attributes']['name']; - $author = $data['attributes']['artistName']; + return parent::getName(); + } + + private function debugLog($message) + { + if ($this->getInput('debug')) { + $this->logger->info(sprintf('[AppleAppStoreBridge] %s', $message)); + } + } + + private function getHtml() + { + $url = $this->makeHtmlUrl(); + $this->debugLog(sprintf('Fetching HTML from: %s', $url)); + + return getSimpleHTMLDOM($url); + } + + private function getJWTToken() + { + $html = $this->getHtml(); + $meta = $html->find('meta[name="web-experience-app/config/environment"]', 0); + + if (!$meta || !isset($meta->content)) { + throw new \Exception('JWT token not found in page content'); + } + + $decoded_content = urldecode($meta->content); + $this->debugLog('Found meta tag content'); + + try { + $decoded_json = Json::decode($decoded_content); + } catch (\Exception $e) { + throw new \Exception(sprintf('Failed to parse JSON from meta tag: %s', $e->getMessage())); + } + + if (!isset($decoded_json['MEDIA_API']['token'])) { + throw new \Exception('Token field not found in JSON structure'); + } + + $token = $decoded_json['MEDIA_API']['token']; + $this->debugLog('Successfully extracted JWT token'); + return $token; + } + + private function getAppData() + { + $token = $this->getJWTToken(); + + $url = $this->makeJsonUrl(); + $this->debugLog(sprintf('Fetching data from API: %s', $url)); + + $headers = [ + 'Authorization: Bearer ' . $token, + 'Origin: https://apps.apple.com', + 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + ]; + + $content = getContents($url, $headers); + + try { + $json = Json::decode($content); + } catch (\Exception $e) { + throw new \Exception(sprintf('Failed to parse API response: %s', $e->getMessage())); + } + + if (!isset($json['data']) || empty($json['data'])) { + throw new \Exception('No app data found in API response'); + } + + $this->debugLog('Successfully retrieved app data from API'); + return $json['data'][0]; + } + + private function extractAppDetails($data) + { + if (isset($data['attributes'])) { + $this->name = $data['attributes']['name'] ?? null; + $author = $data['attributes']['artistName'] ?? null; + $this->debugLog(sprintf('Found app details in attributes: %s by %s', $this->name, $author)); + return [$this->name, $author]; + } + + // Fallback to default values if not found + $this->name = sprintf('App %s', $this->getInput('id')); + $this->debugLog(sprintf('App details not found, using default: %s', $this->name)); + return [$this->name, 'Unknown Developer']; + } + + private function getVersionHistory($data) + { + $platform = $this->getInput('p'); + $this->debugLog(sprintf('Extracting version history for platform: %s', $platform)); + + // Get the mapped platform key (ios for iPhone/iPad, osx for Mac) + $platform_key = self::PLATFORM_MAPPING[$platform] ?? $platform; + + $version_history = $data['attributes']['platformAttributes'][$platform_key]['versionHistory'] ?? []; + + if (empty($version_history)) { + $this->debugLog(sprintf('No version history found for %s', $platform)); + } + + return $version_history; + } + + public function collectData() + { + $this->debugLog(sprintf('Getting data for %s app', $this->getInput('p'))); + $data = $this->getAppData(); + + // Get app name and author using array destructuring + [$name, $author] = $this->extractAppDetails($data); + + // Get version history + $version_history = $this->getVersionHistory($data); + $this->debugLog(sprintf('Found %d versions for %s', count($version_history), $name)); + + foreach ($version_history as $entry) { + $version = $entry['versionDisplay'] ?? 'Unknown Version'; + $release_notes = $entry['releaseNotes'] ?? 'No release notes available'; + $release_date = $entry['releaseDate'] ?? 'Unknown Date'; - foreach ($versionHistory as $row) { $item = []; - - $item['content'] = nl2br($row['releaseNotes']); - $item['title'] = $name . ' - ' . $row['versionDisplay']; - $item['timestamp'] = $row['releaseDate']; + $item['title'] = sprintf('%s - %s', $name, $version); + $item['content'] = nl2br($release_notes) ?: 'No release notes available'; + $item['timestamp'] = $release_date; $item['author'] = $author; - - $item['uri'] = $this->makeHtmlUrl($id, $country); + $item['uri'] = $this->makeHtmlUrl(); $this->items[] = $item; } + + $this->debugLog(sprintf('Successfully collected %d items', count($this->items))); } -} +} \ No newline at end of file diff --git a/bridges/AppleMusicBridge.php b/bridges/AppleMusicBridge.php index 4c3e0e2f..81558b6d 100644 --- a/bridges/AppleMusicBridge.php +++ b/bridges/AppleMusicBridge.php @@ -18,9 +18,45 @@ class AppleMusicBridge extends BridgeAbstract 'required' => true, ], ]]; - const CACHE_TIMEOUT = 21600; // 6 hours + const CACHE_TIMEOUT = 60 * 60 * 6; // 6 hours + + private $title; public function collectData() + { + $items = $this->getJson(); + $artist = $this->getArtist($items); + + $this->title = $artist->artistName; + + foreach ($items as $item) { + if ($item->wrapperType === 'collection') { + $copyright = $item->copyright ?? ''; + $artworkUrl500 = str_replace('/100x100', '/500x500', $item->artworkUrl100); + $artworkUrl2000 = str_replace('/100x100', '/2000x2000', $item->artworkUrl100); + $escapedCollectionName = htmlspecialchars($item->collectionName); + + $this->items[] = [ + 'title' => $item->collectionName, + 'uri' => $item->collectionViewUrl, + 'timestamp' => $item->releaseDate, + 'enclosures' => $artworkUrl500, + 'author' => $item->artistName, + 'content' => "
    + artworkUrl60 60w, $item->artworkUrl100 100w, $artworkUrl500 500w, $artworkUrl2000 2000w\" + sizes=\"100%\" src=\"$artworkUrl2000\" + alt=\"Cover of $escapedCollectionName\" + style=\"display: block; margin: 0 auto;\" /> +
    + from artistLinkUrl\">$item->artistName
    $copyright +
    +
    ", + ]; + } + } + } + + private function getJson() { # Limit the amount of releases to 50 if ($this->getInput('limit') > 50) { @@ -29,29 +65,53 @@ class AppleMusicBridge extends BridgeAbstract $limit = $this->getInput('limit'); } - $url = 'https://itunes.apple.com/lookup?id=' - . $this->getInput('artist') - . '&entity=album&limit=' - . $limit . - '&sort=recent'; + $url = 'https://itunes.apple.com/lookup?id=' . $this->getInput('artist') . '&entity=album&limit=' . $limit . '&sort=recent'; $html = getSimpleHTMLDOM($url); - $json = json_decode($html); + $result = $json->results; - foreach ($json->results as $obj) { - if ($obj->wrapperType === 'collection') { - $this->items[] = [ - 'title' => $obj->artistName . ' - ' . $obj->collectionName, - 'uri' => $obj->collectionViewUrl, - 'timestamp' => $obj->releaseDate, - 'enclosures' => $obj->artworkUrl100, - 'content' => '

    ' - . $obj->artistName . ' - ' . $obj->collectionName - . '
    ' - . $obj->copyright, - ]; - } + if (!is_array($result) || count($result) == 0) { + throwServerException('There is no artist with id "' . $this->getInput('artist') . '".'); } + + return $result; + } + + private function getArtist($json) + { + $nameArray = array_filter($json, function ($obj) { + return $obj->wrapperType == 'artist'; + }); + + if (count($nameArray) === 1) { + return $nameArray[0]; + } + + return parent::getName(); + } + + public function getName() + { + if (isset($this->title)) { + return $this->title; + } + + return parent::getName(); + } + + public function getIcon() + { + if (empty($this->getInput('artist'))) { + return parent::getIcon(); + } + + // it isn't necessary to set the correct artist name into the url + $url = 'https://music.apple.com/us/artist/jon-bellion/' . $this->getInput('artist'); + $html = getSimpleHTMLDOMCached($url); + $image = $html->find('meta[property="og:image"]', 0)->content; + + $imageUpdatedSize = preg_replace('/\/\d*x\d*cw/i', '/144x144-999', $image); + + return $imageUpdatedSize; } } diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php new file mode 100644 index 00000000..ac722dc9 --- /dev/null +++ b/bridges/ArsTechnicaBridge.php @@ -0,0 +1,118 @@ + [ + 'name' => 'Site section', + 'type' => 'list', + 'defaultValue' => 'index', + 'values' => [ + 'All' => 'index', + 'Apple' => 'apple', + 'Board Games' => 'cardboard', + 'Cars' => 'cars', + 'Features' => 'features', + 'Gaming' => 'gaming', + 'Information Technology' => 'technology-lab', + 'Science' => 'science', + 'Staff Blogs' => 'staff-blogs', + 'Tech Policy' => 'tech-policy', + 'Tech' => 'gadgets', + ] + ] + ]]; + + public function collectData() + { + $url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section'); + $this->collectExpandableDatas($url, 10); + } + + protected function parseItem(array $item) + { + $item_html = getSimpleHTMLDOMCached($item['uri']); + $item_html = defaultLinkTo($item_html, self::URI); + + $content = ''; + $header = $item_html->find('article header', 0); + $leading = $header->find('p[class*=leading]', 0); + if ($leading != null) { + $content .= '

    ' . $leading->innertext . '

    '; + } + $intro_image = $header->find('img.intro-image', 0); + if ($intro_image != null) { + $content .= '
    ' . $intro_image; + + $image_caption = $header->find('.caption .caption-content', 0); + if ($image_caption != null) { + $content .= '
    ' . $image_caption->innertext . '
    '; + } + $content .= '
    '; + } + + foreach ($item_html->find('.post-content') as $content_tag) { + $content .= $content_tag->innertext; + } + + $item['content'] = str_get_html($content); + + $parsely = $item_html->find('[name="parsely-page"]', 0); + $parsely_json = json_decode(html_entity_decode($parsely->content), true); + $item['categories'] = $parsely_json['tags']; + + // Some lightboxes are nested in figures. I'd guess that's a + // bug in the website + foreach ($item['content']->find('figure div div.ars-lightbox') as $weird_lightbox) { + $weird_lightbox->parent->parent->outertext = $weird_lightbox; + } + + // It's easier to reconstruct the whole thing than remove + // duplicate reactive tags + foreach ($item['content']->find('.ars-lightbox') as $lightbox) { + $lightbox_content = ''; + foreach ($lightbox->find('.ars-lightbox-item') as $lightbox_item) { + $img = $lightbox_item->find('img', 0); + if ($img != null) { + $lightbox_content .= '
    ' . $img; + $caption = $lightbox_item->find('div.pswp-caption-content', 0); + if ($caption != null) { + $credit = $lightbox_item->find('div.ars-gallery-caption-credit', 0); + if ($credit != null) { + $credit->innertext = 'Credit: ' . $credit->innertext; + } + $lightbox_content .= '
    ' . $caption->innertext . '
    '; + } + $lightbox_content .= '
    '; + } + } + $lightbox->innertext = $lightbox_content; + } + + // remove various ars advertising + foreach ($item['content']->find('.ars-interlude-container') as $ad) { + $ad->remove(); + } + foreach ($item['content']->find('.toc-container') as $toc) { + $toc->remove(); + } + + // Mostly YouTube videos + $iframes = $item['content']->find('iframe'); + foreach ($iframes as $iframe) { + $iframe->outertext = '' . $iframe->src . ''; + } + // This fixed padding around the former iframes and actual inline videos + foreach ($item['content']->find('div[style*=aspect-ratio]') as $styled) { + $styled->removeAttribute('style'); + } + + $item['content'] = backgroundToImg($item['content']); + $item['uid'] = strval($parsely_json['post_id']); + return $item; + } +} diff --git a/bridges/Arte7Bridge.php b/bridges/Arte7Bridge.php index 239fc6ad..5898e881 100644 --- a/bridges/Arte7Bridge.php +++ b/bridges/Arte7Bridge.php @@ -156,6 +156,10 @@ class Arte7Bridge extends BridgeAbstract . $element['mainImage']['url'] . '" />'; + $item['itunes'] = [ + 'duration' => $durationSeconds, + ]; + $this->items[] = $item; } } diff --git a/bridges/AsahiShimbunAJWBridge.php b/bridges/AsahiShimbunAJWBridge.php index 03bee6ba..873eb351 100644 --- a/bridges/AsahiShimbunAJWBridge.php +++ b/bridges/AsahiShimbunAJWBridge.php @@ -45,7 +45,6 @@ class AsahiShimbunAJWBridge extends BridgeAbstract foreach ($html->find('#MainInner li a') as $element) { if ($element->parent()->class == 'HeadlineTopImage-S') { - Debug::log('Skip Headline, it is repeated below'); continue; } $item = []; diff --git a/bridges/AskfmBridge.php b/bridges/AskfmBridge.php deleted file mode 100644 index 0a326417..00000000 --- a/bridges/AskfmBridge.php +++ /dev/null @@ -1,79 +0,0 @@ - [ - 'u' => [ - 'name' => 'Username', - 'required' => true, - 'exampleValue' => 'ApprovedAndReal' - ] - ] - ]; - - public function collectData() - { - $html = getSimpleHTMLDOM($this->getURI()); - - $html = defaultLinkTo($html, self::URI); - - foreach ($html->find('article.streamItem-answer') as $element) { - $item = []; - $item['uri'] = $element->find('a.streamItem_meta', 0)->href; - $question = trim($element->find('header.streamItem_header', 0)->innertext); - - $item['title'] = trim( - htmlspecialchars_decode( - $element->find('header.streamItem_header', 0)->plaintext, - ENT_QUOTES - ) - ); - - $item['timestamp'] = strtotime($element->find('time', 0)->datetime); - - $answer = trim($element->find('div.streamItem_content', 0)->innertext); - - // This probably should be cleaned up, especially for YouTube embeds - if ($visual = $element->find('div.streamItem_visual', 0)) { - $visual = $visual->innertext; - } - - // Fix tracking links, also doesn't work - foreach ($element->find('a') as $link) { - if (strpos($link->href, 'l.ask.fm') !== false) { - $link->href = $link->plaintext; - } - } - - $item['content'] = '

    ' . $question - . '

    ' . $answer - . '

    ' . $visual . '

    '; - - $this->items[] = $item; - } - } - - public function getName() - { - if (!is_null($this->getInput('u'))) { - return self::NAME . ' : ' . $this->getInput('u'); - } - - return parent::getName(); - } - - public function getURI() - { - if (!is_null($this->getInput('u'))) { - return self::URI . urlencode($this->getInput('u')); - } - - return parent::getURI(); - } -} diff --git a/bridges/AssociatedPressNewsBridge.php b/bridges/AssociatedPressNewsBridge.php index 0f8846eb..37dff8b9 100644 --- a/bridges/AssociatedPressNewsBridge.php +++ b/bridges/AssociatedPressNewsBridge.php @@ -66,10 +66,10 @@ class AssociatedPressNewsBridge extends BridgeAbstract { switch ($this->getInput('topic')) { case 'Podcasts': - returnClientError('Podcasts topic feed is not supported'); + throwClientException('Podcasts topic feed is not supported'); break; case 'PressReleases': - returnClientError('PressReleases topic feed is not supported'); + throwClientException('PressReleases topic feed is not supported'); break; default: $this->collectCardData(); @@ -105,13 +105,12 @@ class AssociatedPressNewsBridge extends BridgeAbstract private function collectCardData() { - $json = getContents($this->getTagURI()) - or returnServerError('Could not request: ' . $this->getTagURI()); + $json = getContents($this->getTagURI()); $tagContents = json_decode($json, true); if (empty($tagContents['tagObjs'])) { - returnClientError('Topic not found: ' . $this->getInput('topic')); + throwClientException('Topic not found: ' . $this->getInput('topic')); } $this->feedName = $tagContents['tagObjs'][0]['name']; diff --git a/bridges/AtmoNouvelleAquitaineBridge.php b/bridges/AtmoNouvelleAquitaineBridge.php index d4244fa9..d2621b9a 100644 --- a/bridges/AtmoNouvelleAquitaineBridge.php +++ b/bridges/AtmoNouvelleAquitaineBridge.php @@ -30,6 +30,9 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract public function collectData() { + // this bridge is broken and unmaintained + return; + $uri = self::URI . '/monair/commune/' . $this->getInput('cities'); $html = getSimpleHTMLDOM($uri); diff --git a/bridges/AuctionetBridge.php b/bridges/AuctionetBridge.php new file mode 100644 index 00000000..fbdee441 --- /dev/null +++ b/bridges/AuctionetBridge.php @@ -0,0 +1,344 @@ + [ + 'name' => 'Category', + 'type' => 'list', + 'values' => [ + 'All categories' => '', + 'Art' => [ + 'All' => '25-art', + 'Drawings' => '119-drawings', + 'Engravings & Prints' => '27-engravings-prints', + 'Other' => '30-other', + 'Paintings' => '28-paintings', + 'Photography' => '26-photography', + 'Sculptures & Bronzes' => '29-sculptures-bronzes', + ], + 'Asiatica' => [ + 'All' => '117-asiatica', + ], + 'Books, Maps & Manuscripts' => [ + 'All' => '50-books-maps-manuscripts', + 'Autographs & Manuscripts' => '206-autographs-manuscripts', + 'Books' => '204-books', + 'Maps' => '205-maps', + 'Other' => '207-other', + ], + 'Carpets & Textiles' => [ + 'All' => '35-carpets-textiles', + 'Carpets' => '36-carpets', + 'Textiles' => '37-textiles', + ], + 'Ceramics & Porcelain' => [ + 'All' => '9-ceramics-porcelain', + 'European' => '10-european', + 'Oriental' => '11-oriental', + 'Rest of the world' => '12-rest-of-the-world', + 'Tableware' => '210-tableware', + ], + 'Clocks & Watches' => [ + 'All' => '31-clocks-watches', + 'Carriage & Miniature Clocks' => '258-carriage-miniature-clocks', + 'Longcase clocks' => '32-longcase-clocks', + 'Mantel clocks' => '33-mantel-clocks', + 'Other clocks' => '34-other-clocks', + 'Pocket & Stop Watches' => '110-pocket-stop-watches', + 'Wall Clocks' => '127-wall-clocks', + 'Wristwatches' => '15-wristwatches', + ], + 'Coins, Medals & Stamps' => [ + 'All' => '46-coins-medals-stamps', + 'Coins' => '128-coins', + 'Orders & Medals' => '135-orders-medals', + 'Other' => '131-other', + 'Stamps' => '136-stamps', + ], + 'Folk art' => [ + 'All' => '58-folk-art', + 'Bowls & Boxes' => '121-bowls-boxes', + 'Furniture' => '122-furniture', + 'Other' => '123-other', + 'Tools & Gears' => '120-tools-gears', + ], + 'Furniture' => [ + 'All' => '16-furniture', + 'Armchairs & Chairs' => '18-armchairs-chairs', + 'Chests of drawers' => '24-chests-of-drawers', + 'Cupboards, Cabinets & Shelves' => '23-cupboards-cabinets-shelves', + 'Dining room furniture' => '22-dining-room-furniture', + 'Garden' => '21-garden', + 'Other' => '17-other', + 'Sofas & seatings' => '20-sofas-seatings', + 'Tables' => '19-tables', + ], + 'Glass' => [ + 'All' => '6-glass', + 'Art glass' => '208-art-glass', + 'Other' => '8-other', + 'Tableware' => '7-tableware', + 'Utility glass' => '209-utility-glass', + ], + 'Jewellery & Gemstones' => [ + 'All' => '13-jewellery-gemstones', + 'Alliance rings' => '113-alliance-rings', + 'Bracelets' => '106-bracelets', + 'Brooches & Pendants' => '107-brooches-pendants', + 'Costume Jewellery' => '259-costume-jewellery', + 'Cufflinks & Tie Pins' => '111-cufflinks-tie-pins', + 'Ear studs' => '116-ear-studs', + 'Earrings' => '115-earrings', + 'Gemstones' => '48-gemstones', + 'Jewellery' => '14-jewellery', + 'Jewellery Suites' => '109-jewellery-suites', + 'Necklace' => '104-necklace', + 'Other' => '118-other', + 'Rings' => '112-rings', + 'Signet rings' => '105-signet-rings', + 'Solitaire rings' => '114-solitaire-rings', + ], + 'Licence weapons' => [ + 'All' => '59-licence-weapons', + 'Combi/Combo' => '63-combi-combo', + 'Double express rifles' => '60-double-express-rifles', + 'Rifles' => '61-rifles', + 'Shotguns' => '62-shotguns', + ], + 'Lighting & Lamps' => [ + 'All' => '1-lighting-lamps', + 'Candlesticks' => '4-candlesticks', + 'Ceiling lights' => '3-ceiling-lights', + 'Chandeliers' => '203-chandeliers', + 'Floor lights' => '2-floor-lights', + 'Other lighting' => '5-other-lighting', + 'Table Lamps' => '125-table-lamps', + 'Wall Lights' => '124-wall-lights', + ], + 'Mirrors' => [ + 'All' => '42-mirrors', + ], + 'Miscellaneous' => [ + 'All' => '43-miscellaneous', + 'Fishing equipment' => '54-fishing-equipment', + 'Miscellaneous' => '47-miscellaneous', + 'Modern Tools' => '133-modern-tools', + 'Modern consumer electronics' => '52-modern-consumer-electronics', + 'Musical instruments' => '51-musical-instruments', + 'Technica & Nautica' => '45-technica-nautica', + ], + 'Photo, Cameras & Lenses' => [ + 'All' => '57-photo-cameras-lenses', + 'Cameras & accessories' => '71-cameras-accessories', + 'Optics' => '66-optics', + 'Other' => '72-other', + ], + 'Silver & Metals' => [ + 'All' => '38-silver-metals', + 'Other metals' => '40-other-metals', + 'Pewter, Brass & Copper' => '41-pewter-brass-copper', + 'Silver' => '39-silver', + 'Silver plated' => '213-silver-plated', + ], + 'Toys' => [ + 'All' => '44-toys', + 'Comics' => '211-comics', + 'Toys' => '212-toys', + ], + 'Tribal art' => [ + 'All' => '134-tribal-art', + ], + 'Vehicles, Boats & Parts' => [ + 'All' => '249-vehicles-boats-parts', + 'Automobilia & Transport' => '255-automobilia-transport', + 'Bicycles' => '132-bicycles', + 'Boats & Accessories' => '250-boats-accessories', + 'Car parts' => '253-car-parts', + 'Cars' => '215-cars', + 'Moped parts' => '254-moped-parts', + 'Mopeds' => '216-mopeds', + 'Motorcycle parts' => '252-motorcycle-parts', + 'Motorcycles' => '251-motorcycles', + 'Other' => '256-other', + ], + 'Vintage & Designer Fashion' => [ + 'All' => '49-vintage-designer-fashion', + ], + 'Weapons & Militaria' => [ + 'All' => '137-weapons-militaria', + 'Airguns' => '257-airguns', + 'Armour & Uniform' => '138-armour-uniform', + 'Edged weapons' => '130-edged-weapons', + 'Guns & Rifles' => '129-guns-rifles', + 'Other' => '214-other', + ], + 'Wine, Port & Spirits' => [ + 'All' => '170-wine-port-spirits', + ], + ] + ], + 'sort_order' => [ + 'name' => 'Sort order', + 'type' => 'list', + 'values' => [ + 'Most bids' => 'bids_count_desc', + 'Lowest bid' => 'bid_asc', + 'Highest bid' => 'bid_desc', + 'Last bid on' => 'bid_on', + 'Ending soonest' => 'end_asc_active', + 'Lowest estimate' => 'estimate_asc', + 'Highest estimate' => 'estimate_desc', + 'Recently added' => 'recent' + ], + ], + 'country' => [ + 'name' => 'Country', + 'type' => 'list', + 'values' => [ + 'All' => '', + 'Denmark' => 'DK', + 'Finland' => 'FI', + 'Germany' => 'DE', + 'Spain' => 'ES', + 'Sweden' => 'SE', + 'United Kingdom' => 'GB' + ] + ], + 'language' => [ + 'name' => 'Language', + 'type' => 'list', + 'values' => [ + 'English' => 'en', + 'Español' => 'es', + 'Deutsch' => 'de', + 'Svenska' => 'sv', + 'Dansk' => 'da', + 'Suomi' => 'fi', + ], + ], + ]]; + + const CACHE_TIMEOUT = 3600; // 1 hour + + private $title; + + public function collectData() + { + // Each page contains 48 auctions + // So we fetch 10 pages so we decrease the likelihood + // of missing auctions between feed refreshes + + // Fetch first page and use that to get title + { + $url = $this->getUrl(1); + $data = getContents($url); + + $title = $this->getDocumentTitle($data); + + $this->items = array_merge($this->items, $this->parsePageData($data)); + } + + // Fetch remaining pages + for ($page = 2; $page <= 10; $page++) { + $url = $this->getUrl($page); + + $data = getContents($url); + + $this->items = array_merge($this->items, $this->parsePageData($data)); + } + } + + public function getName() + { + return $this->title ?: parent::getName(); + } + + + /* HELPERS */ + + private function getUrl($page) + { + $category = $this->getInput('category'); + $language = $this->getInput('language'); + $sort_order = $this->getInput('sort_order'); + $country = $this->getInput('country'); + + $url = self::URI . '/' . $language . '/search'; + + if ($category) { + $url = $url . '/' . $category; + } + + $query = []; + $query['page'] = $page; + + if ($sort_order) { + $query['order'] = $sort_order; + } + + if ($country) { + $query['country_code'] = $country; + } + + if (count($query) > 0) { + $url = $url . '?' . http_build_query($query); + } + + return $url; + } + + private function getDocumentTitle($data) + { + $title_elem = ''; + $title_elem_length = strlen($title_elem); + $title_start = strpos($data, $title_elem); + $title_end = strpos($data, '', $title_start); + $title_length = $title_end - $title_start + strlen($title_elem); + $title = substr($data, $title_start + strlen($title_elem), $title_length); + + return $title; + } + + /** + * The auction items data is included in the HTML document + * as a HTML entities encoded JSON structure + * which is used to hydrate the React component for the list of auctions + */ + private function parsePageData($data) + { + $key = 'data-react-props="'; + $keyLength = strlen($key); + + $start = strpos($data, $key); + $end = strpos($data, '"', $start + strlen($key)); + $length = $end - ($start + $keyLength); + + $jsonString = substr($data, $start + $keyLength, $length); + + $jsonData = json_decode(htmlspecialchars_decode($jsonString), false); + + $items = []; + + foreach ($jsonData->{'items'} as $item) { + $title = $item->{'longTitle'}; + $relative_url = $item->{'url'}; + $images = $item->{'imageUrls'}; + $id = $item->{'auctionId'}; + + $items[] = [ + 'title' => $title, + 'uri' => self::URI . $relative_url, + 'uid' => $id, + 'content' => count($images) > 0 ? "
    $title" : $title, + 'enclosures' => array_slice($images, 1), + ]; + } + + return $items; + } +} diff --git a/bridges/AutoJMBridge.php b/bridges/AutoJMBridge.php index c9aaa660..98a86fcc 100644 --- a/bridges/AutoJMBridge.php +++ b/bridges/AutoJMBridge.php @@ -13,12 +13,20 @@ class AutoJMBridge extends BridgeAbstract 'type' => 'text', 'required' => true, 'title' => 'URL d\'une recherche avec filtre de véhicules sans le http://www.autojm.fr/', - 'exampleValue' => 'recherche?brands[]=peugeot&ranges[]=peugeot-nouvelle-308-2021-5p' + 'exampleValue' => 'recherche?brands[]=PEUGEOT&ranges[]=PEUGEOT 308' ], ] ]; + const CACHE_TIMEOUT = 3600; + const TEST_DETECT_PARAMETERS = [ + 'https://www.autojm.fr/recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308' + => ['url' => 'recherche?brands%5B%5D=PEUGEOT&ranges%5B%5D=PEUGEOT%20308', + 'context' => 'Afficher les offres de véhicules disponible sur la recheche AutoJM' + ] + ]; + public function getIcon() { return self::URI . 'favicon.ico'; @@ -35,6 +43,17 @@ class AutoJMBridge extends BridgeAbstract } } + public function getURI() + { + switch ($this->queriedContext) { + case 'Afficher les offres de véhicules disponible sur la recheche AutoJM': + return self::URI . $this->getInput('url'); + break; + default: + return self::URI; + } + } + public function collectData() { // Get the number of result for this search @@ -52,7 +71,7 @@ class AutoJMBridge extends BridgeAbstract $data = json_decode($json); $nb_results = $data->nbResults; - $total_pages = ceil($nb_results / 15); + $total_pages = ceil($nb_results / 14); // Limit the number of page to analyse to 10 for ($page = 1; $page <= $total_pages && $page <= 10; $page++) { @@ -66,8 +85,8 @@ class AutoJMBridge extends BridgeAbstract $image = $car->find('div[class=card-car__header__img]', 0)->find('img', 0)->src; // Decode HTML attribute JSON data $car_data = json_decode(html_entity_decode($car->{'data-layer'})); - $car_model = $car->{'data-title'} . ' ' . $car->{'data-suptitle'}; - $availability = $car->find('div[class=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext; + $car_model = $car_data->title; + $availability = $car->find('div[class*=card-car__modalites]', 0)->find('div[class=col]', 0)->plaintext; $warranty = $car->find('div[data-type=WarrantyCard]', 0)->plaintext; $discount_html = $car->find('div[class=subtext vehicle_reference_element]', 0); // Check if there is any discount info displayed @@ -132,4 +151,18 @@ class AutoJMBridge extends BridgeAbstract return $html; } + + public function detectParameters($url) + { + $params = []; + $regex = '/^(https?:\/\/)?(www\.|)autojm.fr\/(recherche\?.*|recherche\/[0-9]{1,10}\?.*)$/m'; + if (preg_match($regex, $url, $matches) > 0) { + $url = preg_replace('#(recherche|recherche/[0-9]{1,10})#', 'recherche', $matches[3]); + + $params['url'] = $url; + $params['context'] = 'Afficher les offres de véhicules disponible sur la recheche AutoJM'; + + return $params; + } + } } diff --git a/bridges/AwwwardsBridge.php b/bridges/AwwwardsBridge.php index a1adfede..849d04d5 100644 --- a/bridges/AwwwardsBridge.php +++ b/bridges/AwwwardsBridge.php @@ -14,29 +14,10 @@ class AwwwardsBridge extends BridgeAbstract private $sites = []; - public function getIcon() - { - return 'https://www.awwwards.com/favicon.ico'; - } - - private function fetchSites() - { - Debug::log('Fetching all sites'); - $sites = getSimpleHTMLDOM(self::SITESURI); - - Debug::log('Parsing all JSON data'); - foreach ($sites->find('.grid-sites li') as $site) { - $decode = html_entity_decode($site->attr['data-collectable-model-value'], ENT_QUOTES, 'utf-8'); - $decode = json_decode($decode, true); - $this->sites[] = $decode; - } - } - public function collectData() { $this->fetchSites(); - Debug::log('Building RSS feed'); foreach ($this->sites as $site) { $item = []; $item['title'] = $site['title']; @@ -56,4 +37,23 @@ class AwwwardsBridge extends BridgeAbstract } } } + + public function getIcon() + { + return 'https://www.awwwards.com/favicon.ico'; + } + + private function fetchSites() + { + $sites = getSimpleHTMLDOM(self::SITESURI); + foreach ($sites->find('.grid-sites li') as $li) { + $encodedJson = $li->attr['data-collectable-model-value'] ?? null; + if (!$encodedJson) { + continue; + } + $json = html_entity_decode($encodedJson, ENT_QUOTES, 'utf-8'); + $site = Json::decode($json); + $this->sites[] = $site; + } + } } diff --git a/bridges/BAEBridge.php b/bridges/BAEBridge.php index 6807d548..a0e4c536 100644 --- a/bridges/BAEBridge.php +++ b/bridges/BAEBridge.php @@ -29,7 +29,7 @@ class BAEBridge extends BridgeAbstract public function collectData() { $url = $this->getURI(); - $html = getSimpleHTMLDOM($url) or returnClientError('No results for this query.'); + $html = getSimpleHTMLDOM($url); $annonces = $html->find('main article'); foreach ($annonces as $annonce) { diff --git a/bridges/BMDSystemhausBlogBridge.php b/bridges/BMDSystemhausBlogBridge.php new file mode 100644 index 00000000..98fb2d63 --- /dev/null +++ b/bridges/BMDSystemhausBlogBridge.php @@ -0,0 +1,254 @@ + '
    {data_img}{data_content}
    ', + 'clir' => '
    {data_content}{data_img}
    ', + 'itcb' => '
    {data_img}
    {data_content}
    ', + 'ctib' => '
    {data_content}
    {data_img}
    ', + 'co' => '{data_content}', + 'io' => '{data_img}' + ]; + + const PARAMETERS = [ + 'Blog' => [ + 'country' => [ + 'name' => 'Country', + 'type' => 'list', + 'values' => [ + 'Österreich' => 'at', + 'Deutschland' => 'de', + 'Schweiz' => 'ch', + 'Slovensko' => 'sk', + 'Cesko' => 'cz', + 'Hungary' => 'hu', + ], + 'defaultValue' => 'at', + ], + 'style' => [ + 'name' => 'Style', + 'type' => 'list', + 'values' => [ + 'Image left, content right' => 'ilcr', + 'Content left, image right' => 'clir', + 'Image top, content bottom' => 'itcb', + 'Content top, image bottom' => 'ctib', + 'Content only' => 'co', + 'Image only' => 'io', + ], + 'defaultValue' => 'ilcr', + ] + ] + ]; + + //----------------------------------------------------- + public function collectData() + { + // get website content + $html = getSimpleHTMLDOM($this->getURI()); + + // Convert relative links in HTML into absolute links + $html = defaultLinkTo($html, self::URI); + + // Convert lazy-loading images and frames (video embeds) into static elements + $html = convertLazyLoading($html); + + foreach ($html->find('div#bmdNewsList div#bmdNewsList-Item') as $element) { + $itemScope = $element->find('div[itemscope=itemscope]', 0); + + $item = []; + + // set base article data + $item['title'] = $this->getMetaItemPropContent($itemScope, 'headline'); + $item['timestamp'] = strtotime($this->getMetaItemPropContent($itemScope, 'datePublished')); + $item['author'] = $this->getMetaItemPropContent($itemScope->find('div[itemprop=author]', 0), 'name'); + + // find article image + $imageTag = ''; + $image = $element->find('div.mediaelement.mediaelement-image img', 0); + if ((!is_null($image)) and ($image->src != '')) { + $item['enclosures'] = [$image->src]; + $imageTag = ''; + } + + // begin with right style + $content = self::ITEMSTYLE[$this->getInput('style')]; + + // render placeholder + $content = str_replace('{data_content}', $this->getMetaItemPropContent($itemScope, 'description'), $content); + $content = str_replace('{data_img}', $imageTag, $content); + + // set finished content + $item['content'] = $content; + + // get link to article + $link = $element->find('div#bmdNewsList-Text div#bmdNewsList-Title a', 0); + if (!is_null($link)) { + $item['uri'] = $link->href; + } + + // init categories + $categories = []; + $tmpOne = []; + $tmpTwo = []; + + // search first categorie span + $catElem = $element->find('div#bmdNewsList-Text div#bmdNewsList-Category span.news-list-category', 0); + $txt = trim($catElem->innertext); + $tmpOne = explode('/', $txt); + + // split by 2 spaces + foreach ($tmpOne as $tmpElem) { + $tmpElem = trim($tmpElem); + $tmpData = preg_split('/ /', $tmpElem); + $tmpTwo = array_merge($tmpTwo, $tmpData); + } + + // split by tabulator + foreach ($tmpTwo as $tmpElem) { + $tmpElem = trim($tmpElem); + $tmpData = preg_split('/\t+/', $tmpElem); + $categories = array_merge($categories, $tmpData); + } + + // trim each categorie entries + $categories = array_map('trim', $categories); + + // remove empty entries + $categories = array_filter($categories, function ($value) { + return !is_null($value) && $value !== ''; + }); + + // set categories + if (count($categories) > 0) { + $item['categories'] = $categories; + } + + // add item + if (($item['title'] != '') and ($item['content'] != '') and ($item['uri'] != '')) { + $this->items[] = $item; + } + } + } + + //----------------------------------------------------- + public function detectParameters($url) + { + try { + $parsedUrl = Url::fromString($url); + } catch (UrlException $e) { + return null; + } + + if (!in_array($parsedUrl->getHost(), ['www.bmd.com', 'bmd.com'])) { + return null; + } + + $lang = ''; + + // extract language from url + $path = explode('/', $parsedUrl->getPath()); + if (count($path) > 1) { + $lang = $path[1]; + + // validate data + if ($this->getURIbyCountry($lang) == '') { + $lang = ''; + } + } + + // if no country available, find language by browser + if ($lang == '') { + $srvLanguages = explode(';', $_SERVER['HTTP_ACCEPT_LANGUAGE']); + if (count($srvLanguages) > 0) { + $languages = explode(',', $srvLanguages[0]); + if (count($languages) > 0) { + for ($i = 0; $i < count($languages); $i++) { + $langDetails = explode('-', $languages[$i]); + if (count($langDetails) > 1) { + $lang = $langDetails[1]; + } else { + $lang = substr($srvLanguages[0], 0, 2); + } + + // validate data + if ($this->getURIbyCountry($lang) == '') { + $lang = ''; + } + + if ($lang != '') { + break; + } + } + } + } + } + + // if no URL found by language, use AT as default + if ($this->getURIbyCountry($lang) == '') { + $lang = 'at'; + } + + $params = []; + $params['country'] = strtolower($lang); + + return $params; + } + + //----------------------------------------------------- + public function getURI() + { + $country = $this->getInput('country') ?? ''; + $lURI = $this->getURIbyCountry($country); + return $lURI != '' ? $lURI : parent::getURI(); + } + + //----------------------------------------------------- + public function getIcon() + { + return self::BMD_FAV_ICON; + } + + //----------------------------------------------------- + private function getMetaItemPropContent($elem, $key) + { + if (($key != '') and (!is_null($elem))) { + $metaElem = $elem->find('meta[itemprop=' . $key . ']', 0); + if (!is_null($metaElem)) { + return $metaElem->getAttribute('content'); + } + } + + return ''; + } + + //----------------------------------------------------- + private function getURIbyCountry($country) + { + switch (strtolower($country)) { + case 'at': + return 'https://www.bmd.com/at/ueber-bmd/blog-ohne-filter.html'; + case 'de': + return 'https://www.bmd.com/de/das-ist-bmd/blog.html'; + case 'ch': + return 'https://www.bmd.com/ch/das-ist-bmd/blog.html'; + case 'sk': + return 'https://www.bmd.com/sk/firma/blog.html'; + case 'cz': + return 'https://www.bmd.com/cz/firma/news-blog.html'; + case 'hu': + return 'https://www.bmd.com/hu/rolunk/hirek.html'; + default: + return ''; + } + } +} diff --git a/bridges/BadDragonBridge.php b/bridges/BadDragonBridge.php index 2260bbd6..2249d6f7 100644 --- a/bridges/BadDragonBridge.php +++ b/bridges/BadDragonBridge.php @@ -138,6 +138,7 @@ class BadDragonBridge extends BridgeAbstract // Sale $regex = '/^(https?:\/\/)?bad-dragon\.com\/sales/'; if (preg_match($regex, $url, $matches) > 0) { + $params['context'] = 'Sales'; return $params; } @@ -192,6 +193,7 @@ class BadDragonBridge extends BridgeAbstract isset($urlParams['noAccessories']) && $urlParams['noAccessories'] === '1' && $params['noAccessories'] = 'on'; + $params['context'] = 'Clearance'; return $params; } @@ -282,8 +284,7 @@ class BadDragonBridge extends BridgeAbstract case 'Clearance': $toyData = json_decode(getContents($this->inputToURL(true))); - $productList = json_decode(getContents(self::URI - . 'api/inventory-toy/product-list')); + $productList = json_decode(getContents(self::URI . 'api/inventory-toy/product-list')); foreach ($toyData->toys as $toy) { $item = []; diff --git a/bridges/BakaUpdatesMangaReleasesBridge.php b/bridges/BakaUpdatesMangaReleasesBridge.php index 10d59c83..8ba5fe98 100644 --- a/bridges/BakaUpdatesMangaReleasesBridge.php +++ b/bridges/BakaUpdatesMangaReleasesBridge.php @@ -94,7 +94,7 @@ class BakaUpdatesMangaReleasesBridge extends BridgeAbstract // content is an unstructured pile of divs, ugly to parse $cols = $html->find('div#main_content div.row > div.text'); if (!$cols) { - returnServerError('No releases'); + throwServerException('No releases'); } $rows = array_slice( diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index c041b2b3..814317fb 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -111,19 +111,19 @@ class BandcampBridge extends BridgeAbstract $url = self::URI . 'api/hub/1/dig_deeper'; $data = $this->buildRequestJson(); $header = [ - 'Content-Type: application/json', - 'Content-Length: ' . strlen($data) + 'Content-Type: application/json', + 'Content-Length: ' . strlen($data), ]; $opts = [ - CURLOPT_CUSTOMREQUEST => 'POST', - CURLOPT_POSTFIELDS => $data + CURLOPT_CUSTOMREQUEST => 'POST', + CURLOPT_POSTFIELDS => $data, ]; $content = getContents($url, $header, $opts); $json = json_decode($content); if ($json->ok !== true) { - returnServerError('Invalid response'); + throwServerException('Invalid response'); } foreach ($json->items as $entry) { @@ -165,7 +165,7 @@ class BandcampBridge extends BridgeAbstract $regex = '/band_id=(\d+)/'; if (preg_match($regex, $html, $matches) == false) { - returnServerError('Unable to find band ID on: ' . $this->getURI()); + throwServerException('Unable to find band ID on: ' . $this->getURI()); } $band_id = $matches[1]; @@ -196,7 +196,7 @@ class BandcampBridge extends BridgeAbstract case 'By album': $regex = '/album=(\d+)/'; if (preg_match($regex, $html, $matches) == false) { - returnServerError('Unable to find album ID on: ' . $this->getURI()); + throwServerException('Unable to find album ID on: ' . $this->getURI()); } $album_id = $matches[1]; @@ -314,7 +314,8 @@ class BandcampBridge extends BridgeAbstract { $url = self::URI . 'api/' . $endpoint . '?' . http_build_query($query_data); // todo: 429 Too Many Requests happens a lot - $data = json_decode(getContents($url)); + $response = getContents($url); + $data = json_decode($response); return $data; } @@ -397,6 +398,7 @@ class BandcampBridge extends BridgeAbstract // By tag $regex = '/^(https?:\/\/)?bandcamp\.com\/tag\/([^\/.&?\n]+)/'; if (preg_match($regex, $url, $matches) > 0) { + $params['context'] = 'By tag'; $params['tag'] = urldecode($matches[2]); return $params; } @@ -404,6 +406,7 @@ class BandcampBridge extends BridgeAbstract // By band $regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com/'; if (preg_match($regex, $url, $matches) > 0) { + $params['context'] = 'By band'; $params['band'] = urldecode($matches[2]); return $params; } @@ -411,6 +414,7 @@ class BandcampBridge extends BridgeAbstract // By album $regex = '/^(https?:\/\/)?([^\/.&?\n]+?)\.bandcamp\.com\/album\/([^\/.&?\n]+)/'; if (preg_match($regex, $url, $matches) > 0) { + $params['context'] = 'By album'; $params['band'] = urldecode($matches[2]); $params['album'] = urldecode($matches[3]); return $params; diff --git a/bridges/BandcampDailyBridge.php b/bridges/BandcampDailyBridge.php index 57299a17..1f9a031d 100644 --- a/bridges/BandcampDailyBridge.php +++ b/bridges/BandcampDailyBridge.php @@ -93,8 +93,7 @@ class BandcampDailyBridge extends BridgeAbstract public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()) - or returnServerError('Could not request: ' . $this->getURI()); + $html = getSimpleHTMLDOM($this->getURI()); $html = defaultLinkTo($html, self::URI); @@ -105,8 +104,7 @@ class BandcampDailyBridge extends BridgeAbstract $articlePath = $article->find('a.title', 0)->href; - $articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600) - or returnServerError('Could not request: ' . $articlePath); + $articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600); $item['uri'] = $articlePath; $item['title'] = $articlePageHtml->find('article-title', 0)->innertext; diff --git a/bridges/BazarakiBridge.php b/bridges/BazarakiBridge.php new file mode 100644 index 00000000..44e4eb84 --- /dev/null +++ b/bridges/BazarakiBridge.php @@ -0,0 +1,139 @@ + [ + 'name' => 'URL', + 'type' => 'text', + 'required' => true, + 'title' => 'Enter the URL of the Bazaraki page to fetch adverts from.', + 'exampleValue' => 'https://www.bazaraki.com/real-estate-for-sale/houses/?lat=0&lng=0&radius=100000', + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Enter the number of adverts to fetch. (max 50)', + 'exampleValue' => '10', + 'defaultValue' => 10, + ] + ] + ]; + + public function collectData() + { + $url = $this->getInput('url'); + if (! str_starts_with($url, 'https://www.bazaraki.com/')) { + throw new \Exception('Nope'); + } + + $html = getSimpleHTMLDOM($url); + + $i = 0; + foreach ($html->find('div.advert') as $element) { + $i++; + if ($i > $this->getInput('limit') || $i > 50) { + break; + } + + $item = []; + + $item['uri'] = 'https://www.bazaraki.com' . $element->find('a.advert__content-title', 0)->href; + + # Get the content + $advert = getSimpleHTMLDOM($item['uri']); + + $price = trim($advert->find('div.announcement-price__cost', 0)->plaintext); + $name = trim($element->find('a.advert__content-title', 0)->plaintext); + + $item['title'] = $name . ' - ' . $price; + + $time = trim($advert->find('span.date-meta', 0)->plaintext); + $time = str_replace('Posted: ', '', $time); + + + $item['content'] = $this->processAdvertContent($advert); + $item['timestamp'] = $this->convertRelativeTime($time); + $item['author'] = trim($advert->find('div.author-name', 0)->plaintext); + $item['uid'] = $advert->find('span.number-announcement', 0)->plaintext; + + $this->items[] = $item; + } + } + + /** + * Process the advert content to clean up HTML + * + * @param simple_html_dom $advert The SimpleHTMLDOM object for the advert page + * @return string Processed HTML content + */ + private function processAdvertContent($advert) + { + // Get the content sections + $header = $advert->find('div.announcement-content-header', 0); + $characteristics = $advert->find('div.announcement-characteristics', 0); + $description = $advert->find('div.js-description', 0); + $images = $advert->find('div.announcement__images', 0); + + // Remove all favorites divs + foreach ($advert->find('div.announcement-meta__favorites') as $favorites) { + $favorites->outertext = ''; + } + + // Replace all tags with their text content + foreach ($advert->find('a') as $a) { + $a->outertext = $a->innertext; + } + + // Format the content with section headers and dividers + $formattedContent = ''; + + // Add header section + $formattedContent .= $header->innertext; + $formattedContent .= '
    '; + + // Add characteristics section with header + $formattedContent .= '

    Details

    '; + $formattedContent .= $characteristics->innertext; + $formattedContent .= '
    '; + + // Add description section with header + $formattedContent .= '

    Description

    '; + $formattedContent .= $description->innertext; + $formattedContent .= '
    '; + + // Add images section with header + $formattedContent .= '

    Images

    '; + $formattedContent .= $images->innertext; + + return $formattedContent; + } + + /** + * Convert relative time strings like "Yesterday 12:32" to proper timestamps + * + * @param string $timeString The relative time string from the website + * @return string Timestamp in a format compatible with strtotime() + */ + private function convertRelativeTime($timeString) + { + if (strpos($timeString, 'Yesterday') !== false) { + // Replace "Yesterday" with actual date + $time = str_replace('Yesterday', date('Y-m-d', strtotime('-1 day')), $timeString); + return date('Y-m-d H:i:s', strtotime($time)); + } elseif (strpos($timeString, 'Today') !== false) { + // Replace "Today" with actual date + $time = str_replace('Today', date('Y-m-d'), $timeString); + return date('Y-m-d H:i:s', strtotime($time)); + } else { + // For other formats, return as is and let strtotime handle it + return $timeString; + } + } +} diff --git a/bridges/BinanceBridge.php b/bridges/BinanceBridge.php index 03449fb1..5aec0604 100644 --- a/bridges/BinanceBridge.php +++ b/bridges/BinanceBridge.php @@ -8,48 +8,27 @@ class BinanceBridge extends BridgeAbstract const MAINTAINER = 'thefranke'; const CACHE_TIMEOUT = 3600; // 1h + public function collectData() + { + $url = 'https://www.binance.com/bapi/composite/v1/public/content/blog/list?category=&tag=&page=1&size=12'; + $json = getContents($url); + $data = Json::decode($json, false); + foreach ($data->data->blogList as $post) { + $item = []; + $item['title'] = $post->title; + // Url slug not in json + //$item['uri'] = $uri; + $item['timestamp'] = $post->postTimeUTC / 1000; + $item['author'] = 'Binance'; + $item['content'] = $post->brief; + //$item['categories'] = $category; + $item['uid'] = $post->idStr; + $this->items[] = $item; + } + } + public function getIcon() { return 'https://bin.bnbstatic.com/static/images/common/favicon.ico'; } - - public function collectData() - { - $html = getSimpleHTMLDOM(self::URI) - or returnServerError('Could not fetch Binance blog data.'); - - $appData = $html->find('script[id="__APP_DATA"]'); - $appDataJson = json_decode($appData[0]->innertext); - $allposts = $appDataJson->routeProps->f3ac->blogListRes->list; - - foreach ($allposts as $element) { - $date = $element->releasedTime; - $title = $element->title; - $category = $element->category->name; - - $suburl = strtolower($category); - $suburl = str_replace(' ', '_', $suburl); - - $uri = self::URI . '/' . $suburl . '/' . $element->idStr; - - $contentHTML = getSimpleHTMLDOMCached($uri); - $contentAppData = $contentHTML->find('script[id="__APP_DATA"]'); - $contentAppDataJson = json_decode($contentAppData[0]->innertext); - $content = $contentAppDataJson->routeProps->a106->blogDetail->content; - - $item = []; - $item['title'] = $title; - $item['uri'] = $uri; - $item['timestamp'] = substr($date, 0, -3); - $item['author'] = 'Binance'; - $item['content'] = $content; - $item['categories'] = $category; - - $this->items[] = $item; - - if (count($this->items) >= 10) { - break; - } - } - } } diff --git a/bridges/BleepingComputerBridge.php b/bridges/BleepingComputerBridge.php index c1d3d568..79d84176 100644 --- a/bridges/BleepingComputerBridge.php +++ b/bridges/BleepingComputerBridge.php @@ -7,10 +7,14 @@ class BleepingComputerBridge extends FeedExpander const URI = 'https://www.bleepingcomputer.com/'; const DESCRIPTION = 'Returns the newest articles.'; - protected function parseItem($item) + public function collectData() { - $item = parent::parseItem($item); + $feed = static::URI . 'feed/'; + $this->collectExpandableDatas($feed); + } + protected function parseItem(array $item) + { $article_html = getSimpleHTMLDOMCached($item['uri']); if (!$article_html) { $item['content'] .= '

    Could not request ' . $this->getName() . ': ' . $item['uri'] . '

    '; @@ -23,10 +27,4 @@ class BleepingComputerBridge extends FeedExpander return $item; } - - public function collectData() - { - $feed = static::URI . 'feed/'; - $this->collectExpandableDatas($feed); - } } diff --git a/bridges/BlizzardNewsBridge.php b/bridges/BlizzardNewsBridge.php index 3930e0a4..4d82b318 100644 --- a/bridges/BlizzardNewsBridge.php +++ b/bridges/BlizzardNewsBridge.php @@ -1,6 +1,6 @@ getInput('locale'); if ('zh-cn' === $locale) { - return 'https://cn.news.blizzard.com'; + $baseUrl = 'https://cn.news.blizzard.com' . self::API_PATH; + } else { + $baseUrl = 'https://news.blizzard.com/' . $locale . self::API_PATH; } - return 'https://news.blizzard.com/' . $locale; + return $baseUrl .= http_build_query([ + 'feedCxpProductIds' => self::PRODUCT_IDS + ]); + } + + public function collectData() + { + $feedContent = json_decode(getContents($this->getSourceUrl()), true); + + foreach ($feedContent['feed']['contentItems'] as $entry) { + $properties = $entry['properties']; + + $item = []; + + $item['title'] = $this->filterChars($properties['title']); + $item['content'] = $this->filterChars($properties['summary']); + $item['uri'] = $properties['newsUrl']; + $item['author'] = $this->filterChars($properties['author']); + $item['timestamp'] = strtotime($properties['lastUpdated']); + $item['enclosures'] = [$properties['staticAsset']['imageUrl']]; + $item['categories'] = [$this->filterChars($properties['cxpProduct']['title'])]; + + $this->items[] = $item; + } + } + + private function filterChars($content) + { + return htmlspecialchars($content, ENT_XML1); + } + + public function getIcon() + { + return << [ + 'name' => 'Bluesky Data Source', + 'type' => 'list', + 'defaultValue' => 'Profile', + 'values' => [ + 'Profile' => 'getAuthorFeed', + ], + 'title' => 'Select the type of data source to fetch from Bluesky.' + ], + 'user_id' => [ + 'name' => 'User Handle or DID', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'did:plc:z72i7hdynmk6r22z27h6tvur', + 'title' => 'ATProto / Bsky.app handle or DID' + ], + 'feed_filter' => [ + 'name' => 'Feed type', + 'type' => 'list', + 'defaultValue' => 'posts_and_author_threads', + 'values' => [ + 'Posts feed' => 'posts_and_author_threads', + 'All posts and replies' => 'posts_with_replies', + 'Root posts only' => 'posts_no_replies', + 'Media only' => 'posts_with_media', + ] + ], + + 'include_reposts' => [ + 'name' => 'Include Reposts?', + 'type' => 'checkbox', + 'defaultValue' => 'checked' + ], + + 'include_reply_context' => [ + 'name' => 'Include Reply context?', + 'type' => 'checkbox' + ], + + 'verbose_title' => [ + 'name' => 'Use verbose feed item titles?', + 'type' => 'checkbox' + ] + ] + ]; + + private $profile; + + public function getName() + { + if (isset($this->profile)) { + if ($this->profile['handle'] === 'handle.invalid') { + return sprintf('Bluesky - %s', $this->profile['displayName']); + } else { + return sprintf('Bluesky - %s (@%s)', $this->profile['displayName'], $this->profile['handle']); + } + } + return parent::getName(); + } + + public function getURI() + { + if (isset($this->profile)) { + if ($this->profile['handle'] === 'handle.invalid') { + return self::URI . '/profile/' . $this->profile['did']; + } else { + return self::URI . '/profile/' . $this->profile['handle']; + } + } + return parent::getURI(); + } + + public function getIcon() + { + if (isset($this->profile)) { + return $this->profile['avatar']; + } + return parent::getIcon(); + } + + public function getDescription() + { + if (isset($this->profile)) { + return $this->profile['description']; + } + return parent::getDescription(); + } + + private function parseExternal($external, $did) + { + $description = ''; + $externalUri = $external['uri']; + $externalTitle = e($external['title']); + $externalDescription = e($external['description']); + $thumb = $external['thumb'] ?? null; + + if (preg_match('/http(|s):\/\/media\.tenor\.com/', $externalUri)) { + //tenor gif embed + $tenorInterstitial = str_replace('media.tenor.com', 'media1.tenor.com/m', $externalUri); + $description .= "
    $externalTitle
    "; + } else { + //link embed preview + $host = parse_url($externalUri)['host']; + $thumbDesc = $thumb ? ('') : ''; + $externalDescription = strlen($externalDescription) > 0 ? "
    ($host) $externalDescription
    " : ''; + $description .= '
    ' . $externalTitle . ''; + $description .= '
    ' . $thumbDesc . $externalDescription . '
    '; + } + return $description; + } + + private function textToDescription($record) + { + if (isset($record['value'])) { + $record = $record['value']; + } + $text = $record['text']; + $text_copy = $text; + $text = nl2br(e($text)); + if (isset($record['facets'])) { + $facets = $record['facets']; + foreach ($facets as $facet) { + if ($facet['features'][0]['$type'] === 'app.bsky.richtext.facet#link') { + $substring = substr($text_copy, $facet['index']['byteStart'], $facet['index']['byteEnd'] - $facet['index']['byteStart']); + $text = str_replace($substring, '' . $substring . '', $text); + } + } + } + return $text; + } + + public function collectData() + { + $user_id = $this->getInput('user_id'); + $handle_match = preg_match('/(?:[a-zA-Z]*\.)+([a-zA-Z](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/', $user_id, $handle_res); //gets the TLD in $handle_match[1] + $did_match = preg_match('/did:plc:[a-z2-7]{24}/', $user_id); //https://github.com/did-method-plc/did-method-plc#identifier-syntax + $exclude = ['alt', 'arpa', 'example', 'internal', 'invalid', 'local', 'localhost', 'onion']; //https://en.wikipedia.org/wiki/Top-level_domain#Reserved_domains + if ($handle_match == true && array_search($handle_res[1], $exclude) == false) { + //valid bsky handle + $did = $this->resolveHandle($user_id); + } elseif ($did_match == true) { + //valid DID + $did = $user_id; + } else { + throwClientException('Invalid ATproto handle or DID provided.'); + } + + $filter = $this->getInput('feed_filter') ?: 'posts_and_author_threads'; + $replyContext = $this->getInput('include_reply_context'); + + $this->profile = $this->getProfile($did); + $authorFeed = $this->getAuthorFeed($did, $filter); + + foreach ($authorFeed['feed'] as $post) { + $postRecord = $post['post']['record']; + + $item = []; + $item['uri'] = self::URI . '/profile/' . $this->fallbackAuthor($post['post']['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1]; + $item['title'] = $this->getInput('verbose_title') ? $this->generateVerboseTitle($post) : strtok($postRecord['text'], "\n"); + $item['timestamp'] = strtotime($postRecord['createdAt']); + $item['author'] = $this->fallbackAuthor($post['post']['author'], 'display'); + + $postAuthorDID = $post['post']['author']['did']; + $postAuthorHandle = $post['post']['author']['handle'] !== 'handle.invalid' ? '@' . $post['post']['author']['handle'] . ' ' : ''; + $postDisplayName = $post['post']['author']['displayName'] ?? ''; + $postDisplayName = e($postDisplayName); + $postUri = $item['uri']; + + $url = explode('/', $post['post']['uri']); + $this->logger->debug('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]); + + $description = ''; + $description .= '

    '; + //post + $description .= $this->getPostDescription( + $postDisplayName, + $postAuthorHandle, + $postUri, + $postRecord, + 'post' + ); + + if (isset($postRecord['embed']['$type'])) { + //post link embed + if ($postRecord['embed']['$type'] === 'app.bsky.embed.external') { + $description .= $this->parseExternal($postRecord['embed']['external'], $postAuthorDID); + } elseif ( + $postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $postRecord['embed']['media']['$type'] === 'app.bsky.embed.external' + ) { + $description .= $this->parseExternal($postRecord['embed']['media']['external'], $postAuthorDID); + } + + //post images + if ( + $postRecord['embed']['$type'] === 'app.bsky.embed.images' || + ( + $postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $postRecord['embed']['media']['$type'] === 'app.bsky.embed.images' + ) + ) { + $images = $post['post']['embed']['images'] ?? $post['post']['embed']['media']['images']; + foreach ($images as $image) { + $description .= $this->getPostImageDescription($image); + } + } + + //post video + if ( + $postRecord['embed']['$type'] === 'app.bsky.embed.video' || + ( + $postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $postRecord['embed']['media']['$type'] === 'app.bsky.embed.video' + ) + ) { + $description .= $this->getPostVideoDescription( + $postRecord['embed']['video'] ?? $postRecord['embed']['media']['video'], + $postAuthorDID + ); + } + } + $description .= '

    '; + + //quote post + if ( + isset($postRecord['embed']) && + ( + $postRecord['embed']['$type'] === 'app.bsky.embed.record' || + $postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' + ) && + isset($post['post']['embed']['record']) + ) { + $description .= '

    '; + $quotedRecord = $post['post']['embed']['record']['record'] ?? $post['post']['embed']['record']; + + if (isset($quotedRecord['notFound']) && $quotedRecord['notFound']) { //deleted post + $description .= 'Quoted post deleted.'; + } elseif (isset($quotedRecord['detached']) && $quotedRecord['detached']) { //detached quote + $uri_explode = explode('/', $quotedRecord['uri']); + $uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4]; + $description .= 'Quoted post detached.'; + } elseif (isset($quotedRecord['blocked']) && $quotedRecord['blocked']) { //blocked by quote author + $description .= 'Author of quoted post has blocked OP.'; + } elseif ( + ($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' || + ($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView' + ) { + $description .= $this->getListFeedDescription($quotedRecord); + } elseif ( + ($quotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' || + ($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic' + ) { + $description .= $this->getStarterPackDescription($post['post']['embed']['record']); + } else { + $quotedAuthorDid = $quotedRecord['author']['did']; + $quotedDisplayName = $quotedRecord['author']['displayName'] ?? ''; + $quotedDisplayName = e($quotedDisplayName); + $quotedAuthorHandle = $quotedRecord['author']['handle'] !== 'handle.invalid' ? '@' . $quotedRecord['author']['handle'] . '' : ''; + + $parts = explode('/', $quotedRecord['uri']); + $quotedPostId = end($parts); + $quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($quotedRecord['author'], 'url') . '/post/' . $quotedPostId; + + //quoted post - post + $description .= $this->getPostDescription( + $quotedDisplayName, + $quotedAuthorHandle, + $quotedPostUri, + $quotedRecord, + 'quote' + ); + + if (isset($quotedRecord['value']['embed']['$type'])) { + //quoted post - post link embed + if ($quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') { + $description .= $this->parseExternal($quotedRecord['value']['embed']['external'], $quotedAuthorDid); + } + + //quoted post - post video + if ( + $quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' || + ( + $quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video' + ) + ) { + $description .= $this->getPostVideoDescription( + $quotedRecord['value']['embed']['video'] ?? $quotedRecord['value']['embed']['media']['video'], + $quotedAuthorDid + ); + } + + //quoted post - post images + if ( + $quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' || + ( + $quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images' + ) + ) { + foreach ($quotedRecord['embeds'] as $embed) { + if ( + $embed['$type'] === 'app.bsky.embed.images#view' || + ($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view') + ) { + $images = $embed['images'] ?? $embed['media']['images']; + foreach ($images as $image) { + $description .= $this->getPostImageDescription($image); + } + } + } + } + } + } + $description .= '

    '; + } + + //reply + if ($replyContext && isset($post['reply']) && isset($post['reply']['parent'])) { + $replyPost = $post['reply']['parent']; + $description .= '
    '; + $description .= '

    '; + + if (isset($replyPost['notFound']) && $replyPost['notFound']) { //deleted post + $description .= 'Replied to post was deleted.'; + } elseif (isset($replyPost['blocked']) && $replyPost['blocked']) { //blocked by quote author + $description .= 'Author of replied to post has blocked OP.'; + } else { + $replyPostRecord = $replyPost['record']; + $replyPostAuthorDID = $replyPost['author']['did']; + $replyPostAuthorHandle = $replyPost['author']['handle'] !== 'handle.invalid' ? '@' . $replyPost['author']['handle'] . ' ' : ''; + $replyPostDisplayName = $replyPost['author']['displayName'] ?? ''; + $replyPostDisplayName = e($replyPostDisplayName); + $replyPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyPost['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $replyPost['uri'])[1]; + + // reply post + $description .= $this->getPostDescription( + $replyPostDisplayName, + $replyPostAuthorHandle, + $replyPostUri, + $replyPostRecord, + 'reply' + ); + + if (isset($replyPostRecord['embed']['$type'])) { + //post link embed + if ($replyPostRecord['embed']['$type'] === 'app.bsky.embed.external') { + $description .= $this->parseExternal($replyPostRecord['embed']['external'], $replyPostAuthorDID); + } elseif ( + $replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.external' + ) { + $description .= $this->parseExternal($replyPostRecord['embed']['media']['external'], $replyPostAuthorDID); + } + + //post images + if ( + $replyPostRecord['embed']['$type'] === 'app.bsky.embed.images' || + ( + $replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.images' + ) + ) { + $images = $replyPost['embed']['images'] ?? $replyPost['embed']['media']['images']; + foreach ($images as $image) { + $description .= $this->getPostImageDescription($image); + } + } + + //post video + if ( + $replyPostRecord['embed']['$type'] === 'app.bsky.embed.video' || + ( + $replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.video' + ) + ) { + $description .= $this->getPostVideoDescription( + $replyPostRecord['embed']['video'] ?? $replyPostRecord['embed']['media']['video'], + $replyPostAuthorDID + ); + } + } + $description .= '

    '; + + //quote post + if ( + isset($replyPostRecord['embed']) && + ($replyPostRecord['embed']['$type'] === 'app.bsky.embed.record' || $replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia') && + isset($replyPost['embed']['record']) + ) { + $description .= '

    '; + $replyQuotedRecord = $replyPost['embed']['record']['record'] ?? $replyPost['embed']['record']; + + if (isset($replyQuotedRecord['notFound']) && $replyQuotedRecord['notFound']) { //deleted post + $description .= 'Quoted post deleted.'; + } elseif (isset($replyQuotedRecord['detached']) && $replyQuotedRecord['detached']) { //detached quote + $uri_explode = explode('/', $replyQuotedRecord['uri']); + $uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4]; + $description .= 'Quoted post detached.'; + } elseif (isset($replyQuotedRecord['blocked']) && $replyQuotedRecord['blocked']) { //blocked by quote author + $description .= 'Author of quoted post has blocked OP.'; + } elseif ( + ($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' || + ($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView' + ) { + $description .= $this->getListFeedDescription($replyQuotedRecord); + } elseif ( + ($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' || + ($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic' + ) { + $description .= $this->getStarterPackDescription($replyPost['embed']['record']); + } else { + $quotedAuthorDid = $replyQuotedRecord['author']['did']; + $quotedDisplayName = $replyQuotedRecord['author']['displayName'] ?? ''; + $quotedDisplayName = e($quotedDisplayName); + $quotedAuthorHandle = $replyQuotedRecord['author']['handle'] !== 'handle.invalid' ? '@' . $replyQuotedRecord['author']['handle'] . '' : ''; + + $parts = explode('/', $replyQuotedRecord['uri']); + $quotedPostId = end($parts); + $quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyQuotedRecord['author'], 'url') . '/post/' . $quotedPostId; + + //quoted post - post + $description .= $this->getPostDescription( + $quotedDisplayName, + $quotedAuthorHandle, + $quotedPostUri, + $replyQuotedRecord, + 'quote' + ); + + if (isset($replyQuotedRecord['value']['embed']['$type'])) { + //quoted post - post link embed + if ($replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') { + $description .= $this->parseExternal($replyQuotedRecord['value']['embed']['external'], $quotedAuthorDid); + } + + //quoted post - post video + if ( + $replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' || + ( + $replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video' + ) + ) { + $description .= $this->getPostVideoDescription( + $replyQuotedRecord['value']['embed']['video'] ?? $replyQuotedRecord['value']['embed']['media']['video'], + $quotedAuthorDid + ); + } + + //quoted post - post images + if ( + $replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' || + ( + $replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' && + $replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images' + ) + ) { + foreach ($replyQuotedRecord['embeds'] as $embed) { + if ( + $embed['$type'] === 'app.bsky.embed.images#view' || + ($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view') + ) { + $images = $embed['images'] ?? $embed['media']['images']; + foreach ($images as $image) { + $description .= $this->getPostImageDescription($image); + } + } + } + } + } + } + $description .= '

    '; + } + } + } + + $item['content'] = $description; + $this->items[] = $item; + } + } + + private function getPostVideoDescription(array $video, $authorDID) + { + //https://video.bsky.app/watch/$did/$cid/thumbnail.jpg + $videoCID = $video['ref']['$link']; + $videoMime = $video['mimeType']; + $thumbnail = "poster=\"https://video.bsky.app/watch/$authorDID/$videoCID/thumbnail.jpg\"" ?? ''; + $videoURL = "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=$authorDID&cid=$videoCID"; + return "
    "; + } + + private function getPostImageDescription(array $image) + { + $thumbnailUrl = $image['thumb']; + $fullsizeUrl = $image['fullsize']; + $alt = strlen($image['alt']) > 0 ? '
    ' . e($image['alt']) . '
    ' : ''; + return "
    $alt
    "; + } + + private function getPostDescription( + string $postDisplayName, + string $postAuthorHandle, + string $postUri, + array $postRecord, + string $type + ) { + $description = ''; + if ($type === 'quote') { + // Quoted post/reply from bbb @bbb.com: + $postType = isset($postRecord['reply']) ? 'reply' : 'post'; + $description .= "Quoted $postType from $postDisplayName $postAuthorHandle:
    "; + } elseif ($type === 'reply') { + // Replying to aaa @aaa.com's post/reply: + $postType = isset($postRecord['reply']) ? 'reply' : 'post'; + $description .= "Replying to $postDisplayName $postAuthorHandle's $postType:
    "; + } else { + // aaa @aaa.com posted: + $description .= "$postDisplayName $postAuthorHandle posted:
    "; + } + $description .= $this->textToDescription($postRecord); + return $description; + } + + //used if handle verification fails, fallsback to displayName or DID depending on context. + private function fallbackAuthor($author, $reason) + { + if ($author['handle'] === 'handle.invalid') { + switch ($reason) { + case 'url': + return $author['did']; + case 'display': + $displayName = $author['displayName'] ?? ''; + return e($displayName); + } + } + return $author['handle']; + } + + private function generateVerboseTitle($post) + { + //use "Post by A, replying to B, quoting C" instead of post contents + $title = ''; + if (isset($post['reason']) && str_contains($post['reason']['$type'], 'reasonRepost')) { + $title .= 'Repost by ' . $this->fallbackAuthor($post['reason']['by'], 'display') . ', post by ' . $this->fallbackAuthor($post['post']['author'], 'display'); + } else { + $title .= 'Post by ' . $this->fallbackAuthor($post['post']['author'], 'display'); + } + + if (isset($post['reply'])) { + if (isset($post['reply']['parent']['blocked'])) { + $replyAuthor = 'blocked user'; + } elseif (isset($post['reply']['parent']['notFound'])) { + $replyAuthor = 'deleted post'; + } else { + $replyAuthor = $this->fallbackAuthor($post['reply']['parent']['author'], 'display'); + } + $title .= ', replying to ' . $replyAuthor; + } + + if ( + isset($post['post']['embed']) && + isset($post['post']['embed']['record']) && + //if not starter pack, feed or list + ($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.feed.defs#generatorView' && + ($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#listView' && + ($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#starterPackViewBasic' + ) { + if (isset($post['post']['embed']['record']['blocked'])) { + $quotedAuthor = 'blocked user'; + } elseif (isset($post['post']['embed']['record']['notFound'])) { + $quotedAuthor = 'deleted psost'; + } elseif (isset($post['post']['embed']['record']['detached'])) { + $quotedAuthor = 'detached post'; + } else { + $quotedAuthor = $this->fallbackAuthor($post['post']['embed']['record']['record']['author'] ?? $post['post']['embed']['record']['author'], 'display'); + } + $title .= ', quoting ' . $quotedAuthor; + } + return $title; + } + + private function resolveHandle($handle) + { + $uri = 'https://public.api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=' . urlencode($handle); + $response = json_decode(getContents($uri), true); + return $response['did']; + } + + private function getProfile($did) + { + $uri = 'https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=' . urlencode($did); + $response = json_decode(getContents($uri), true); + return $response; + } + + private function getAuthorFeed($did, $filter) + { + $uri = 'https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=' . urlencode($did) . '&filter=' . urlencode($filter) . '&limit=30'; + + $this->logger->debug($uri); + + $response = json_decode(getContents($uri), true); + return $response; + } + + //Embed for generated feeds and lists + private function getListFeedDescription(array $record): string + { + $feedViewAvatar = isset($record['avatar']) ? '' : ''; + $feedViewName = e($record['displayName'] ?? $record['name']); + $feedViewDescription = e($record['description'] ?? ''); + $authorDisplayName = e($record['creator']['displayName']); + $authorHandle = e($record['creator']['handle']); + $likeCount = isset($record['likeCount']) ? '
    Liked by ' . e($record['likeCount']) . ' users' : ''; + preg_match('/\/([^\/]+)$/', $record['uri'], $matches); + if (($record['purpose'] ?? '') === 'app.bsky.graph.defs#modlist') { + $typeURL = '/lists/'; + $typeDesc = 'moderation list'; + } elseif (($record['purpose'] ?? '') === 'app.bsky.graph.defs#curatelist') { + $typeURL = '/lists/'; + $typeDesc = 'list'; + } else { + $typeURL = '/feed/'; + $typeDesc = 'feed'; + } + $uri = e('https://bsky.app/profile/' . $record['creator']['did'] . $typeURL . $matches[1]); + + return << +{$feedViewName}
    +Bluesky {$typeDesc} by {$authorDisplayName} @{$authorHandle} +
    +{$feedViewAvatar} +
    {$feedViewDescription}{$likeCount}
    +
    + +END; + } + + private function getStarterPackDescription(array $record): string + { + if (!isset($record['record'])) { + return 'Failed to get starter pack information.'; + } + $starterpackRecord = $record['record']; + $starterpackName = e($starterpackRecord['name']); + $starterpackDescription = e($starterpackRecord['description']); + $creatorDisplayName = e($record['creator']['displayName']); + $creatorHandle = e($record['creator']['handle']); + preg_match('/\/([^\/]+)$/', $starterpackRecord['list'], $matches); + $uri = e('https://bsky.app/starter-pack/' . $record['creator']['did'] . '/' . $matches[1]); + return << +{$starterpackName}
    +Bluesky starter pack by {$creatorDisplayName} @{$creatorHandle}
    +{$starterpackDescription} + +END; + } +} diff --git a/bridges/BodaccBridge.php b/bridges/BodaccBridge.php new file mode 100644 index 00000000..38e5856a --- /dev/null +++ b/bridges/BodaccBridge.php @@ -0,0 +1,218 @@ + [ + 'departement' => [ + 'name' => 'Département', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Ain' => '01', + 'Aisne' => '02', + 'Allier' => '03', + 'Alpes-de-Haute-Provence' => '04', + 'Hautes-Alpes' => '05', + 'Alpes-Maritimes' => '06', + 'Ardèche' => '07', + 'Ardennes' => '08', + 'Ariège' => '09', + 'Aube' => '10', + 'Aude' => '11', + 'Aveyron' => '12', + 'Bouches-du-Rhône' => '13', + 'Calvados' => '14', + 'Cantal' => '15', + 'Charente' => '16', + 'Charente-Maritime' => '17', + 'Cher' => '18', + 'Corrèze' => '19', + 'Corse-du-Sud' => '2A', + 'Haute-Corse' => '2B', + 'Côte-d\'Or' => '21', + 'Côtes-d\'Armor' => '22', + 'Creuse' => '23', + 'Dordogne' => '24', + 'Doubs' => '25', + 'Drôme' => '26', + 'Eure' => '27', + 'Eure-et-Loir' => '28', + 'Finistère' => '29', + 'Gard' => '30', + 'Haute-Garonne' => '31', + 'Gers' => '32', + 'Gironde' => '33', + 'Hérault' => '34', + 'Ille-et-Vilaine' => '35', + 'Indre' => '36', + 'Indre-et-Loire' => '37', + 'Isère' => '38', + 'Jura' => '39', + 'Landes' => '40', + 'Loir-et-Cher' => '41', + 'Loire' => '42', + 'Haute-Loire' => '43', + 'Loire-Atlantique' => '44', + 'Loiret' => '45', + 'Lot' => '46', + 'Lot-et-Garonne' => '47', + 'Lozère' => '48', + 'Maine-et-Loire' => '49', + 'Manche' => '50', + 'Marne' => '51', + 'Haute-Marne' => '52', + 'Mayenne' => '53', + 'Meurthe-et-Moselle' => '54', + 'Meuse' => '55', + 'Morbihan' => '56', + 'Moselle' => '57', + 'Nièvre' => '58', + 'Nord' => '59', + 'Oise' => '60', + 'Orne' => '61', + 'Pas-de-Calais' => '62', + 'Puy-de-Dôme' => '63', + 'Pyrénées-Atlantiques' => '64', + 'Hautes-Pyrénées' => '65', + 'Pyrénées-Orientales' => '66', + 'Bas-Rhin' => '67', + 'Haut-Rhin' => '68', + 'Rhône' => '69', + 'Haute-Saône' => '70', + 'Saône-et-Loire' => '71', + 'Sarthe' => '72', + 'Savoie' => '73', + 'Haute-Savoie' => '74', + 'Paris' => '75', + 'Seine-Maritime' => '76', + 'Seine-et-Marne' => '77', + 'Yvelines' => '78', + 'Deux-Sèvres' => '79', + 'Somme' => '80', + 'Tarn' => '81', + 'Tarn-et-Garonne' => '82', + 'Var' => '83', + 'Vaucluse' => '84', + 'Vendée' => '85', + 'Vienne' => '86', + 'Haute-Vienne' => '87', + 'Vosges' => '88', + 'Yonne' => '89', + 'Territoire de Belfort' => '90', + 'Essonne' => '91', + 'Hauts-de-Seine' => '92', + 'Seine-Saint-Denis' => '93', + 'Val-de-Marne' => '94', + 'Val-d\'Oise' => '95', + 'Guadeloupe' => '971', + 'Martinique' => '972', + 'Guyane' => '973', + 'La Réunion' => '974', + 'Saint-Pierre-et-Miquelon' => '975', + 'Mayotte' => '976', + 'Saint-Barthélemy' => '977', + 'Saint-Martin' => '978', + 'Terres australes et antarctiques françaises' => '984', + 'Wallis-et-Futuna' => '986', + 'Polynésie française' => '987', + 'Nouvelle-Calédonie' => '988', + 'Île de Clipperton' => '989' + ] + ], + 'famille' => [ + 'name' => 'Famille', + 'type' => 'list', + 'values' => [ + 'Toutes' => null, + 'Annonces diverses' => 'divers', + 'Créations' => 'creation', + 'Dépôts des comptes' => 'dpc', + 'Immatriculations' => 'immatriculation', + 'Modifications diverses' => 'modification', + 'Procédures collectives' => 'collective', + 'Procédures de conciliation' => 'conciliation', + 'Procédures de rétablissement professionnel' => 'retablissement_professionnel', + 'Radiations' => 'radiation', + 'Ventes et cessions' => 'vente' + ] + ], + 'type' => [ + 'name' => 'Type', + 'type' => 'list', + 'values' => [ + 'Tous' => null, + 'Avis initial' => 'annonce', + 'Avis d\'annulation' => 'annulation', + 'Avis rectificatif' => 'rectificatif' + ] + ] + ] + ]; + + public function collectData() + { + $parameters = [ + 'select' => 'id,dateparution,typeavis_lib,familleavis_lib,commercant,ville,cp', + 'order_by' => 'id desc', + 'limit' => 50, + ]; + + $where = []; + if (!empty($this->getInput('departement'))) { + $where[] = 'numerodepartement="' . $this->getInput('departement') . '"'; + } + + if (!empty($this->getInput('famille'))) { + $where[] = 'familleavis="' . $this->getInput('famille') . '"'; + } + + if (!empty($this->getInput('type'))) { + $where[] = 'typeavis="' . $this->getInput('type') . '"'; + } + + if ($where !== []) { + $parameters['where'] = implode(' and ', $where); + } + + $url = urljoin(self::URI, '/api/explore/v2.1/catalog/datasets/annonces-commerciales/records?' . http_build_query($parameters)); + + $data = Json::decode(getContents($url), false); + + foreach ($data->results as $result) { + if ( + !isset( + $result->id, + $result->dateparution, + $result->typeavis_lib, + $result->familleavis_lib, + $result->commercant, + $result->ville, + $result->cp + ) + ) { + continue; + } + + $title = sprintf( + '[%s] %s - %s à %s (%s)', + $result->typeavis_lib, + $result->familleavis_lib, + $result->commercant, + $result->ville, + $result->cp + ); + + $this->items[] = [ + 'uid' => $result->id, + 'timestamp' => strtotime($result->dateparution), + 'title' => $title, + ]; + } + } +} diff --git a/bridges/BookMyShowBridge.php b/bridges/BookMyShowBridge.php index 7064df91..6ad02fe2 100644 --- a/bridges/BookMyShowBridge.php +++ b/bridges/BookMyShowBridge.php @@ -1218,14 +1218,15 @@ EOT; $table = $this->generateEventDetailsTable($event); $imgsrc = $event['BannerURL']; + $FShareURL = $event['FShareURL']; return << -
    - $table -
    - More Details are available on the BookMyShow website. -EOT; + +
    + $table +
    + More Details are available on the BookMyShow website. + EOT; } /** @@ -1292,14 +1293,15 @@ EOT; $synopsis = preg_replace(self::SYNOPSIS_REGEX, '', $data['EventSynopsis']); + $eventTrailerURL = $data['EventTrailerURL']; return << -
    $table
    -

    $innerHtml

    -

    ${synopsis}

    - More Details are available on the BookMyShow website and a trailer is available - here -EOT; + +
    $table
    +

    $innerHtml

    +

    $synopsis

    + More Details are available on the BookMyShow website and a trailer is available + here + EOT; } /** diff --git a/bridges/BruegelBridge.php b/bridges/BruegelBridge.php new file mode 100644 index 00000000..b7813dbc --- /dev/null +++ b/bridges/BruegelBridge.php @@ -0,0 +1,63 @@ + [ + 'name' => 'Category', + 'type' => 'list', + 'defaultValue' => '/publications', + 'values' => [ + 'Publications' => '/publications', + 'Commentary' => '/commentary' + ] + ] + ] + ]; + + public function getIcon() + { + return self::URI . '/themes/custom/bruegel/assets/favicon/android-icon-72x72.png'; + } + + public function collectData() + { + $url = self::URI . $this->getInput('category'); + $html = getSimpleHTMLDOM($url); + + $articles = $html->find('.c-listing__content article'); + + foreach ($articles as $article) { + $title = $article->find('.c-list-item__title a span', 0)->plaintext; + $content = trim($article->find('.c-list-item__description', 0)->plaintext); + $publishDate = $article->find('.c-list-item__date', 0)->plaintext; + $href = $article->find('.c-list-item__title a', 0)->getAttribute('href'); + + $item = [ + 'title' => $title, + 'content' => $content, + 'timestamp' => strtotime($publishDate), + 'uri' => self::URI . $href, + 'author' => $this->getAuthor($article), + ]; + + $this->items[] = $item; + } + } + + private function getAuthor($article) + { + $authorsElements = $article->find('.c-list-item__authors a'); + + $authors = array_map(function ($author) { + return $author->plaintext; + }, $authorsElements); + + return join(', ', $authors); + } +} \ No newline at end of file diff --git a/bridges/BrutBridge.php b/bridges/BrutBridge.php index aa2a1b4d..0db0851a 100644 --- a/bridges/BrutBridge.php +++ b/bridges/BrutBridge.php @@ -38,50 +38,20 @@ class BrutBridge extends BridgeAbstract ] ]; - const CACHE_TIMEOUT = 1800; // 30 mins - - private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/'; - public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()); - - $results = $html->find('div.results', 0); - - foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) { - $item = []; - - $videoPath = self::URI . $li->children(0)->href; - $videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600); - - $json = $this->extractJson($videoPageHtml); - $id = array_keys((array) $json->media->index)[0]; - - $item['uri'] = $videoPath; - $item['title'] = $json->media->index->$id->title; - $item['timestamp'] = $json->media->index->$id->published_at; - $item['enclosures'][] = $json->media->index->$id->media->thumbnail; - - $description = $json->media->index->$id->description; - $article = ''; - - if (is_null($json->media->index->$id->media->seo_article) === false) { - $article = markdownToHtml($json->media->index->$id->media->seo_article); - } - - $item['content'] = << - - -

    {$description}

    - {$article} -EOD; - - $this->items[] = $item; - - if (count($this->items) >= 10) { - break; - } + $url = $this->getURI(); + $html = getSimpleHTMLDOM($url); + $regex = '/window.__PRELOADED_STATE__ = (.*);/'; + preg_match($regex, $html, $parts); + $data = Json::decode($parts[1], false); + foreach ($data->medias->index as $uid => $media) { + $this->items[] = [ + 'uid' => $uid, + 'title' => $media->metadata->slug, + 'uri' => $media->share_url, + 'timestamp' => $media->published_at, + ]; } } @@ -90,35 +60,14 @@ EOD; if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category'); } - return parent::getURI(); } public function getName() { if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { - return $this->getKey('category') . ' - ' . - $this->getKey('edition') . ' - Brut.'; + return $this->getKey('category') . ' - ' . $this->getKey('edition') . ' - Brut.'; } - return parent::getName(); } - - /** - * Extract JSON from page - */ - private function extractJson($html) - { - if (!preg_match($this->jsonRegex, $html, $parts)) { - returnServerError('Failed to extract data from page'); - } - - $data = json_decode($parts[1]); - - if ($data === false) { - returnServerError('Failed to decode extracted data'); - } - - return $data; - } } diff --git a/bridges/BugzillaBridge.php b/bridges/BugzillaBridge.php index 9b4d1adc..29112918 100644 --- a/bridges/BugzillaBridge.php +++ b/bridges/BugzillaBridge.php @@ -98,7 +98,7 @@ class BugzillaBridge extends BridgeAbstract // Array of comments is here if (!isset($json['bugs'][$this->bugid]['comments'])) { - returnClientError('Cannot find REST endpoint'); + throwClientException('Cannot find REST endpoint'); } foreach ($json['bugs'][$this->bugid]['comments'] as $comment) { @@ -131,7 +131,7 @@ class BugzillaBridge extends BridgeAbstract // Array of changesets which contain an array of changes if (!isset($json['bugs']['0']['history'])) { - returnClientError('Cannot find REST endpoint'); + throwClientException('Cannot find REST endpoint'); } foreach ($json['bugs']['0']['history'] as $changeset) { @@ -159,12 +159,12 @@ class BugzillaBridge extends BridgeAbstract protected function getUser($user) { // Check if the user endpoint is available - if ($this->loadCacheValue($this->instance . 'userEndpointClosed', 86400)) { + if ($this->loadCacheValue($this->instance . 'userEndpointClosed')) { return $user; } $cache = $this->loadCacheValue($this->instance . $user); - if (!is_null($cache)) { + if ($cache) { return $cache; } diff --git a/bridges/BukowskisBridge.php b/bridges/BukowskisBridge.php index 14889889..3573c206 100644 --- a/bridges/BukowskisBridge.php +++ b/bridges/BukowskisBridge.php @@ -206,7 +206,7 @@ class BukowskisBridge extends BridgeAbstract $this->items[] = [ 'title' => $title, 'uri' => $baseUrl . $relative_url, - 'uid' => $lot->getAttribute('data-lot-id'), + 'uid' => $relative_url, 'content' => count($images) > 0 ? "
    $title" : $title, 'enclosures' => array_slice($images, 1), ]; diff --git a/bridges/BundesbankBridge.php b/bridges/BundesbankBridge.php index 4335cb69..0c6b943f 100644 --- a/bridges/BundesbankBridge.php +++ b/bridges/BundesbankBridge.php @@ -71,7 +71,9 @@ class BundesbankBridge extends BridgeAbstract $item['content'] .= '' . $study->find('.teasable__subtitle', 0)->plaintext . ''; } - $item['content'] .= '

    ' . $study->find('.teasable__text', 0)->plaintext . '

    '; + $teasable = $study->find('.teasable__text', 0); + $teasableText = $teasable->plaintext ?? ''; + $item['content'] .= '

    ' . $teasableText . '

    '; $item['timestamp'] = strtotime($study->find('.teasable__date', 0)->plaintext); diff --git a/bridges/BundestagParteispendenBridge.php b/bridges/BundestagParteispendenBridge.php index cdf398e8..198cf534 100644 --- a/bridges/BundestagParteispendenBridge.php +++ b/bridges/BundestagParteispendenBridge.php @@ -26,21 +26,19 @@ TMPL; https://www.bundestag.de/ajax/filterlist/de/parlament/praesidium/parteienfinanzierung/fundstellen50000/462002-462002 URI; // Get the main page - $html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT) - or returnServerError('Could not request AJAX list.'); + $html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT); // Build the URL from the first anchor element. The list is sorted by year, descending, so the first element is the current year. $firstAnchor = $html->find('a', 0) - or returnServerError('Could not find the proper HTML element.'); + or throwServerException('Could not find the proper HTML element.'); - $url = 'https://www.bundestag.de' . $firstAnchor->href; + $url = $firstAnchor->href; // Get the actual page with the soft money donations - $html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT) - or returnServerError('Could not request ' . $url); + $html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT); $rows = $html->find('table.table > tbody > tr') - or returnServerError('Could not find the proper HTML elements.'); + or throwServerException('Could not find the proper HTML elements.'); foreach ($rows as $row) { $item = $this->generateItemFromRow($row); diff --git a/bridges/BundesverbandFuerFreieKammernBridge.php b/bridges/BundesverbandFuerFreieKammernBridge.php new file mode 100644 index 00000000..147f2d47 --- /dev/null +++ b/bridges/BundesverbandFuerFreieKammernBridge.php @@ -0,0 +1,28 @@ +setTime(0, 0, 0); + return $dti->getTimestamp(); + } +} diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 34442abd..77339c7c 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -1,6 +1,6 @@ 'list', 'values' => [ 'All articles' => '', - 'Apple' => 'apple', - 'Google' => 'google', - 'Microsoft' => 'tags-microsoft', - 'Computers' => 'topics-computers', - 'Mobile' => 'topics-mobile', - 'Sci-Tech' => 'topics-sci-tech', - 'Security' => 'topics-security', - 'Internet' => 'topics-internet', - 'Tech Industry' => 'topics-tech-industry' + 'Tech' => 'tech', + 'Money' => 'personal-finance', + 'Home' => 'home', + 'Wellness' => 'health', + 'Energy' => 'home/energy-and-utilities', + 'Deals' => 'deals', + 'Computing' => 'tech/computing', + 'Mobile' => 'tech/mobile', + 'Science' => 'science', + 'Services' => 'tech/services-and-software' ] - ] + ], + 'limit' => self::LIMIT ] ]; - private function cleanArticle($article_html) - { - $offset_p = strpos($article_html, '

    '); - $offset_figure = strpos($article_html, '', '', $article_html); - $article_html = str_replace('', '', $article_html); - $article_html = StripWithDelimiters($article_html, ''); - $article_html = stripWithDelimiters($article_html, 'find('div.originalImage', 0); - } - if (empty($article_thumbnail)) { - $article_thumbnail = $article_html->find('span.imageContainer', 0); - } - if (is_object($article_thumbnail)) { - $article_thumbnail = $article_thumbnail->find('img', 0)->src; - } - - $article_content .= trim( - $this->cleanArticle( - extractFromDelimiters( - $article_html, - 'find('script[type=application/ld+json]') as $ldjson) { + $datePublished = extractFromDelimiters($ldjson->innertext, '"datePublished":"', '"'); + if ($datePublished !== false) { + $date = strtotime($datePublished); + } + $imageObject = extractFromDelimiters($ldjson->innertext, 'ImageObject","url":"', '"'); + if ($imageObject !== false) { + $enclosure = $imageObject; } - - $item = []; - $item['uri'] = $article_uri; - $item['title'] = $article_title; - $item['author'] = $article_author; - $item['timestamp'] = $article_timestamp; - $item['enclosures'] = [$article_thumbnail]; - $item['content'] = $article_content; - $this->items[] = $item; } + + foreach ($content->find('div.c-shortcodeGallery') as $cleanup) { + $cleanup->outertext = ''; + } + + foreach ($content->find('figure') as $figure) { + $img = $figure->find('img', 0); + if ($img) { + $figure->outertext = $img->outertext; + } + } + + $content = $content->innertext; + + if ($enclosure) { + $content = "

    " . $content; + } + + if ($headline) { + $content = '

    ' . $headline->plaintext . '


    ' . $content; + } + + $item = []; + $item['uri'] = $article_uri; + $item['title'] = $title; + + if ($author) { + $item['author'] = $author->plaintext; + } + + $item['content'] = $content; + + if (!is_null($date)) { + $item['timestamp'] = $date; + } + + if (!is_null($enclosure)) { + $item['enclosures'] = [$enclosure]; + } + + $this->items[] = $item; } } } diff --git a/bridges/CNETFranceBridge.php b/bridges/CNETFranceBridge.php index 724564fa..35e92daf 100644 --- a/bridges/CNETFranceBridge.php +++ b/bridges/CNETFranceBridge.php @@ -43,10 +43,8 @@ class CNETFranceBridge extends FeedExpander $this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/'); } - protected function parseItem($feedItem) + protected function parseItem(array $item) { - $item = parent::parseItem($feedItem); - foreach ($this->bannedTitle as $term) { if (preg_match('/' . $term . '/mi', $item['title']) === 1) { return null; @@ -54,7 +52,7 @@ class CNETFranceBridge extends FeedExpander } foreach ($this->bannedURL as $term) { - if (preg_match('/' . $term . '/mi', $item['uri']) === 1) { + if (preg_match('#' . $term . '#mi', $item['uri'])) { return null; } } diff --git a/bridges/CVEDetailsBridge.php b/bridges/CVEDetailsBridge.php index 38b37bb7..776f675a 100644 --- a/bridges/CVEDetailsBridge.php +++ b/bridges/CVEDetailsBridge.php @@ -36,12 +36,43 @@ class CVEDetailsBridge extends BridgeAbstract private $vendor = ''; private $product = ''; - // Return the URL to query. - // Because of the optional product ID, we need to attach it if it is - // set. The search result page has the exact same structure (with and - // without the product ID). - private function buildUrl() + public function collectData() { + if ($this->html == null) { + $this->fetchContent(); + } + + $var = $this->html->find('#searchresults > div > div.row'); + foreach ($var as $i => $tr) { + $uri = $tr->find('h3 > a', 0)->href ?? null; + $title = $tr->find('h3 > a', 0)->innertext; + $content = $tr->find('.cvesummarylong', 0)->innertext ?? ''; + $timestamp = $tr->find('[data-tsvfield="publishDate"]', 0)->innertext ?? 0; + + $this->items[] = [ + 'uri' => $uri, + 'title' => $title, + 'timestamp' => $timestamp, + 'content' => $content, + 'categories' => [$this->vendor], + 'enclosures' => [], + 'uid' => $title, + ]; + if (count($this->items) >= 30) { + break; + } + } + } + + // Make the actual request to cvedetails.com and stores the response + // (HTML) for later use and extract vendor and product from it. + private function fetchContent() + { + // build url + // Return the URL to query. + // Because of the optional product ID, we need to attach it if it is + // set. The search result page has the exact same structure (with and + // without the product ID). $url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id'); if ($this->getInput('product_id') !== '') { $url .= '/product_id-' . $this->getInput('product_id'); @@ -51,32 +82,21 @@ class CVEDetailsBridge extends BridgeAbstract // number, which should be mostly accurate. $url .= '?order=1'; // Order by CVE number DESC - return $url; - } - - // Make the actual request to cvedetails.com and stores the response - // (HTML) for later use and extract vendor and product from it. - private function fetchContent() - { - $html = getSimpleHTMLDOM($this->buildUrl()); + $html = getSimpleHTMLDOM($url); $this->html = defaultLinkTo($html, self::URI); - $vendor = $html->find('#contentdiv > h1 > a', 0); + $vendor = $html->find('#contentdiv h1 > a', 0); if ($vendor == null) { - returnServerError('Invalid Vendor ID ' . - $this->getInput('vendor_id') . - ' or Product ID ' . - $this->getInput('product_id')); + throwServerException('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id')); } $this->vendor = $vendor->innertext; - $product = $html->find('#contentdiv > h1 > a', 1); + $product = $html->find('#contentdiv h1 > a', 1); if ($product != null) { $this->product = $product->innertext; } } - // Build the name of the feed. public function getName() { if ($this->getInput('vendor_id') == '') { @@ -94,52 +114,4 @@ class CVEDetailsBridge extends BridgeAbstract return $name; } - - // Pull the data from the HTML response and fill the items.. - public function collectData() - { - if ($this->html == null) { - $this->fetchContent(); - } - - foreach ($this->html->find('#vulnslisttable .srrowns') as $i => $tr) { - // There are some optional vulnerability types, which will be - // added to the categories as well as the CWE number -- which is - // always given. - $categories = [$this->vendor]; - $enclosures = []; - - $cwe = $tr->find('td', 2)->find('a', 0); - if ($cwe != null) { - $cwe = $cwe->innertext; - $categories[] = 'CWE-' . $cwe; - $enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe . '.html'; - } - $c = $tr->find('td', 4)->innertext; - if (trim($c) != '') { - $categories[] = $c; - } - if ($this->product != '') { - $categories[] = $this->product; - } - - // The CVE number itself - $title = $tr->find('td', 1)->find('a', 0)->innertext; - - $this->items[] = [ - 'uri' => $tr->find('td', 1)->find('a', 0)->href, - 'title' => $title, - 'timestamp' => $tr->find('td', 5)->innertext, - 'content' => $tr->next_sibling()->innertext, - 'categories' => $categories, - 'enclosures' => $enclosures, - 'uid' => $tr->find('td', 1)->find('a', 0)->innertext, - ]; - - // We only want to fetch the latest 10 CVEs - if (count($this->items) >= 10) { - break; - } - } - } } diff --git a/bridges/CachetBridge.php b/bridges/CachetBridge.php index 355e7926..7ee48a5e 100644 --- a/bridges/CachetBridge.php +++ b/bridges/CachetBridge.php @@ -72,14 +72,14 @@ class CachetBridge extends BridgeAbstract { $ping = getContents(urljoin($this->getURI(), '/api/v1/ping')); if (!$this->validatePing($ping)) { - returnClientError('Provided URI is invalid!'); + throwClientException('Provided URI is invalid!'); } $url = urljoin($this->getURI(), '/api/v1/incidents?sort=id&order=desc'); $incidents = getContents($url); $incidents = json_decode($incidents); if ($incidents === null) { - returnClientError('/api/v1/incidents returned no valid json'); + throwClientException('/api/v1/incidents returned no valid json'); } usort($incidents->data, function ($a, $b) { diff --git a/bridges/CarThrottleBridge.php b/bridges/CarThrottleBridge.php index 95641573..8475a414 100644 --- a/bridges/CarThrottleBridge.php +++ b/bridges/CarThrottleBridge.php @@ -1,44 +1,118 @@ [ + 'news' => [ + 'name' => 'news', + 'type' => 'checkbox' + ], + 'reviews' => [ + 'name' => 'reviews', + 'type' => 'checkbox' + ], + 'features' => [ + 'name' => 'features', + 'type' => 'checkbox' + ], + 'videos' => [ + 'name' => 'videos', + 'type' => 'checkbox' + ], + 'gaming' => [ + 'name' => 'gaming', + 'type' => 'checkbox' + ] + ] + ]; public function collectData() { - $this->collectExpandableDatas('https://www.carthrottle.com/rss', 10); + $this->items = []; + + $this->handleCategory('news'); + $this->handleCategory('reviews'); + $this->handleCategory('features'); + $this->handleCategory2('videos', 'video'); + $this->handleCategory('gaming'); } - protected function parseItem($feedItem) + private function handleCategory($category) { - $item = parent::parseItem($feedItem); + if ($this->getInput($category)) { + $this->getArticles($category); + } + } - //fetch page - $articlePage = getSimpleHTMLDOMCached($feedItem->link) - or returnServerError('Could not retrieve ' . $feedItem->link); + private function handleCategory2($categoryParameter, $categoryURLname) + { + if ($this->getInput($categoryParameter)) { + $this->getArticles($categoryURLname); + } + } - $subtitle = $articlePage->find('p.standfirst', 0); - $article = $articlePage->find('div.content_field', 0); + private function getArticles($category) + { + $categoryPage = getSimpleHTMLDOMCached(self::URI . $category); - $item['content'] = str_get_html($subtitle . $article); + //for each post + foreach ($categoryPage->find('div.cmg-card') as $post) { + $item = []; - //convert '; + EOT; + } + } + + //built golem videos + foreach ($article->find('.gvideofig') as &$embedcontent) { + if (preg_match('/gvideo_(.*)/', $embedcontent->id, $videoid)) { + $embedcontent->innertext .= << + EOT; + } + } + // delete known bad elements foreach ( $article->find('div[id*="adtile"], #job-market, #seminars, iframe, - div.gbox_affiliate, div.toc, .embedcontent, script') as $bad + div.gbox_affiliate, div.toc') as $bad ) { $bad->remove(); } // reload html, as remove() is buggy $article = str_get_html($article->outertext); - if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) { - $item .= $pageHeader; - } $header = $article->find('header', 0); foreach ($header->find('p, figure') as $element) { @@ -138,7 +152,7 @@ class GolemBridge extends FeedExpander $img->src = $img->getAttribute('data-src-full'); } - foreach ($content->find('p, h1, h3, img[src*="."]') as $element) { + foreach ($content->find('p, h1, h2, h3, pre, img[src*="."], div[class*="golem_tablediv"], iframe, video') as $element) { $item .= $element; } diff --git a/bridges/GooglePlayStoreBridge.php b/bridges/GooglePlayStoreBridge.php index d61be2c8..86343520 100644 --- a/bridges/GooglePlayStoreBridge.php +++ b/bridges/GooglePlayStoreBridge.php @@ -21,30 +21,22 @@ class GooglePlayStoreBridge extends BridgeAbstract ] ]]; - const INFORMATION_MAP = [ - 'Updated' => 'timestamp', - 'Current Version' => 'title', - 'Offered By' => 'author' - ]; - public function collectData() { - $appuri = static::URI . '/details?id=' . $this->getInput('id'); - $html = getSimpleHTMLDOM($appuri); + $id = $this->getInput('id'); + $url = 'https://play.google.com/store/apps/details?id=' . $id; + $html = getSimpleHTMLDOM($url); + + $updatedAtElement = $html->find('div.TKjAsc div', 2); + // Updated onSep 27, 2023 + $updatedAt = $updatedAtElement->plaintext; + $description = $html->find('div.bARER', 0); $item = []; - $item['uri'] = $appuri; - $item['content'] = $html->find('div[itemprop=description]', 1)->innertext; - - // Find other fields from Additional Information section - foreach ($html->find('.hAyfc') as $info) { - $index = self::INFORMATION_MAP[$info->first_child()->plaintext] ?? null; - if (is_null($index)) { - continue; - } - $item[$index] = $info->children(1)->plaintext; - } - + $item['uri'] = $url; + $item['title'] = $id . ' ' . $updatedAt; + $item['content'] = $description->innertext ?? ''; + $item['uid'] = 'GooglePlayStoreBridge/' . $updatedAt; $this->items[] = $item; } diff --git a/bridges/GoogleScholarBridge.php b/bridges/GoogleScholarBridge.php index 981355dd..3004180f 100644 --- a/bridges/GoogleScholarBridge.php +++ b/bridges/GoogleScholarBridge.php @@ -2,7 +2,7 @@ class GoogleScholarBridge extends BridgeAbstract { - const NAME = 'Google Scholar v2'; + const NAME = 'Google Scholar'; const URI = 'https://scholar.google.com/'; const DESCRIPTION = 'Search for publications or follow authors on Google Scholar.'; const MAINTAINER = 'nicholasmccarthy'; @@ -109,7 +109,7 @@ class GoogleScholarBridge extends BridgeAbstract case 'user': $userId = $this->getInput('userId'); $uri = self::URI . '/citations?hl=en&view_op=list_works&sortby=pubdate&user=' . $userId; - $html = getSimpleHTMLDOM($uri) or returnServerError('Could not fetch Google Scholar data.'); + $html = getSimpleHTMLDOM($uri); $publications = $html->find('tr[class="gsc_a_tr"]'); @@ -184,7 +184,7 @@ class GoogleScholarBridge extends BridgeAbstract $uri .= $sortBy ? '&scisbd=1' : ''; $uri .= $numResults ? '&num=' . $numResults : ''; - $html = getSimpleHTMLDOM($uri) or returnServerError('Could not fetch Google Scholar data.'); + $html = getSimpleHTMLDOM($uri); $publications = $html->find('div[class="gs_r gs_or gs_scl"]'); @@ -193,6 +193,11 @@ class GoogleScholarBridge extends BridgeAbstract $articleUrl = $articleTitleElement->find('a', 0)->href; $articleTitle = $articleTitleElement->plaintext; + // Break the loop if 'Check for Updates' is found in the article title + if (strpos($articleTitle, 'Check for updates') !== false) { + break; + } + $articleDateElement = $publication->find('div[class="gs_a"]', 0); $articleDate = $articleDateElement ? $articleDateElement->plaintext : ''; diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 59465e89..08d05964 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -5,7 +5,7 @@ class GoogleSearchBridge extends BridgeAbstract const MAINTAINER = 'sebsauvage'; const NAME = 'Google search'; const URI = 'https://www.google.com/'; - const CACHE_TIMEOUT = 1800; // 30min + const CACHE_TIMEOUT = 60 * 30; // 30m const DESCRIPTION = 'Returns max 100 results from the past year.'; const PARAMETERS = [[ @@ -26,7 +26,7 @@ class GoogleSearchBridge extends BridgeAbstract // todo: wrap this in try..catch because 429 too many requests happens a lot $dom = getSimpleHTMLDOM($this->getURI(), ['Accept-language: en-US']); if (!$dom) { - returnServerError('No results for this query.'); + throwServerException('No results for this query.'); } $result = $dom->find('div[id=res]', 0); diff --git a/bridges/GovTrackBridge.php b/bridges/GovTrackBridge.php new file mode 100644 index 00000000..a2c18d9f --- /dev/null +++ b/bridges/GovTrackBridge.php @@ -0,0 +1,111 @@ + [ + 'name' => 'Feed to track', + 'type' => 'list', + 'defaultValue' => 'posts', + 'values' => [ + 'All Legislative Activity' => 'bill-activity', + 'Bill Summaries' => 'bill-summaries', + 'Legislation Coming Up' => 'coming-up', + 'Major Legislative Activity' => 'major-bill-activity', + 'New Bills and Resolutions' => 'introduced-bills', + 'New Laws' => 'enacted-bills', + 'News from Us' => 'posts' + ] + ], + 'limit' => self::LIMIT + ]]; + + public function collectData() + { + $limit = $this->getInput('limit') ?? 15; + if ($this->getInput('feed') == 'posts') { + $this->collectExpandableDatas($this->getURI() . '.rss', $limit); + } else { + $this->collectEvent($this->getURI(), $limit); + } + } + + protected function parseItem(array $item) + { + $html = getSimpleHTMLDOMCached($item['uri']); + $html = defaultLinkTo($html, parent::getURI()); + + $item['categories'] = [$html->find('.breadcrumb-item', 1)->plaintext]; + $content = $html->find('#content .col-md', 1); + $item['author'] = explode(' by ', $content->firstChild()->plaintext)[1]; + $content->removeChild($content->firstChild()); + $item['content'] = $content->innertext; + + return $item; + } + + private function collectEvent($uri, $limit) + { + $html = getSimpleHTMLDOMCached($uri); + preg_match('/"csrfmiddlewaretoken" value="(.*)"/', $html, $preg); + $header = [ + "cookie: csrftoken=$preg[1]", + "x-csrftoken: $preg[1]", + 'referer: ' . parent::getURI(), + ]; + preg_match('/var selected_feed = "(.*)";/', $html, $preg); + $opt = [ CURLOPT_POSTFIELDS => [ + 'count' => $limit, + 'feed' => $preg[1] + ]]; + + $html = getContents(parent::getURI() . 'events/_load_events', $header, $opt); + $html = defaultLinkTo(str_get_html($html), parent::getURI()); + + foreach ($html->find('.tracked_event') as $event) { + $bill = $event->find('.event_title a, .event_body a', 0); + $date = explode(' ', $event->find('.event_date', 0)->plaintext); + preg_match('/Sponsor:(.*)\n/', $event->plaintext, $preg); + + $item = [ + 'author' => $preg[1] ?? '', + 'content' => $event->find('td', 1)->innertext, + 'enclosures' => [$event->find('img', 0)->src], + 'timestamp' => strtotime(implode(' ', array_slice($date, 2))), + 'title' => explode(': ', $bill->innertext)[0], + 'uri' => $bill->href, + ]; + + foreach ($event->find('.event_title, .event_type span') as $tag) { + if (!$tag->find('a', 0)) { + $item['categories'][] = $tag->plaintext; + } + } + + $this->items[] = $item; + } + } + + public function getName() + { + $name = parent::getName(); + if ($this->getInput('feed') != null) { + $name .= ' - ' . $this->getKey('feed'); + } + return $name; + } + + public function getURI() + { + if ($this->getInput('feed') == 'posts') { + $url = parent::getURI() . $this->getInput('feed'); + } else { + $url = parent::getURI() . 'events/' . $this->getInput('feed'); + } + return $url; + } +} diff --git a/bridges/HackerNewsUserThreadsBridge.php b/bridges/HackerNewsUserThreadsBridge.php index fee96b61..0ab7445d 100644 --- a/bridges/HackerNewsUserThreadsBridge.php +++ b/bridges/HackerNewsUserThreadsBridge.php @@ -21,8 +21,6 @@ class HackerNewsUserThreadsBridge extends BridgeAbstract { $url = 'https://news.ycombinator.com/threads?id=' . $this->getInput('user'); $html = getSimpleHTMLDOM($url); - Debug::log('queried ' . $url); - Debug::log('found ' . $html); $item = []; $articles = $html->find('tr[class*="comtr"]'); diff --git a/bridges/HardwareInfoBridge.php b/bridges/HardwareInfoBridge.php deleted file mode 100644 index e295984c..00000000 --- a/bridges/HardwareInfoBridge.php +++ /dev/null @@ -1,67 +0,0 @@ -collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 20); - } - - protected function parseItem($feedItem) - { - $item = parent::parseItem($feedItem); - - //get full article - $articlePage = getSimpleHTMLDOMCached($feedItem->link); - - $article = $articlePage->find('div.article__content', 0); - - //everything under the social bar is not part of the article, remove it - $reachedEndOfArticle = false; - - foreach ($article->find('*') as $child) { - if ( - !$reachedEndOfArticle && isset($child->attr['class']) - && $child->attr['class'] == 'article__content__social-bar' - ) { - $reachedEndOfArticle = true; - } - - if ($reachedEndOfArticle) { - $child->outertext = ''; - } - } - - //get rid of some more elements we don't need - $to_remove_selectors = [ - 'script', - 'div.incontent', - 'div.article__content__social-bar', - 'div#revealNewsTip', - 'div.article__previous_next' - ]; - - foreach ($to_remove_selectors as $selector) { - foreach ($article->find($selector) as $found) { - $found->outertext = ''; - } - } - - // convert iframes to links. meant for embedded YouTube videos. - foreach ($article->find('iframe') as $found) { - $iframeUrl = $found->getAttribute('src'); - - if ($iframeUrl) { - $found->outertext = '
    ' . $iframeUrl . ''; - } - } - - $item['content'] = $article; - return $item; - } -} diff --git a/bridges/HarvardBusinessReviewBridge.php b/bridges/HarvardBusinessReviewBridge.php new file mode 100644 index 00000000..cd99a1ba --- /dev/null +++ b/bridges/HarvardBusinessReviewBridge.php @@ -0,0 +1,88 @@ + [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => true, + 'title' => 'Maximum number of items to return', + 'defaultValue' => 6, //More requires clicking button "Load more" + ], + ]]; + + public function collectData() + { + $url = self::URI . '/the-latest'; + $html = getSimpleHTMLDOM($url); + + foreach ($html->find('li.stream-entry') as $data) { + // Skip if $data is null + if ($data === null) { + continue; + } + + try { + // Skip entries containing the text 'stream-ad-container' + if ($data->innertext !== null && strpos($data->innertext, 'stream-ad-container') !== false) { + continue; + } + + // Skip entries with class 'sponsored' + if ($data->hasClass('sponsored')) { + continue; + } + + $item = []; + $linkElement = $data->find('a', 0); + $titleElement = $data->find('h3.hed a', 0); + $authorElement = $data->find('ul.byline-list li', 0); + $timestampElement = $data->find('li.pubdate time', 0); + $contentElement = $data->find('div.dek', 0); + + if ($linkElement) { + $item['uri'] = self::URI . $linkElement->getAttribute('href'); + } else { + continue; // Skip this entry if no link is found + } + if ($titleElement) { + $item['title'] = trim($titleElement->plaintext); + } else { + continue; // Skip this entry if no title is found + } + if ($authorElement) { + $item['author'] = trim($authorElement->plaintext); + } else { + $item['author'] = 'Unknown'; // Default value if author is missing + } + if ($timestampElement) { + $item['timestamp'] = strtotime($timestampElement->plaintext); + } else { + $item['timestamp'] = time(); // Default to current time if timestamp is missing + } + if ($contentElement) { + $item['content'] = trim($contentElement->plaintext); + } else { + $item['content'] = ''; // Default to empty string if content is missing + } + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + + if (count($this->items) >= $this->getInput('postcount')) { + break; + } + } catch (Exception $e) { + // Log the error if necessary + continue; // Skip to the next iteration on error + } + } + } +} \ No newline at end of file diff --git a/bridges/HarvardHealthBlogBridge.php b/bridges/HarvardHealthBlogBridge.php new file mode 100644 index 00000000..bb6a5ede --- /dev/null +++ b/bridges/HarvardHealthBlogBridge.php @@ -0,0 +1,71 @@ + [ + 'name' => 'Article Image', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ], + ], + ]; + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI); + $count = 0; + + foreach ($dom->find('div[class="mb-16 md:flex"]') as $element) { + if ($count >= self::MAX_ARTICLES) { + break; + } + + $data = $element->find('a[class="hover:text-red transition-colors duration-200"]', 0); + if (!$data) { + continue; + } + + $url = $data->href; + + $this->items[] = [ + 'content' => $this->constructContent($url), + 'timestamp' => $element->find('time', 0)->datetime, + 'title' => $data->plaintext, + 'uid' => $url, + 'uri' => $url, + ]; + + $count++; + } + } + + private function constructContent($url) + { + $dom = getSimpleHTMLDOMCached($url); + + $article = $dom->find('div[class*="content-repository-content"]', 0); + if (!$article) { + return 'Content Not Found'; + } + + // remove article image + if (!$this->getInput('image')) { + $image = $article->find('p', 0); + $image->remove(); + } + + // remove ads + foreach ($article->find('.inline-ad') as $ad) { + $ad->outertext = ''; + } + + return $article->innertext; + } +} diff --git a/bridges/HaveIBeenPwnedBridge.php b/bridges/HaveIBeenPwnedBridge.php index e8a5e4f9..daf040dd 100644 --- a/bridges/HaveIBeenPwnedBridge.php +++ b/bridges/HaveIBeenPwnedBridge.php @@ -48,7 +48,7 @@ class HaveIBeenPwnedBridge extends BridgeAbstract . $pwnCount . ' breached accounts'; $item['dateAdded'] = $breach['AddedDate']; $item['breachDate'] = $breach['BreachDate']; - $item['uri'] = self::URI . '/PwnedWebsites#' . $breach['Name']; + $item['uri'] = self::URI . '/breach/' . $breach['Name']; $item['content'] = '

    ' . $breach['Description'] . '

    '; $item['content'] .= '

    ' . $this->breachType($breach) . '

    '; diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index ec8bb96f..b3bbf76b 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -103,6 +103,16 @@ class HeiseBridge extends FeedExpander 'required' => false, 'title' => 'Specify number of full articles to return', 'defaultValue' => 5 + ], + 'sessioncookie' => [ + 'name' => 'Session Cookie', + 'required' => false, + 'title' => <<<'TITLE' + If you have a heise+ subscription, + you can enter your cookie (ssohls) here to + have heise+ articles displayed in full. + By default the cookie is 1 year valid. + TITLE, ] ]]; const LIMIT = 5; @@ -115,9 +125,9 @@ class HeiseBridge extends FeedExpander ); } - protected function parseItem($feedItem) + protected function parseItem(array $item) { - $item = parent::parseItem($feedItem); + $sessioncookie = $this->getInput('sessioncookie'); // strip rss parameter $item['uri'] = explode('?', $item['uri'])[0]; @@ -126,15 +136,16 @@ class HeiseBridge extends FeedExpander if (strpos($item['uri'], 'https://www.heise.de') !== 0) { return $item; } - - // abort on heise+ articles and link to archive.ph for full-text content - if (str_starts_with($item['title'], 'heise+ |')) { - $item['uri'] = 'https://archive.ph/?run=1&url=' . urlencode($item['uri']); + // abort on heise+ articles + if ($sessioncookie == '' && str_starts_with($item['title'], 'heise+ |')) { + $item['uri'] = 'https://archive.is/' . $item['uri']; return $item; } $item['uri'] .= '?seite=all'; - $article = getSimpleHTMLDOMCached($item['uri']); + $article = getSimpleHTMLDOM($item['uri'], [ + 'cookie: ssohls=' . $sessioncookie + ]); if ($article) { $article = defaultLinkTo($article, $item['uri']); @@ -150,9 +161,17 @@ class HeiseBridge extends FeedExpander $article = defaultLinkTo($article, $item['uri']); // remove unwanted stuff - foreach ($article->find('figure.branding, a-ad, div.ho-text, a-img, .opt-in__content-container, .a-toc__list, a-collapse') as $element) { + foreach ( + $article->find('figure.branding, figure.a-inline-image, a-ad, div.ho-text, a-img, + .a-toc__list, a-collapse, .opt-in__description, .opt-in__footnote, .notice-banner__text, .notice-banner__link') as $element + ) { $element->remove(); } + foreach ($article->find('img') as $element) { + if (str_contains($element->alt, 'l+f')) { + $element->remove(); + } + } // reload html, as remove() is buggy $article = str_get_html($article->outertext); @@ -169,7 +188,31 @@ class HeiseBridge extends FeedExpander } } - $categories = $article->find('.article-footer__topics ul.topics li.topics__item'); + //fix for embbedded youtube-videos + $oldlink = ''; + foreach ($article->find('div.video__yt-container') as &$ytvideo) { + if (preg_match('/www.youtube.*?\"/', $ytvideo->innertext, $link) && $link[0] != $oldlink) { + //save link to prevent duplicates + $oldlink = $link[0]; + $ytiframe = << + EOT; + //check if video is in header or article for correct possitioning + if (strpos($header->innertext, $link[0])) { + $item['content'] .= $ytiframe; + } else { + $ytvideo->innertext .= $ytiframe; + $reloadneeded = 1; + } + } + } + if (isset($reloadneeded)) { + $article = str_get_html($article->outertext); + } + + $categories = $article->find('.article-footer__topics ul.topics li.topics__item a-topic a'); foreach ($categories as $category) { $item['categories'][] = trim($category->plaintext); } @@ -177,7 +220,7 @@ class HeiseBridge extends FeedExpander $content = $article->find('.article-content', 0); if ($content) { $contentElements = $content->find( - 'p, h3, ul, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption' + 'p, h3, ul, ol, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption, noscript iframe' ); $item['content'] .= implode('', $contentElements); } diff --git a/bridges/HinduTamilBridge.php b/bridges/HinduTamilBridge.php new file mode 100644 index 00000000..50b9b8e6 --- /dev/null +++ b/bridges/HinduTamilBridge.php @@ -0,0 +1,97 @@ + [ + 'name' => 'topic', + 'type' => 'list', + 'defaultValue' => 'crime', + 'values' => [ + 'Astrology' => 'astrology', + 'Blogs' => 'blogs', + 'Business' => 'business', + 'Cartoon' => 'cartoon', + 'Cinema' => 'cinema', + 'Crime' => 'crime', + 'Discussion' => 'discussion', + 'Education' => 'education', + 'Environment' => 'environment', + 'India' => 'india', + 'Lifestyle' => 'life-style', + 'Literature' => 'literature', + 'Opinion' => 'opinion', + 'Reporters' => 'reporters-page', + 'Socialmedia' => 'social-media', + 'Spirituals' => 'spirituals', + 'Sports' => 'sports', + 'Supplements' => 'supplements', + 'Tamilnadu' => 'tamilnadu', + 'Technology' => 'technology', + 'Tourism' => 'tourism', + 'World' => 'world', + ], + ], + 'limit' => [ + 'name' => 'limit (max 100)', + 'type' => 'number', + 'defaultValue' => 10, + ], + ], + ]; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ' - ' . $topic : ''); + } + + public function collectData() + { + $limit = min(100, $this->getInput('limit')); + $url = self::FEED_BASE_URL . $this->getInput('topic'); + $this->collectExpandableDatas($url, $limit); + } + + protected function parseItem($item) + { + $dom = getSimpleHTMLDOMCached($item['uri']); + $content = $dom->find('#pgContentPrint', 0); + + if ($content === null) { + return $item; + } + + $item['timestamp'] = $this->getTimestamp($dom) ?? $item['timestamp']; + $item['content'] = $this->getImage($dom) . $this->cleanContent($content); + + return $item; + } + + private function cleanContent($content): string + { + foreach ($content->find('div[align="center"], script, .adsplacement') as $remove) { + $remove->outertext = ''; + } + + return $content->innertext; + } + + private function getTimestamp($dom): ?string + { + $date = $dom->find('meta[property="article:published_time"]', 0); + return $date ? $date->getAttribute('content') : null; + } + + private function getImage($dom): string + { + $image = $dom->find('meta[property="og:image"]', 0); + return $image ? sprintf('

    ', $image->getAttribute('content')) : ''; + } +} diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index 7328ca04..7450c6f0 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -40,3202 +40,23 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'Deals per group' => [ 'group' => [ 'name' => 'Group', - 'type' => 'list', - 'title' => 'Group whose deals must be displayed', - 'values' => [ - '3D Blu-ray' => '3d-bluray', - '3D Printer' => '3d-printer', - '3D TV' => '3d-tv', - '4K Blu-ray' => '4k-bluray', - '4K Monitor' => '4k-monitor', - '4K TV' => '4k-tv', - '5G Phones' => '5g-phones', - '7 Up' => '7up', - '8K TV' => '8k-tv', - '32 inch TV' => '32-inch-tv', - '40 inch TV' => '40-inch-tv', - '55 inch TV' => '55-inch-tv', - '65 inch TV' => '65-inch-tv', - '75 inch TV' => '75-inch-tv', - '144Hz Monitor' => '144hz', - 'A4 Paper' => 'a4-paper', - 'AAA Battery' => 'aaa', - 'AA Battery' => 'aa', - 'Abercrombie' => 'abercrombie', - 'Aberlour' => 'aberlour', - 'Accommodation' => 'accomodation', - 'Accurist' => 'accurist', - 'Ace Combat 7: Skies Unknown' => 'ace-combat-7', - 'Acer' => 'acer', - 'Acer Aspire' => 'acer-aspire', - 'Acer Laptop' => 'acer-laptop', - 'Acer PC Monitor' => 'acer-pc-monitor', - 'Acer Predator' => 'acer-predator', - 'Action Camera' => 'action-camera', - 'Action Figure & Playsets' => 'playsets', - 'Activewear' => 'sports-clothes', - 'Activia' => 'activia', - 'adidas' => 'adidas', - 'adidas Continental' => 'continental', - 'Adidas Gazelle' => 'gazelle', - 'Adidas Originals' => 'adidas-originals', - 'Adidas Samba' => 'samba', - 'Adidas Stan Smith' => 'stan-smith', - 'Adidas Superstar' => 'adidas-superstar', - 'Adidas Trainers' => 'adidas-shoes', - 'Adidas Ultraboost' => 'adidas-ultraboost', - 'Adidas ZX Flux' => 'adidas-zx-flux', - 'Adobe' => 'adobe', - 'Adobe Lightroom' => 'lightroom', - 'Adobe Photoshop' => 'photoshop', - 'Adult Products' => 'adult', - 'Advent Calendar' => 'advent-calendar', - 'Adventure Time' => 'adventure-time', - 'AEG' => 'aeg', - 'Aftershave' => 'aftershave', - 'Age Of Empires' => 'age-of-empires', - 'Air Bed' => 'air-bed', - 'Air Conditioner' => 'air-con', - 'Airer' => 'airer', - 'Airfix' => 'airfix', - 'Air Fryer' => 'air-fryer', - 'Airline' => 'airline', - 'Airport' => 'airport', - 'Airport Parking' => 'airport-parking', - 'Air Purifier' => 'air-purifier', - 'AirTag' => 'airtag', - 'Air Treatment' => 'air-treatment', - 'AKG' => 'akg', - 'Alarm Clock' => 'alarm-clock', - 'Alarm System' => 'alarm-system', - 'Alcatel' => 'alcatel', - 'Alcohol' => 'alcohol', - 'Alesis' => 'alesis', - 'Alien: Isolation' => 'alien-isolation', - 'Alienware' => 'alienware', - 'All-in-One PC' => 'all-in-one-pc', - 'All-in-One Printer' => 'all-in-one-printer', - 'Alloy Wheel' => 'alloy-wheels', - 'All Saints' => 'all-saints', - 'Almonds' => 'almonds', - 'Alpro' => 'alpro', - 'Alton Towers' => 'alton-towers', - 'Amazfit' => 'xiaomi-amazfit', - 'Amazfit Bip' => 'xiaomi-amazfit-bip', - 'Amazfit GTS' => 'amazfit-gts', - 'Amazfit Verge' => 'amazfit-verge', - 'Amazfit Verge Lite' => 'amazfit-verge-lite', - 'Amazfit Watch' => 'amazfit-watch', - 'Amazon Add On Item' => 'add-on-item', - 'Amazon Business' => 'amazon-business', - 'Amazon Echo' => 'amazon-echo', - 'Amazon Echo Dot' => 'amazon-echo-dot', - 'Amazon Echo Plus' => 'amazon-echo-plus', - 'Amazon Echo Show' => 'amazon-echo-show', - 'Amazon Echo Show 5' => 'echo-show-5', - 'Amazon Echo Show 8' => 'amazon-echo-show-8', - 'Amazon Echo Spot' => 'amazon-echo-spot', - 'Amazon Fire 7' => 'amazon-fire-7', - 'Amazon Fire HD 8' => 'amazon-fire-hd-7', - 'Amazon Fire HD 10 Tablet' => 'amazon-fire-hd-10', - 'Amazon Fire Tablet' => 'amazon-tablet', - 'Amazon Fire TV Cube' => 'fire-tv-cube', - 'Amazon Fire TV Stick' => 'amazon-fire-stick', - 'Amazon Pantry' => 'amazon-pantry', - 'Amazon Prime' => 'amazon-prime', - 'Amazon Prime Video' => 'amazon-video', - 'Amazon Warehouse' => 'amazon-warehouse', - 'AMD' => 'amd', - 'AMD Radeon' => 'radeon', - 'AMD Ryzen' => 'amd-ryzen', - 'AMD Ryzen 5 5600X' => 'amd-ryzen-5-5600x', - 'AMD Ryzen 7 5800X' => 'amd-ryzen-7-5800x', - 'AMD Ryzen 9 5900X' => 'amd-ryzen-9-5900x', - 'AMD Ryzen 9 5950X' => 'amd-ryzen-9-5950x', - 'Amex' => 'amex', - 'Amiibo' => 'amiibo', - 'Amplifier' => 'amplifier', - 'Anchor Butter' => 'anchor-butter', - 'Andrex' => 'andrex', - 'Android Apps' => 'android-app', - 'Android Smartphone' => 'android-smartphone', - 'Android Tablet' => 'android-tablet', - 'Angelcare' => 'angelcare', - 'Angle Grinder' => 'grinder', - 'Anglepoise' => 'anglepoise', - 'Angry Birds' => 'angry-birds', - 'Animal Crossing' => 'animal-crossing', - 'Anime' => 'anime', - 'Anker' => 'anker', - 'Ankle Boots' => 'ankle-boots', - 'Anno 1800' => 'anno-1800', - 'Anthem' => 'anthem', - 'Antibacterial Hand Gel' => 'hand-gel', - 'Antibacterial Wipes' => 'cleaning-wipes', - 'Antivirus' => 'antivirus', - 'Antler' => 'antler', - 'AOC' => 'aoc', - 'Apex Legends' => 'apex-legends', - 'A Plague Tale: Innocence' => 'a-plague-tale-innocence', - 'App' => 'app', - 'Apple' => 'apple', - 'Apple AirPods' => 'apple-airpods', - 'Apple Airpods 2' => 'airpods-2', - 'Apple Airpods Max' => 'airpods-max', - 'Apple Airpods Pro' => 'airpods-pro', - 'Apple EarPods' => 'earpods', - 'Apple Headphones' => 'apple-headphones', - 'Apple HomePod' => 'apple-homepod', - 'Apple HomePod mini' => 'apple-homepod-mini', - 'Apple Keyboard' => 'apple-keyboard', - 'Apple Pencil' => 'apple-pencil', - 'Apple TV' => 'apple-tv', - 'Apple TV 4K' => 'apple-tv-4k', - 'Apple Watch' => 'apple-watch', - 'Apple Watch 3' => 'apple-watch-3', - 'Apple Watch 4' => 'apple-watch-4', - 'Apple Watch 5' => 'apple-watch-5', - 'Apple Watch 6' => 'apple-watch-6', - 'Apple Watch SE' => 'apple-watch-se', - 'Apron' => 'apron', - 'Aquadoodle' => 'aquadoodle', - 'Aqua Optima' => 'aqua-optima', - 'Aquarium' => 'aquarium', - 'Aramis' => 'aramis', - 'Argan Oil' => 'argan-oil', - 'Ariel' => 'ariel', - 'Ark' => 'ark', - 'Armani' => 'armani', - 'Armchair' => 'armchair', - 'Armed Forces Discount' => 'armed-forces', - 'Arsenal F. C.' => 'arsenal', - 'Arts and Crafts' => 'craft', - 'Asics' => 'asics', - 'Ask' => 'ask', - 'ASRock' => 'asrock', - 'Assassin's Creed' => 'assassins-creed', - 'Assassin's Creed: Odyssey' => 'assassins-creed-odyssey', - 'Assassin's Creed: Origins' => 'assassins-creed-origins', - 'Assassin's Creed: Unity' => 'assassins-creed-unity', - 'Assassin's Creed: Valhalla' => 'assasins-creed-valhalla', - 'Astral Chain' => 'astral-chain', - 'ASTRO Gaming' => 'astro-gaming', - 'Astro Gaming A40' => 'astro-gaming-a40', - 'Astro Gaming A50' => 'astro-gaming-a50', - 'Asus' => 'asus', - 'ASUS Laptop' => 'asus-laptop', - 'ASUS Monitor' => 'asus-monitor', - 'ASUS ROG' => 'asus-rog', - 'Asus ROG Phone' => 'asus-rog-phone', - 'Asus ROG Phone 2' => 'asus-rog-phone-2', - 'ASUS Router' => 'asus-router', - 'Asus Smartphone' => 'asus-smartphone', - 'ASUS Vivobook' => 'asus-vivobook', - 'ASUS Zenbook' => 'zenbook', - 'Asus ZenFone 6' => 'asus-zenfone-6', - 'Atari' => 'atari', - 'Audi' => 'audi', - 'Audio & Hi-Fi' => 'audio', - 'Audio Accessories' => 'audio-accessories', - 'Audiobook' => 'audiobook', - 'Audio Technica' => 'audio-technica', - 'Aukey' => 'aukey', - 'Aussie' => 'aussie', - 'Autoglym' => 'autoglym', - 'Aveeno' => 'aveeno', - 'Avengers' => 'avengers', - 'AVG' => 'avg', - 'Aviva' => 'aviva', - 'Avon' => 'avon', - 'AV Receiver' => 'av-receiver', - 'Axe' => 'axe', - 'Baby Annabell' => 'baby-annabell', - 'Baby Bath' => 'baby-bath', - 'Baby Born' => 'baby-born', - 'Baby Bottle' => 'baby-bottles', - 'Baby Bouncer' => 'bouncer', - 'Baby Carrier' => 'baby-carrier', - 'Baby Clothes' => 'baby-clothes', - 'Baby Food' => 'baby-food', - 'Baby Gym' => 'baby-gym', - 'Baby Jogger' => 'baby-jogger', - 'Babyliss' => 'babyliss', - 'Baby Monitor' => 'baby-monitor', - 'Baby Shoes' => 'baby-shoes', - 'Baby Swing' => 'baby-swing', - 'Baby Walker' => 'baby-walker', - 'Baby Wipes' => 'wipes', - 'Bacardi' => 'bacardi', - 'Backpack' => 'backpack', - 'Back to the Future' => 'back-to-the-future', - 'Bacon' => 'bacon', - 'Badminton' => 'badminton', - 'Bag' => 'bag', - 'Bagless Vacuum Cleaner' => 'bagless-vacuum-cleaner', - 'Bahco' => 'bahco', - 'Baileys' => 'baileys', - 'Baked Beans' => 'baked-beans', - 'Bakery Products' => 'bakery-products', - 'Baking' => 'baking', - 'Ball Pit' => 'ball-pit', - 'Ballpoint Pen' => 'pen', - 'Band of Brothers' => 'band-of-brothers', - 'Bang & Olufsen' => 'bang-olufsen', - 'Bank' => 'bank', - 'Bank Account' => 'bank-account', - 'Banks & Credit Cards' => 'bank-credit-card', - 'Barbell' => 'barbell', - 'Barbie' => 'barbie', - 'Barbour' => 'barbour', - 'Barclaycard' => 'barclaycard', - 'Barclays' => 'barclays', - 'Barebones PC' => 'barebones', - 'bareMinerals' => 'bareminerals', - 'Barry M' => 'barry-m', - 'Bar Stools' => 'bar-stools', - 'Base Layer' => 'base-layer', - 'Basket' => 'basket', - 'Basketball' => 'basketball', - 'Basmati Rice' => 'basmati-rice', - 'Bath Mat' => 'bath-mat', - 'Bathroom Accessories' => 'bathroom', - 'Bathroom Cabinet' => 'bathroom-cabinet', - 'Bathroom Scale' => 'bathroom-scales', - 'Bathroom Tap' => 'tap', - 'Batman' => 'batman', - 'Battery' => 'battery', - 'Battleborn' => 'battleborn', - 'Battlefield' => 'battlefield', - 'Battlefield 1' => 'battlefield-1', - 'Battlefield 4' => 'battlefield-4', - 'Battlefield 5' => 'battlefield-5', - 'Battlestar Galactica' => 'battlestar-galactica', - 'Baylis & Harding' => 'baylis-and-harding', - 'Bayonetta' => 'bayonetta', - 'Bayonetta 2' => 'bayonetta-2', - 'Baywatch' => 'baywatch', - 'BB-8' => 'bb-8', - 'BBC' => 'bbc', - 'BBQ Food' => 'bbq', - 'BBQs and Grills' => 'grill', - 'Bean Bag' => 'bean-bag', - 'Beanie Hat' => 'beanie-hat', - 'Bean to Cup Machine' => 'bean-to-cup', - 'Beard Trimmer' => 'beard-trimmer', - 'Beats by Dre' => 'beats-by-dre', - 'Beats Solo 3' => 'beats-solo-3', - 'Beats Studio 3' => 'beats-studio-3', - 'Beauty' => 'beauty-care', - 'Beauty and the Beast' => 'beauty-and-the-beast', - 'Becks' => 'becks', - 'Bed' => 'bed', - 'Bedding' => 'bedding', - 'Bedding & Linens' => 'bedding-linens', - 'Bed Frame' => 'bed-frame', - 'Bedroom' => 'bedroom-furniture', - 'Beef' => 'beef', - 'Beer' => 'beer', - 'Beer Advent Calendar' => 'beer-advent-calendar', - 'Beko' => 'beko', - 'Belkin' => 'belkin', - 'Belstaff' => 'belstaff', - 'Belt' => 'belt', - 'BelVita' => 'belvita', - 'Ben & Jerry's' => 'ben-jerrys', - 'Benefit Cosmetics' => 'benefit-cosmetics', - 'BenQ' => 'benq', - 'BenQ Monitor' => 'benq-monitor', - 'Ben Sherman' => 'ben-sherman', - 'BeoPlay Headphones' => 'beoplay-headphones', - 'Beoplay Speakers' => 'beoplay', - 'Berghaus' => 'berghaus', - 'Bestway' => 'bestway', - 'Betting' => 'betting', - 'Beyerdynamic' => 'beyerdynamic', - 'Bic' => 'bic', - 'Bike' => 'bike', - 'Bike Accessories' => 'bike-accessories', - 'Bike Brake' => 'brakes', - 'Bike Computer' => 'bike-computer', - 'Bike Helmet' => 'bicycle-helmet', - 'Bike Inner Tube' => 'inner-tube', - 'Bike Lights' => 'bike-lights', - 'Bike Lock' => 'bike-lock', - 'Bike Parts' => 'bike-parts', - 'Bike Pump' => 'bike-pump', - 'Biker Equipment' => 'biker-equipment', - 'Bike Saddle' => 'saddle', - 'Biking & Urban Sports' => 'biking-urban-sports', - 'Bikini' => 'bikini', - 'Billabong' => 'billabong', - 'Bin' => 'bin', - 'Binatone' => 'binatone', - 'Bingo' => 'bingo', - 'Binoculars' => 'binoculars', - 'Bio Oil' => 'bio-oil', - 'Bioshock' => 'bioshock', - 'Birds Eye' => 'birds-eye', - 'Birkenstock' => 'birkenstock', - 'Biscuits' => 'biscuits', - 'Bissell' => 'bissell', - 'Bistro Set' => 'bistro-set', - 'Bitdefender' => 'bitdefender', - 'Black & Decker' => 'black-decker', - 'Blackberry Smartphone' => 'blackberry', - 'Blanket' => 'blanket', - 'Blaupunkt' => 'blaupunkt', - 'Blazer' => 'blazer', - 'Bleach' => 'bleach', - 'Blended Malt' => 'malt', - 'Blender' => 'blender', - 'Blinds' => 'blinds', - 'Blink XT2 Smart Security Camera' => 'blink-xt2', - 'Blizzard' => 'blizzard', - 'Blood & Truth' => 'blood-and-truth', - 'Bloodborne' => 'bloodborne', - 'Blood Pressure Monitor' => 'blood-pressure', - 'Blu-ray' => 'blu-ray', - 'Blu-ray Player' => 'blu-ray-player', - 'Bluetooth Headphones' => 'bluetooth-headphones', - 'Bluetooth Speaker' => 'bluetooth-speaker', - 'BMW' => 'bmw', - 'BMW Mini Cooper' => 'mini-cooper', - 'BMX' => 'bmx', - 'Board Game' => 'board-game', - 'Boardman' => 'boardman', - 'Boat Shoes' => 'boat-shoes', - 'Bodum' => 'bodum', - 'Bogof' => 'bogof', - 'Boiler' => 'boiler', - 'Bold' => 'bold', - 'Bombay Sapphire' => 'bombay-sapphire', - 'Bomber Jacket' => 'bomber-jacket', - 'Bonne Maman' => 'bonne-maman', - 'Bonsai' => 'bonsai', - 'Book' => 'book', - 'Bookcase' => 'bookcase', - 'Books & Magazines' => 'books-magazines', - 'Booster Seat' => 'booster-seat', - 'Boots' => 'boots', - 'Borderlands' => 'borderlands', - 'Borderlands 3' => 'borderlands-3', - 'Bosch' => 'bosch', - 'Bosch Dishwasher' => 'bosch-dishwasher', - 'Bosch Drill' => 'bosch-drill', - 'Bosch Fridge' => 'bosch-fridge', - 'Bosch Rotak' => 'rotak', - 'Bosch Washing Machine' => 'bosch-washing-machine', - 'Bose' => 'bose', - 'Bose Headphones' => 'bose-headphones', - 'Bose Noise Cancelling Headphones 700' => 'bose-headphones-700', - 'Bose QuietComfort' => 'bose-quietcomfort', - 'Bose QuietComfort 35 II' => 'bose-quietcomfort-35-ii', - 'Bose SoundLink' => 'bose-soundlink', - 'Bose SoundLink Around-Ear II' => 'bose-soundlink-2', - 'Bose SoundTouch' => 'bose-soundtouch', - 'BOSS' => 'hugo-boss', - 'Boss Bottled' => 'boss-bottled', - 'Bouncy Castle' => 'bouncy-castle', - 'Bourbon' => 'bourbon', - 'Bourjois' => 'bourjois', - 'Bowers & Wilkins' => 'bowers-wilkins', - 'Bowling' => 'bowling', - 'Bowmore' => 'bowmore', - 'Boxers' => 'boxers', - 'Boxing' => 'boxing', - 'Boxing Gloves' => 'boxing-gloves', - 'Boy's Clothes' => 'clothes-for-boys', - 'Bra' => 'bra', - 'Brabantia' => 'brabantia', - 'Bracelet' => 'bracelet', - 'Brands' => 'brand', - 'Brandy' => 'brandy', - 'Branston' => 'branston', - 'Branston Beans' => 'branston-beans', - 'Braun' => 'braun', - 'Braun Series 3' => 'braun-series-3', - 'Braun Series 5' => 'braun-series-5', - 'Braun Series 7' => 'braun-series-7', - 'Braun Series 9' => 'braun-series-9', - 'Braun Shaver' => 'braun-shaver', - 'Bread' => 'bread', - 'Breadmaker' => 'breadmaker', - 'Breakdown Cover' => 'breakdown', - 'Breaking Bad' => 'breaking-bad', - 'Breast Pump' => 'breast-pump', - 'Breville' => 'breville', - 'Breville Blend Active' => 'blendactive', - 'Brewdog' => 'brewdog', - 'Bridge Camera' => 'bridge-camera', - 'Briefcase' => 'briefcase', - 'Brita' => 'brita', - 'Britax' => 'britax', - 'British Airways' => 'british-airways', - 'Broadband' => 'broadband', - 'Broadband & Phone Contracts' => 'broadband-phone-service', - 'Brogues' => 'brogues', - 'Brother' => 'brother', - 'Brother Printer' => 'brother-printer', - 'Brownie' => 'brownie', - 'BT' => 'bt', - 'BT Sport' => 'bt-sport', - 'Budweiser' => 'budweiser', - 'Buffalo' => 'buffalo', - 'Bugaboo' => 'bugaboo', - 'Buggy' => 'buggy', - 'Build-A-Bear' => 'build-a-bear', - 'Bulb' => 'bulbs', - 'Bulletstorm' => 'bulletstorm', - 'Bulmers' => 'bulmers', - 'Bulova' => 'bulova', - 'Burberry' => 'burberry', - 'Burger' => 'burger', - 'Burnout Paradise' => 'burnout-paradise', - 'Burt's Bees' => 'burts-bees', - 'Bus and Coach Ticket' => 'bus', - 'Bush' => 'bush', - 'Bushmills' => 'bushmills', - 'Butter' => 'butter', - 'Buying From Abroad' => 'buying-from-abroad', - 'Bvlgari' => 'bvlgari', - 'Cabin Case' => 'cabin-case', - 'Cabinet' => 'cabinet', - 'Cable Reel' => 'cable-reel', - 'Cables' => 'cables', - 'Cadbury's' => 'cadbury', - 'Café Rouge' => 'cafe-rouge', - 'Cafetière' => 'cafetiere', - 'Caffè Nero' => 'cafe-nero', - 'Cake' => 'cake', - 'Calculator' => 'calculator', - 'Calendar' => 'calendar', - 'Call of Duty' => 'call-of-duty', - 'Call of Duty: Black Ops' => 'black-ops', - 'Call of Duty: Black Ops 3' => 'black-ops-3', - 'Call of Duty: Black Ops 4' => 'black-ops-4', - 'Call of Duty: Black Ops Cold War' => 'call-of-duty-black-ops-cold-war', - 'Call of Duty: Infinite Warfare' => 'call-of-duty-infinite-warfare', - 'Call of Duty: Modern Warfare' => 'modern-warfare', - 'Call of Duty: WW2' => 'call-of-duty-ww2', - 'Calpol' => 'calpol', - 'Calvin Klein' => 'calvin-klein', - 'Camcorder' => 'camcorder', - 'Camelbak' => 'camelbak', - 'Camera' => 'camera', - 'Camera Accessories' => 'camera-accessories', - 'Camera Bag' => 'camera-bag', - 'Camera Lens' => 'lens', - 'Camping' => 'camping', - 'Campingaz' => 'campingaz', - 'Candle' => 'candle', - 'Cannondale' => 'cannondale', - 'Canon' => 'canon', - 'Canon Camera' => 'canon-camera', - 'Canon EOS' => 'canon-eos', - 'Canon Lens' => 'canon-lens', - 'Canon Pixma' => 'canon-pixma', - 'Canon PowerShot' => 'canon-powershot', - 'Canon PowerShot SX430 IS' => 'canon-powershot-sx430-is', - 'Canon Printer' => 'canon-printer', - 'Canterbury' => 'canterbury', - 'Canton' => 'canton', - 'Canvas Print' => 'canvas-print', - 'Cap' => 'cap', - 'Capsule Machine' => 'capsule-machine', - 'Captain America' => 'captain-america', - 'Captain Morgan' => 'captain-morgan', - 'Captain Toad: Treasure Tracker' => 'captain-toad-treasure-tracker', - 'Car' => 'car', - 'Car & Motorcycle' => 'car-motorcycle', - 'Car Accessories' => 'car-accessories', - 'Caravan' => 'caravan', - 'Car Battery' => 'car-battery', - 'Carbon Monoxide Detector' => 'carbon-monoxide', - 'Car Care' => 'car-care', - 'Car Charger' => 'car-charger', - 'Cardhu' => 'cardhu', - 'Cardigan' => 'cardigan', - 'Card Reader' => 'card-reader', - 'Carex' => 'carex', - 'Carhartt' => 'carhartt', - 'Car Hire' => 'car-hire', - 'Car Insurance' => 'car-insurance', - 'Car Leasing' => 'car-lease', - 'Carling' => 'carling', - 'Car Lock' => 'lock', - 'Carlsberg' => 'carlsberg', - 'Car Mats' => 'car-mats', - 'Carolina Herrera' => 'carolina-herrera', - 'Car Parts' => 'car-parts', - 'Carpet' => 'carpet', - 'Carpet Cleaner' => 'carpet-cleaner', - 'CarPlan' => 'carplan', - 'Car Polish' => 'car-polish', - 'Carrera Bikes' => 'carrera', - 'Car Seat' => 'car-seat', - 'Car Service' => 'car-service', - 'Car Stereo' => 'car-stereo', - 'Car Wash' => 'car-wash', - 'Car Wax' => 'car-wax', - 'Casio' => 'casio', - 'Casio Eco-Drive' => 'eco-drive', - 'Casio Edifice' => 'edifice', - 'Casio G-Shock' => 'g-shock', - 'Casserole' => 'casserole', - 'Cast Iron Pots and Pans' => 'cast-iron', - 'Castrol' => 'castrol', - 'Caterpillar' => 'caterpillar', - 'Cat Flap' => 'cat-flap', - 'Cat Food' => 'cat-food', - 'Cath Kidston' => 'cath-kidston', - 'Cat Supplies' => 'cat-supplies', - 'CCTV' => 'cctv', - 'CD' => 'cd', - 'CD Player' => 'cd-player', - 'Ceiling Light' => 'ceiling-light', - 'Celebrations' => 'celebrations', - 'Cereal' => 'cereal', - 'Cetirizine' => 'cetirizine', - 'Chad Valley' => 'chad-valley', - 'Chainsaw' => 'chainsaw', - 'Champagne' => 'champagne', - 'Champneys' => 'champneys', - 'Chanel' => 'chanel', - 'Chanel Coco Mademoiselle' => 'coco-mademoiselle', - 'Changing Bag' => 'changing-bag', - 'Channel 4' => 'channel-4', - 'Charger' => 'charger', - 'Cheese' => 'cheese', - 'Chelsea Boots' => 'chelsea-boots', - 'Chelsea F. C.' => 'chelsea', - 'Chess' => 'chess', - 'Chessington' => 'chessington', - 'Chest Freezer' => 'chest-freezer', - 'Chest of Drawers' => 'chest-of-drawers', - 'Chicco' => 'chicco', - 'Chicken' => 'chicken', - 'Childcare' => 'baby', - 'Children's Books' => 'childrens-books', - 'Chino' => 'chino', - 'Chisel' => 'chisel', - 'Chloe' => 'chloe', - 'Chocolate' => 'chocolate', - 'Chocolate Advent Calendar' => 'chocolate-advent-calendar', - 'Chopper' => 'chopper', - 'Chopping Board' => 'chopping-board', - 'Christmas Card' => 'christmas-card', - 'Christmas Decoration' => 'christmas-decorations', - 'Christmas Gift' => 'christmas-gifts', - 'Christmas Jumper' => 'christmas-jumper', - 'Christmas Lights' => 'christmas-lights', - 'Christmas Stocking Fillers' => 'christmas-stocking-fillers', - 'Christmas Toys' => 'christmas-toys', - 'Christmas Tree' => 'christmas-tree', - 'Chromebook' => 'chromebook', - 'Chromecast' => 'chromecast', - 'Chromecast Ultra' => 'chromecast-ultra', - 'Chromecast with Google TV' => 'chromecast-google-tv', - 'Chronograph' => 'chronograph', - 'Chupa Chups' => 'chupa-chups', - 'Chuwi' => 'chuwi', - 'Cider' => 'cider', - 'Cinema' => 'cinema', - 'Cineworld' => 'cineworld', - 'Circular Saw' => 'circular-saw', - 'Circulon' => 'circulon', - 'Ciroc' => 'ciroc', - 'Cities Skylines' => 'cities-skylines', - 'Citizen' => 'citizen', - 'Citroen' => 'citroen', - 'City Break' => 'city-breaks', - 'Civilization' => 'civilization', - 'Clarins' => 'clarins', - 'Clarks' => 'clarks', - 'Clearance' => 'clearance', - 'Climbing' => 'climbing', - 'Climbing Frame' => 'climbing-frame', - 'Clinique' => 'clinique', - 'Clothes' => 'clothes', - 'Cloud Service' => 'cloud', - 'Clutch Bag' => 'clutch', - 'Coat' => 'coat', - 'Coca Cola' => 'coke', - 'Cocktail' => 'cocktail', - 'Coconut Oil' => 'coconut', - 'Coffee' => 'coffee', - 'Coffee Beans' => 'coffee-beans', - 'Coffee Machine' => 'coffee-machine', - 'Coffee Pods' => 'coffee-pods', - 'Coffee Table' => 'coffee-table', - 'Cognac' => 'cognac', - 'Cola' => 'cola', - 'Coleman' => 'coleman', - 'Colgate' => 'colgate', - 'Combi Drill' => 'combi', - 'Comfort' => 'comfort', - 'Comic' => 'comic', - 'Command & Conquer' => 'command-and-conquer', - 'Compact Camera' => 'compact-camera', - 'Compact Flash' => 'compact-flash', - 'Competitions' => 'competitions', - 'Compost' => 'compost', - 'Compressor' => 'compressor', - 'Computer Accessories' => 'computer-accessories', - 'Computers & Tablets' => 'computers', - 'Concert' => 'concert', - 'Condé Nast' => 'conde-nast', - 'Conditioner' => 'conditioner', - 'Condom' => 'condom', - 'Connectors' => 'connectors', - 'Contact Lenses' => 'contact-lenses', - 'Contents Insurance' => 'contents-insurance', - 'Controller' => 'controller', - 'Converse' => 'converse', - 'Converse Chuck Taylor' => 'chuck-taylor', - 'Cooker' => 'cooker', - 'Cooking Oil' => 'cooking-oil', - 'Cookware' => 'cooking', - 'Cookware Set' => 'cookware-set', - 'Cookworks' => 'cookworks', - 'Cool Box' => 'cool-box', - 'Coors Light' => 'coors-light', - 'Cordless Drill' => 'cordless-drill', - 'Cordless Phone' => 'cordless-phone', - 'Cornetto' => 'cornetto', - 'Corona Beer' => 'corona', - 'Corsair' => 'corsair', - 'Cosatto' => 'cosatto', - 'Costa Coffee' => 'costa-coffee', - 'Costume' => 'costume', - 'Cot' => 'cot', - 'Counter Strike' => 'counter-strike', - 'Courses and Training' => 'education', - 'Cow & Gate' => 'cow-and-gate', - 'Cozy Coupe' => 'cozy-coupe', - 'CPU' => 'cpu', - 'CPU Cooler' => 'cpu-cooler', - 'Craghoppers' => 'craghoppers', - 'Crash Bandicoot' => 'crash-bandicoot', - 'Crash Team Racing Nitro-Fueled' => 'crash-team-racing-nitro-fueled', - 'Crayola' => 'crayola', - 'Creatine' => 'creatine', - 'Credit Card' => 'credit-card', - 'Creme Egg' => 'creme-egg', - 'Cricket' => 'cricket', - 'Crisps' => 'crisps', - 'Crocs' => 'crocs', - 'Cross Trainer' => 'cross-trainer', - 'Crown Paint' => 'crown', - 'Crucial' => 'crucial', - 'Cruelty Free Makeup' => 'cruelty-free-makeup', - 'Cruises' => 'cruise', - 'Cube Bikes' => 'cube', - 'Cubot' => 'cubot', - 'Cufflinks' => 'cufflinks', - 'Culture & Leisure' => 'entertainment', - 'Cuphead' => 'cuphead', - 'Cuprinol' => 'cuprinol', - 'Curling Wand' => 'curling-wand', - 'Curtain' => 'curtain', - 'Cushelle' => 'cushelle', - 'Cushion' => 'cushion', - 'Cutlery' => 'cutlery', - 'CyberLink' => 'cyberlink', - 'Cyberpunk 2077' => 'cyberpunk-2077', - 'Cybex' => 'cybex', - 'Cycling' => 'cycling', - 'Cycling Jacket' => 'cycling-jacket', - 'D-Link' => 'd-link', - 'DAB Radio' => 'dab-radio', - 'Dacia' => 'dacia', - 'Daily Mail' => 'daily-mail', - 'Dairy Milk' => 'dairy-milk', - 'Darksiders' => 'darksiders', - 'Dark Souls' => 'dark-souls', - 'Dark Souls 3' => 'dark-souls-3', - 'Dartboard' => 'dartboard', - 'Darts' => 'darts', - 'Dash Cam' => 'dash-cam', - 'Data Storage' => 'storage', - 'Davidoff' => 'davidoff', - 'Days Gone' => 'days-gone', - 'Days Out' => 'days-out', - 'Daz' => 'daz', - 'DC Comic' => 'dc', - 'DDR3' => 'ddr3', - 'DDR4' => 'ddr4', - 'Dead Island' => 'dead-island', - 'Dead or Alive 6' => 'dead-or-alive-6', - 'Deadpool' => 'deadpool', - 'Dead Rising' => 'dead-rising', - 'Death Stranding' => 'death-stranding', - 'Deezer' => 'deezer', - 'Dehumidifier' => 'dehumidifier', - 'Dell' => 'dell', - 'Dell Laptop' => 'dell-laptop', - 'Dell Monitor' => 'dell-monitor', - 'Dell XPS' => 'xps', - 'Delonghi' => 'delonghi', - 'Demon's Souls' => 'demon-souls', - 'Denby' => 'denby', - 'Denon' => 'denon', - 'Deodorant' => 'deodorant', - 'Desk' => 'desk', - 'Desperados Beer' => 'desperados', - 'Despicable Me' => 'despicable-me', - 'Destiny' => 'destiny', - 'Destiny 2' => 'destiny-2', - 'Detergent' => 'detergent', - 'Detroit: Become Human' => 'detroit-become-human', - 'Dettol' => 'dettol', - 'Deus Ex' => 'deus-ex', - 'Deus Ex: Mankind Divided' => 'deus-ex-mankind-divided', - 'Development Boards' => 'development-boards', - 'Devil May Cry 5' => 'devil-may-cry-5', - 'DeWalt' => 'dewalt', - 'DFDS' => 'dfds', - 'Diablo 3' => 'diablo-3', - 'Diary' => 'diary', - 'Dickies' => 'dickies', - 'Diesel' => 'diesel', - 'Diet' => 'diet', - 'Diggerland' => 'diggerland', - 'Digihome' => 'digihome', - 'Digimon' => 'digimon', - 'Digital Camera' => 'digital-camera', - 'Digital Watch' => 'digital-watch', - 'Dildo' => 'dildo', - 'Dimplex' => 'dimplex', - 'Dining Room' => 'dining-room', - 'Dining Room Chair' => 'chair', - 'Dining Set' => 'dining-set', - 'Dining Table' => 'dining-table', - 'Dinner Plate' => 'plates', - 'Dinner Set' => 'dinner-set', - 'Dinosaur' => 'dinosaur', - 'Dior' => 'dior', - 'Dior Sauvage' => 'dior-sauvage', - 'Dirt' => 'dirt', - 'Dirt 4' => 'dirt-4', - 'DIRT 5' => 'dirt-5', - 'Dirt Rally 2.0' => 'dirt-rally-2', - 'Disaronno' => 'disaronno', - 'Discord Nitro' => 'discord-nitro', - 'Disgaea' => 'disgaea', - 'Dishonored' => 'dishonored', - 'Dishonored 2' => 'dishonored-2', - 'Dishwasher' => 'dishwasher', - 'Dishwasher Tablets' => 'dishwasher-tablets', - 'Disinfectants' => 'disinfectants', - 'Disney' => 'disney', - 'Disney's Cars' => 'disney-cars', - 'Disney's Frozen' => 'disney-frozen', - 'Disney+' => 'disney-plus', - 'Disney Infinity' => 'disney-infinity', - 'Disneyland' => 'disneyland', - 'Disney Princess' => 'disney-princess', - 'Disney Tsum Tsum' => 'tsum-tsum', - 'Disney World' => 'disney-world', - 'Divan' => 'divan', - 'DIY' => 'diy', - 'DJ Equipment' => 'dj', - 'DJI Phantom' => 'dji-phantom', - 'DKNY' => 'dkny', - 'Doctor Who' => 'doctor-who', - 'Dog Bed' => 'dog-bed', - 'Dog Food' => 'dog-food', - 'Dog Supplies' => 'dog', - 'Dolce & Gabbana' => 'dolce', - 'Dolce Gusto' => 'dolce-gusto', - 'Dolce Gusto Coffee Machine' => 'dolce-gusto-coffee-machine', - 'Doll' => 'doll', - 'Dolls House' => 'dolls-house', - 'Domain Service' => 'domain', - 'Doogee' => 'doogee', - 'Doom' => 'doom', - 'Door' => 'door', - 'Doorbell' => 'doorbell', - 'Door Handles' => 'door-handles', - 'Doormat' => 'doormat', - 'Doritos' => 'doritos', - 'Dove' => 'dove', - 'Down Jacket' => 'down-jacket', - 'Downton Abbey' => 'downton-abbey', - 'Dr. Martens' => 'dr-martens', - 'Dragon Age' => 'dragon-age', - 'Dragon Ball' => 'dragon-ball', - 'Dragon Ball: FighterZ' => 'dragon-ball-fighterz', - 'Dragon Quest' => 'dragon-quest', - 'Dragon Quest Builders' => 'dragon-quest-builders', - 'Dragon Quest Builders 2' => 'dragon-quest-builders-2', - 'Dragon Quest XI: Echoes of an Elusive Age' => 'dragon-quest-xi', - 'Draper' => 'draper', - 'Drayton Manor' => 'drayton-manor', - 'Dreame T20' => 'dreame-t20', - 'Dreame V9' => 'dreame-v9', - 'Dreame V9P' => 'dreame-v9p', - 'Dreame V10' => 'dreame-v10', - 'Dreame V11' => 'dreame-v11', - 'Dreame Vacuum Cleaner' => 'xiaomi-vacuum-cleaner', - 'Dremel' => 'dremel', - 'Dress' => 'dress', - 'Dressing Gown' => 'dressing-gown', - 'Drill' => 'drill', - 'Drill Driver' => 'driver', - 'Drinks' => 'drinks', - 'Driveclub' => 'driveclub', - 'Driving Lessons' => 'driving-lessons', - 'Drone' => 'drone', - 'Dryer' => 'dryer', - 'DSLR Camera' => 'dslr', - 'Dual Fuel Cooker' => 'dual-fuel', - 'Dualit' => 'dualit', - 'Dual Sim' => 'sim', - 'Dulux' => 'dulux', - 'Duracell' => 'duracell', - 'Durex' => 'durex', - 'Duvet' => 'duvet', - 'DVD' => 'dvd', - 'DVD Player' => 'dvd-player', - 'Dying Light' => 'dying-light', - 'Dymo' => 'dymo', - 'Dyson' => 'dyson', - 'Dyson Supersonic' => 'dyson-supersonic', - 'Dyson V6' => 'dyson-v6', - 'Dyson V7' => 'dyson-v7', - 'Dyson V8' => 'dyson-v8', - 'Dyson V10' => 'dyson-v10', - 'Dyson V11' => 'dyson-v11', - 'Dyson Vacuum Cleaner' => 'dyson-vacuum-cleaner', - 'e-Reader' => 'ereader', - 'EA' => 'ea', - 'EA Access' => 'ea-access', - 'Earphones' => 'earphones', - 'Earrings' => 'earrings', - 'EA Sports' => 'ea-sports', - 'EA Sports UFC' => 'ufc', - 'Easter Eggs' => 'egg', - 'Eastpak' => 'eastpak', - 'eBook' => 'ebook', - 'Ecovacs' => 'ecovacs', - 'Ecover' => 'ecover', - 'Educational Toys' => 'educational-toys', - 'EE' => 'ee', - 'eFootball PES 2021' => 'pes-2021', - 'ELC Happyland' => 'happyland', - 'Electrical Accessories' => 'electrical-accessories', - 'Electric Bike' => 'electric-bike', - 'Electric Blanket' => 'electric-blanket', - 'Electric Cooker' => 'electric-cooker', - 'Electric Fires' => 'electric-fire', - 'Electric Scooter' => 'electric-scooter', - 'Electric Shower' => 'electric-shower', - 'Electric Toothbrush' => 'electric-toothbrush', - 'Electronic Accessories' => 'electronics-accessories', - 'Electronics' => 'electronics', - 'Elemis' => 'elemis', - 'Elephone' => 'elephone', - 'Elgato' => 'elgato', - 'Elite Dangerous' => 'elite-dangerous', - 'Elizabeth Arden' => 'elizabeth-arden', - 'Emirates' => 'emirates', - 'Endura' => 'endura', - 'Eneloop' => 'eneloop', - 'Energizer' => 'energizer', - 'Energy' => 'energy', - 'Energy, Heating & Gas' => 'energy-heating-gas', - 'Energy Drinks' => 'energy-drinks', - 'Engine Oil' => 'engine-oil', - 'Epilator' => 'epilator', - 'Epson' => 'epson', - 'Epson Printer' => 'epson-printer', - 'Espresso' => 'espresso', - 'Espresso Machine' => 'espresso-machine', - 'Esprit' => 'esprit', - 'Estée Lauder' => 'estee-lauder', - 'Ethernet' => 'ethernet', - 'Etnies' => 'etnies', - 'Eurostar Ticket' => 'eurostar', - 'Eurotunnel' => 'eurotunnel', - 'Everton F. C.' => 'everton', - 'EVGA' => 'evga', - 'Evian' => 'evian', - 'Exercise Equipment' => 'exercise-equipment', - 'Exercise Weights' => 'weight', - 'Extension Lead' => 'extension-lead', - 'External Hard Drive' => 'external-hard-drive', - 'F1' => 'formula-one', - 'F1 2017' => 'f1-2017', - 'F1 2018' => 'f1-2018', - 'F1 2019' => 'f1-2019', - 'F1 2020' => 'f1-2020', - 'Fabric Conditioner' => 'fabric-conditioner', - 'Face Cream' => 'face-cream', - 'Face Mask' => 'face-mask', - 'Fairy' => 'fairy', - 'Fairy Light' => 'fairy-light', - 'Fallout' => 'fallout', - 'Fallout 4' => 'fallout-4', - 'Fallout 76' => 'fallout-76', - 'Family & Kids' => 'kids', - 'Family Break' => 'family-break', - 'Family Guy' => 'family-guy', - 'Famous Grouse' => 'famous-grouse', - 'Fancy Dress' => 'fancy-dress', - 'Fans' => 'fan', - 'Fanta' => 'fanta', - 'Far Cry' => 'far-cry', - 'Far Cry 4' => 'far-cry-4', - 'Far Cry 5' => 'far-cry-5', - 'Far Cry New Dawn' => 'far-cry-new-dawn', - 'Far Cry Primal' => 'far-cry-primal', - 'Farming Simulator' => 'farming-simulator', - 'Fashion & Accessories' => 'fashion', - 'Fashion Accessories' => 'fashion-accessories', - 'Fashion for Men' => 'mens-clothing', - 'Fashion for Women' => 'womens-clothes', - 'Fast and Furious' => 'fast-and-furious', - 'Father's Day' => 'fathers-day', - 'FatMax' => 'fatmax', - 'FC Barcelona' => 'fc-barcelona', - 'Felix' => 'felix', - 'Fence' => 'fence', - 'Fender Guitar' => 'fender', - 'Ferrero Rocher' => 'ferrero-rocher', - 'Ferry' => 'ferry', - 'Festival' => 'festival', - 'Fever Thermometer' => 'thermometer', - 'Fiat' => 'fiat', - 'Fidget Spinner' => 'spinner', - 'FIFA' => 'fifa', - 'FIFA 17' => 'fifa-17', - 'FIFA 18' => 'fifa-18', - 'FIFA 19' => 'fifa-19', - 'FIFA 20' => 'fifa-20', - 'FIFA 21' => 'fifa-21', - 'FightStick' => 'fightstick', - 'Figures' => 'figures', - 'Fila Trainers' => 'fila-trainers', - 'Filing Cabinet' => 'filing-cabinet', - 'Final Fantasy' => 'final-fantasy', - 'Final Fantasy 15' => 'final-fantasy-15', - 'Finance & Insurance' => 'personal-finance', - 'Finish' => 'finish', - 'Finlux' => 'finlux', - 'Fiorelli' => 'fiorelli', - 'Fire Emblem' => 'fire-emblem', - 'Fire Pit' => 'fire-pit', - 'Fireplace' => 'fireplace', - 'Firewall: Zero Hour' => 'firewall-zero-hour', - 'First Aid' => 'first-aid', - 'Fish & Seafood' => 'fish-and-seafood', - 'Fish and Aquatic Pet Supplies' => 'fish', - 'Fisher Price' => 'fisher-price', - 'Fisher Price Imaginext' => 'imaginext', - 'Fisher Price Jumperoo' => 'jumperoo', - 'Fisher Price Little People' => 'little-people', - 'Fishing' => 'fishing', - 'Fiskars' => 'fiskars', - 'Fitbit' => 'fitbit', - 'Fitbit Alta' => 'fitbit-alta', - 'Fitbit Blaze' => 'fitbit-blaze', - 'Fitbit Charge 2' => 'fitbit-charge-2', - 'Fitbit Inspire' => 'fitbit-inspire', - 'Fitbit Versa' => 'fitbit-versa', - 'Fitness & Running' => 'fitness', - 'Fitness App' => 'fitness-app', - 'Fitness Tracker' => 'fitness-tracker', - 'Flamingo Land' => 'flamingo-land', - 'Flea Treatment' => 'flea', - 'Fleece Clothing' => 'fleece', - 'Flights' => 'flight', - 'Flip Flops' => 'flip-flops', - 'Floodlight' => 'floodlight', - 'Flooring' => 'flooring', - 'Flowers' => 'flowers', - 'Flymo' => 'flymo', - 'FM Transmitter' => 'fm-transmitter', - 'Food' => 'food', - 'Food Containers' => 'food-containers', - 'Food Processor' => 'food-processor', - 'Food Server' => 'food-server', - 'Football' => 'football', - 'Football Boots' => 'football-boots', - 'Football Manager' => 'football-manager', - 'Football Matches' => 'football-matches', - 'Football Shirt' => 'football-shirt', - 'Foot Pump' => 'foot-pump', - 'Ford' => 'ford', - 'For Honor' => 'for-honor', - 'Fortnite' => 'fortnite', - 'Fortnite: Darkfire' => 'fortnite-darkfire', - 'Forza' => 'forza', - 'Forza 7' => 'forza-7', - 'Forza Horizon' => 'forza-horizon', - 'Forza Horizon 3' => 'forza-horizon-3', - 'Forza Horizon 4' => 'forza-horizon-4', - 'Forza Motorsport' => 'forza-motorsport', - 'Foscam' => 'foscam', - 'Fossil' => 'fossil', - 'Foster's' => 'fosters', - 'Foundation' => 'foundation', - 'Fountain Pen' => 'fountain-pen', - 'Fred Perry' => 'fred-perry', - 'Freesat' => 'freesat', - 'Freeview' => 'freeview', - 'Freezer' => 'freezer', - 'Fridge' => 'fridge', - 'Fridge Freezer' => 'fridge-freezer', - 'Frontline' => 'frontline', - 'Frozen Food' => 'frozen', - 'Fruit' => 'fruit', - 'Fruit and Vegetables' => 'fruit-and-vegetable', - 'Fruit of the Loom' => 'fruit-of-the-loom', - 'Fryer' => 'fryer', - 'Frying Pan' => 'frying-pan', - 'Fujifilm' => 'fuji', - 'Fujitsu' => 'fujitsu', - 'Funko Pop' => 'funko-pop', - 'Furby' => 'furby', - 'Furniture' => 'furniture', - 'G-Star' => 'g-star', - 'G-Sync Monitor' => 'g-sync', - 'Gaggia' => 'gaggia', - 'Gambling' => 'gambling', - 'Game App' => 'game-app', - 'Game of Thrones' => 'game-of-thrones', - 'Games & Board Games' => 'board-games', - 'Games Consoles' => 'console', - 'Gaming' => 'gaming', - 'Gaming Accessories' => 'gaming-accessories', - 'Gaming Chair' => 'gaming-chair', - 'Gaming Headset' => 'gaming-headset', - 'Gaming Keyboard' => 'gaming-keyboard', - 'Gaming Laptop' => 'gaming-laptop', - 'Gaming Monitor' => 'gaming-monitor', - 'Gaming Mouse' => 'gaming-mouse', - 'Gaming PC' => 'gaming-pc', - 'Gant' => 'gant', - 'Garage' => 'garage', - 'Garage & Service' => 'garage-service', - 'Garden' => 'garden', - 'Garden & Do It Yourself' => 'garden-diy', - 'Garden Furniture' => 'garden-furniture', - 'Gardening' => 'gardening', - 'Garden Storage' => 'garden-storage', - 'Garden Table' => 'table', - 'Garmin' => 'garmin', - 'Garmin Fenix' => 'garmin-fenix', - 'Garmin Fenix 6' => 'garmin-fenix-6', - 'Garmin Fenix 6 Pro' => 'garmin-fenix-6-pro', - 'Garmin Forerunner' => 'garmin-forerunner', - 'Garmin Vivoactive' => 'garmin-vivoactive', - 'Garmin Watch' => 'garmin-watch', - 'Garnier' => 'garnier', - 'Gas' => 'gas', - 'Gas Canister' => 'butane', - 'Gas Cooker' => 'gas-cooker', - 'Gatwick' => 'gatwick', - 'Gazebo' => 'gazebo', - 'GBK' => 'gbk', - 'Gears 5' => 'gears-5', - 'Gears of War' => 'gears-of-war', - 'Gears of War 4' => 'gears-of-war-4', - 'George Foreman' => 'george-foreman', - 'Geox' => 'geox', - 'GHD' => 'ghd', - 'Ghostbusters' => 'ghostbusters', - 'Ghostbusters: The Video Game Remastered' => 'ghostbusters-the-video-game', - 'Ghost of Tsushima' => 'ghost-of-tsushima', - 'Gibson Guitar' => 'gibson', - 'giffgaff' => 'giffgaff', - 'Gift Card' => 'gift-card', - 'Gift Hamper' => 'hamper', - 'Gifts' => 'gifts', - 'Gift Set' => 'gift-set', - 'GIGABYTE' => 'gigabyte', - 'Gigaset' => 'gigaset', - 'Gilet' => 'gilet', - 'Gillette Fusion' => 'fusion', - 'Gillette Mach3' => 'mach-3', - 'Gillette Razor' => 'gillette', - 'Gimbal' => 'gimbal', - 'Gin' => 'gin', - 'Girl's Clothes' => 'girls-clothes', - 'Glasses' => 'glasses', - 'Glassware' => 'glassware', - 'Glenfiddich' => 'glenfiddich', - 'Glenlivet' => 'glenlivet', - 'Glenmorangie' => 'glenmorangie', - 'Gloves' => 'gloves', - 'Glue' => 'glue', - 'Glue Gun' => 'glue-gun', - 'Gluten-Free' => 'gluten-free', - 'God of War' => 'god-of-war', - 'Go Kart' => 'go-kart', - 'Golf' => 'golf', - 'Golf Balls' => 'golf-balls', - 'Golf Clubs' => 'golf-clubs', - 'Goodfellas' => 'goodfellas', - 'Goodmans' => 'goodmans', - 'Goodyear' => 'goodyear', - 'Google' => 'google', - 'Google Home' => 'google-home', - 'Google Home Max' => 'google-home-max', - 'Google Home Mini' => 'google-home-mini', - 'Google Nest' => 'nest', - 'Google Nest Audio' => 'google-nest-audio', - 'Google Nest Hub' => 'google-home-hub', - 'Google Nest Mini' => 'nest-mini', - 'Google Nest Protect' => 'google-nest-protect', - 'Google Nexus' => 'nexus', - 'Google Pixel' => 'google-pixel', - 'Google Pixel 2' => 'google-pixel-2', - 'Google Pixel 2 XL' => 'google-pixel-2-xl', - 'Google Pixel 3' => 'google-pixel-3', - 'Google Pixel 3 XL' => 'google-pixel-3-xl', - 'Google Pixel 3a' => 'google-pixel-3a', - 'Google Pixel 3a XL' => 'google-pixel-3a-xl', - 'Google Pixel 4' => 'google-pixel-4', - 'Google Pixel 4 XL' => 'google-pixel-4-xl', - 'Google Pixel 4a' => 'google-pixel-4a', - 'Google Pixel 4a 5G' => 'google-pixel-4a-5g', - 'Google Pixel 5' => 'google-pixel-5', - 'Google Pixelbook' => 'google-pixelbook', - 'Google Pixel XL' => 'google-pixel-xl', - 'Google Smartphone' => 'google-smartphone', - 'Google Stadia' => 'google-stadia', - 'GoPro' => 'gopro', - 'GoPro HERO 6' => 'gopro-hero-6', - 'GoPro HERO 7' => 'gopro-hero-7', - 'GoPro HERO 8' => 'gopro-hero-8', - 'GoPro HERO 9' => 'gopro-hero-9', - 'Gore-Tex Clothing and Shoes' => 'gore-tex', - 'Graco' => 'graco', - 'Grand National' => 'grand-national', - 'Gran Turismo' => 'gran-turismo', - 'Gran Turismo Sport' => 'gran-turismo-sport', - 'Graphics Card' => 'graphics-card', - 'Gravity Rush' => 'gravity-rush', - 'Graze' => 'graze', - 'GreedFall' => 'greedfall', - 'Greenhouse' => 'greenhouse', - 'Greeting Cards and Wrapping Paper' => 'wrapping-paper-and-cards', - 'Greggs' => 'greggs', - 'Grey Goose' => 'grey-goose', - 'Griffin Technology' => 'griffin', - 'GroBag' => 'grobag', - 'Groceries' => 'groceries', - 'Gruffalo' => 'gruffalo', - 'Grundig' => 'grundig', - 'GTA' => 'gta', - 'GTA V' => 'gta-v', - 'GTX 970' => 'gtx-970', - 'GTX 980' => 'gtx-980', - 'GTX 1060' => 'gtx-1060', - 'GTX 1070' => 'gtx-1070', - 'GTX 1080' => 'gtx-1080', - 'GTX 1080 Ti' => 'gtx-1080-ti', - 'GTX 1660' => 'gtx-1660', - 'GTX 1660 Ti' => 'gtx-1660-ti', - 'Guardians of the Galaxy' => 'guardians-of-the-galaxy', - 'Gucci' => 'gucci', - 'Guinness' => 'guinness', - 'Guitar' => 'guitar', - 'Guitar Amp' => 'guitar-amp', - 'Guitar Hero' => 'guitar-hero', - 'Gulliver's' => 'gullivers', - 'Gym' => 'gym', - 'Gym Membership' => 'gym-membership', - 'H1Z1' => 'h1z1', - 'Häagen Dazs' => 'haagen-dazs', - 'Habitat' => 'habitat', - 'Hacksaw' => 'hacksaw', - 'Hair Brush' => 'hair-brush', - 'Hair Care' => 'hair', - 'Hair Clipper' => 'hair-clipper', - 'Hair Colour' => 'hair-colour', - 'Haircut' => 'haircut', - 'Hair Dryer' => 'hair-dryer', - 'Hair Dye' => 'hair-dye', - 'Hair Removal Devices' => 'hair-removal-devices', - 'Halifax' => 'halifax', - 'Hall' => 'hall', - 'Halloween' => 'halloween', - 'Halo' => 'halo', - 'Halo 5' => 'halo-5', - 'Ham' => 'ham', - 'Hammer' => 'hammer', - 'Hammer Drill' => 'hammer-drill', - 'Hammock' => 'hammock', - 'Handbag' => 'handbag', - 'Hand Blender' => 'hand-blender', - 'Hand Cream' => 'hand-cream', - 'Hand Mixer' => 'hand-mixer', - 'Hand Tools' => 'hand-tools', - 'Handwash' => 'handwash', - 'Hard Drive' => 'hard-drive', - 'Haribo' => 'haribo', - 'Harman Kardon' => 'harman-kardon', - 'Harry Potter' => 'harry-potter', - 'Hasbro' => 'hasbro', - 'Hat' => 'hat', - 'Hatchimals' => 'hatchimals', - 'Hats & Caps' => 'hats-caps', - 'Hauck' => 'hauck', - 'Hayfever Remedies' => 'hayfever', - 'Headboard' => 'headboard', - 'Headphones' => 'headphones', - 'Headset' => 'headset', - 'Health & Beauty' => 'beauty', - 'Healthcare' => 'health-care', - 'Heart Rate Monitor' => 'heart-rate-monitor', - 'Heater' => 'heater', - 'Heating' => 'heating', - 'Heating Appliances' => 'heating-appliances', - 'Hedge Trimmer' => 'hedge-trimmer', - 'Heineken' => 'heineken', - 'Heinz' => 'heinz', - 'Heinz Beanz' => 'heinz-baked-beans', - 'Hello Kitty' => 'hello-kitty', - 'Hello Neighbour' => 'hello-neighbour', - 'Helly Hansen' => 'helly-hansen', - 'Henry Hoover' => 'henry-hoover', - 'Hermes' => 'hermes', - 'High5' => 'high-5', - 'Highchair' => 'highchair', - 'Hiking' => 'hiking', - 'Hilton' => 'hilton', - 'Hisense' => 'hisense', - 'Hisense TVs' => 'hisense-tv', - 'Hitachi' => 'hitachi', - 'Hitman' => 'hitman', - 'Hive' => 'hive', - 'Hive Active Heating' => 'hive-active-heating', - 'Hob' => 'hob', - 'Hobbit' => 'hobbit', - 'Hockey' => 'hockey', - 'Holiday Inn' => 'holiday-inn', - 'Holiday Park' => 'holiday-parks', - 'Holidays and Trips' => 'holidays-and-trips', - 'Hollow Knight' => 'hollow-knight', - 'Home & Living' => 'home', - 'Home Accessories' => 'home-accessories', - 'Home Appliances' => 'home-appliances', - 'Home Care' => 'home-care', - 'Home Cinema' => 'home-cinema', - 'HoMedics' => 'homedics', - 'Homefront' => 'homefront', - 'Home Networking' => 'network', - 'Homeplug' => 'homeplug', - 'Home Security' => 'home-security', - 'Homeware' => 'homeware', - 'Honda' => 'honda', - 'Honey' => 'honey', - 'Honeywell' => 'honeywell', - 'Honor 6X' => 'honor-6x', - 'Honor 7' => 'honor-7', - 'Honor 8S' => 'honor-8s', - 'Honor 8X' => 'honor-8x', - 'Honor 8X Max' => 'honor-8x-max', - 'Honor 9' => 'honor-9', - 'Honor 9X' => 'honor-9x', - 'Honor 10' => 'honor-10', - 'Honor Band 5' => 'honor-band-5', - 'Honor Play' => 'honor-play', - 'Honor Smartphone' => 'honor', - 'Honor View 20' => 'honor-view-20', - 'Hoodie' => 'hoodie', - 'Hoover' => 'hoover', - 'Hori' => 'hori', - 'Horizon: Zero Dawn' => 'horizon-zero-dawn', - 'Hornby' => 'hornby', - 'Horse Races' => 'horse-races', - 'Hose' => 'hose', - 'HOTAS' => 'hotas', - 'Hotel' => 'hotel', - 'Hotpoint' => 'hotpoint', - 'Hotspot' => 'hotspot', - 'Hot Tub' => 'hot-tub', - 'Hot Water Bottle' => 'hot-water-bottle', - 'Hot Wheels' => 'hot-wheels', - 'Hozelock' => 'hozelock', - 'HP' => 'hp', - 'HP Envy' => 'hp-envy', - 'HP Laptop' => 'hp-laptop', - 'HP Omen' => 'hp-omen', - 'HP Printer' => 'hp-printer', - 'HTC' => 'htc', - 'HTC 10' => 'htc-10', - 'HTC Desire' => 'htc-desire', - 'HTC One' => 'htc-one', - 'HTC Smartphone' => 'htc-smartphone', - 'HTC U11' => 'htc-u11', - 'HTC Vive' => 'htc-vive', - 'Huawei' => 'huawei', - 'Huawei Freebuds 3' => 'huawei-freebuds-3', - 'Huawei Headphones' => 'huawei-headphones', - 'Huawei Mate 20' => 'huawei-mate-20', - 'Huawei Mate 20 Pro' => 'huawei-mate-20-pro', - 'Huawei Mate 30' => 'huawei-mate-30', - 'Huawei Mate 30 Lite' => 'huawei-mate-30-lite', - 'Huawei Mate 30 Pro' => 'huawei-mate-30-pro', - 'Huawei Matebook' => 'huawei-matebook', - 'Huawei MediaPad M3' => 'huawei-mediapad-m3', - 'Huawei MediaPad M5' => 'huawei-mediapad-m5', - 'Huawei MediaPad T3' => 'huawei-mediapad-t3', - 'Huawei MediaPad T5' => 'huawei-mediapad-t5', - 'Huawei P9' => 'huawei-p9', - 'Huawei P10' => 'huawei-p10', - 'Huawei P20' => 'huawei-p20', - 'Huawei P20 Lite' => 'huawei-p20-lite', - 'Huawei P20 Pro' => 'huawei-p20-pro', - 'Huawei P30' => 'huawei-p30', - 'Huawei P30 Lite' => 'huawei-p30-lite', - 'Huawei P30 Pro' => 'huawei-p30-pro', - 'Huawei P40' => 'huawei-p40', - 'Huawei P40 Lite' => 'huawei-p40-lite', - 'Huawei P40 Pro' => 'huawei-p40-pro', - 'Huawei P Smart' => 'huawei-p-smart', - 'Huawei Smartphone' => 'huawei-smartphone', - 'Huawei Smartwatch' => 'huawei-smartwatch', - 'Huawei Tablet' => 'huawei-tablet', - 'Huawei Watch 2' => 'huawei-watch-2', - 'Huawei Watch GT' => 'huawei-watch-gt', - 'Huawei Watch GT2' => 'huawei-watch-gt2', - 'Huawei Watch GT 2 Pro' => 'huawei-watch-gt-2-pro', - 'Huawei Y7' => 'huawei-y7', - 'Huggies' => 'huggies', - 'Hulk' => 'hulk', - 'Humax' => 'humax', - 'Humidifier' => 'humidifier', - 'Hunter' => 'hunter', - 'HyperX' => 'hyperx', - 'Hyrule Warriors' => 'hyrule-warriors', - 'Hyundai' => 'hyundai', - 'IAMS' => 'iams', - 'iCandy' => 'icandy', - 'Ice-Watch' => 'ice-watch', - 'Ice Cream' => 'ice-cream', - 'Ice Cream Maker' => 'ice-cream-maker', - 'iMac' => 'apple-imac', - 'iMac 2021' => 'imac-2021', - 'Impact Driver' => 'impact-driver', - 'Indesit' => 'indesit', - 'Inflatable Boats' => 'boat', - 'Inflatable Toys' => 'inflatable', - 'Injustice' => 'injustice', - 'Injustice 2' => 'injustice-2', - 'Ink Cartridge' => 'ink', - 'Inkjet Printer' => 'inkjet-printer', - 'Innocent' => 'innocent', - 'Instant Cameras' => 'instant-cameras', - 'Instant Ink' => 'instant-ink', - 'Instax Mini 9' => 'instax-mini-9', - 'Insulation' => 'insulation', - 'Insurance' => 'insurance', - 'Intel' => 'intel', - 'Intel Atom' => 'atom', - 'Intel i3' => 'i3', - 'Intel i5' => 'i5', - 'Intel i7' => 'i7', - 'Intel i9' => 'intel-i9', - 'Internet' => 'internet', - 'Internet Security' => 'internet-security', - 'In the Night Garden' => 'in-the-night-garden', - 'Intimate Care' => 'intimate-care', - 'Introduce Yourself' => 'introduce-yourself', - 'iOS Apps' => 'ios-apps', - 'iPad' => 'ipad', - 'iPad 2019' => 'ipad-2019', - 'iPad 2020' => 'ipad-2020', - 'iPad Air' => 'ipad-air', - 'iPad Air 2019' => 'ipad-air-2019', - 'iPad Air 2020' => 'ipad-air-2020', - 'iPad Case' => 'ipad-case', - 'iPad mini' => 'ipad-mini', - 'iPad Pro' => 'ipad-pro', - 'iPad Pro 11' => 'ipad-pro-11', - 'iPad Pro 12.9' => 'ipad-pro-12-9', - 'iPad Pro 2020' => 'ipad-pro-2020', - 'iPad Pro 2021' => 'ipad-pro-2021', - 'IP Camera' => 'ip-camera', - 'iPhone' => 'iphone', - 'iPhone 5s' => 'iphone-5s', - 'iPhone 6' => 'iphone-6', - 'iPhone 6 Plus' => 'iphone-6-plus', - 'iPhone 6s' => 'iphone-6s', - 'iPhone 6s Plus' => 'iphone-6s-plus', - 'iPhone 7' => 'iphone-7', - 'iPhone 7 Plus' => 'iphone-7-plus', - 'iPhone 8' => 'iphone-8', - 'iPhone 8 Plus' => 'iphone-8-plus', - 'iPhone 11' => 'iphone-11', - 'iPhone 11 Pro' => 'iphone-11-pro', - 'iPhone 11 Pro Max' => 'iphone-11-pro-max', - 'iPhone 12' => 'iphone-12', - 'iPhone 12 mini' => 'iphone-12-mini', - 'iPhone 12 Pro' => 'iphone-12-pro', - 'iPhone 12 Pro Max' => 'iphone-12-pro-max', - 'iPhone Accessories' => 'iphone-accessories', - 'iPhone Case' => 'iphone-case', - 'iPhone SE' => 'iphone-se', - 'iPhone X' => 'iphone-x', - 'iPhone Xr' => 'iphone-xr', - 'iPhone Xs' => 'iphone-xs', - 'iPhone Xs Max' => 'iphone-xs-max', - 'iPod' => 'ipod', - 'iPod Nano' => 'ipod-nano', - 'iPod Shuffle' => 'ipod-shuffle', - 'iPod Touch' => 'ipod-touch', - 'Irish Whiskey' => 'irish-whisky', - 'Irn Bru' => 'irn-bru', - 'iRobot' => 'irobot', - 'Iron' => 'iron', - 'Ironing' => 'ironing', - 'Ironing Board' => 'ironing-board', - 'Iron Man' => 'iron-man', - 'Issey Miyake' => 'issey-miyake', - 'ITV' => 'itv', - 'Jabra' => 'jabra', - 'Jabra Elite 85h' => 'jabra-elite-85h', - 'Jabra Elite Active 65t' => 'jabra-elite-active-65t', - 'Jabra Elite Active 75t' => 'jabra-elite-active-75t', - 'Jabra Headphones' => 'jabra-headphones', - 'Jack & Jones' => 'jack-and-jones', - 'Jack Daniel's' => 'jack-daniels', - 'Jacket' => 'jacket', - 'Jack Wills' => 'jack-wills', - 'Jack Wolfskin' => 'jack-wolfskin', - 'Jaffa Cakes' => 'jaffa-cakes', - 'Jägermeister' => 'jagermeister', - 'Jameson' => 'jameson', - 'Jamie Oliver' => 'jamie-oliver', - 'Jaybird' => 'jaybird', - 'JBL' => 'jbl', - 'JBL Flip' => 'jbl-flip', - 'JBL GO' => 'jbl-go', - 'JBL Headphones' => 'jbl-headphones', - 'JBL Link' => 'jbl-link', - 'JBL Live' => 'jbl-live', - 'JBL Tune' => 'jbl-tune', - 'JCB' => 'jcb', - 'Jean Paul Gaultier' => 'jean-paul-gautier', - 'Jean Paul Gaultier Le Male' => 'le-male', - 'Jeans' => 'jeans', - 'Jelly Belly' => 'jelly-belly', - 'Jewellery' => 'jewellery', - 'Jigsaw' => 'jigsaw', - 'Jim Beam' => 'jim-beam', - 'Jimmy Choo' => 'jimmy-choo', - 'JML' => 'jml', - 'Jogging Bottoms' => 'jogging-bottoms', - 'Johnnie Walker' => 'johnnie-walker', - 'Johnson's' => 'johnsons', - 'John West' => 'john-west', - 'John Wick' => 'john-wick', - 'JoJo Siwa' => 'jojo', - 'Joop' => 'joop', - 'Joseph Joseph' => 'joseph-joseph', - 'Joules' => 'joules', - 'Juice' => 'juice', - 'Juicer' => 'juicer', - 'Jumper' => 'jumper', - 'Jurassic World' => 'jurassic-world', - 'Jura Whisky' => 'jura', - 'Just Cause' => 'just-cause', - 'Just Cause 3' => 'just-cause-3', - 'Just Cause 4' => 'just-cause-4', - 'Just Dance' => 'just-dance', - 'JVC' => 'jvc', - 'K-Swiss' => 'k-swiss', - 'Karcher' => 'karcher', - 'Karcher Window Vacuum' => 'karcher-window-cleaner', - 'Karen Millen' => 'karen-millen', - 'Karrimor' => 'karrimor', - 'Kaspersky' => 'kaspersky', - 'Kayak' => 'kayak', - 'Keg' => 'keg', - 'Kellogg's' => 'kelloggs', - 'Kellogg's Cornflakes' => 'cornflakes', - 'Kellogg's Crunchy Nut' => 'crunchy-nut', - 'Kenco' => 'kenco', - 'Kenwood' => 'kenwood', - 'Kenwood kMix' => 'kmix', - 'Kenzo' => 'kenzo', - 'Ketchup' => 'ketchup', - 'Keter' => 'keter', - 'Kettle' => 'kettle', - 'Kettlebell' => 'kettlebell', - 'Keyboard' => 'keyboard', - 'KIA' => 'kia', - 'Kickers' => 'kickers', - 'Kid's Bike' => 'kids-bike', - 'Kid's Clothes' => 'kids-clothes', - 'Kid's Room' => 'kids-rooms', - 'Kid's Shoes' => 'kids-shoes', - 'Kidizoom' => 'kidizoom', - 'Killzone' => 'killzone', - 'Kilner' => 'kilner', - 'Kinder' => 'kinder', - 'Kindle' => 'kindle', - 'Kindle Book' => 'kindle-book', - 'Kindle Fire' => 'kindle-fire', - 'Kindle Oasis' => 'kindle-oasis', - 'Kindle Paperwhite' => 'kindle-paperwhite', - 'Kingdom Come: Deliverance' => 'kingdom-come-deliverance', - 'Kingdom Hearts' => 'kingdom-hearts', - 'Kingdom Hearts 3' => 'kingdom-hearts-3', - 'Kingdom Hearts: The Story So Far' => 'kingdom-hearts-the-story-so-far', - 'King Kong' => 'king-kong', - 'King Size Bed' => 'king-size', - 'Kingsmill' => 'kingsmill', - 'Kingston' => 'kingston', - 'Kitchen' => 'kitchen', - 'KitchenAid' => 'kitchenaid', - 'Kitchen Appliances' => 'kitchen-appliances', - 'Kitchen Knife' => 'knife', - 'Kitchen Roll' => 'kitchen-roll', - 'Kitchen Scale' => 'kitchen-scales', - 'Kitchen Tap' => 'kitchen-tap', - 'Kitchen Utensils' => 'kitchen-utensils', - 'Kite' => 'kite', - 'KitSound' => 'kitsound', - 'Knickers' => 'knickers', - 'Kobo' => 'kobo', - 'Kodak' => 'kodak', - 'Kodi' => 'kodi', - 'Kohinoor' => 'kohinoor', - 'Kopparberg' => 'kopparberg', - 'Kraken' => 'kraken', - 'Krispy Kreme' => 'krispy-kreme', - 'Krups' => 'krups', - 'KTC' => 'ktc', - 'Kurt Geiger' => 'kurt-geiger', - 'L'Occitane' => 'loccitane', - 'L.O.L. Surprise!' => 'lol-surprise', - 'Lacoste' => 'lacoste', - 'Ladder' => 'ladder', - 'Lamaze' => 'lamaze', - 'Lamb' => 'lamb', - 'Laminate' => 'laminate', - 'Laminator' => 'laminator', - 'Lamp' => 'lamp', - 'Lancôme' => 'lancome', - 'Landmann' => 'landmann', - 'Lantern' => 'lantern', - 'Laphroaig' => 'laphroaig', - 'Laptop' => 'laptop', - 'Laptop Accessories' => 'laptop-accessories', - 'Laptop Case' => 'laptop-case', - 'Laptop Sleeve' => 'laptop-sleeve', - 'Laser Printer' => 'laser-printer', - 'Last Minute' => 'last-minute', - 'Laundry Basket' => 'laundry-basket', - 'Laura Ashley' => 'laura-ashley', - 'Lavazza' => 'lavazza', - 'Lavender' => 'lavender', - 'Lawnmower' => 'lawnmower', - 'Lay-Z-Spa' => 'lay-z-spa', - 'LeapFrog' => 'leapfrog', - 'Le Creuset' => 'le-creuset', - 'LED Bulb' => 'led-bulbs', - 'LED Light' => 'led-light', - 'LED Strip Lights' => 'led-strip-lights', - 'LED TV' => 'led-tv', - 'Lee Stafford' => 'lee-stafford', - 'Leffe' => 'leffe', - 'Leggings' => 'leggings', - 'Lego' => 'lego', - 'Lego Advent Calendar' => 'lego-advent-calendar', - 'Lego Architecture' => 'lego-architecture', - 'Lego Art' => 'lego-art', - 'Lego Batman' => 'lego-batman', - 'Lego BrickHeadz' => 'lego-brickheadz', - 'Lego City' => 'lego-city', - 'Lego Classic' => 'lego-classic', - 'Lego Creator' => 'lego-creator', - 'Lego Dimensions' => 'lego-dimensions', - 'Lego Disney' => 'lego-disney', - 'Lego Dots' => 'lego-dots', - 'Lego Duplo' => 'lego-duplo', - 'Lego Friends' => 'lego-friends', - 'LEGO Harry Potter' => 'lego-harry-potter', - 'Lego Hidden Side' => 'lego-hidden-side', - 'Legoland' => 'legoland', - 'Lego Marvel' => 'lego-marvel', - 'Lego Mindstorms' => 'lego-mindstorms', - 'Lego Nexo Knights' => 'lego-nexo-knights', - 'Lego Ninjago' => 'lego-ninjago', - 'Lego Porsche' => 'lego-porsche', - 'Lego Simpsons' => 'lego-simpsons', - 'Lego Speed Champions' => 'lego-speed-champions', - 'Lego Star Wars' => 'lego-star-wars', - 'Lego Star Wars Millennium Falcon' => 'lego-star-wars-millennium-falcon', - 'Lego Super Mario' => 'lego-mario', - 'Lego Technic' => 'lego-technic', - 'Lego VIDIYO' => 'lego-vidiyo', - 'Lemonade' => 'lemonade', - 'Lenor' => 'lenor', - 'Lenovo' => 'lenovo', - 'Lenovo IdeaPad' => 'lenovo-ideapad', - 'Lenovo Laptop' => 'lenovo-laptop', - 'Lenovo Tablet' => 'lenovo-tablet', - 'Lenovo Thinkpad' => 'thinkpad', - 'Lenovo Yoga Laptop' => 'lenovo-yoga-laptop', - 'Lenovo Yoga Tablet' => 'lenovo-yoga', - 'Les Paul' => 'les-paul', - 'Levi's' => 'levi', - 'Lexar' => 'lexar', - 'LG' => 'lg', - 'LG G3' => 'lg-g3', - 'LG G5' => 'lg-g5', - 'LG G6' => 'lg-g6', - 'LG G7' => 'lg-g7', - 'LG G8S ThinQ' => 'lg-g8s-thinq', - 'LG OLED TV' => 'lg-oled-tv', - 'LG Smartphone' => 'lg-smartphone', - 'LG TV' => 'lg-tv', - 'LG V30' => 'lg-v30', - 'LG V40 ThinQ' => 'lg-v40-thinq', - 'Life Insurance' => 'life-insurance', - 'Life is Strange' => 'life-is-strange', - 'Light Box' => 'light-box', - 'Lighting' => 'lighting', - 'Lightning Cable' => 'lightning-cable', - 'Lightsaber' => 'lightsaber', - 'Lindor' => 'lindor', - 'Lindt' => 'lindt', - 'Lingerie' => 'lingerie', - 'Linksys' => 'linksys', - 'Linx' => 'linx', - 'Lion King' => 'lion-king', - 'Lipstick' => 'lipstick', - 'Lipsy' => 'lipsy', - 'Little Tikes' => 'little-tikes', - 'Liverpool F. C.' => 'liverpool-fc', - 'Living Room' => 'living-room', - 'Local Traffic' => 'local-traffic', - 'Lodge' => 'lodge', - 'Loft' => 'loft', - 'Logitech' => 'logitech', - 'Logitech G430' => 'logitech-g430', - 'Logitech G703' => 'logitech-g703', - 'Logitech G903' => 'logitech-g903', - 'Logitech Harmony' => 'harmony', - 'Logitech Keyboard' => 'logitech-keyboard', - 'Logitech Mouse' => 'logitech-mouse', - 'Logitech MX Master' => 'logitech-mx-master', - 'Logitech MX Master 2S' => 'logitech-mx-master-2s', - 'London Eye' => 'london-eye', - 'London Zoo' => 'london-zoo', - 'Longleat' => 'longleat', - 'Long Sleeve' => 'long-sleeve', - 'Lord of the Rings' => 'lord-of-the-rings', - 'Lottery' => 'lottery', - 'Lounger' => 'lounger', - 'Lowepro' => 'lowepro', - 'Lucozade' => 'lucozade', - 'Luigi' => 'luigi', - 'Luigi's Mansion' => 'luigis-manison', - 'Luigi's Mansion 3' => 'luigis-mansion-3', - 'Lunch Bag' => 'lunch-bag', - 'Lunch Box' => 'lunch-box', - 'Lurpak' => 'lurpak', - 'Luton' => 'luton', - 'Lyle & Scott' => 'lyle-and-scott', - 'Lynx' => 'lynx', - 'M.2 SSD' => 'm2-ssd', - 'MacBook' => 'macbook', - 'MacBook Air' => 'macbook-air', - 'MacBook Pro' => 'macbook-pro', - 'MacBook Pro 13' => 'macbook-pro-13', - 'MacBook Pro 15' => 'macbook-pro-15', - 'MacBook Pro 16' => 'macbook-pro-16', - 'Maclaren' => 'maclaren', - 'Mac mini' => 'mac-mini', - 'Madame Tussauds' => 'madame-tussauds', - 'Mad Catz' => 'madcatz', - 'Madden NFL' => 'madden', - 'Madden NFL 20' => 'madden-nfl-20', - 'Mad Max' => 'mad-max', - 'Mafia 3' => 'mafia-3', - 'Magazine' => 'magazine', - 'Magimix' => 'magimix', - 'Magners' => 'magners', - 'Magnum' => 'magnum', - 'Make Up' => 'make-up', - 'Makeup Advent Calendar' => 'makeup-advent-calendar', - 'Make Up Brush' => 'make-up-brush', - 'Makita' => 'makita', - 'Makita Drill' => 'makita-drill', - 'Malibu' => 'malibu', - 'Maltesers' => 'maltesers', - 'MAM' => 'mam', - 'Mamas & Papas' => 'mamas-and-papas', - 'Manchester United' => 'manchester-united', - 'Manfrotto' => 'manfrotto', - 'Manga' => 'manga', - 'Manuka Honey' => 'manuka-honey', - 'Marantz' => 'marantz', - 'Marc Jacobs' => 'marc-jacobs', - 'Marc Jacobs Daisy' => 'daisy', - 'Mario & Sonic at the Olympic Games: Tokyo 2020' => 'mario-and-sonic-tokyo-2020', - 'Mario + Rabbids Kingdom Battle' => 'mario-rabbids-kingdom-battle', - 'Mario Kart' => 'mario-kart', - 'Mario Kart 8' => 'mario-kart-8', - 'Mario Kart 8 Deluxe' => 'mario-kart-8-deluxe', - 'Marmite' => 'marmite', - 'Mars' => 'mars', - 'Marshall' => 'marshall', - 'Marshall Headphones' => 'marshall-headphones', - 'Marvel' => 'marvel', - 'Marvel's Spider-Man (PS4)' => 'spider-man-2018', - 'Marvel's Spider-Man: Miles Morales' => 'spiderman-miles-morales', - 'Mascara' => 'mascara', - 'Massage' => 'massage', - 'Mass Effect' => 'mass-effect', - 'Mass Effect: Andromeda' => 'mass-effect-andromeda', - 'Mastercard' => 'mastercard', - 'Masterplug' => 'masterplug', - 'Maternity & Pregnancy' => 'maternity', - 'Mattress' => 'mattress', - 'Mattress Protector' => 'mattress-protector', - 'Mattress Topper' => 'mattress-topper', - 'Mavic' => 'mavic', - 'Max Factor' => 'max-factor', - 'Maxi Cosi' => 'maxi-cosi', - 'Maximuscle' => 'maximuscle', - 'Maxtor' => 'maxtor', - 'Maybelline' => 'maybelline', - 'Mayo' => 'mayo', - 'Mazda' => 'mazda', - 'McAfee' => 'mcafee', - 'Meat & Sausages' => 'meat', - 'Meccano' => 'meccano', - 'Mechanical Keyboard' => 'mechanical-keyboard', - 'Medal of Honor' => 'medal-of-honor', - 'Medela' => 'medela', - 'Media Player' => 'media-player', - 'Medievil' => 'medievil', - 'Medion' => 'medion', - 'Mega Bloks' => 'mega-bloks', - 'Megathread' => 'megathread', - 'Melissa & Doug' => 'melissa', - 'Memory Cards' => 'memory-cards', - 'Memory Foam Mattress' => 'memory-foam', - 'Men's Boots' => 'mens-boots', - 'Men's Fragrance' => 'mens-fragrance', - 'Men's Shoes' => 'mens-shoes', - 'Men's Suit' => 'suit', - 'Mercedes' => 'mercedes', - 'Meridian' => 'meridian', - 'Merlin' => 'merlin', - 'Merrell' => 'merrell', - 'Messenger Bag' => 'messenger-bag', - 'Metal Gear Solid' => 'metal-gear-solid', - 'Metro Exodus' => 'metro-exodus', - 'Metroid' => 'metroid', - 'Metro Series' => 'metro-series', - 'Michael Kors' => 'michael-kors', - 'Michelin' => 'michelin', - 'Microphone' => 'microphone', - 'Micro SD Card' => 'micro-sd', - 'Micro SDHC' => 'micro-sdhc', - 'Micro SDXC' => 'micro-sdxc', - 'Microserver' => 'microserver', - 'Microsoft' => 'microsoft', - 'Microsoft Flight Simulator' => 'microsoft-flight-simulator', - 'Microsoft Office' => 'microsoft-office', - 'Microsoft Points' => 'microsoft-points', - 'Microsoft Software' => 'microsoft-software', - 'Microsoft Surface Book' => 'surface-book', - 'Microsoft Surface Laptop' => 'surface', - 'Microsoft Surface Pro 6' => 'surface-pro-6', - 'Microsoft Surface Pro 7' => 'surface-pro-7', - 'Microsoft Surface Tablet' => 'microsoft-surface-tablet', - 'Microwave' => 'microwave', - 'Middle Earth' => 'middle-earth', - 'Middle Earth: Shadow of Mordor' => 'shadow-of-mordor', - 'Middle Earth: Shadow of War' => 'middle-earth-shadow-of-war', - 'Miele' => 'miele', - 'Miele Vacuum Cleaner' => 'miele-vacuum-cleaner', - 'Milk' => 'milk', - 'Milk Frother' => 'milk-frother', - 'Milk Tray' => 'milk-tray', - 'Milwaukee' => 'milwaukee', - 'Mince' => 'mince', - 'Minecraft Game' => 'minecraft', - 'Mineral Water' => 'mineral-water', - 'Mini Fridge' => 'mini-fridge', - 'Minions' => 'minions', - 'Mini PC' => 'mini-pc', - 'Minky' => 'minky', - 'Mira' => 'mira', - 'Mirror' => 'mirror', - 'Mirror's Edge' => 'mirrors-edge', - 'Misc' => 'misc', - 'Misfit' => 'misfit', - 'Mitre Saw' => 'mitre-saw', - 'Mitsubishi' => 'mitsubishi', - 'Mixer & Blender' => 'mixer-and-blender', - 'Mobile Contracts' => 'mobile-contract', - 'Mobile Phone' => 'mobile-phone', - 'Model Building' => 'model-building', - 'Moët' => 'moet', - 'Molton Brown' => 'molton-brown', - 'Money Saving Tips and Tricks' => 'money-saving-tips', - 'Monitor' => 'monitor', - 'Monopoly' => 'monopoly', - 'Monsoon' => 'monsoon', - 'Monster Energy' => 'monster-energy', - 'Monster High' => 'monster-high', - 'Monster Hunter' => 'monster-hunter', - 'Monster Hunter World' => 'monster-hunter-world', - 'Mont Blanc' => 'mont-blanc', - 'Mop' => 'mop', - 'Morphy Richards' => 'morphy-richards', - 'Mortal Kombat' => 'mortal-kombat', - 'Mortal Kombat 11' => 'mortal-kombat-11', - 'Mortgage' => 'mortgage', - 'Moschino' => 'moschino', - 'Moses Basket' => 'moses-basket', - 'MOT' => 'mot', - 'Motherboard' => 'motherboard', - 'Moto 360' => 'moto-360', - 'Moto E' => 'moto-e', - 'Moto G' => 'moto-g', - 'Moto G4' => 'moto-g4', - 'Moto G5' => 'moto-g5', - 'Moto G6' => 'moto-g6', - 'Moto G7' => 'moto-g7', - 'Motorcycle' => 'motorcycle', - 'Motorcycle Accessories' => 'motorcycle-accessories', - 'Motorcycle Helmet' => 'motorcycle-helmet', - 'Motorola' => 'motorola', - 'Motorola Smartphone' => 'motorola-smartphone', - 'Moto X' => 'moto-x', - 'Moto Z' => 'moto-z', - 'Mountain Bike' => 'mountain-bike', - 'Mouse & Keyboard Bundles' => 'mouse-and-keyboard-bundle', - 'Mouse Mat' => 'mouse-mat', - 'Mouthwash' => 'mouthwash', - 'Movie and TV Box Set' => 'box-set', - 'Movies & Series' => 'movie', - 'MP3 Player' => 'mp3-player', - 'Mr Kipling' => 'mr-kipling', - 'Mr Men' => 'mr-men', - 'MSI' => 'msi', - 'MSI Laptop' => 'msi-laptop', - 'Muc-Off' => 'muc-off', - 'Mug' => 'mug', - 'Muller' => 'muller', - 'Multi-Room Audio System' => 'multi-room-audio-system', - 'Multitool' => 'multitool', - 'Museums' => 'museums', - 'Music' => 'music', - 'Musical Instruments' => 'musical-instrument', - 'Music App' => 'music-app', - 'Music Streaming' => 'music-streaming', - 'My Little Pony' => 'my-little-pony', - 'Nail Gun' => 'nail-gun', - 'Nail Polish' => 'nail-polish', - 'Nails' => 'nails', - 'Nails Inc.' => 'nails-inc', - 'Nakd' => 'nakd', - 'Nando's' => 'nandos', - 'Nappy' => 'nappy', - 'NAS' => 'nas', - 'National Express Ticket' => 'national-express', - 'National Trust' => 'national-trust', - 'Nature Observation' => 'nature-observation', - 'NatWest' => 'natwest', - 'NBA 2K' => 'nba-2k', - 'NBA Live' => 'nba', - 'Necklace' => 'necklace', - 'Need for Speed' => 'need-for-speed', - 'Need for Speed: Payback' => 'need-for-speed-payback', - 'Need for Speed Heat' => 'need-for-speed-heat', - 'Neff' => 'neff', - 'Nerf Guns' => 'nerf', - 'Nescafé Azera' => 'azera', - 'Nescafé Coffee' => 'nescafe', - 'Nespresso' => 'nespresso', - 'Nespresso Coffee Machine' => 'nespresso-coffee-machine', - 'Nest Hello' => 'nest-hello', - 'Nestlé' => 'nestle', - 'Nest Learning Thermostat' => 'nest-learning-thermostat', - 'Nestlé Cheerios' => 'cheerios', - 'Nestlé Shreddies' => 'shreddies', - 'Netatmo' => 'netatmo', - 'Netflix' => 'netflix', - 'Netgear' => 'netgear', - 'Netgear Arlo' => 'arlo', - 'New Balance' => 'new-balance', - 'New Balance Trainers' => 'new-balance-trainers', - 'New Look' => 'new-look', - 'Newspapers' => 'newspapers', - 'Nextbase' => 'nextbase', - 'NFL' => 'nfl', - 'NHL' => 'nhl', - 'NHL 20' => 'nhl-20', - 'NHS' => 'nhs', - 'NieR: Automata' => 'nier', - 'Night Light' => 'night-light', - 'Nike' => 'nike', - 'Nike Air Max' => 'nike-air-max', - 'Nike Air Max 200' => 'nike-air-max-200', - 'Nike Air Max 270' => 'nike-air-max-270', - 'Nike Air Max 720' => 'nike-air-max-720', - 'Nike Free' => 'nike-free', - 'Nike Huarache' => 'nike-huarache', - 'Nike Jordan' => 'jordan', - 'Nike Presto' => 'nike-presto', - 'Nike Roshe' => 'nike-roshe', - 'Nike Trainers' => 'nike-shoes', - 'Nikon' => 'nikon', - 'Nikon Camera' => 'nikon-camera', - 'Nikon Coolpix' => 'nikon-coolpix', - 'Nikon D3400' => 'nikon-d3400', - 'Nikon Lens' => 'nikon-lens', - 'Nilfisk' => 'nilfisk', - 'Ni No Kuni' => 'ni-no-kuni', - 'Ni No Kuni: Wrath of the White Witch' => 'ni-no-kuni-white-witch', - 'Ni No Kuni II: Revenant Kingdom' => 'ni-no-kuni-2', - 'Nintendo' => 'nintendo', - 'Nintendo 2DS' => '2ds', - 'Nintendo 3DS' => '3ds', - 'Nintendo 3DS Game' => '3ds-games', - 'Nintendo 3DS XL' => 'nintendo-3ds-xl', - 'Nintendo Accessories' => 'nintendo-accessories', - 'Nintendo Classic Mini' => 'nintendo-classic-mini', - 'Nintendo DS Game' => 'ds-games', - 'Nintendo Labo' => 'switch-labo', - 'Nintendo Switch' => 'nintendo-switch', - 'Nintendo Switch Accessories' => 'switch-accessories', - 'Nintendo Switch Case' => 'switch-case', - 'Nintendo Switch Controller' => 'switch-controller', - 'Nintendo Switch Game' => 'switch-game', - 'Nintendo Switch Joy-Con' => 'switch-joy-con', - 'Nintendo Switch Lite' => 'nintendo-switch-lite', - 'Nintendo Switch Pro Controller' => 'switch-pro-controller', - 'Nioh' => 'nioh', - 'Nissan' => 'nissan', - 'Nivea' => 'nivea', - 'No7' => 'no7', - 'Noise Cancelling Headphones' => 'noise-cancelling-headphones', - 'Nokia' => 'nokia', - 'Nokia Smartphones' => 'nokia-mobile', - 'No Man's Sky' => 'no-man-s-sky', - 'Noodles' => 'noodles', - 'Norton' => 'norton', - 'Now' => 'now-tv', - 'Numatic' => 'numatic', - 'Nursery' => 'nursery', - 'Nutella' => 'nutella', - 'NutriBullet' => 'nutribullet', - 'Nutri Ninja' => 'nutri-ninja', - 'Nuts' => 'nuts', - 'Nvidia' => 'nvidia', - 'Nvidia GeForce' => 'geforce', - 'Nvidia Shield' => 'nvidia-shield', - 'NYX' => 'nyx', - 'NZXT' => 'nzxt', - 'O2' => 'o2', - 'O2 Refresh' => 'o2-refresh', - 'Oakley' => 'oakley', - 'Octonauts' => 'octonauts', - 'Oculus Game' => 'oculus-game', - 'Oculus Go' => 'oculus-go', - 'Oculus Quest' => 'oculus-quest', - 'Oculus Rift' => 'oculus', - 'Oculus Rift S' => 'oculus-rift-s', - 'Odeon' => 'odeon', - 'Office' => 'office', - 'Office Chair' => 'office-chair', - 'Official Announcements' => 'official-announcements', - 'Olay' => 'olay', - 'OLED TV' => 'oled', - 'Olive Oil' => 'olive-oil', - 'Olympus' => 'olympus', - 'Omega Seamaster' => 'omega-seamaster', - 'Omega Speedmaster' => 'omega-speedmaster', - 'Omega Watches' => 'omega-watch', - 'OnePlus 3' => 'oneplus-3', - 'OnePlus 5' => 'oneplus-5', - 'OnePlus 6' => 'oneplus-6', - 'OnePlus 6T' => 'oneplus-6t', - 'OnePlus 7' => 'oneplus-7', - 'OnePlus 7 Pro' => 'oneplus-7-pro', - 'OnePlus 7T' => 'oneplus-7t', - 'OnePlus 7T Pro' => 'one-plus-7t-pro', - 'OnePlus 8' => 'oneplus-8', - 'OnePlus 8 Pro' => 'oneplus-8-pro', - 'OnePlus 8T' => 'oneplus-8t', - 'OnePlus 9' => 'oneplus-9', - 'OnePlus 9 Pro' => 'oneplus-9-pro', - 'OnePlus Nord' => 'oneplus-nord', - 'OnePlus Nord N10 5G' => 'oneplus-n10', - 'OnePlus Nord N100' => 'oneplus-n100', - 'OnePlus Smartphone' => 'oneplus', - 'Onesie' => 'onesie', - 'Onkyo' => 'onkyo', - 'Online Courses' => 'online-courses', - 'Operating System' => 'operating-system', - 'Oppo Find X2 Lite' => 'oppo-find-x2-lite', - 'Oppo Find X2 Neo' => 'oppo-find-x2-neo', - 'Oppo Find X2 Pro' => 'oppo-find-x2-pro', - 'Oppo Reno' => 'oppo-reno', - 'Oppo Reno4 5G' => 'oppo-reno4', - 'Oppo Reno4 Z 5G' => 'oppo-reno4-z', - 'Oppo Smartphone' => 'oppo-smartphone', - 'Opticians' => 'opticians', - 'Optoma' => 'optoma', - 'Oral-B' => 'oral-b', - 'Oral-B Toothbrush' => 'oral-b-toothbrush', - 'Oreo' => 'oreo', - 'Origin' => 'origin', - 'Original Penguin' => 'penguin', - 'Orla Kiely' => 'orla-kiely', - 'Osprey' => 'osprey', - 'Osram' => 'osram', - 'Other' => 'other-deals', - 'Ottoman' => 'ottoman', - 'Oukitel' => 'oukitel', - 'Outdoor Clothing' => 'outdoor-clothing', - 'Outdoor Lighting' => 'outdoor-lighting', - 'Outdoor Sports & Camping' => 'outdoor', - 'Outdoor Toys' => 'outdoor-toys', - 'Outlast' => 'outlast', - 'Outlet' => 'outlet', - 'Outwell' => 'outwell', - 'Oven' => 'oven', - 'Overcooked' => 'overcooked', - 'Overcooked 2' => 'overcooked-2', - 'Overwatch' => 'overwatch', - 'Oyster Card' => 'oyster', - 'Package Holidays' => 'holiday', - 'Paco Rabanne' => 'paco-rabanne', - 'Paco Rabanne 1 Million' => 'paco-rabanne-1-million', - 'Paco Rabanne Lady Million' => 'lady-million', - 'Paddling Pool' => 'paddling-pool', - 'Padlock' => 'padlock', - 'Paint' => 'paint', - 'Paint Brush' => 'paint-brush', - 'Pampers' => 'pampers', - 'Panasonic' => 'panasonic', - 'Panasonic Camera' => 'panasonic-camera', - 'Panasonic Lumix' => 'lumix', - 'Panasonic TV' => 'panasonic-tv', - 'Pandora' => 'pandora', - 'Panini' => 'panini', - 'Panini Stickers' => 'panini-stickers', - 'Papa Johns' => 'papa-johns', - 'Paper Mario' => 'paper-mario', - 'Parasol' => 'parasol', - 'Parcel and Delivery Services' => 'parcel', - 'Parka' => 'parka', - 'Parking' => 'parking', - 'Parrot' => 'parrot', - 'Paul Smith' => 'paul-smith', - 'PAW Patrol' => 'paw-patrol', - 'Payday' => 'payday', - 'Payday 2' => 'payday-2', - 'PAYG' => 'payg', - 'Pay Monthly' => 'pay-monthly', - 'PC' => 'pc', - 'PC Case' => 'pc-case', - 'PC Game' => 'pc-game', - 'PC Gaming Accessories' => 'pc-gaming-accessories', - 'PC Gaming Systems' => 'pc-gaming-systems', - 'PC Mouse' => 'mouse', - 'PC Parts' => 'pc-parts', - 'Peanut Butter' => 'peanut-butter', - 'Peanuts' => 'peanuts', - 'Pedometer' => 'pedometer', - 'Pentax' => 'pentax', - 'Peppa Pig' => 'peppa-pig', - 'PepperBonus' => 'pepperbonus', - 'Pepsi' => 'pepsi', - 'Perfume' => 'perfume', - 'Persil' => 'persil', - 'Persona' => 'persona', - 'Persona 5' => 'persona-5', - 'Personal Care & Hygiene' => 'personal-care-hygiene', - 'Petrol and Diesel' => 'petrol', - 'Pet Supplies' => 'pets', - 'Peugeot' => 'peugeot', - 'PG Tips' => 'pg-tips', - 'Philips' => 'philips', - 'Philips Alarm Clock' => 'philips-alarm-clock', - 'Philips Avent' => 'avent', - 'Philips Hue' => 'philips-hue', - 'Philips Lumea' => 'lumea', - 'Philips OneBlade' => 'philips-one-blade', - 'Philips Senseo' => 'philips-senseo', - 'Philips Senseo Coffee Machine' => 'philips-senseo-coffee-machine', - 'Philips Shaver' => 'philips-shaver', - 'Philips Sonicare' => 'sonicare', - 'Philips TV' => 'philips-tv', - 'Phone Holder' => 'phone-holder', - 'Phones & Accessories' => 'phone', - 'Photo & Cameras' => 'photo-video', - 'Photo & Video App' => 'photo-video-app', - 'Photo Editing' => 'photo-editing', - 'Photo Frame' => 'photo-frame', - 'Photo Paper' => 'photo-paper', - 'Piano' => 'piano', - 'Picnic & Outdoor Cooking' => 'picnic', - 'Pikmin 3 Deluxe' => 'pikmin-3-deluxe', - 'Pillow' => 'pillow', - 'Pimm's' => 'pimms', - 'Pioneer' => 'pioneer', - 'Pirate Toys' => 'pirates', - 'PIR Lights' => 'pir', - 'Pixel C' => 'pixel-c', - 'Piz Buin' => 'piz-buin', - 'Pizza' => 'pizza', - 'Pizza Stone' => 'pizza-stone', - 'Planer' => 'planer', - 'Planet Earth' => 'planet-earth', - 'Plant' => 'plant', - 'Plant Pot' => 'plant-pots', - 'Plants vs. Zombies: Battle for Neighborville' => 'battle-for-neighborville', - 'Plants vs Zombies' => 'plants-vs-zombies', - 'Play-Doh' => 'play-doh', - 'PlayerUnknown's Battlegrounds' => 'playerunknown-s-battlegrounds', - 'Playhouse' => 'playhouse', - 'Playing Cards' => 'playing-cards', - 'Playmat' => 'playmat', - 'Playmobil' => 'playmobil', - 'Playmobil Advent Calendar' => 'playmobil-advent-calendar', - 'PlayStation' => 'playstation', - 'PlayStation 5 DualSense Controller' => 'ps5-controller', - 'PlayStation Accessories' => 'playstation-accessories', - 'PlayStation Classic' => 'playstation-classic', - 'PlayStation Move' => 'playstation-move', - 'PlayStation Now' => 'playstation-now', - 'PlayStation Plus' => 'playstation-plus', - 'PlayStation VR' => 'playstation-vr', - 'PlayStation VR Aim Controller' => 'aim-controller-ps4', - 'Pliers' => 'pliers', - 'Plumbing & Fittings' => 'plumbing-and-fitting', - 'Plus Size' => 'plus-size', - 'PNY' => 'pny', - 'POCO F2 Pro' => 'poco-f2-pro', - 'POCO F3' => 'poco-f3', - 'Poco M3' => 'poco-m3', - 'POCO X3' => 'poco-x3', - 'POCO X3 Pro' => 'poco-x3-pro', - 'Pokémon' => 'pokemon', - 'Pokémon: Let's Go' => 'pokemon-lets-go', - 'Pokémon Go' => 'pokemon-go', - 'Pokemon Sword and Shield' => 'pokemon-sword-and-shield', - 'Pokémon Ultra Sun and Ultra Moon' => 'pokemon-ultra-sun-ultra-moon', - 'Poker' => 'poker', - 'Pokken Tournament' => 'pokken-tournament', - 'Polaroid' => 'polaroid', - 'Police Toys' => 'police', - 'Polo Shirt' => 'polo-shirt', - 'Pool' => 'pool', - 'Pool & Snooker' => 'pool-table', - 'Popcorn' => 'popcorn', - 'Pork' => 'pork', - 'Porridge & Oats' => 'porridge-and-oats', - 'Portable Wireless Speaker' => 'wireless-speaker', - 'Poster' => 'poster', - 'Pots and Pans' => 'pan', - 'Potty' => 'potty', - 'Power Bank' => 'power-bank', - 'Powerbeats Pro' => 'powerbeats-pro', - 'Power Dental Flosser' => 'floss', - 'Powerline' => 'powerline', - 'Power Rangers' => 'power-rangers', - 'Power Tool' => 'power-tool', - 'Prada' => 'prada', - 'Pram' => 'pram', - 'Pregnancy' => 'pregnancy', - 'Prescription Glasses' => 'prescription-glasses', - 'Pressure Cooker' => 'pressure-cooker', - 'Pressure Washer' => 'pressure-washer', - 'Price Glitch' => 'price-glitch', - 'Prime Gaming' => 'twitch', - 'Pringles' => 'pringles', - 'Printer & Printer Supplies' => 'printer', - 'Printer Supplies' => 'printer-supplies', - 'Productivity App' => 'productivity-app', - 'Pro Evolution Soccer' => 'pro-evolution-soccer', - 'Pro Evolution Soccer 2018' => 'pro-evolution-soccer-2018', - 'Pro Evolution Soccer 2019' => 'pro-evolution-soccer-2019', - 'Pro Evolution Soccer 2020' => 'pes-2020', - 'Project Cars' => 'project-cars', - 'Project Cars 2' => 'project-cars-2', - 'Projector' => 'projector', - 'Protein' => 'protein', - 'Protein Bars' => 'protein-bars', - 'Protein Shaker' => 'shaker', - 'PS4' => 'ps4-slim', - 'PS4 Camera' => 'ps4-camera', - 'PS4 Controller' => 'ps4-controller', - 'PS4 Games' => 'ps4-games', - 'PS4 Headset' => 'ps4-headset', - 'PS4 Pro' => 'ps4-pro', - 'PS5' => 'ps5', - 'PS5 Games' => 'ps5-game', - 'PSU' => 'psu', - 'Public Transport' => 'public-transport', - 'Pukka' => 'pukka', - 'Pulse Light Epilator' => 'pulse-light-epilator', - 'Puma' => 'puma', - 'Puma Trainers' => 'puma-trainers', - 'Puppy Supplies' => 'puppy', - 'Purse' => 'purse', - 'Pushchair' => 'pushchair', - 'Pushchairs and Strollers' => 'baby-transport', - 'Puzzle' => 'puzzle', - 'PVR' => 'pvr', - 'Pyjamas' => 'pyjamas', - 'Pyrex' => 'pyrex', - 'Q Acoustics' => 'q-acoustics', - 'QNAP' => 'qnap', - 'Qualcast' => 'qualcast', - 'Quality Street' => 'quality-street', - 'Quantum Break' => 'quantum-break', - 'Quechua' => 'quechua', - 'Quick Charge' => 'quick-charge', - 'Quiksilver' => 'quiksilver', - 'Quinny' => 'quinny', - 'Quorn' => 'quorn', - 'Rab' => 'rab', - 'Radeon RX 480' => 'rx-480', - 'Radeon RX 5700' => 'radeon-rx-5700', - 'Radeon RX 5700 XT' => 'radeon-rx-5700-xt', - 'Radeon RX 6800' => 'radeon-rx-6800', - 'Radeon RX 6800 XT' => 'radeon-rx-6800-xt', - 'Radeon RX 6900 XT' => 'radeon-rx-6900-xt', - 'Radiator' => 'radiator', - 'Radio' => 'radio', - 'Radley' => 'radley', - 'Rage 2' => 'rage-2', - 'Railcard' => 'railcard', - 'Rainbow Six' => 'rainbow-six', - 'Rake' => 'rake', - 'Ralph Lauren' => 'ralph-lauren', - 'RAM' => 'ram', - 'Raspberry Pi' => 'raspberry-pi', - 'Ratchet' => 'ratchet', - 'Ratchet and Clank' => 'ratchet-and-clank', - 'Rattan Garden Furniture' => 'rattan', - 'RAVPower' => 'ravpower', - 'Ray Ban' => 'ray-ban', - 'Razer' => 'razer', - 'Razor' => 'razor', - 'Razor Blade' => 'razor-blade', - 'Real Madrid' => 'real-madrid', - 'Realme Smartphones' => 'realme-smartphone', - 'Real Techniques' => 'real-techniques', - 'Recliner' => 'recliner', - 'ReCore' => 'recore', - 'Recreational Sports' => 'recreational-sports', - 'Red Bull' => 'red-bull', - 'Red Dead Redemption' => 'red-dead-redemption', - 'Red Dead Redemption 2' => 'red-dead-redemption-2', - 'Redex' => 'redex', - 'Red Kite' => 'red-kite', - 'Reebok' => 'reebok', - 'Reese's' => 'reeses', - 'Regatta' => 'regatta', - 'Regina' => 'regina', - 'Remington' => 'remington', - 'Remote Control Car' => 'remote-control-car', - 'Renault' => 'renault', - 'Resident Evil' => 'resident-evil', - 'Resident Evil 2' => 'resident-evil-2', - 'Resident Evil 7' => 'resident-evil-7', - 'Restaurant, Café & Pub' => 'restaurant', - 'Retailer Offers and Issues' => 'retailer-offers-and-issues', - 'Ribena' => 'ribena', - 'Rice' => 'rice', - 'Rice Cooker' => 'rice-cooker', - 'Rick and Morty' => 'rick-and-morty', - 'Ricoh' => 'ricoh', - 'Ride On' => 'ride-on', - 'Ring' => 'ring', - 'Ring Door View Cam' => 'ring-door-view-cam', - 'Ring Fit Adventures' => 'ring-fit-adventures', - 'Ring Stick Up Cam' => 'ring-stick-up-cam', - 'Ring Video Doorbell' => 'ring-video-doorbell', - 'Ring Video Doorbell 2' => 'ring-video-doorbell-2', - 'Ring Video Doorbell 3' => 'ring-video-doorbell-3', - 'Ring Video Doorbell Pro' => 'ring-video-doorbell-pro', - 'Road Bike' => 'road-bike', - 'Roaming' => 'roaming', - 'Robinsons' => 'robinsons', - 'Robotic Lawnmower' => 'robotic-lawnmower', - 'Robot Vacuum Cleaner' => 'robot-vacuum-cleaner', - 'Rock Band' => 'rock-band', - 'Rocket League' => 'rocket-league', - 'Rocking Horse' => 'rocking-horse', - 'Rogue One: A Star Wars Story' => 'rogue-one', - 'Roku' => 'roku', - 'Rolex' => 'rolex', - 'Rollerskates' => 'skate', - 'Ronseal' => 'ronseal', - 'Roof Box' => 'roof-box', - 'Roses' => 'roses', - 'Rotary' => 'rotary', - 'Router' => 'router', - 'Rowenta' => 'rowenta', - 'RTX 2060' => 'rtx-2060', - 'RTX 2070' => 'rtx-2070', - 'RTX 2080' => 'rtx-2080', - 'RTX 2080 Ti' => 'rtx-2080-ti', - 'RTX 3070' => 'rtx-3070', - 'RTX 3080' => 'rtx-3080', - 'RTX 3090' => 'rtx-3090', - 'Rug' => 'rug', - 'Rugby' => 'rugby', - 'Rum' => 'rum', - 'Running' => 'running', - 'Running Shoes' => 'running-shoes', - 'Russell Hobbs' => 'russell-hobbs', - 'RX 570' => 'rx-570', - 'RX 580' => 'rx-580', - 'RX 590' => 'rx-590', - 'RX Vega 56' => 'rx-vega-56', - 'RX Vega 64' => 'rx-vega-64', - 'Ryanair' => 'ryanair', - 'Ryobi' => 'ryobi', - 'Safari' => 'safari', - 'Safety Boots' => 'safety-boots', - 'Sage by Heston Blumenthal' => 'sage', - 'Saints Row' => 'saints-row', - 'Saitek' => 'saitek', - 'Sale' => 'sale', - 'Salmon' => 'salmon', - 'Salomon' => 'salomon', - 'Salter' => 'salter', - 'Samsonite' => 'samsonite', - 'Samsung' => 'samsung', - 'Samsung Ecobubble' => 'ecobubble', - 'Samsung Fridge' => 'samsung-fridge', - 'Samsung Galaxy' => 'samsung-galaxy', - 'Samsung Galaxy A10' => 'samsung-galaxy-a10', - 'Samsung Galaxy A20e' => 'samsung-galaxy-a20e', - 'Samsung Galaxy A40' => 'samsung-galaxy-a40', - 'Samsung Galaxy A42 5G' => 'samsung-galaxy-a42-5g', - 'Samsung Galaxy A50' => 'samsung-galaxy-a50', - 'Samsung Galaxy A51' => 'samsung-galaxy-a51', - 'Samsung Galaxy A52 5G' => 'samsung-galaxy-a52', - 'Samsung Galaxy A60' => 'samsung-galaxy-a60', - 'Samsung Galaxy A70' => 'samsung-galaxy-a70', - 'Samsung Galaxy A71' => 'samsung-galaxy-a71', - 'Samsung Galaxy A72' => 'samsung-galaxy-a72', - 'Samsung Galaxy A80' => 'samsung-galaxy-a80', - 'Samsung Galaxy A90' => 'samsung-galaxy-a90', - 'Samsung Galaxy Buds' => 'samsung-galaxy-buds', - 'Samsung Galaxy Buds+' => 'samsung-galaxy-buds-plus', - 'Samsung Galaxy Buds Live' => 'samsung-galaxy-buds-live', - 'Samsung Galaxy Buds Pro' => 'samsung-galaxy-buds-pro', - 'Samsung Galaxy Fold' => 'samsung-galaxy-fold', - 'Samsung Galaxy J5' => 'galaxy-j5', - 'Samsung Galaxy Note' => 'samsung-galaxy-note', - 'Samsung Galaxy Note 8' => 'samsung-galaxy-note-8', - 'Samsung Galaxy Note 9' => 'samsung-galaxy-note-9', - 'Samsung Galaxy Note 10' => 'samsung-galaxy-note-10', - 'Samsung Galaxy Note 10+' => 'samsung-galaxy-note-10-plus', - 'Samsung Galaxy Note20' => 'samsung-galaxy-note20', - 'Samsung Galaxy Note20 Ultra' => 'samsung-galaxy-note20-ultra', - 'Samsung Galaxy S6' => 'samsung-galaxy-s6', - 'Samsung Galaxy S7' => 'samsung-galaxy-s7', - 'Samsung Galaxy S7 Edge' => 'samsung-galaxy-s7-edge', - 'Samsung Galaxy S8' => 'samsung-galaxy-s8', - 'Samsung Galaxy S8+' => 'samsung-s8-plus', - 'Samsung Galaxy S9' => 'samsung-galaxy-s9', - 'Samsung Galaxy S9 Plus' => 'samsung-s9-plus', - 'Samsung Galaxy S10' => 'samsung-galaxy-s10', - 'Samsung Galaxy S10 Lite' => 'samsung-galaxy-s10-lite', - 'Samsung Galaxy S10 Plus' => 'samsung-galaxy-s10-plus', - 'Samsung Galaxy S10e' => 'samsung-galaxy-s10e', - 'Samsung Galaxy S20' => 'samsung-galaxy-s20', - 'Samsung Galaxy S20 FE' => 'samsung-galaxy-s20-fe', - 'Samsung Galaxy S20 Ultra' => 'samsung-galaxy-s20-ultra', - 'Samsung Galaxy S20+' => 'samsung-galaxy-s20-plus', - 'Samsung Galaxy S21 5G' => 'samsung-galaxy-s21-5g', - 'Samsung Galaxy S21 Ultra 5G' => 'samsung-galaxy-s21-ultra-5g', - 'Samsung Galaxy S21+ 5G' => 'samsung-galaxy-s21-plus-5g', - 'Samsung Galaxy Tab' => 'samsung-galaxy-tab', - 'Samsung Galaxy Tab A' => 'samsung-galaxy-tab-a', - 'Samsung Galaxy Tab A7' => 'samsung-galaxy-tab-a7', - 'Samsung Galaxy Tab S' => 'samsung-galaxy-tab-s', - 'Samsung Galaxy Tab S4' => 'samsung-galaxy-tab-s4', - 'Samsung Galaxy Tab S5e' => 'samsung-galaxy-tab-s5e', - 'Samsung Galaxy Tab S6' => 'samsung-galaxy-tab-s6', - 'Samsung Galaxy Watch' => 'samsung-galaxy-watch', - 'Samsung Galaxy Watch3' => 'samsung-galaxy-watch3', - 'Samsung Galaxy Watch Active2' => 'samsung-galaxy-watch-active-2', - 'Samsung Gear' => 'samsung-gear', - 'Samsung Gear S3' => 'gear-s3', - 'Samsung Gear VR' => 'samsung-gear-vr', - 'Samsung Headphones' => 'samsung-headphones', - 'Samsung Monitor' => 'samsung-monitor', - 'Samsung QLED TVs' => 'samsung-qled-tv', - 'Samsung Smartphone' => 'samsung-smartphone', - 'Samsung SSD' => 'samsung-ssd', - 'Samsung The Frame TV' => 'samsung-the-frame', - 'Samsung TV' => 'samsung-tv', - 'Samsung Washing Machine' => 'samsung-washing-machine', - 'Samsung Watch' => 'samsung-watch', - 'Sandals' => 'sandals', - 'Sander' => 'sander', - 'SanDisk' => 'sandisk', - 'SanDisk SSD' => 'sandisk-ssd', - 'Sand Pit' => 'sand-pit', - 'Sandwich Maker' => 'sandwich', - 'San Miguel' => 'san-miguel', - 'Santander' => 'santander', - 'Satchel' => 'satchel', - 'Sat Nav' => 'sat-nav', - 'Sauce' => 'sauce', - 'Saw' => 'saw', - 'Scalextric' => 'scalextric', - 'Scanner' => 'scanner', - 'School Bag' => 'school-bag', - 'School Supplies' => 'school', - 'School Uniform' => 'school-uniform', - 'Schwalbe' => 'schwalbe', - 'Scooby Doo' => 'scooby-doo', - 'Scooter' => 'scooter', - 'Scotch Whisky' => 'scotch', - 'Scrabble' => 'scrabble', - 'Screen Protector' => 'screen-protector', - 'Screenwash' => 'screenwash', - 'Screwdriver' => 'screwdriver', - 'Screws' => 'screws', - 'SD Cards' => 'sd-card', - 'SDHC' => 'sdhc', - 'SDXC' => 'sdxc', - 'Seagate' => 'seagate', - 'Sea Life' => 'sea-life', - 'Sea of Thieves' => 'sea-of-thieves', - 'Season Pass' => 'season-pass', - 'Seaworld' => 'seaworld', - 'Security Camera' => 'security-camera', - 'Seeds & Bulbs' => 'seeds-and-bulbs', - 'Sega' => 'sega', - 'SEGA Mega Drive Mini' => 'sega-mega-drive-mini', - 'Segway' => 'segway', - 'Seiko' => 'seiko', - 'Sekiro: Shadows Die Twice' => 'sekiro', - 'Sekonda' => 'sekonda', - 'Selfie Stick' => 'selfie-stick', - 'Sennheiser' => 'sennheiser', - 'Sennheiser Headphones' => 'sennheiser-headphones', - 'Sensodyne' => 'sensodyne', - 'Server' => 'server', - 'Services & Contracts' => 'services-contracts', - 'Services and Subscriptions' => 'service-contract', - 'Sewing' => 'sewing', - 'Sewing Machine' => 'sewing-machine', - 'Sex Toys' => 'sex-toys', - 'Shadow of the Tomb Raider' => 'shadow-of-the-tomb-raider', - 'Shampoo' => 'shampoo', - 'Shark' => 'shark', - 'Shark DuoClean' => 'shark-duoclean', - 'Shark Vacuum Cleaner' => 'shark-vacuum-cleaner', - 'Sharp' => 'sharp', - 'Sharpener' => 'sharpener', - 'Sharpie' => 'sharpie', - 'Shaver' => 'shaver', - 'Shaving & Beard Care' => 'shaving', - 'Shaving, Trimming, & Hair Removal' => 'hair-removal', - 'Shaving Foam' => 'shaving-foam', - 'Shears' => 'shears', - 'Sheba' => 'sheba', - 'Shed' => 'shed', - 'Shelter' => 'shelter', - 'Shelves' => 'shelves', - 'Shenmue I & II' => 'shenmue-one-and-two', - 'Shenmue III' => 'shenmue-3', - 'Shenmue Series' => 'shenmue-series', - 'Shimano' => 'shimano', - 'Shirt' => 'shirt', - 'Shoe Rack' => 'shoe-rack', - 'Shoes' => 'shoe', - 'Shopkins' => 'shopkins', - 'Shortbread' => 'shortbread', - 'Shorts' => 'shorts', - 'Short Trip' => 'break', - 'Shoulder Bag' => 'shoulder-bag', - 'Shovel' => 'shovel', - 'Shower Curtain' => 'shower-curtain', - 'Shower Enclosure' => 'shower-enclosure', - 'Shower Fittings' => 'shower', - 'Shower Gel' => 'shower-gel', - 'Shower Head' => 'shower-head', - 'Shredder' => 'shredder', - 'Side-by-Side-Fridge' => 'side-by-side-fridge', - 'Sideboard' => 'sideboard', - 'Sid Meier's Civilization VI' => 'civilization-vi', - 'Siemens' => 'siemens', - 'Siemens Washing Machine' => 'siemens-washing-machine', - 'Sigma' => 'sigma', - 'Silentnight' => 'silentnight', - 'Silvercrest' => 'silvercrest', - 'Silver Cross' => 'silver-cross', - 'Sim Free' => 'sim-free', - 'Sim Only' => 'sim-only', - 'Simplehuman' => 'simplehuman', - 'Simpsons' => 'simpsons', - 'Single Malt' => 'single-malt', - 'Sink' => 'sink', - 'Sistema' => 'sistema', - 'Skateboard' => 'skateboard', - 'Skating' => 'skating', - 'Skechers' => 'skechers', - 'Skiing' => 'ski', - 'Skin Care' => 'skincare', - 'Skittles' => 'skittles', - 'Skoda' => 'skoda', - 'Skullcandy' => 'skullcandy', - 'Sky' => 'sky', - 'Sky Cinema' => 'sky-cinema', - 'Skylanders' => 'skylanders', - 'Skylanders Battlecast' => 'skylanders-battlecast', - 'Skylanders Imaginators' => 'skylanders-imaginators', - 'Sleeping Bag' => 'sleeping-bag', - 'Sleeping Dogs' => 'sleeping-dogs', - 'Sleepwear' => 'sleepwear', - 'Slide' => 'slide', - 'Slimming World' => 'slimming-world', - 'Slippers' => 'slippers', - 'Slow Cooker' => 'slow-cooker', - 'Smart Clock' => 'clock', - 'Smart Doorbells' => 'smart-doorbell', - 'Smart Home' => 'smart-home', - 'Smart Light' => 'smart-light', - 'Smart Lock' => 'smart-lock', - 'Smartphone Accessories' => 'smartphone-accessories', - 'Smartphone Case' => 'smartphone-case', - 'Smartphone under £200' => 'smartphone-under-200-pounds', - 'Smartphone under £400' => 'smartphone-under-400-pounds', - 'Smart Plugs' => 'smart-plugs', - 'Smart Speaker' => 'smart-speaker', - 'Smart Tech & Gadgets' => 'smart-tech', - 'Smart Thermostat' => 'thermostat', - 'SmartThings' => 'smartthings', - 'Smart TV' => 'smart-tv', - 'Smart Watch' => 'smartwatch', - 'Smeg' => 'smeg', - 'Smirnoff' => 'smirnoff', - 'Smoke Alarm' => 'smoke-alarm', - 'Smoothie' => 'smoothie', - 'Smoothie Maker' => 'smoothie-maker', - 'Snacks' => 'snacks', - 'Sneakers' => 'sneakers', - 'SNES Nintendo Classic Mini' => 'snes-nintendo-classic', - 'Snickers' => 'snickers', - 'Sniper Elite' => 'sniper-elite', - 'Snowboard' => 'snowboard', - 'Snow Boots' => 'snow-boots', - 'Soap' => 'soap', - 'Soap and Glory' => 'soap-and-glory', - 'Socket Set' => 'socket-set', - 'Socks' => 'socks', - 'SodaStream' => 'soda-stream', - 'Sofa' => 'sofa', - 'Soft Drinks' => 'soft-drinks', - 'Soft Toy' => 'soft-toy', - 'Software' => 'software', - 'Software & Apps' => 'software-apps', - 'Solar Lights' => 'solar-lights', - 'Soldering Iron' => 'soldering', - 'Sonic' => 'sonic', - 'Sonos' => 'sonos', - 'Sonos Beam' => 'sonos-beam', - 'Sonos Move' => 'sonos-move', - 'Sonos One' => 'sonos-one', - 'Sonos PLAY:1' => 'sonos-play-1', - 'Sonos PLAY:3' => 'sonos-play-3', - 'Sonos PLAY:5' => 'sonos-play-5', - 'Sonos PLAYBAR' => 'sonos-playbar', - 'Sonos PLAYBASE' => 'sonos-playbase', - 'Sony' => 'sony', - 'Sony Camera' => 'sony-camera', - 'Sony Headphones' => 'sony-headphones', - 'Sony Pulse 3D Wireless Headset' => 'pulse-3d-wireless-headsets', - 'Sony TV' => 'sony-tv', - 'Sony WF-1000XM3' => 'sony-wf1000xm3', - 'Sony WH-1000XM3' => 'sony-wh-1000xm3', - 'Sony WH-1000XM4' => 'sony-wh1000xm4', - 'Sony Xperia' => 'xperia', - 'Sony Xperia 5' => 'sony-xperia-5', - 'Sony Xperia 10' => 'sony-xperia-10', - 'Sony Xperia Xa' => 'sony-xperia-xa', - 'Sony Xperia Z3' => 'xperia-z3', - 'Sony Xperia Z5' => 'xperia-z5', - 'Soulcalibur' => 'soulcalibur', - 'Soundbar' => 'soundbar', - 'Soundbase' => 'soundbase', - 'Sound Card' => 'sound-card', - 'Soundmagic' => 'soundmagic', - 'Soup' => 'soup', - 'Soup Maker' => 'soup-maker', - 'Sous-Vide' => 'sousvide', - 'Southern Comfort' => 'southern-comfort', - 'South Park' => 'south-park', - 'Spa' => 'spa', - 'Spade' => 'spade', - 'Spanner' => 'spanner', - 'Speaker' => 'speakers', - 'Specialized' => 'specialized', - 'Speedo' => 'speedo', - 'Sphero' => 'sphero', - 'Spice Rack' => 'spice-rack', - 'Spiderman' => 'spiderman', - 'Spiralizer' => 'spiralizer', - 'Spirit & Liqueur' => 'spirits', - 'Spirit Level' => 'spirit-level', - 'Splatoon' => 'splatoon', - 'Sports & Outdoors' => 'sports-fitness', - 'Sports Events' => 'sports-events', - 'Sports Nutrition' => 'nutrition', - 'Spreads' => 'spreads', - 'Spyro Reignited Trilogy' => 'spyro-reignited-trilogy', - 'SSD' => 'ssd', - 'SSHD' => 'sshd', - 'Staedtler' => 'staedtler', - 'Stair Gate' => 'stair-gate', - 'Stanley' => 'stanley', - 'Stapler' => 'stapler', - 'Starbucks' => 'starbucks', - 'Starlink: Battle for Atlas' => 'starlink-battle-for-atlas', - 'Star Ocean' => 'star-ocean', - 'Star Trek' => 'star-trek', - 'Star Wars' => 'star-wars', - 'Star Wars: Battlefront' => 'star-wars-battlefront', - 'Star Wars: Battlefront II' => 'star-wars-battlefront-2', - 'Star Wars: Squadrons' => 'star-wars-squadrons', - 'Star Wars Jedi: Fallen Order' => 'star-wars-jedi-fallen-order', - 'Stationery' => 'stationery', - 'Stationery & Office Supplies' => 'stationery-office-supplies', - 'Staycation' => 'staycation', - 'Steak' => 'steak', - 'Steam Cleaner' => 'steam-cleaner', - 'Steam Controller' => 'steam-controller', - 'Steamer' => 'steamer', - 'Steam Gaming' => 'steam', - 'Steam Iron' => 'steam-iron', - 'Steam Link' => 'steam-link', - 'Steam Mop' => 'steam-mop', - 'SteelSeries' => 'steelseries', - 'Steering Wheel' => 'steering-wheel', - 'Stella' => 'stella', - 'Stool' => 'stool', - 'Storage Box' => 'storage-box', - 'Stormtrooper' => 'stormtrooper', - 'Straightener' => 'straightener', - 'Streaming' => 'streaming', - 'Street Fighter' => 'street-fighter', - 'Street Fighter V' => 'street-fighter-v', - 'Streetwear' => 'streetwear', - 'Strimmer' => 'strimmer', - 'Strongbow' => 'strongbow', - 'Student Discount' => 'student-discount', - 'Subwoofer' => 'subwoofer', - 'Suitcase' => 'suitcase', - 'Suncare' => 'suncare', - 'Sun Cream' => 'sun-cream', - 'Sunglasses' => 'sunglasses', - 'Superdry' => 'superdry', - 'Superfast Broadband' => 'superfast-broadband', - 'Superking' => 'superking', - 'Super Mario' => 'mario', - 'Super Mario 3D All-Stars' => 'super-mario-3d-all-stars', - 'Super Mario 3D World' => 'super-mario-3d-world', - 'Super Mario Maker 2' => 'super-mario-maker-2', - 'Super Mario Odyssey' => 'super-mario-odyssey', - 'Super Mario Party' => 'mario-party', - 'Supermarket' => 'supermarket', - 'Super Smash Bros.' => 'super-smash-bros', - 'Surf' => 'surf', - 'Swarovski' => 'swarovski', - 'Sweets' => 'sweets', - 'Swimming' => 'swimming', - 'Swimming Goggles' => 'goggles', - 'Swimwear' => 'swimwear', - 'Swing' => 'swing', - 'Swingball' => 'swingball', - 'Syberia' => 'syberia', - 'Sylvanian' => 'sylvanian', - 'Synology' => 'synology', - 'T-Mobile' => 't-mobile', - 'T-Shirt' => 't-shirt', - 'Table Lamp' => 'table-lamp', - 'Tablet' => 'tablet', - 'Tablet Accessories' => 'tablet-accessories', - 'Table Tennis' => 'table-tennis', - 'Tableware' => 'tableware', - 'Tacx' => 'tacx', - 'Tado' => 'tado', - 'Tag Heuer' => 'tag-heuer', - 'Takeaway and Food Delivery' => 'takeaway', - 'Tales of Vesperia: Definitive Edition' => 'tales-of-vesperia-definitive-edition', - 'Talisker' => 'talisker', - 'Talkmobile' => 'talkmobile', - 'Tamron' => 'tamron', - 'Tangle Teezer' => 'tangle-teezer', - 'Tank Top' => 'tank-top', - 'Tannoy' => 'tannoy', - 'Tanqueray' => 'tanqueray', - 'Tape' => 'tape', - 'Tassimo' => 'tassimo', - 'Tassimo Coffee Machine' => 'tassimo-coffee-machine', - 'tastecard' => 'tastecard', - 'Taxi' => 'taxi', - 'Tea' => 'tea', - 'Team Sonic Racing' => 'team-sonic-racing', - 'Team Sports' => 'team-sports', - 'Teapot' => 'teapot', - 'Technika' => 'technika', - 'Techwood' => 'techwood', - 'Ted Baker' => 'ted-baker', - 'Teddy Bear' => 'teddy-bear', - 'Teenage Mutant Ninja Turtles' => 'turtle', - 'Teeth Care' => 'teeth-care', - 'Teeth Whitening' => 'teeth-whitening', - 'Tefal' => 'tefal', - 'Tefal Actifry' => 'actifry', - 'Tefal Pan' => 'tefal-pan', - 'Tekken' => 'tekken', - 'Tekken 7' => 'tekken-7', - 'Telegraph' => 'telegraph', - 'Telescope' => 'telescope', - 'Telltale' => 'telltale', - 'Tennis' => 'tennis', - 'Tent' => 'tent', - 'Tequila' => 'tequila', - 'Tesco Clothing' => 'tesco-clothing', - 'Tesla' => 'tesla', - 'Tetris' => 'tetris', - 'Tetris 99' => 'tetris-99', - 'Theatre & Musical' => 'theatre', - 'The Beatles' => 'beatles', - 'The Big Bang Theory' => 'big-bang-theory', - 'The Crew' => 'the-crew', - 'The Dark Pictures: Anthology Man of Medan' => 'the-dark-pictures-anthology-man-of-medan', - 'The Elder Scrolls' => 'elder-scrolls', - 'The Elder Scrolls V: Skyrim' => 'skyrim', - 'The Evil Within' => 'the-evil-within', - 'The Evil Within 2' => 'the-evil-within-2', - 'The Last Guardian' => 'the-last-guardian', - 'The Last of Us' => 'the-last-of-us', - 'The Last of Us Part II' => 'the-last-of-us-part-2', - 'The Legend of Zelda' => 'zelda', - 'The Legend of Zelda: Breath of the Wild' => 'zelda-breath-of-the-wild', - 'The Legend of Zelda: Link's Awakening' => 'the-legend-of-zelda-links-awakening', - 'The Legend of Zelda: Skyward Sword HD' => 'the-legend-of-zelda-skyward-sword-hd', - 'Theme Park' => 'theme-park', - 'The North Face' => 'north-face', - 'The Outer Worlds' => 'the-outer-worlds', - 'Thermos Storage' => 'thermos', - 'The Sims' => 'sims', - 'The Sims 4' => 'the-sims-4', - 'The Sinking City' => 'the-sinking-city', - 'The Sun' => 'the-sun', - 'The Sunday Times' => 'sunday-times', - 'The Walking Dead' => 'walking-dead', - 'The Witcher' => 'witcher', - 'The Witcher 3' => 'the-witcher-3', - 'Thierry Mugler' => 'thierry-mugler', - 'Thomas Sabo' => 'thomas-sabo', - 'Thomas The Tank Engine' => 'thomas-the-tank', - 'Thornton's' => 'thorntons', - 'Thorpe Park' => 'thorpe-park', - 'Throw' => 'throw', - 'Thrustmaster' => 'thrustmaster', - 'Thule' => 'thule', - 'Tickets & Shows' => 'tickets-shows', - 'Tie' => 'tie', - 'Tights' => 'tights', - 'TIGI' => 'tigi', - 'Tilda' => 'tilda', - 'Tile' => 'tile', - 'Timberland' => 'timberland', - 'Timex' => 'timex', - 'Tissot' => 'tissot', - 'Tissues' => 'tissues', - 'Titanfall' => 'titanfall', - 'Titanfall 2' => 'titanfall-2', - 'Toaster' => 'toaster', - 'Toblerone' => 'toblerone', - 'Toddler Bed' => 'toddler-bed', - 'Toilet Brush' => 'brush', - 'Toilet Cleaner' => 'toilet', - 'Toilet Roll' => 'toilet-roll', - 'Toilet Seat' => 'toilet-seat', - 'Tokyo Laundry' => 'tokyo-laundry', - 'Tomb Raider' => 'tomb-raider', - 'Tom Clancy's' => 'tom-clancy', - 'Tom Clancy's: Ghost Recon' => 'ghost-recon', - 'Tom Clancy's Ghost Recon: Wildlands' => 'ghost-recon-wildlands', - 'Tom Clancy's Ghost Recon Breakpoint' => 'tom-clancys-ghost-recon-breakpoint', - 'Tom Clancy's The Division' => 'tom-clancy-the-division', - 'Tom Clancy's The Division 2' => 'tom-clancy-the-division-2', - 'Tom Ford' => 'tom-ford', - 'Tommee Tippee' => 'tommee-tippee', - 'Tommy Hilfiger' => 'tommy-hilfiger', - 'Toms' => 'toms', - 'TomTom' => 'tomtom', - 'Tonic Water' => 'tonic-water', - 'Tony Hawk's Pro Skater 1 + 2' => 'tony-hawks-pro-skater-1-2', - 'Tools' => 'tool', - 'Toothbrush' => 'toothbrush', - 'Toothpaste' => 'toothpaste', - 'Torch' => 'torch', - 'Torque Wrench' => 'torque-wrench', - 'Toshiba' => 'toshiba', - 'Toshiba Laptop' => 'toshiba-laptop', - 'Toshiba TV' => 'toshiba-tv', - 'Total War' => 'total-war', - 'Tottenham Hotspur F. C.' => 'tottenham', - 'Towel' => 'towel', - 'Toy Box' => 'toy-box', - 'Toy Cars' => 'toy-cars', - 'Toy Castle' => 'castle', - 'Toy Digger' => 'digger', - 'Toy Helicopter' => 'helicopter', - 'Toy Kitchen' => 'toy-kitchen', - 'Toy Mask' => 'mask', - 'Toyota' => 'toyota', - 'Toys' => 'toy', - 'Toy Story' => 'toy-story', - 'Toy Tractor' => 'tractor', - 'Toy Train' => 'train', - 'TP-Link' => 'tp-link', - 'TP-Link Archer' => 'archer', - 'TP-Link Router' => 'tp-link-router', - 'Tracksuit' => 'tracksuit', - 'Trainers' => 'trainers', - 'Trains & Buses' => 'train-and-bus-ticket', - 'Train Ticket' => 'train-ticket', - 'Trampoline' => 'trampoline', - 'Transcend' => 'transcend', - 'Transformers' => 'transformers', - 'Travel' => 'travel', - 'Travel App' => 'travel-app', - 'Travel Insurance' => 'travel-insurance', - 'Travelodge' => 'travelodge', - 'Travel System' => 'travel-system', - 'Treadmill' => 'treadmill', - 'TRESemmé' => 'tresemme', - 'Trespass' => 'trespass', - 'Triathlon' => 'triathlon', - 'Trike' => 'trike', - 'Trine 4' => 'trine-4', - 'Tripod' => 'tripod', - 'Tripp' => 'tripp', - 'Triton Shower' => 'triton', - 'Trolley Bag' => 'trolley', - 'Tropico 5' => 'tropico-5', - 'Tropico 6' => 'tropico-6', - 'Tropico Series' => 'tropico-deals', - 'Trousers' => 'trousers', - 'True Wireless Earbuds' => 'wireless-earphones', - 'Trunki' => 'trunki', - 'Tumble Dryer' => 'tumble-dryer', - 'Tuna' => 'tuna', - 'Turbo Trainer' => 'turbo-trainer', - 'Turntable' => 'turntable', - 'Turtle Beach' => 'turtle-beach', - 'TV' => 'tv', - 'TV & Video' => 'tv-video', - 'TV Accessories' => 'tv-accessories', - 'TV Mount' => 'tv-mount', - 'TV Series' => 'tv-series', - 'TV Stand' => 'tv-stand', - 'Twinings' => 'twinings', - 'Twin Peaks' => 'twin-peaks', - 'Twix' => 'twix', - 'Typhoo' => 'typhoo', - 'Tyres' => 'tyres', - 'Ubisoft' => 'ubisoft', - 'UE BOOM' => 'ue-boom', - 'UE Boom 2' => 'ue-boom-2', - 'UEFA' => 'uefa', - 'UE Megablast' => 'ue-megablast', - 'UE Megaboom' => 'ue-megaboom', - 'UGG' => 'ugg', - 'Ulefone' => 'ulefone', - 'Ultrabook' => 'ultrabook', - 'Ultrawide Monitor' => 'ultrawide', - 'Umbrella' => 'umbrella', - 'UMI' => 'umidigi', - 'Uncharted' => 'uncharted', - 'Uncharted 4: A Thief's End' => 'uncharted-4', - 'Uncharted: The Lost Legacy' => 'uncharted-the-lost-legacy', - 'Under Armour' => 'under-armour', - 'Underwear' => 'underwear', - 'Unicorn' => 'unicorn', - 'UNiDAYS' => 'unidays', - 'Universal Remote' => 'universal-remote', - 'Uno' => 'uno', - 'Uplay' => 'uplay', - 'Urban Decay' => 'urban-decay', - 'Urban Sports' => 'urban-sports', - 'USB Cable' => 'usb-cable', - 'USB Hub' => 'usb-hub', - 'USB Memory Stick' => 'flash-drive', - 'USB Type C' => 'usb-type-c', - 'USN' => 'usn', - 'Vacuum Cleaner' => 'vacuum-cleaners', - 'Vacuum Flask' => 'flask', - 'Valkyria Chronicles' => 'valkyria-chronicles', - 'Valkyria Chronicles 4' => 'valkyria-chronicles-4', - 'Vango' => 'vango', - 'Vanish' => 'vanish', - 'Vans' => 'vans', - 'Vans Old Skool' => 'vans-old-skool', - 'Vans Shoes' => 'vans-shoes', - 'Vase' => 'vase', - 'Vaseline' => 'vaseline', - 'Vauxhall' => 'vauxhall', - 'VAX' => 'vax', - 'Vax Blade' => 'vax-blade', - 'Vax Vacuum Cleaner' => 'vax-vacuum', - 'Veet' => 'veet', - 'Vega 7' => 'vega-7', - 'Vegetables' => 'vegetables', - 'Vegetarian' => 'vegetarian', - 'Vehicles' => 'vehicles', - 'Velvet Comfort' => 'velvet', - 'Vera Wang' => 'vera-wang', - 'Verbatim' => 'verbatim', - 'Versace' => 'versace', - 'Vibrator' => 'vibrator', - 'Victorinox' => 'victorinox', - 'Video Games' => 'videogame', - 'Video Streaming' => 'video-streaming', - 'Viktor & Rolf Spicebomb' => 'spicebomb', - 'Vileda' => 'vileda', - 'Villeroy & Boch' => 'villeroy-boch', - 'Viners' => 'viners', - 'Vinyl' => 'vinyl', - 'Virgin' => 'virgin', - 'Vitamins & Supplements' => 'vitamins', - 'Vitamix' => 'vitamix', - 'Vodafone' => 'vodafone', - 'Vodka' => 'vodka', - 'Volvo' => 'volvo', - 'VPN' => 'vpn', - 'VR Headset' => 'vr-headset', - 'VTech' => 'vtech', - 'VTech Toot Toot' => 'toot-toot', - 'Vue' => 'vue', - 'VW' => 'vw', - 'Wacom' => 'wacom', - 'Waffle Maker' => 'waffle-maker', - 'Wahl' => 'wahl', - 'Walkers' => 'walkers', - 'Walking Boots' => 'walking-boots', - 'Wall Art' => 'wall-art', - 'Wallet' => 'wallet', - 'Wallpaper' => 'wallpaper', - 'Wardrobe' => 'wardrobe', - 'Warhammer' => 'warhammer', - 'Washbag' => 'washbag', - 'Washer Dryer' => 'washer-dryer', - 'Washing Machine' => 'washing-machine', - 'Washing Powder' => 'washing-powder', - 'Watch' => 'watch', - 'Watch Dogs' => 'watch-dogs', - 'Watch Dogs 2' => 'watch-dogs-2', - 'Watch Dogs: Legion' => 'watch-dogs-legion', - 'Water Bottle' => 'water-bottle', - 'Water Butt' => 'water-butt', - 'Water Dispenser' => 'water-dispenser', - 'Water Filter' => 'water-filter', - 'Water Gun' => 'water-gun', - 'Waterproof Camera' => 'waterproof-camera', - 'Waterproof Jacket' => 'waterproof-jacket', - 'Watersports' => 'watersport', - 'Water Toys' => 'water-toys', - 'Wayfarer' => 'wayfarer', - 'WD40' => 'wd40', - 'Wearable' => 'wearable', - 'Weather Station' => 'weather-station', - 'Webcam' => 'webcam', - 'Weber' => 'weber', - 'Web Hosting' => 'web-hosting', - 'Wedding' => 'wedding', - 'Weed Killer' => 'weed', - 'Weekend Break' => 'weekend-break', - 'Weetabix' => 'weetabix', - 'Weightlifting' => 'weightlifting', - 'Weight Watchers' => 'weight-watchers', - 'Wellies' => 'wellies', - 'Wellness and Health' => 'wellness-and-health', - 'Wenger' => 'wenger', - 'Western Digital' => 'western-digital', - 'Wetsuit' => 'wetsuit', - 'Wheelbarrow' => 'wheelbarrow', - 'Wheelchair' => 'wheelchair', - 'Whey' => 'whey', - 'Whiskas' => 'whiskas', - 'Whisky' => 'whisky', - 'Whole Home Mesh Wi-Fi System' => 'whole-home-mesh-wifi-system', - 'Wi-Fi Camera' => 'wifi-camera', - 'Wi-Fi Dongle' => 'dongle', - 'Wi-Fi Extender' => 'wifi-extender', - 'Wii' => 'wii', - 'Wii Game' => 'wii-games', - 'Wii U Game' => 'wii-u-game', - 'Wii U Pro Controller' => 'wii-u-pro-controller', - 'Wild Turkey' => 'wild-turkey', - 'Wileyfox' => 'wileyfox', - 'Wilkinson Sword Hydro 5' => 'hydro-5', - 'Wilkinson Sword Razor' => 'wilkinson-sword', - 'Wimbledon Tennis' => 'wimbledon', - 'Window Cleaner' => 'window-cleaner', - 'Windows' => 'windows', - 'Windows 8' => 'windows-8', - 'Windows 10' => 'windows-10', - 'Wine' => 'wine', - 'Wine Advent Calendar' => 'wine-advent-calendar', - 'Wine Glasses' => 'wine-glasses', - 'Winter Jacket' => 'winter-jacket', - 'Wiper Blades' => 'wiper-blades', - 'Wireless Adapter' => 'wireless-adapter', - 'Wireless Charger' => 'wireless-charger', - 'Wireless Controller' => 'wireless-controller', - 'Wireless Headphones' => 'wireless-headphones', - 'Wireless Headset' => 'wireless-headset', - 'Wireless Keyboard' => 'wireless-keyboard', - 'Wireless Mouse' => 'wireless-mouse', - 'Wok' => 'wok', - 'Wolfenstein' => 'wolfenstein', - 'Wolfenstein 2: The New Colossus' => 'wolfenstein-2', - 'Women's Boots' => 'womens-boots', - 'Women's Fragrance' => 'womens-fragrance', - 'Women's Shoes' => 'womens-shoes', - 'Workbench' => 'workbench', - 'World of Warcraft' => 'world-of-warcraft', - 'World War Z' => 'world-war-z', - 'WORX' => 'worx', - 'Wreckfest' => 'wreckfest', - 'Wuaki' => 'wuaki', - 'WWE 2K' => 'wwe', - 'Xbox' => 'xbox', - 'Xbox 360 Game' => 'xbox-360-game', - 'Xbox Accessories' => 'xbox-accessories', - 'Xbox Controller' => 'xbox-controller', - 'Xbox Game Pass' => 'xbox-game-pass', - 'Xbox Gift Card' => 'xbox-gift-card', - 'Xbox Headset' => 'xbox-headset', - 'Xbox Kinect' => 'kinect', - 'Xbox Live' => 'xbox-live', - 'Xbox One Controller' => 'xbox-one-controller', - 'Xbox One Elite Controller' => 'xbox-one-elite-controller', - 'Xbox One Games' => 'xbox-one-games', - 'Xbox One S' => 'xbox-one-s', - 'Xbox One X' => 'xbox-one-x', - 'Xbox Series S' => 'xbox-series-s', - 'Xbox Series X' => 'xbox-series-x', - 'Xbox Series X Controller' => 'xbox-series-x-controller', - 'Xbox Series X Games' => 'xbox-series-x-game', - 'Xbox Wireless Adapter' => 'xbox-wireless-adapter', - 'Xbox Wireless Headset' => 'xbox-wireless-headset', - 'XCOM' => 'xcom', - 'XCOM 2' => 'xcom-2', - 'Xenoblade Chronicles' => 'xenoblade-chronicles', - 'XFX' => 'xfx', - 'Xiaomi' => 'xiaomi', - 'Xiaomi AirDots' => 'xiaomi-airdots', - 'Xiaomi Black Shark' => 'xiaomi-black-shark', - 'Xiaomi Black Shark 2' => 'xiaomi-black-shark-2', - 'Xiaomi Headphones' => 'xiaomi-headphones', - 'Xiaomi Laptop' => 'xiaomi-laptop', - 'Xiaomi Mi 5' => 'xiaomi-mi-5', - 'Xiaomi Mi 6' => 'xiaomi-mi-6', - 'Xiaomi Mi 8' => 'xiaomi-mi-8', - 'Xiaomi Mi 8 Lite' => 'xiaomi-mi-8-lite', - 'Xiaomi Mi 8 Pro' => 'xiaomi-mi-8-pro', - 'Xiaomi Mi 9' => 'xiaomi-mi-9', - 'Xiaomi Mi 9 Lite' => 'xiaomi-mi-9-lite', - 'Xiaomi Mi 9 SE' => 'xiaomi-mi-9-se', - 'Xiaomi Mi 9T' => 'xiaomi-mi-9t', - 'Xiaomi Mi 9T Pro' => 'xiaomi-mi-9t-pro', - 'Xiaomi Mi 10' => 'xiaomi-mi-10', - 'Xiaomi Mi 10 Lite' => 'xiaomi-mi-10-lite', - 'Xiaomi Mi 10T' => 'xiaomi-mi-10t', - 'Xiaomi Mi 10T Lite' => 'xiaomi-mi-10t-lite', - 'Xiaomi Mi 10T Pro' => 'xiaomi-mi-10t-pro', - 'Xiaomi Mi 11' => 'xiaomi-mi-11', - 'Xiaomi Mi 11 Lite 4G' => 'xiaomi-mi-11-lite-4g', - 'Xiaomi Mi 11 Lite 5G' => 'xiaomi-mi-11-lite-5g', - 'Xiaomi Mi 11 Pro' => 'xiaomi-mi-11-pro', - 'Xiaomi Mi 11 Ultra' => 'xiaomi-mi-11-ultra', - 'Xiaomi Mi 11i' => 'xiaomi-mi-11i', - 'Xiaomi Mi A1' => 'xiaomi-mi-a1', - 'Xiaomi Mi A2' => 'mi-a2', - 'Xiaomi Mi A3' => 'xiaomi-mi-a3', - 'Xiaomi Mi Band' => 'xiaomi-mi-band', - 'Xiaomi Mi Band 3' => 'xiaomi-mi-band-3', - 'Xiaomi Mi Band 4' => 'xiaomi-mi-band-4', - 'Xiaomi Mi Band 5' => 'xiaomi-mi-band-5', - 'Xiaomi Mi Box' => 'xiaomi-mi-box', - 'Xiaomi Mi Max 3' => 'xiaomi-mi-max3', - 'Xiaomi Mi Mix' => 'xiaomi-mi-mix', - 'Xiaomi Mi Mix 2' => 'xiaomi-mi-mix-2', - 'Xiaomi Mi Mix 2S' => 'xiaomi-mi-mix-2s', - 'Xiaomi Mi Mix 3' => 'xiaomi-mi-mix-3', - 'Xiaomi Mi Note' => 'xiaomi-mi-note', - 'Xiaomi Mi Note 10' => 'mi-note-10', - 'Xiaomi Mi Pad 4' => 'xiaomi-mi-pad-4', - 'Xiaomi Pocophone F1' => 'pocophone-f1', - 'Xiaomi Redmi' => 'redmi', - 'Xiaomi Redmi 4' => 'xiaomi-redmi-4', - 'Xiaomi Redmi 5' => 'redmi-5', - 'Xiaomi Redmi 6' => 'redmi-6', - 'Xiaomi Redmi 8' => 'redmi-8', - 'Xiaomi Redmi Note 4' => 'note-4', - 'Xiaomi Redmi Note 5' => 'redmi-note-5', - 'Xiaomi Redmi Note 6' => 'redmi-note-6', - 'Xiaomi Redmi Note 6 Pro' => 'xiaomi-redmi-note-6-pro', - 'Xiaomi Redmi Note 7' => 'redmi-note-7', - 'Xiaomi Redmi Note 8' => 'xiaomi-redmi-note-8', - 'Xiaomi Redmi Note 8 Pro' => 'xiaomi-redmi-note-8-pro', - 'Xiaomi Redmi Note 8T' => 'redmi-note-8t', - 'Xiaomi Redmi Note 9' => 'xiaomi-redmi-note-9', - 'Xiaomi Redmi Note 9 Pro' => 'xiaomi-redmi-note-9-pro', - 'Xiaomi Redmi Note 9S' => 'xiaomi-redmi-note-9s', - 'Xiaomi Roborock' => 'xiaomi-roborock', - 'Xiaomi Roborock S5' => 'xiaomi-roborock-s5', - 'Xiaomi Scooter' => 'xiaomi-scooter', - 'Xiaomi Smartphones' => 'xiaomi-smartphone', - 'Xiaomi Tablets' => 'xiaomi-tablet', - 'Yakuza' => 'yakuza', - 'Yale' => 'yale', - 'Yale Smart Lock' => 'yale-smart-lock', - 'Yamaha' => 'yamaha', - 'Yankee Candle' => 'yankee-candle', - 'Yeelight' => 'xiaomi-yeelight', - 'Yoga' => 'yoga', - 'Yoghurt' => 'yoghurt', - 'Yoshi' => 'yoshi', - 'Yoshi's Crafted World' => 'yoshis-crafted-world', - 'YouView' => 'youview', - 'Yves Saint Laurent' => 'yves-saint-laurent', - 'Zanussi' => 'zanussi', - 'Zippo' => 'zippo', - 'Zizzi' => 'zizzi', - 'Zoo' => 'zoo', - 'Zoostorm' => 'zoostorm', - 'ZOTAC' => 'zotac', - 'ZTE' => 'zte', - 'ZTE Smartphone' => 'zte-smartphone', - 'ZyXEL' => 'zyxel', - ] + 'type' => 'text', + 'exampleValue' => 'broadband', + 'title' => 'Group name in the URL : The group name that must be entered is present after "https://www.hotukdeals.com/tag/" and before any "?". +Example: If the URL of the group displayed in the browser is : +https://www.hotukdeals.com/tag/broadband?sortBy=temp +Then enter : +broadband', + ], + 'subgroups' => [ + 'name' => 'category', + 'type' => 'text', + 'exampleValue' => '343563', + 'title' => 'Category number in the URL : The category number that must be entered is present after "groups=" and before any "&". +Example: If the URL of the group displayed in the browser is : +https://www.hotukdeals.com/tag/broadband?groups=343563&sortBy=new +Then enter : +343563', ], 'order' => [ 'name' => 'Order by', @@ -3273,12 +94,12 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'context-group' => 'Deals per group', 'context-talk' => 'Discussion Monitoring', 'uri-group' => 'tag/', + 'uri-deal' => 'deals/', + 'uri-merchant' => 'search/deals?merchant-id=', + 'image-host' => 'https://images.hotukdeals.com/', 'request-error' => 'Could not request HotUKDeals', 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', - 'no-results' => 'Ooops, looks like we could', - 'relative-date-indicator' => [ - 'ago', - ], + 'currency' => '£', 'price' => 'Price', 'shipping' => 'Shipping', 'origin' => 'Origin', @@ -3286,50 +107,9 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'title-keyword' => 'Search', 'title-group' => 'Group', 'title-talk' => 'Discussion Monitoring', - 'local-months' => [ - 'Jan', - 'Feb', - 'Mar', - 'Apr', - 'May', - 'Jun', - 'Jul', - 'Aug', - 'Sep', - 'Occ', - 'Nov', - 'Dec', - 'st', - 'nd', - 'rd', - 'th' - ], - 'local-time-relative' => [ - 'Posted ', - 'm', - 'h,', - 'day', - 'days', - 'month', - 'year', - 'and ' - ], - 'date-prefixes' => [ - 'Found ', - 'Refreshed ', - 'Made hot ' - ], - 'relative-date-alt-prefixes' => [ - 'Made hot ', - 'Refreshed ', - 'Last updated ' - ], - 'relative-date-ignore-suffix' => [ - '/by.*$/' - ], - 'localdeal' => [ - 'Local', - 'Expires' - ] + 'deal-type' => 'Deal Type', + 'localdeal' => 'Local deal', + 'context-hot' => '-hot', + 'context-new' => '-new', ]; } diff --git a/bridges/HumbleBundleBridge.php b/bridges/HumbleBundleBridge.php new file mode 100644 index 00000000..28be3ad3 --- /dev/null +++ b/bridges/HumbleBundleBridge.php @@ -0,0 +1,133 @@ + [ + 'name' => 'Bundle type', + 'type' => 'list', + 'defaultValue' => 'bundles', + 'values' => [ + 'All' => 'bundles', + 'Books' => 'books', + 'Games' => 'games', + 'Software' => 'software', + ] + ] + ]]; + + public function collectData() + { + $page = getSimpleHTMLDOMCached($this->getURI()); + $json_text = $page->find('#landingPage-json-data', 0)->innertext; + $json = json_decode(html_entity_decode($json_text), true)['data']; + + $products = []; + $types = ['books', 'games', 'software']; + $types = $this->getInput('type') === 'bundles' ? $types : [$this->getInput('type')]; + foreach ($types as $type) { + $products = array_merge($products, $json[$type]['mosaic'][0]['products']); + } + + foreach ($products as $element) { + $dom = new simple_html_dom(); + $body = $dom->createElement('div'); + $item = [ + 'author' => $element['author'], + 'categories' => $element['hover_highlights'], + 'content' => $body, + 'timestamp' => $element['start_date|datetime'], + 'title' => $element['tile_short_name'], + 'uid' => $element['machine_name'], + 'uri' => parent::getURI() . $element['product_url'], + ]; + + array_unshift($item['categories'], explode(':', $element['tile_name'])[0]); + array_unshift($item['categories'], $element['tile_stamp']); + + $this->createChild($dom, $body, 'img', null, ['src' => $element['tile_logo']]); + $this->createChild($dom, $body, 'img', null, ['src' => $element['high_res_tile_image']]); + $this->createChild($dom, $body, 'h2', $element['short_marketing_blurb']); + $this->createChild($dom, $body, 'p', $element['detailed_marketing_blurb']); + + $this->items[] = $this->processBundle($item, $dom, $body); + } + } + + private function createChild($dom, $body, $name = null, $val = null, $args = []) + { + if ($name == null) { + $elem = $dom->createTextNode($val); + } else { + $elem = $dom->createElement($name, $val); + } + foreach ($args as $arg => $val) { + $elem->setAttribute($arg, $val); + } + $body->appendChild($elem); + return $elem; + } + + private function processBundle($item, $dom, $body) + { + $page = getSimpleHTMLDOMCached($item['uri']); + $json_text = $page->find('#webpack-bundle-page-data', 0)->innertext; + $json = json_decode(html_entity_decode($json_text), true)['bundleData']; + $tiers = $json['tier_display_data']; + ksort($tiers, SORT_NATURAL); + # `initial` element gets sorted to the end as bt# (bundle tiers) precede it alphabetically + array_unshift($tiers, array_pop($tiers)); + + $seen = []; + $toc = $this->createChild($dom, $body, 'ul'); + foreach ($tiers as $tiername => $tier) { + $this->createChild($dom, $body, 'h2', $tier['header'], ['id' => $tiername]); + $li = $this->createChild($dom, $toc, 'li'); + $this->createChild($dom, $li, 'a', $tier['header'], ['href' => "#$tiername"]); + $toc_tier = $this->createChild($dom, $toc, 'ul'); + foreach ($tier['tier_item_machine_names'] as $name) { + if (in_array($name, $seen)) { + continue; + } + array_push($seen, $name); + + $element = $json['tier_item_data'][$name]; + $head = $this->createChild($dom, $body, 'h3', null, ['id' => $name]); + $head_link = $this->createChild($dom, $head, 'a', $element['human_name'], ['id' => $name]); + $li = $this->createChild($dom, $toc_tier, 'li'); + $this->createChild($dom, $li, 'a', $element['human_name'], ['href' => "#$name"]); + $this->createChild($dom, $body, 'img', null, ['src' => $element['resolved_paths']['featured_image']]); + $this->createChild($dom, $body, 'img', null, ['src' => $element['resolved_paths']['preview_image']]); + $this->createChild($dom, $body, 'br'); + if ($element['description_text']) { + $body->appendChild(str_get_html($element['description_text'])->root); + } + if ($element['youtube_link']) { + $head_link->href = 'https://youtu.be/' . $element['youtube_link']; + } + if ($element['book_preview']) { + $head_link->href = $element['book_preview']['preview_file_link']; + } + } + } + + return $item; + } + + public function getName() + { + $name = parent::getName(); + $name .= $this->getInput('type') ? ' - ' . $this->getInput('type') : ''; + return $name; + } + + public function getURI() + { + $uri = parent::getURI() . $this->getInput('type'); + return $uri; + } +} diff --git a/bridges/HuntShowdownNewsBridge.php b/bridges/HuntShowdownNewsBridge.php new file mode 100644 index 00000000..6aca88f7 --- /dev/null +++ b/bridges/HuntShowdownNewsBridge.php @@ -0,0 +1,40 @@ +find('.col'); + + // Removing first element because it's a "load more" button + array_shift($articles); + foreach ($articles as $article) { + $item = []; + + $article_title = $article->find('h3', 0)->plaintext; + $article_content = $article->find('p', 0)->plaintext; + $article_cover = $article->find('img', 0)->src; + + // If there is a cover, add it to the content + if (!empty($article_cover)) { + $article_cover = '' . $article_title . '

    '; + $article_content = $article_cover . $article_content; + } + + $item['uri'] = $article->find('a', 0)->href; + $item['title'] = $article_title; + $item['content'] = $article_content; + $item['enclosures'] = [$article_cover]; + $item['timestamp'] = $article->find('span', 0)->plaintext; + + $this->items[] = $item; + } + } +} \ No newline at end of file diff --git a/bridges/HytaleBridge.php b/bridges/HytaleBridge.php index 7ca11af6..01fc0f38 100644 --- a/bridges/HytaleBridge.php +++ b/bridges/HytaleBridge.php @@ -18,26 +18,27 @@ class HytaleBridge extends BridgeAbstract $blogPosts = json_decode(getContents(self::_API_URL_PUBLISHED)); $length = count($blogPosts); - for ($i = 1; $i < $length; $i += 3) { + for ($i = 0; $i < $length; $i += 3) { $slug = $blogPosts[$i]->slug; $blogPost = json_decode(getContents(self::_API_URL_BLOG_POST . $slug)); - if (property_exists($blogPost, 'previous')) { - $this->addBlogPost($blogPost->previous); + if (property_exists($blogPost, 'next')) { + $this->addBlogPost($blogPost->next); } $this->addBlogPost($blogPost); - if (property_exists($blogPost, 'next')) { - $this->addBlogPost($blogPost->next); + if (property_exists($blogPost, 'previous')) { + $this->addBlogPost($blogPost->previous); } } - if ($length % 3 == 1) { - $slug = $blogPosts[count($blogPosts) - 1]->slug; + if (($length >= 3) && ($length % 3 == 0)) { + $slug = $blogPosts[$length - 1]->slug; $blogPost = json_decode(getContents(self::_API_URL_BLOG_POST . $slug)); + $this->addBlogPost($blogPost); } } diff --git a/bridges/I4wifiBridge.php b/bridges/I4wifiBridge.php new file mode 100644 index 00000000..643057a4 --- /dev/null +++ b/bridges/I4wifiBridge.php @@ -0,0 +1,293 @@ + [ + ], + ]; + + /** + * Fetches and processes data based on the selected context. + * + * This function retrieves the HTML content for the specified context's URI, + * resolves relative links within the content, and then delegates the data + * extraction to the appropriate method (currently only `collectNews`). + */ + + public function collectData() + { + $html = getSimpleHTMLDOMCached($this->getURI(), 86400); + + defaultLinkTo($html, static::URI); + + // Router + switch ($this->queriedContext) { + case 'Product news': + $this->collectNews($html); + break; + } + } + + /** + * Returns the icon for the bridge. + * + * @return string The icon URL. + */ + public function getURI() + { + $uri = static::URI; + + // URI Router + switch ($this->queriedContext) { + case 'Product news': + $uri .= '/'; + break; + } + + return $uri; + } + + /** + * Returns the name for the bridge. + * + * @return string The Name. + */ + public function getName() + { + $name = static::NAME; + + $name .= ($this->queriedContext) ? ' - ' . $this->queriedContext : ''; + + switch ($this->queriedContext) { + case 'Product news': + break; + } + + return $name; + } + + /** + * Parse most used date formats + * + * Basically strtotime doesn't convert dates correctly due to formats + * being hard to interpret. So we use the DateTime object, manually + * fixing dates and times (set to 00:00:00.000). + * + * We don't know the timezone, so just assume +00:00 (or whatever + * DateTime chooses) + */ + private function fixDate($date) + { + $df = $this->parseDateTimeFromString($date); + + return date_format($df, 'U'); + } + + /** + * Extracts the images from the article. + * + * @param object $article The article object. + * @return array An array of image URLs. + */ + private function extractImages($article) + { + // Notice: We can have zero or more images (though it should mostly be 1) + $elements = $article->find('img'); + + $images = []; + + foreach ($elements as $img) { + $images[] = $img->src; + } + + return $images; + } + + #region Articles + + /** + * Collects uri, timestamp, title, content and images in the news articles from the HTML and transforms to rss. + * + * @param object $html The HTML object. + * @return void + */ + private function collectNews($html) + { + $articles = $html->find('.timeline-item.timeline-item-right') + or throwServerException('No articles found! Layout might have changed!'); + + foreach ($articles as $article) { + $item = []; + + // get uri of product + $item['uri'] = $this->extractNewsUri($article); + // Add content + $item['content'] = $this->extractNewsDescription($article); + // Add images + $item['title'] = $this->extractNewsTitle($article); + // Add images + $item['enclosures'] = $this->extractImages($article); + // Add timestamp + $item['timestamp'] = $this->extractNewsDate($article); + + // collect sources into rss article + $this->items[] = $item; + } + } + + /** + * Extracts the URI of the news article. + * + * @param object $article The article object. + * @return string The URI of the news article. + */ + private function extractNewsUri($article) + { + // Return URI of the article + $element = $article->find('a', 0) + or throwServerException('Anchor not found!'); + + return $element->href; + } + + /** + * Extracts the date of the news article. + * + * @param object $article The article object. + * @return string The date of the news article. + */ + private function extractNewsDate($article) + { + // Check if date is set + $element = $article->find('.timeline-item-info', 0) + or throwServerException('Date not found!'); + + // Format date + return $this->fixDate($element->plaintext); + } + + /** + * Extracts the description of the news article. + * + * @param object $article The article object. + * @return string The description of the news article. + */ + private function extractNewsDescription($article) + { + // Extract description + $element = $article->find('p', 0) + or throwServerException('Description not found!'); + + return $element->innertext; + } + + /** + * Extracts the title of the news article. + * + * @param object $article The article object. + * @return string The title of the news article. + */ + private function extractNewsTitle($article) + { + // Extract title + $element = $article->find('img', 0) + or throwServerException('Title not found!'); + + return $element->alt; + } + + /** + * It attempts to recognize the date/time format in a string and create a DateTime object. + * + * It goes through the list of defined formats and tries to apply them to the input string. + * Returns the first successfully parsed DateTime object that matches the entire string. + * + * @param string $dateString A string potentially containing a date and/or time. + * @return DateTime|null A DateTime object if successfully recognized and parsed, otherwise null. + */ + private function parseDateTimeFromString(string $dateString): ?DateTime + { + // List of common formats - YOU CAN AND SHOULD EXPAND IT according to expected inputs! + // Order may matter if the formats are ambiguous. + // It is recommended to give more specific formats (with time, full year) before more general ones. + $possibleFormats = [ + // Czech formats (day.month.year) + 'd.m.Y H:i:s', // 10.04.2025 10:57:47 + 'j.n.Y H:i:s', // 10.4.2025 10:57:47 + 'd. m. Y H:i:s', // 10. 04. 2025 10:57:47 + 'j. n. Y H:i:s', // 10. 4. 2025 10:57:47 + 'd.m.Y H:i', // 10.04.2025 10:57 + 'j.n.Y H:i', // 10.4.2025 10:57 + 'd. m. Y H:i', // 10. 04. 2025 10:57 + 'j. n. Y H:i', // 10. 4. 2025 10:57 + 'd.m.Y', // 10.04.2025 + 'j.n.Y', // 10.4.2025 + 'd. m. Y', // 10. 04. 2025 + 'j. n. Y', // 10. 4. 2025 + + // ISO 8601 and international formats (year-month-day) + 'Y-m-d H:i:s', // 2025-04-10 10:57:47 + 'Y-m-d H:i', // 2025-04-10 10:57 + 'Y-m-d', // 2025-04-10 + 'YmdHis', // 20250410105747 + 'Ymd', // 20250410 + + // American formats (month/day/year) - beware of ambiguity! + 'm/d/Y H:i:s', // 04/10/2025 10:57:47 + 'n/j/Y H:i:s', // 4/10/2025 10:57:47 + 'm/d/Y H:i', // 04/10/2025 10:57 + 'n/j/Y H:i', // 4/10/2025 10:57 + 'm/d/Y', // 04/10/2025 + 'n/j/Y', // 4/10/2025 + + // Standard formats (including time zone) + DateTime::ATOM, // example. 2025-04-10T10:57:47+02:00 + DateTime::RFC3339, // example. 2025-04-10T10:57:47+02:00 + DateTime::RFC3339_EXTENDED, // example. 2025-04-10T10:57:47.123+02:00 + DateTime::RFC2822, // example. Thu, 10 Apr 2025 10:57:47 +0200 + DateTime::ISO8601, // example. 2025-04-10T105747+0200 + 'Y-m-d\TH:i:sP', // ISO 8601 s 'T' oddělovačem + 'Y-m-d\TH:i:s.uP', // ISO 8601 s mikrosekundami + + // You can add more formats as needed... + // e.g. 'd-M-Y' (10-Apr-2025) - requires English locale + // e.g. 'j. F Y' (10. abren 2025) - requires Czech locale + ]; + + // Set locale for parsing month/day names (if using F, M, l, D) + // E.g. setlocale(LC_TIME, 'cs_CZ.UTF-8'); or 'en_US.UTF-8'); + + foreach ($possibleFormats as $format) { + // We will try to create a DateTime object from the given format + $dateTime = DateTime::createFromFormat($format, $dateString); + + // We check that the parsing was successful AND ALSO + // that there were no errors or warnings during the parsing. + // This is important to ensure that the format matches the ENTIRE string. + if ($dateTime !== false) { + $errors = DateTime::getLastErrors(); + if (!($errors)) { + // Success! We found a valid format for the entire string. + return $dateTime; + } + } + } + + // If no format matches or parsing failed + return null; + } + + #endregion +} diff --git a/bridges/IGNBridge.php b/bridges/IGNBridge.php index d00b6a18..e063dbbb 100644 --- a/bridges/IGNBridge.php +++ b/bridges/IGNBridge.php @@ -10,17 +10,14 @@ class IGNBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas('http://feeds.ign.com/ign/all', 15); + $this->collectExpandableDatas('http://feeds.ign.com/ign/all', 2); } // IGNs feed is both hidden and incomplete. This bridge tries to fix this. - protected function parseItem($newsItem) + protected function parseItem(array $item) { - $item = parent::parseItem($newsItem); - - // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); + $articlePage = getSimpleHTMLDOM($item['uri']); // List of BS elements $uselessElements = [ @@ -33,7 +30,7 @@ class IGNBridge extends FeedExpander '.jsx-4213937408', '.commerce-container', '.widget-container', - '.newsletter-signup-button' + '.newsletter-signup-button', ]; // Remove useless elements diff --git a/bridges/IPBBridge.php b/bridges/IPBBridge.php index d5db0111..32d148fa 100644 --- a/bridges/IPBBridge.php +++ b/bridges/IPBBridge.php @@ -44,7 +44,7 @@ class IPBBridge extends FeedExpander switch (parse_url($this->getInput('uri'), PHP_URL_PATH)) { case null: case '/index.php': - returnClientError('Provided URI is invalid!'); + throwClientException('Provided URI is invalid!'); break; default: break; @@ -75,7 +75,7 @@ class IPBBridge extends FeedExpander $this->collectForum($html); break; default: - returnClientError('Unknown type!'); + throwClientException('Unknown type!'); break; } } @@ -106,7 +106,7 @@ class IPBBridge extends FeedExpander $this->collectForumTable($html); break; default: - returnClientError('Unknown forum format!'); + throwClientException('Unknown forum format!'); break; } } @@ -159,7 +159,7 @@ class IPBBridge extends FeedExpander $this->collectTopicHistory($html, $limit, 'collectTopicDiv'); break; default: - returnClientError('Unknown topic format!'); + throwClientException('Unknown topic format!'); break; } } @@ -168,7 +168,7 @@ class IPBBridge extends FeedExpander { // Make sure the callback is valid! if (!method_exists($this, $callback)) { - returnServerError('Unknown function (\'' . $callback . '\')!'); + throwServerException('Unknown function (\'' . $callback . '\')!'); } $next = null; // Holds the URI of the next page diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php new file mode 100644 index 00000000..05a2ebb8 --- /dev/null +++ b/bridges/IdealoBridge.php @@ -0,0 +1,301 @@ + [ + 'name' => 'idealo.de / idealo.fr / idealo.es Link to productpage', + 'required' => true, + 'exampleValue' => 'https://www.idealo.de/preisvergleich/OffersOfProduct/202007367_-s7-pro-ultra-roborock.html' + ], + 'ExcludeNew' => [ + 'name' => 'Priceupdate: Do not track new items', + 'type' => 'checkbox', + 'value' => 'c' + ], + 'ExcludeUsed' => [ + 'name' => 'Priceupdate: Do not track used items', + 'type' => 'checkbox', + 'value' => 'uc' + ], + 'MaxPriceNew' => [ + 'name' => 'Pricealarm: Maximum price for new Product', + 'type' => 'number' + ], + 'MaxPriceUsed' => [ + 'name' => 'Pricealarm: Maximum price for used Product', + 'type' => 'number' + ], + ] + ]; + + private $headers = [ + 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0', + 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', + 'Accept-Language: fr-FR,fr;q=0.8,en-US;q=0.5,en;q=0.3' + ]; + private $options = [ + CURLOPT_TRANSFER_ENCODING => 1, + CURLOPT_ACCEPT_ENCODING => 'gzip, deflate, br' + ]; + + public function getIcon() + { + return 'https://cdn.idealo.com/storage/ids-assets/ico/favicon.ico'; + } + + /** + * Returns the RSS Feed title when a RSS feed is rendered + * @return string the RSS feed Title + */ + private function getFeedTitle() + { + $cacheDuration = 604800; + $link = $this->getInput('Link'); + $keyTITLE = $link . 'TITLE'; + $product = $this->loadCacheValue($keyTITLE); + + // The cache does not contain the title of the bridge, we must get it and save it in the cache + if ($product === null) { + $html = getSimpleHTMLDOM($link, $this->headers, $this->options); + $product = $html->find('.oopStage-title', 0)->find('span', 0)->plaintext; + $this->saveCacheValue($keyTITLE, $product); + } + + $MaxPriceUsed = $this->getInput('MaxPriceUsed'); + $MaxPriceNew = $this->getInput('MaxPriceNew'); + $titleParts = []; + + $titleParts[] = $product; + + // Add Max Prices to the title + if ($MaxPriceUsed !== null) { + $titleParts[] = 'Max Price Used : ' . $MaxPriceUsed . '€'; + } + if ($MaxPriceNew !== null) { + $titleParts[] = 'Max Price New : ' . $MaxPriceNew . '€'; + } + + $title = implode(' ', $titleParts); + + + return $title . ' - ' . $this::NAME; + } + + /** + * Returns the Price as float + * @return float rhe price converted in float + */ + private function convertPriceToFloat($price) + { + // Every price is stored / displayed as "xxx,xx €", but PHP can't convert it as float + + if ($price !== null) { + // Convert comma as dot + $price = str_replace(',', '.', $price); + // Remove the '€' char + $price = str_replace('€', '', $price); + // Convert to float + return floatval($price); + } else { + return $price; + } + } + + /** + * Returns the Price Trend emoji + * @return string the Price Trend Emoji + */ + private function getPriceTrend($NewPrice, $OldPrice) + { + $NewPrice = $this->convertPriceToFloat($NewPrice); + $OldPrice = $this->convertPriceToFloat($OldPrice); + // In case there is no old Price, then show no trend + if ($OldPrice === null || $OldPrice == 0) { + $trend = ''; + } else if ($NewPrice > $OldPrice) { + $trend = '↗'; + } else if ($NewPrice == $OldPrice) { + $trend = '➡'; + } else if ($NewPrice < $OldPrice) { + $trend = '↘'; + } + return $trend; + } + public function collectData() + { + $link = $this->getInput('Link'); + $html = getSimpleHTMLDOM($link, $this->headers, $this->options); + + // Get Productname + $titleobj = $html->find('.oopStage-title', 0); + $Productname = $titleobj->find('span', 0)->plaintext; + + // Create product specific Cache Keys with the link + $KeyNEW = $link; + $KeyNEW .= 'NEW'; + + $KeyUSED = $link; + $KeyUSED .= 'USED'; + + // Load previous Price + $OldPriceNew = $this->loadCacheValue($KeyNEW); + $OldPriceUsed = $this->loadCacheValue($KeyUSED); + + // First button contains the new price. Found at oopStage-conditionButton-wrapper-text class (.) + $ActualNewPrice = $html->find('div[id=oopStage-conditionButton-new]', 0); + // Second Button contains the used product price + $ActualUsedPrice = $html->find('div[id=oopStage-conditionButton-used]', 0); + // Get the first item of the offers list to have an option if there is no New/Used Button available + $altPrice = $html->find('.productOffers-listItemOfferPrice', 0); + + if ($ActualNewPrice) { + $PriceNew = $ActualNewPrice->find('strong', 0)->plaintext; + // Save current price + $this->saveCacheValue($KeyNEW, $PriceNew); + } else if ($altPrice) { + // Get price from first List item if no New/used Buttons available + $PriceNew = trim($altPrice->plaintext); + $this->saveCacheValue($KeyNEW, $PriceNew); + } else if (($ActualNewPrice === null || $altPrice === null) && $ActualUsedPrice !== null) { + // In case there is no actual New Price and a Used Price exists, then delete the previous value in the cache + $this->cache->delete($this->getShortName() . '_' . $KeyNEW); + } + + // Second Button contains the used product price + if ($ActualUsedPrice) { + $PriceUsed = $ActualUsedPrice->find('strong', 0)->plaintext; + // Save current price + $this->saveCacheValue($KeyUSED, $PriceUsed); + } else if ($ActualUsedPrice === null && ($ActualNewPrice !== null || $altPrice !== null)) { + // In case there is no actual Used Price and a New Price exists, then delete the previous value in the cache + $this->cache->delete($this->getShortName() . '_' . $KeyUSED); + } + + // Only continue if a price has changed and there exists a New, Used or Alternative price (sometimes no new Price _and_ Used Price are shown) + if (!($ActualNewPrice === null && $ActualUsedPrice === null && $altPrice === null) && ($PriceNew != $OldPriceNew || $PriceUsed != $OldPriceUsed)) { + // Get Product Image + $image = $html->find('.datasheet-cover-image', 0)->src; + + $content = ''; + + // Generate Content + if (isset($PriceNew) && $this->convertPriceToFloat($PriceNew) > 0) { + $content .= sprintf('

    Price New:
    %s %s

    ', $PriceNew, $this->getPriceTrend($PriceNew, $OldPriceNew)); + $content .= "

    Price New before:
    $OldPriceNew

    "; + } + + if ($this->getInput('MaxPriceNew') != '') { + $content .= sprintf('

    Max Price New:
    %s,00 €

    ', $this->getInput('MaxPriceNew')); + } + + if (isset($PriceUsed) && $this->convertPriceToFloat($PriceUsed) > 0) { + $content .= sprintf('

    Price Used:
    %s %s

    ', $PriceUsed, $this->getPriceTrend($PriceUsed, $OldPriceUsed)); + $content .= "

    Price Used before:
    $OldPriceUsed

    "; + } + + if ($this->getInput('MaxPriceUsed') != '') { + $content .= sprintf('

    Max Price Used:
    %s,00 €

    ', $this->getInput('MaxPriceUsed')); + } + + $content .= ""; + + + $now = date('d/m/Y H:i'); + + $Pricealarm = 'Pricealarm %s: %s %s - %s'; + + // Currently under Max new price + if ($this->getInput('MaxPriceNew') != '') { + if (isset($PriceNew) && $this->convertPriceToFloat($PriceNew) < $this->getInput('MaxPriceNew')) { + $title = sprintf($Pricealarm, 'New', $PriceNew, $Productname, $now); + $item = [ + 'title' => $title, + 'uri' => $link, + 'content' => $content, + 'uid' => md5($title) + ]; + $this->items[] = $item; + } + } + + // Currently under Max used price + if ($this->getInput('MaxPriceUsed') != '') { + if (isset($PriceUsed) && $this->convertPriceToFloat($PriceUsed) < $this->getInput('MaxPriceUsed')) { + $title = sprintf($Pricealarm, 'Used', $PriceUsed, $Productname, $now); + $item = [ + 'title' => $title, + 'uri' => $link, + 'content' => $content, + 'uid' => md5($title) + ]; + $this->items[] = $item; + } + } + + // General Priceupdate Without any Max Price for new and Used product + if ($this->getInput('MaxPriceUsed') == '' && $this->getInput('MaxPriceNew') == '') { + // check if a relevant pricechange happened + if ( + (!$this->getInput('ExcludeNew') && $PriceNew != $OldPriceNew ) || + (!$this->getInput('ExcludeUsed') && $PriceUsed != $OldPriceUsed ) + ) { + $title = 'Priceupdate! '; + + if (!$this->getInput('ExcludeNew') && isset($PriceNew)) { + $title .= 'NEW' . $this->getPriceTrend($PriceNew, $OldPriceNew) . ' '; + } + + if (!$this->getInput('ExcludeUsed') && isset($PriceUsed)) { + $title .= 'USED' . $this->getPriceTrend($PriceUsed, $OldPriceUsed) . ' '; + } + $title .= $Productname; + $title .= ' - '; + $title .= $now; + + $item = [ + 'title' => $title, + 'uri' => $link, + 'content' => $content, + 'uid' => md5($title) + ]; + $this->items[] = $item; + } + } + } + } + + /** + * Returns the RSS Feed title according to the parameters + * @return string the RSS feed Tile + */ + public function getName() + { + switch ($this->queriedContext) { + case '0': + return $this->getFeedTitle(); + default: + return parent::getName(); + } + } + + /** + * Returns the RSS Feed URL according to the parameters + * @return string the RSS feed URL + */ + public function getURI() + { + switch ($this->queriedContext) { + case '0': + return $this->getInput('Link'); + default: + return parent::getURI(); + } + } +} diff --git a/bridges/ImgsedBridge.php b/bridges/ImgsedBridge.php new file mode 100644 index 00000000..ed541adb --- /dev/null +++ b/bridges/ImgsedBridge.php @@ -0,0 +1,302 @@ + [ + 'u' => [ + 'name' => 'username', + 'type' => 'text', + 'title' => 'Instagram username you want to follow', + 'exampleValue' => 'aesoprockwins', + 'required' => true, + ], + 'post' => [ + 'name' => 'posts', + 'type' => 'checkbox', + 'title' => 'Show posts for this Instagram user', + 'defaultValue' => 'checked', + ], + 'story' => [ + 'name' => 'stories', + 'type' => 'checkbox', + 'title' => 'Show stories for this Instagram user', + ], + 'tagged' => [ + 'name' => 'tagged', + 'type' => 'checkbox', + 'title' => 'Show tagged post for this Instagram user', + ], + ] + ]; + const TEST_DETECT_PARAMETERS = [ + 'https://www.instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'], + 'https://instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'], + 'https://imgsed.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'], + 'https://www.imgsed.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'], + ]; + + public function collectData() + { + $username = $this->getInput('u'); + try { + // Check if the user exist + $html = getSimpleHTMLDOMCached(self::URI . $username . '/'); + if ($this->getInput('post')) { + $this->collectPosts(); + } + if ($this->getInput('story')) { + $this->collectStories(); + } + if ($this->getInput('tagged')) { + $this->collectTaggeds(); + } + } catch (HttpException $e) { + throwClientException(sprintf('Unable to find user `%s`', $username)); + } + } + + private function collectPosts() + { + $username = $this->getInput('u'); + $html = getSimpleHTMLDOMCached(self::URI . $username . '/'); + $html = defaultLinkTo($html, self::URI); + + foreach ($html->find('div[class=item]') as $post) { + $url = $post->find('a', 0)->href; + $instagramURL = $this->convertURLToInstagram($url); + $date = $this->parseDate($post->find('div[class=time]', 0)->plaintext); + $description = $post->find('img', 0)->alt; + $imageUrl = $post->find('img', 0)->src; + // Sometimes, there is some lazy image instead of the real URL + if ($imageUrl == 'https://imgsed.com/img/lazy.jpg') { + $imageUrl = $post->find('img', 0)->getAttribute('data-src'); + } + $download = $post->find('a[class=download]', 0)->href; + $author = $username; + $uid = $post->find('a', 0)->href; + $title = 'Post - ' . $username . ' - ' . $this->descriptionToTitle($description); + + // Checking post type + $isVideo = (bool) $post->find('i[class=video]', 0); + $videoNote = $isVideo ? '

    (video)

    ' : ''; + + $isMoreContent = (bool) $post->find('svg', 0); + $moreContentNote = $isMoreContent ? '

    (multiple images and/or videos)

    ' : ''; + + $this->items[] = [ + 'uri' => $url, + 'author' => $author, + 'timestamp' => $date, + 'title' => $title, + 'thumbnail' => $imageUrl, + 'enclosures' => [$imageUrl, $download], + 'content' => << + {$description} + +{$videoNote} +{$moreContentNote} +

    {$description}

    +

    Download

    +

    Display on Instagram

    +HTML, + 'uid' => $uid + ]; + } + } + + private function collectStories() + { + try { + $username = $this->getInput('u'); + $html = getSimpleHTMLDOMCached(self::URI . 'api/media/?name=' . $username); + $json = Json::decode($html); + + foreach ($json as $post) { + $url = $post['src']; + $imageUrl = $post['thumb']; + $download = $url; + $author = $username; + $uid = $url; + $title = 'Story - ' . $username; + + $this->items[] = [ + 'uri' => $url, + 'author' => $author, + 'title' => $title, + 'thumbnail' => $imageUrl, + 'enclosures' => [$imageUrl, $download], + 'content' => << + story + +

    Download

    + HTML, + 'uid' => $uid + ]; + } + } catch (Exception $e) { + // If it fails, it's because there are no stories, so don't do anything + } + } + + private function collectTaggeds() + { + $username = $this->getInput('u'); + try { + $html = getSimpleHTMLDOMCached(self::URI . 'tagged/' . $username . '/'); + $html = defaultLinkTo($html, self::URI); + + foreach ($html->find('div[class=item]') as $post) { + $url = $post->find('a', 1)->href; + $instagramURL = $this->convertURLToInstagram($url); + $fromURL = $post->find('div[class=username]', 0)->find('a', 0)->href; + $fromUsername = $post->find('div[class=username]', 0)->plaintext; + $date = $this->parseDate($post->find('div[class=time]', 0)->plaintext); + $description = $post->find('img', 0)->alt; + $imageUrl = $post->find('img', 0)->src; + $download = $post->find('a[class=download]', 0)->href; + $author = $fromUsername; + $uid = $post->find('a', 0)->href; + $title = 'Tagged - ' . $fromUsername . ' - ' . $this->descriptionToTitle($description); + + // Checking post type + $isVideo = (bool) $post->find('i[class=video]', 0); + $videoNote = $isVideo ? '

    (video)

    ' : ''; + + $isMoreContent = (bool) $post->find('svg', 0); + $moreContentNote = $isMoreContent ? '

    (multiple images and/or videos)

    ' : ''; + + + $this->items[] = [ + 'uri' => $url, + 'author' => $author, + 'timestamp' => $date, + 'title' => $title, + 'thumbnail' => $imageUrl, + 'enclosures' => [$imageUrl, $download], + 'content' => << + {$description} + +{$videoNote} +{$moreContentNote} +

    From {$fromUsername}

    +

    {$description}

    +

    Download

    +

    Display on Instagram

    +HTML, + 'uid' => $uid + ]; + } + } catch (Exception $e) { + // If it fails, it's because the account was not tagged + } + } + + private function parseDate($content) + { + // Parse date, and transform the date into a timetamp, even in a case of a relative date + $date = date_create(); + + // Content trimmed to be sure that the "article" is at the beginning of the string and remove "ago" to make it a valid PHP date interval + $dateString = trim(str_replace(' ago', '', $content)); + + // Replace the article "an" or "a" by the number "1" to be a valid PHP date interval + $dateString = preg_replace('/^((an|a) )/m', '1 ', $dateString); + + $relativeDate = date_interval_create_from_date_string($dateString); + if ($relativeDate) { + date_sub($date, $relativeDate); + // As the relative interval has the precision of a day for date older than 24 hours, we can remove the hour of the date, as it is not relevant + date_time_set($date, 0, 0, 0, 0); + } else { + $this->logger->info(sprintf('Unable to parse date string: %s', $dateString)); + } + return date_format($date, 'r'); + } + + public function getURI() + { + if (!is_null($this->getInput('u'))) { + return urljoin(self::URI, '/' . $this->getInput('u') . '/'); + } + + return parent::getURI(); + } + + private function convertURLToInstagram($url) + { + return str_replace(self::URI, self::INSTAGRAMURI, $url); + } + private function descriptionToTitle($description) + { + return strlen($description) > 60 ? mb_substr($description, 0, 57) . '...' : $description; + } + + public function getName() + { + if (!is_null($this->getInput('u'))) { + $types = []; + if ($this->getInput('post')) { + $types[] = 'Posts'; + } + if ($this->getInput('story')) { + $types[] = 'Stories'; + } + if ($this->getInput('tagged')) { + $types[] = 'Tags'; + } + + // If no content type is selected, this bridge does nothing, so we return an error + if (count($types) == 0) { + throwClientException('You must select at least one of the content type : Post, Stories or Tags !'); + } + $typesText = $types[0] ?? ''; + + if (count($types) > 1) { + for ($i = 1; $i < count($types) - 1; $i++) { + $typesText .= ', ' . $types[$i]; + } + $typesText .= ' & ' . $types[$i]; + } + + return 'Username ' . $this->getInput('u') . ' - ' . $typesText . ' - Imgsed Bridge'; + } + return parent::getName(); + } + + public function detectParameters($url) + { + $params = [ + 'post' => 'on', + 'story' => 'on', + 'tagged' => 'on', + ]; + $regex = '/^http(s|):\/\/((www\.|)(instagram.com)\/([a-zA-Z0-9_\.]{1,30})(\/reels\/|\/tagged\/|\/|)|(www\.|)(imgsed.com)\/(stories\/|tagged\/|)([a-zA-Z0-9_\.]{1,30})\/)/'; + if (preg_match($regex, $url, $matches) > 0) { + $params['context'] = 'Username'; + // Extract detected domain using the regex + $domain = $matches[8] ?? $matches[4]; + if ($domain == 'imgsed.com') { + $params['u'] = $matches[10]; + return $params; + } elseif ($domain == 'instagram.com') { + $params['u'] = $matches[5]; + return $params; + } else { + return null; + } + } else { + return null; + } + } +} diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 71431906..58e56c59 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -63,9 +63,9 @@ class InstagramBridge extends BridgeAbstract ]; const TEST_DETECT_PARAMETERS = [ - 'https://www.instagram.com/metaverse' => ['u' => 'metaverse'], - 'https://instagram.com/metaverse' => ['u' => 'metaverse'], - 'http://www.instagram.com/metaverse' => ['u' => 'metaverse'], + 'https://www.instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'], + 'https://instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'], + 'http://www.instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'], ]; const USER_QUERY_HASH = '58b6785bea111c67129decbe6a448951'; @@ -86,6 +86,11 @@ class InstagramBridge extends BridgeAbstract $headers = []; $sessionId = $this->getOption('session_id'); $dsUserId = $this->getOption('ds_user_id'); + $headers[] = 'x-ig-app-id: 936619743392459'; + $headers[] = 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'; + $headers[] = 'Accept-Language: en-US,en;q=0.9,ru;q=0.8'; + $headers[] = 'Accept-Encoding: gzip, deflate, br'; + $headers[] = 'Accept: */*'; if ($sessionId and $dsUserId) { $headers[] = 'cookie: sessionid=' . $sessionId . '; ds_user_id=' . $dsUserId; } @@ -98,24 +103,20 @@ class InstagramBridge extends BridgeAbstract return $username; } - $cache = RssBridge::getCache(); - $cache->setScope('InstagramBridge'); - $cache->setKey([$username]); - $key = $cache->loadData(); + $cacheKey = 'InstagramBridge_' . $username; + $pk = $this->cache->get($cacheKey); - if ($key == null) { + if (!$pk) { $data = $this->getContents(self::URI . 'web/search/topsearch/?query=' . $username); - foreach (json_decode($data)->users as $user) { - if (strtolower($user->user->username) === strtolower($username)) { - $key = $user->user->pk; + if (!$data) { + foreach (json_decode($data)->users as $user) { + if (strtolower($user->user->username) === strtolower($username)) { + $pk = $user->user->pk; + } } } - if ($key == null) { - returnServerError('Unable to find username in search result.'); - } - $cache->saveData($key); } - return $key; + return $pk; } public function collectData() @@ -123,9 +124,14 @@ class InstagramBridge extends BridgeAbstract $directLink = !is_null($this->getInput('direct_links')) && $this->getInput('direct_links'); $data = $this->getInstagramJSON($this->getURI()); + if (!$data) { + return; + } - if (!is_null($this->getInput('u'))) { + if (!is_null($this->getInput('u')) && !$this->fallbackMode) { $userMedia = $data->data->user->edge_owner_to_timeline_media->edges; + } elseif (!is_null($this->getInput('u')) && $this->fallbackMode) { + $userMedia = $data->context->graphql_media; } elseif (!is_null($this->getInput('h'))) { $userMedia = $data->data->hashtag->edge_hashtag_to_media->edges; } elseif (!is_null($this->getInput('l'))) { @@ -133,7 +139,12 @@ class InstagramBridge extends BridgeAbstract } foreach ($userMedia as $media) { - $media = $media->node; + // The media is not in the same element if in fallback mode than not + if (!$this->fallbackMode) { + $media = $media->node; + } else { + $media = $media->shortcode_media; + } switch ($this->getInput('media_type')) { case 'all': @@ -266,14 +277,28 @@ class InstagramBridge extends BridgeAbstract protected function getInstagramJSON($uri) { + // Sets fallbackMode to false + $this->fallbackMode = false; if (!is_null($this->getInput('u'))) { - $userId = $this->getInstagramUserId($this->getInput('u')); - $data = $this->getContents(self::URI . + try { + $userId = $this->getInstagramUserId($this->getInput('u')); + + // If the Userid is not null, try to load the data from the graphql + if (!$userId) { + $data = $this->getContents(self::URI . 'graphql/query/?query_hash=' . self::USER_QUERY_HASH . '&variables={"id"%3A"' . $userId . '"%2C"first"%3A10}'); + } else { + // In case we did not get the UserId then we must go back to the fallback mode + $data = $this->getInstagramJSONFallback(); + } + } catch (HttpException $e) { + // Even if the UserId is not nul, the graphql request could go wrong, and then we should try to use the fallback mode + $data = $this->getInstagramJSONFallback(); + } return json_decode($data); } elseif (!is_null($this->getInput('h'))) { $data = $this->getContents(self::URI . @@ -288,12 +313,39 @@ class InstagramBridge extends BridgeAbstract $html = getContents($uri); $scriptRegex = '/window\._sharedData = (.*);<\/script>/'; - preg_match($scriptRegex, $html, $matches, PREG_OFFSET_CAPTURE, 0); - - return json_decode($matches[1][0]); + $ret = preg_match($scriptRegex, $html, $matches, PREG_OFFSET_CAPTURE); + if ($ret) { + return json_decode($matches[1][0]); + } + return null; } } + protected function getInstagramJSONFallback() + { + // If loading the data directly failed, we fall back to the "/embed" data loading + // We are in the fallback mode : set a booolean to handle this specific case while collecting the content + $this->fallbackMode = true; + // Get the HTML code of the profile embed page, and extract the JSON of it + $username = $this->getInput('u'); + // Load the content using the integrated function to use helping headers + $htmlString = $this->getContents(self::URI . $username . '/embed/'); + // Load the String as an SimpleHTMLDom Object + $html = new simple_html_dom(); + $html->load($htmlString); + // Find the '); - $article_content = stripRecursiveHTMLSection($article_content, 'div', '
    id}"; + $item['title'] = $deal->title; + $item['timestamp'] = $deal->date; + $item['content'] = $this->generateContent($deal); + $item['categories'] = $deal->categories; + $item['uid'] = strval($deal->id); + + $this->items[] = $item; + } + } + + public function getIcon() + { + return <<<'EOD' +  + VR4AWIgFoReXcVmdnKRuymg9WoAkiUJon2+8BnhiLP17bNtX+Bs27aN0bdtY8y1bZu9m1evIrZH + VTW9HVsVuTNTndH5qirz5VuP7ddL3LZdk9y2AmZdk9z2TvaZP8lj3znFY/95ssdxxWPB4GHaRI1 + Lgq6T2ct/RyAWmExa+ySP7acZMecJlgPPK7cdOdlj/5i9qFcdTG44Ifb5Lk7Pwq5tXnUA8zbZbd + s7O/zf8aaCT/U6zmc7r7ESaFZwAd1fvpmuzVkhel4xzeM8K+POZcHnhBbSQ5Vb6fmmffQAC3J5d + IkQxDSfk5YNF9O2kQqazQClPC+fF1x4nPTOZcf+epuPemmA9JQZoXr6oCtMN+StSvL/tT+XP9+k + lwnywr5HWCVIOFHwZxv3km5iVlE7rWMBd7CdD7OJtUEaoul+lyAn7G8Kkk6c7cXUQrrFCSAz0gH + AOqb4HMcaAFDnwuz12I2XbdXL6f2uEP09kE+e0RoaYjMTgC16uaJE7d8ZDKcimTJqITcLmJrtj1 + Rto6fq99CSoSLqor604CGqkyYqjG26+U5aeogGelWV1oMVW2j1cAn/fnX2Mp7hP7EkY+XK12YG5 + vMkvK1oHT3dsIfWDpeiSnB6mfnBb5+jgdszOV4WWUwvt7ipm/pwFXztqqxl9HVfNl97s91n+L7a + 6jHPHZ7/vtHQWDI5ftITNbL62cZ99MdAXmIOWAbAEn+Dhq6mcsJdJ9b93aUbUkvTAIBjXzRUhBw + x2ysiANAldBCU4UfdYazhrtMAICg4AL9zqdFIQLXZGzVVBdxetC4xEGhYCuDdzkDiGq7JzAk04A + TyZQ5IvISX4n6lALAOWsbvPwfyTVQB7L+wBiUjcQDxCHYlBQAbuybDrsleriKj9RpklMzhxeYDS + YFiVC8HIKiCK2NL6bPemIqMvtSg4WQOd5asT2U4gDIA9NMgjhucIAQAUsJvBYCZGlojmoMkSaiB + OpMAgHa/68/G9UAjGL6JAKAJ7IMF3P/G/FXSCviAPjhYw4CAlKF8oWm/sNE4Bwvp3rJNdGlkkQH + 2utwVnKrrqIP7+Edr1cc/NqBeZXyAF2/Uy5RdD2IFrTe1Fd/HAEp23zZr34KjtcQB9cpMKrPQWv + VxzHc6AgqRan9F+I8H1KuCNOi1Vi+1ULcycBN10ZP1u1Wlt423YdGAdIZ6haPqNB6r3cWFye6RS + l6ae0eqyDFYCH1gSDCJFfGjVw1IZ6hXZjTBVnSx23WqZmZAOkO9TlRwHLt855IBfoB6NTjCktnb + kHD8zq0OqFcISGi4cQRuRJ1Ldm1tYBfQcJBRUDIQEwiEloquhsaCoKBXznAmx/9MoY9pELa5wwA + AAABJRU5ErkJggg== + EOD; + } + + private function jsonToHtml($node) + { + if ($node['type'] == 'text') { + return htmlspecialchars($node['data'], ENT_QUOTES, 'UTF-8'); + } + + if ($node['type'] == 'tag') { + $html = "<{$node['name']}"; + + foreach ($node['attribs'] as $key => $value) { + $html .= sprintf( + ' %s="%s"', + htmlspecialchars($key, ENT_QUOTES, 'UTF-8'), + htmlspecialchars($value, ENT_QUOTES, 'UTF-8') + ); + } + + $html .= '>'; + + foreach ($node['children'] as $child) { + $html .= $this->jsonToHtml($child); + } + + $html .= ""; + + return $html; + } + + return ''; + } + + private function generateContent($deal) + { + $img_link = $deal->image->source; + $content = "

    "; + + $content .= "

    \${$deal->price} \${$deal->streetPrice}

    "; + + foreach ($deal->buyButtons as $buy) { + $content .= "

    Buy from url}\">$buy->merchant"; + if ($buy->promo->effect) { + $content .= " {$buy->promo->effect}"; + } + if ($buy->promo->code) { + $content .= " (Use promo code {$buy->promo->code})"; + } + $content .= '

    '; + } + + $content .= '

     

    '; + $structuredContent = json_decode($deal->structuredContent, true); + foreach ($structuredContent as $node) { + $content .= $this->jsonToHtml($node); + } + + if ($deal->relatedArticle) { + $review = $deal->relatedArticle; + $content .= '

     

    '; + $content .= "

    Read the review: link}\">{$review->title}

    "; + } + + return $content; + } +} diff --git a/bridges/WiredBridge.php b/bridges/WiredBridge.php index d4c7cbbb..b77e3973 100644 --- a/bridges/WiredBridge.php +++ b/bridges/WiredBridge.php @@ -32,7 +32,7 @@ class WiredBridge extends FeedExpander { $feed = $this->getInput('feed'); if (empty($feed) || !ctype_alpha(str_replace('-', '', $feed))) { - returnClientError('Invalid feed, please check the "feed" parameter.'); + throwClientException('Invalid feed, please check the "feed" parameter.'); } $feed_url = $this->getURI() . 'feed/'; @@ -50,13 +50,14 @@ class WiredBridge extends FeedExpander $this->collectExpandableDatas($feed_url, $limit); } - protected function parseItem($newsItem) + protected function parseItem(array $item) { - $item = parent::parseItem($newsItem); + $originalContent = $item['content']; + $article = getSimpleHTMLDOMCached($item['uri']); $item['content'] = $this->extractArticleContent($article); - $headline = strval($newsItem->description); + $headline = $originalContent; if (!empty($headline)) { $item['content'] = '

    ' . $headline . '

    ' . $item['content']; } diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 1d46958d..723a16c4 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -20,50 +20,54 @@ class WordPressBridge extends FeedExpander ], ]]; - private function cleanContent($content) + public function collectData() { - $content = stripWithDelimiters($content, ''); - $content = preg_replace('/
    /', '', $content); - return $content; + $limit = $this->getInput('limit') ?? 10; + if ($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { + // just in case someone find a way to access local files by playing with the url + throwClientException('The url parameter must either refer to http or https protocol.'); + } + try { + $this->collectExpandableDatas($this->getURI() . '/feed/atom/', $limit); + } catch (Exception $e) { + $this->collectExpandableDatas($this->getURI() . '/?feed=atom', $limit); + } } - protected function parseItem($newItem) + protected function parseItem(array $item) { - $item = parent::parseItem($newItem); - - $article_html = getSimpleHTMLDOMCached($item['uri']); + $dom = getSimpleHTMLDOMCached($item['uri']); // Find article body $article = null; switch (true) { case !empty($this->getInput('content-selector')): // custom contect selector (manually specified by user) - $article = $article_html->find($this->getInput('content-selector'), 0); + $article = $dom->find($this->getInput('content-selector'), 0); break; - case !is_null($article_html->find('[itemprop=articleBody]', 0)): + case !is_null($dom->find('[itemprop=articleBody]', 0)): // highest priority content div (used for SEO) - $article = $article_html->find('[itemprop=articleBody]', 0); + $article = $dom->find('[itemprop=articleBody]', 0); break; - case !is_null($article_html->find('.article-content', 0)): + case !is_null($dom->find('.article-content', 0)): // more precise than article when present - $article = $article_html->find('.article-content', 0); + $article = $dom->find('.article-content', 0); break; - case !is_null($article_html->find('article', 0)): + case !is_null($dom->find('article', 0)): // most common content div - $article = $article_html->find('article', 0); + $article = $dom->find('article', 0); break; - case !is_null($article_html->find('.single-content', 0)): + case !is_null($dom->find('.single-content', 0)): // another common content div - $article = $article_html->find('.single-content', 0); + $article = $dom->find('.single-content', 0); break; - case !is_null($article_html->find('.post-content', 0)): + case !is_null($dom->find('.post-content', 0)): // another common content div - $article = $article_html->find('.post-content', 0); + $article = $dom->find('.post-content', 0); break; - case !is_null($article_html->find('.post', 0)): + case !is_null($dom->find('.post', 0)): // for old WordPress themes without HTML5 - $article = $article_html->find('.post', 0); + $article = $dom->find('.post', 0); break; } @@ -76,7 +80,7 @@ class WordPressBridge extends FeedExpander // Find article main image $article = convertLazyLoading($article); - $article_image = $article_html->find('img.wp-post-image', 0); + $article_image = $dom->find('img.wp-post-image', 0); if (!empty($item['content']) && (!is_object($article_image) || empty($article_image->src))) { $article_image = str_get_html($item['content'])->find('img.wp-post-image', 0); } @@ -106,6 +110,14 @@ class WordPressBridge extends FeedExpander return $item; } + private function cleanContent($content) + { + $content = stripWithDelimiters($content, ''); + $content = preg_replace('/
    /', '', $content); + return $content; + } + public function getURI() { $url = $this->getInput('url'); @@ -114,18 +126,4 @@ class WordPressBridge extends FeedExpander } return $url; } - - public function collectData() - { - $limit = $this->getInput('limit') ?? 10; - if ($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { - // just in case someone find a way to access local files by playing with the url - returnClientError('The url parameter must either refer to http or https protocol.'); - } - try { - $this->collectExpandableDatas($this->getURI() . '/feed/atom/', $limit); - } catch (Exception $e) { - $this->collectExpandableDatas($this->getURI() . '/?feed=atom', $limit); - } - } } diff --git a/bridges/WordPressMadaraBridge.php b/bridges/WordPressMadaraBridge.php index c5ff54b5..9af44ef4 100644 --- a/bridges/WordPressMadaraBridge.php +++ b/bridges/WordPressMadaraBridge.php @@ -117,8 +117,8 @@ The default URI shows the Madara demo page.'; protected function getMangaInfo($url) { $url_cache = 'TitleInfo_' . preg_replace('/[^\w]/', '.', rtrim($url, '/')); - $cache = $this->loadCacheValue($url_cache, 86400); - if (isset($cache)) { + $cache = $this->loadCacheValue($url_cache); + if ($cache) { return $cache; } diff --git a/bridges/WorldCosplayBridge.php b/bridges/WorldCosplayBridge.php deleted file mode 100644 index 721c545f..00000000 --- a/bridges/WorldCosplayBridge.php +++ /dev/null @@ -1,149 +0,0 @@ -%s'; - - const ERR_CONTEXT = 'No context provided'; - const ERR_QUERY = 'Unable to query: %s'; - - const LIMIT_MIN = 1; - const LIMIT_MAX = 24; - - const PARAMETERS = [ - 'Character' => [ - 'cid' => [ - 'name' => 'Character ID', - 'type' => 'number', - 'required' => true, - 'title' => 'WorldCosplay character ID', - 'exampleValue' => 18204 - ] - ], - 'Cosplayer' => [ - 'uid' => [ - 'name' => 'Cosplayer ID', - 'type' => 'number', - 'required' => true, - 'title' => 'Cosplayer\'s WorldCosplay profile ID', - 'exampleValue' => 406782 - ] - ], - 'Series' => [ - 'sid' => [ - 'name' => 'Series ID', - 'type' => 'number', - 'required' => true, - 'title' => 'WorldCosplay series ID', - 'exampleValue' => 3139 - ] - ], - 'Tag' => [ - 'tid' => [ - 'name' => 'Tag ID', - 'type' => 'number', - 'required' => true, - 'title' => 'WorldCosplay tag ID', - 'exampleValue' => 33643 - ] - ], - 'global' => [ - 'limit' => [ - 'name' => 'Limit', - 'type' => 'number', - 'required' => false, - 'title' => 'Maximum number of photos to return', - 'exampleValue' => 5, - 'defaultValue' => 5 - ] - ] - ]; - - public function collectData() - { - $limit = $this->getInput('limit'); - $limit = min(self::LIMIT_MAX, max(self::LIMIT_MIN, $limit)); - switch ($this->queriedContext) { - case 'Character': - $id = $this->getInput('cid'); - $url = self::API_CHARACTER; - break; - case 'Cosplayer': - $id = $this->getInput('uid'); - $url = self::API_COSPLAYER; - break; - case 'Series': - $id = $this->getInput('sid'); - $url = self::API_SERIES; - break; - case 'Tag': - $id = $this->getInput('tid'); - $url = self::API_TAG; - break; - default: - returnClientError(self::ERR_CONTEXT); - } - $url = self::URI . sprintf($url, $id, $limit); - - $json = json_decode(getContents($url)); - if ($json->has_error) { - returnServerError($json->message); - } - $list = $json->list; - - foreach ($list as $img) { - $image = $img->photo ?? $img; - $item = [ - 'uri' => self::URI . substr($image->url, 1), - 'title' => $image->subject, - 'author' => $img->member->global_name, - 'enclosures' => [$image->large_url], - 'uid' => $image->id, - ]; - // Context cosplayer don't have created_at - if (isset($image->created_at)) { - $item['timestamp'] = $image->created_at; - } - $item['content'] = sprintf( - self::CONTENT_HTML, - $item['uri'], - $item['enclosures'][0], - $item['title'], - $item['title'] - ); - $this->items[] = $item; - } - } - - public function getName() - { - switch ($this->queriedContext) { - case 'Character': - $id = $this->getInput('cid'); - break; - case 'Cosplayer': - $id = $this->getInput('uid'); - break; - case 'Series': - $id = $this->getInput('sid'); - break; - case 'Tag': - $id = $this->getInput('tid'); - break; - default: - return parent::getName(); - } - return sprintf('%s %u - ', $this->queriedContext, $id) . self::NAME; - } -} diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php index 6e7a594b..7bf20015 100644 --- a/bridges/WorldOfTanksBridge.php +++ b/bridges/WorldOfTanksBridge.php @@ -30,9 +30,8 @@ class WorldOfTanksBridge extends FeedExpander $this->collectExpandableDatas(sprintf('https://worldoftanks.eu/%s/rss/news/', $this->getInput('lang'))); } - protected function parseItem($newsItem) + protected function parseItem(array $item) { - $item = parent::parseItem($newsItem); $item['content'] = $this->loadFullArticle($item['uri']); return $item; } diff --git a/bridges/WorldbankBridge.php b/bridges/WorldbankBridge.php new file mode 100644 index 00000000..9b40e86e --- /dev/null +++ b/bridges/WorldbankBridge.php @@ -0,0 +1,52 @@ + [ + 'name' => 'Language', + 'type' => 'list', + 'defaultValue' => 'English', + 'values' => [ + 'English' => 'English', + 'French' => 'French', + ] + ], + 'limit' => [ + 'name' => 'limit (max 100)', + 'type' => 'number', + 'defaultValue' => 5, + 'required' => true, + ] + ] + ]; + + public function collectData() + { + $apiUrl = 'https://search.worldbank.org/api/v2/news?format=json&rows=' + . min(100, $this->getInput('limit')) + . '&lang_exact=' . $this->getInput('lang'); + + $jsonData = json_decode(getContents($apiUrl)); + + // Remove unnecessary data from the original object + if (isset($jsonData->documents->facets)) { + unset($jsonData->documents->facets); + } + + foreach ($jsonData->documents as $element) { + $this->items[] = [ + 'uid' => $element->id, + 'timestamp' => $element->lnchdt, + 'title' => $element->title->{'cdata!'}, + 'uri' => $element->url, + 'content' => $element->descr->{'cdata!'}, + ]; + } + } +} diff --git a/bridges/XPathBridge.php b/bridges/XPathBridge.php index 52346aac..35ec6ad1 100644 --- a/bridges/XPathBridge.php +++ b/bridges/XPathBridge.php @@ -59,6 +59,17 @@ EOL, 'type' => 'text', 'required' => false ], + 'raw_content' => [ + 'name' => 'Use raw item description', + 'title' => <<<"EOL" + Whether to use the raw item description or to replace certain characters with + special significance in HTML by HTML entities (using the PHP function htmlspecialchars). + EOL, + 'type' => 'checkbox', + 'defaultValue' => false, + 'required' => false + ], + 'uri' => [ 'name' => 'Item URL selector', 'title' => <<<"EOL" @@ -178,6 +189,15 @@ EOL, 'type' => 'checkbox', return urldecode($this->getInput('content')); } + /** + * Use raw item content + * @return bool + */ + protected function getSettingUseRawItemContent(): bool + { + return $this->getInput('raw_content'); + } + /** * XPath expression for extracting an item link from the item context * @return string @@ -226,9 +246,9 @@ EOL, 'type' => 'checkbox', /** * Fix encoding - * @return string + * @return bool */ - protected function getSettingFixEncoding() + protected function getSettingFixEncoding(): bool { return $this->getInput('fix_encoding'); } diff --git a/bridges/XenForoBridge.php b/bridges/XenForoBridge.php index 1ecb1d74..bbef2816 100644 --- a/bridges/XenForoBridge.php +++ b/bridges/XenForoBridge.php @@ -81,24 +81,24 @@ class XenForoBridge extends BridgeAbstract ); if ($this->threadurl === false) { - returnClientError('The URL you provided is invalid!'); + throwClientException('The URL you provided is invalid!'); } $urlparts = parse_url($this->threadurl, PHP_URL_SCHEME); // Scheme must be "http" or "https" if (preg_match('/http[s]{0,1}/', parse_url($this->threadurl, PHP_URL_SCHEME)) == false) { - returnClientError('The URL you provided doesn\'t specify a valid scheme (http or https)!'); + throwClientException('The URL you provided doesn\'t specify a valid scheme (http or https)!'); } // Path cannot be root (../) if (parse_url($this->threadurl, PHP_URL_PATH) === '/') { - returnClientError('The URL you provided doesn\'t link to a valid thread (root path)!'); + throwClientException('The URL you provided doesn\'t link to a valid thread (root path)!'); } // XenForo adds a thread ID to the URL, like "...-thread.454934283". It must be present if (preg_match('/.+\.\d+[\/]{0,1}/', parse_URL($this->threadurl, PHP_URL_PATH)) == false) { - returnClientError('The URL you provided doesn\'t link to a valid thread (ID missing)!'); + throwClientException('The URL you provided doesn\'t link to a valid thread (ID missing)!'); } // We want to start at the first page in the thread. XenForo uses "../page-n" syntax @@ -121,13 +121,13 @@ class XenForoBridge extends BridgeAbstract } elseif ($mainContent = $html->find('div[class~="p-body"]', 0)) { $this->version = self::XENFORO_VERSION_2; } else { - returnServerError('This forum is currently not supported!'); + throwServerException('This forum is currently not supported!'); } switch ($this->version) { case self::XENFORO_VERSION_1: $titleBar = $mainContent->find('div.titleBar > h1', 0) - or returnServerError('Error finding title bar!'); + or throwServerException('Error finding title bar!'); $this->title = $titleBar->plaintext; @@ -139,7 +139,7 @@ class XenForoBridge extends BridgeAbstract case self::XENFORO_VERSION_2: $titleBar = $mainContent->find('div[class~="p-title"] h1', 0) - or returnServerError('Error finding title bar!'); + or throwServerException('Error finding title bar!'); $this->title = $titleBar->plaintext; $this->extractThreadPostsV2($html, $this->threadurl); @@ -166,7 +166,7 @@ class XenForoBridge extends BridgeAbstract // Posts are contained in an "ol" $messageList = $html->find('#messageList > li') - or returnServerError('Error finding message list!'); + or throwServerException('Error finding message list!'); foreach ($messageList as $post) { if (!isset($post->attr['id'])) { // Skip ads @@ -252,7 +252,7 @@ class XenForoBridge extends BridgeAbstract $lang = $html->find('html', 0)->lang; $messageList = $html->find('div[class~="block-body"] article') - or returnServerError('Error finding message list!'); + or throwServerException('Error finding message list!'); foreach ($messageList as $post) { if (!isset($post->attr['id'])) { // Skip ads @@ -304,11 +304,9 @@ class XenForoBridge extends BridgeAbstract // We can optimize performance by caching all but the last page if ($page != $lastpage) { - $html = getSimpleHTMLDOMCached($pageurl) - or returnServerError('Error loading contents from ' . $pageurl . '!'); + $html = getSimpleHTMLDOMCached($pageurl); } else { - $html = getSimpleHTMLDOM($pageurl) - or returnServerError('Error loading contents from ' . $pageurl . '!'); + $html = getSimpleHTMLDOM($pageurl); } $html = defaultLinkTo($html, $hosturl); @@ -347,11 +345,9 @@ class XenForoBridge extends BridgeAbstract // We can optimize performance by caching all but the last page if ($page != $lastpage) { - $html = getSimpleHTMLDOMCached($pageurl) - or returnServerError('Error loading contents from ' . $pageurl . '!'); + $html = getSimpleHTMLDOMCached($pageurl); } else { - $html = getSimpleHTMLDOM($pageurl) - or returnServerError('Error loading contents from ' . $pageurl . '!'); + $html = getSimpleHTMLDOM($pageurl); } $html = defaultLinkTo($html, $hosturl); @@ -436,8 +432,6 @@ class XenForoBridge extends BridgeAbstract break; } - // Debug::log(date_format($df, 'U')); - return date_format($df, 'U'); } } diff --git a/bridges/YGGTorrentBridge.php b/bridges/YGGTorrentBridge.php index f0c31f11..018bcfc4 100644 --- a/bridges/YGGTorrentBridge.php +++ b/bridges/YGGTorrentBridge.php @@ -7,7 +7,7 @@ class YGGTorrentBridge extends BridgeAbstract { const MAINTAINER = 'teromene'; const NAME = 'Yggtorrent Bridge'; - const URI = 'https://www5.yggtorrent.fi'; + const URI = 'https://www3.yggtorrent.qa'; const DESCRIPTION = 'Returns torrent search from Yggtorrent'; const PARAMETERS = [ diff --git a/bridges/YandexZenBridge.php b/bridges/YandexZenBridge.php index ee1ff506..e3ace55e 100644 --- a/bridges/YandexZenBridge.php +++ b/bridges/YandexZenBridge.php @@ -3,17 +3,17 @@ class YandexZenBridge extends BridgeAbstract { const NAME = 'YandexZen Bridge'; - const URI = 'https://zen.yandex.com'; - const DESCRIPTION = 'Latest posts from the specified profile.'; + const URI = 'https://dzen.ru'; + const DESCRIPTION = 'Latest posts from the specified channel.'; const MAINTAINER = 'llamasblade'; const PARAMETERS = [ [ - 'username' => [ - 'name' => 'Username', + 'channelURL' => [ + 'name' => 'Channel URL', 'type' => 'text', 'required' => true, - 'title' => 'The account\'s username, found in its URL', - 'exampleValue' => 'dream_faity_diy', + 'title' => 'The channel\'s URL', + 'exampleValue' => 'https://dzen.ru/dream_faity_diy', ], 'limit' => [ 'name' => 'Limit', @@ -27,14 +27,41 @@ class YandexZenBridge extends BridgeAbstract ]; # credit: https://github.com/teromene see #1032 - const _API_URL = 'https://zen.yandex.ru/api/v3/launcher/more?channel_name='; + const _BASE_API_URL_WITH_CHANNEL_NAME = 'https://dzen.ru/api/v3/launcher/more?channel_name='; + const _BASE_API_URL_WITH_CHANNEL_ID = 'https://dzen.ru/api/v3/launcher/more?channel_id='; + + const _ACCOUNT_URL_WITH_CHANNEL_ID_REGEX = '#^https?://dzen\.ru/id/(?[a-z0-9]{24})#'; + const _ACCOUNT_URL_WITH_CHANNEL_NAME_REGEX = '#^https?://dzen\.ru/(?[\w\.]+)#'; + + private $channelRealName = null; # as shown in the webpage, not in the URL + public function collectData() { - $profile_json = json_decode(getContents($this->getAPIUrl())); + $channelURL = $this->getInput('channelURL'); + + if (preg_match(self::_ACCOUNT_URL_WITH_CHANNEL_ID_REGEX, $channelURL, $matches)) { + $channelID = $matches['channelID']; + $channelAPIURL = self::_BASE_API_URL_WITH_CHANNEL_ID . $channelID; + } elseif (preg_match(self::_ACCOUNT_URL_WITH_CHANNEL_NAME_REGEX, $channelURL, $matches)) { + $channelName = $matches['channelName']; + $channelAPIURL = self::_BASE_API_URL_WITH_CHANNEL_NAME . $channelName; + } else { + throwClientException(<<channelRealName = $APIResponse->header->title; + $limit = $this->getInput('limit'); - foreach (array_slice($profile_json->items, 0, $limit) as $post) { + foreach (array_slice($APIResponse->items, 0, $limit) as $post) { $item = []; $item['uri'] = $post->share_link; @@ -45,30 +72,30 @@ class YandexZenBridge extends BridgeAbstract $item['timestamp'] = date(DateTimeInterface::ATOM, $publicationDateUnixTimestamp); } - $item['content'] = $post->text . "
    "; - $item['enclosures'] = [ - $post->image, - ]; + $postImage = $post->image ?? null; + $item['content'] = $post->text; + if ($postImage) { + $item['content'] .= "
    "; + $item['enclosures'] = [$postImage]; + } $this->items[] = $item; } } - private function getAPIUrl() - { - return self::_API_URL . $this->getInput('username'); - } - public function getURI() { - return self::URI . '/' . $this->getInput('username'); + if (is_null($this->getInput('channelURL'))) { + return parent::getURI(); + } + return $this->getInput('channelURL'); } public function getName() { - if (is_null($this->getInput('username'))) { + if (is_null($this->channelRealName)) { return parent::getName(); } - return $this->getInput('username') . '\'s latest zen.yandex posts'; + return $this->channelRealName . '\'s latest zen.yandex posts'; } } diff --git a/bridges/YorushikaBridge.php b/bridges/YorushikaBridge.php index 12d02f1f..d75b97d7 100644 --- a/bridges/YorushikaBridge.php +++ b/bridges/YorushikaBridge.php @@ -7,6 +7,20 @@ class YorushikaBridge extends BridgeAbstract const DESCRIPTION = 'Return news from Yorushika\'s offical website'; const MAINTAINER = 'Miicat_47'; const PARAMETERS = [ + 'global' => [ + 'lang' => [ + 'name' => 'Language', + 'defaultValue' => 'jp', + 'type' => 'list', + 'values' => [ + '日本語' => 'jp', + 'English' => 'en', + '한국어' => 'ko', + '中文(繁體字)' => 'zh-tw', + '中文(簡体字)' => 'zh-cn', + ] + ], + ], 'All categories' => [ ], 'Only selected categories' => [ @@ -27,6 +41,27 @@ class YorushikaBridge extends BridgeAbstract public function collectData() { + switch ($this->getInput('lang')) { + case 'jp': + $url = 'https://yorushika.com/news/5/'; + break; + case 'en': + $url = 'https://yorushika.com/news/5/?lang=en'; + break; + case 'ko': + $url = 'https://yorushika.com/news/5/?lang=ko'; + break; + case 'zh-tw': + $url = 'https://yorushika.com/news/5/?lang=zh-tw'; + break; + case 'zh-cn': + $url = 'https://yorushika.com/news/5/?lang=zh-cn'; + break; + default: + $url = 'https://yorushika.com/news/5/'; + break; + } + $categories = []; if ($this->queriedContext == 'All categories') { array_push($categories, 'all'); @@ -42,7 +77,7 @@ class YorushikaBridge extends BridgeAbstract } } - $html = getSimpleHTMLDOM('https://yorushika.com/news/5/')->find('.list--news', 0); + $html = getSimpleHTMLDOM($url)->find('.list--news', 0); $html = defaultLinkTo($html, $this->getURI()); foreach ($html->find('.inview') as $art) { @@ -62,10 +97,10 @@ class YorushikaBridge extends BridgeAbstract $url = $art->find('a.clearfix', 0)->href; // Get article date - $exp_date = '/\d+\.\d+\.\d+/'; $date = $art->find('.date', 0)->plaintext; - preg_match($exp_date, $date, $matches); - $date = date_create_from_format('Y.m.d', $matches[0]); + preg_match('/(\d+)[\.年](\d+)[\.月](\d+)/u', $date, $matches); + $formattedDate = sprintf('%d.%02d.%02d', $matches[1], $matches[2], $matches[3]); + $date = date_create_from_format('Y.m.d', $formattedDate); $date = date_format($date, 'd.m.Y'); // Get article info diff --git a/bridges/YouTubeCommunityTabBridge.php b/bridges/YouTubeCommunityTabBridge.php index c9ea5863..b14ec246 100644 --- a/bridges/YouTubeCommunityTabBridge.php +++ b/bridges/YouTubeCommunityTabBridge.php @@ -2,9 +2,9 @@ class YouTubeCommunityTabBridge extends BridgeAbstract { - const NAME = 'YouTube Community Tab Bridge'; + const NAME = 'YouTube Posts Tab Bridge'; const URI = 'https://www.youtube.com'; - const DESCRIPTION = 'Returns posts from a channel\'s community tab'; + const DESCRIPTION = 'Returns posts from a channel\'s posts tab'; const MAINTAINER = 'VerifiedJoseph'; const PARAMETERS = [ 'By channel ID' => [ @@ -31,8 +31,8 @@ class YouTubeCommunityTabBridge extends BridgeAbstract private $feedName = ''; private $itemTitle = ''; - private $urlRegex = '/youtube\.com\/(channel|user|c)\/([\w]+)\/community/'; - private $jsonRegex = '/var ytInitialData = (.*);<\/script>/'; + private $urlRegex = '/youtube\.com\/(channel|user|c)\/([\w]+)\/posts/'; + private $jsonRegex = '/var ytInitialData = ([^<]*);<\/script>/'; public function detectParameters($url) { @@ -59,40 +59,52 @@ class YouTubeCommunityTabBridge extends BridgeAbstract { if (is_null($this->getInput('username')) === false) { try { - $this->feedUrl = $this->buildCommunityUri($this->getInput('username'), 'c'); + $this->feedUrl = $this->buildPostsUri($this->getInput('username'), 'c'); $html = getSimpleHTMLDOM($this->feedUrl); } catch (Exception $e) { - $this->feedUrl = $this->buildCommunityUri($this->getInput('username'), 'user'); + $this->feedUrl = $this->buildPostsUri($this->getInput('username'), 'user'); $html = getSimpleHTMLDOM($this->feedUrl); } } else { - $this->feedUrl = $this->buildCommunityUri($this->getInput('channel'), 'channel'); + $this->feedUrl = $this->buildPostsUri($this->getInput('channel'), 'channel'); $html = getSimpleHTMLDOM($this->feedUrl); } - $json = $this->extractJson($html->find('body', 0)->innertext); + $json = $this->extractJson($html->find('html', 0)->innertext); - $this->feedName = $json->header->c4TabbedHeaderRenderer->title; + $this->feedName = $json->header->c4TabbedHeaderRenderer->title ?? null; + $this->feedName ??= $json->header->pageHeaderRenderer->pageTitle ?? null; + $this->feedName ??= $json->metadata->channelMetadataRenderer->title ?? null; + $this->feedName ??= $json->microformat->microformatDataRenderer->title ?? null; + $this->feedName ??= ''; - if ($this->hasCommunityTab($json) === false) { - returnServerError('Channel does not have a community tab'); + if ($this->hasPostsTab($json) === false) { + throwServerException('Channel does not have a posts tab'); } - foreach ($this->getCommunityPosts($json) as $key => $post) { + $posts = $this->getPosts($json); + foreach ($posts as $key => $post) { $this->itemTitle = ''; if (!isset($post->backstagePostThreadRenderer)) { continue; } - $details = $post->backstagePostThreadRenderer->post->backstagePostRenderer; + if (isset($post->backstagePostThreadRenderer->post->backstagePostRenderer)) { + $details = $post->backstagePostThreadRenderer->post->backstagePostRenderer; + } elseif (isset($post->backstagePostThreadRenderer->post->sharedPostRenderer)) { + // todo: properly extract data from this shared post + $details = $post->backstagePostThreadRenderer->post->sharedPostRenderer; + } else { + continue; + } $item = []; $item['uri'] = self::URI . '/post/' . $details->postId; - $item['author'] = $details->authorText->runs[0]->text; - $item['content'] = ''; + $item['author'] = $details->authorText->runs[0]->text ?? null; + $item['content'] = $item['uri']; - if (isset($details->contentText)) { + if (isset($details->contentText->runs)) { $text = $this->getText($details->contentText->runs); $this->itemTitle = $this->ellipsisTitle($text); @@ -124,18 +136,18 @@ class YouTubeCommunityTabBridge extends BridgeAbstract public function getName() { if (!empty($this->feedName)) { - return $this->feedName . ' - YouTube Community Tab'; + return $this->feedName . ' - YouTube Posts Tab'; } return parent::getName(); } /** - * Build Community URI + * Build Posts URI */ - private function buildCommunityUri($value, $type) + private function buildPostsUri($value, $type) { - return self::URI . '/' . $type . '/' . $value . '/community'; + return self::URI . '/' . $type . '/' . $value . '/posts'; } /** @@ -144,27 +156,27 @@ class YouTubeCommunityTabBridge extends BridgeAbstract private function extractJson($html) { if (!preg_match($this->jsonRegex, $html, $parts)) { - returnServerError('Failed to extract data from page'); + throwServerException('Failed to extract data from page'); } $data = json_decode($parts[1]); if ($data === false) { - returnServerError('Failed to decode extracted data'); + throwServerException('Failed to decode extracted data'); } return $data; } /** - * Check if channel has a community tab + * Check if channel has a posts tab */ - private function hasCommunityTab($json) + private function hasPostsTab($json) { foreach ($json->contents->twoColumnBrowseResultsRenderer->tabs as $tab) { if ( isset($tab->tabRenderer) - && str_ends_with($tab->tabRenderer->endpoint->commandMetadata->webCommandMetadata->url, 'community') + && str_ends_with($tab->tabRenderer->endpoint->commandMetadata->webCommandMetadata->url, 'posts') ) { return true; } @@ -174,14 +186,14 @@ class YouTubeCommunityTabBridge extends BridgeAbstract } /** - * Get community tab posts + * Get posts from posts tab */ - private function getCommunityPosts($json) + private function getPosts($json) { foreach ($json->contents->twoColumnBrowseResultsRenderer->tabs as $tab) { if ( isset($tab->tabRenderer) - && str_ends_with($tab->tabRenderer->endpoint->commandMetadata->webCommandMetadata->url, 'community') + && str_ends_with($tab->tabRenderer->endpoint->commandMetadata->webCommandMetadata->url, 'posts') ) { return $tab->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer->contents; } @@ -196,7 +208,15 @@ class YouTubeCommunityTabBridge extends BridgeAbstract $text = ''; foreach ($runs as $part) { - $text .= $this->formatUrls($part->text); + if (isset($part->navigationEndpoint->browseEndpoint->canonicalBaseUrl)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->browseEndpoint->canonicalBaseUrl); + } elseif (isset($part->navigationEndpoint->urlEndpoint->url)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->urlEndpoint->url); + } elseif (isset($part->navigationEndpoint->commandMetadata->webCommandMetadata->url)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->commandMetadata->webCommandMetadata->url); + } else { + $text .= $this->formatUrls($part->text, null); + } } return nl2br($text); @@ -212,8 +232,8 @@ class YouTubeCommunityTabBridge extends BridgeAbstract if (isset($details->backstageAttachment)) { $attachments = $details->backstageAttachment; - // Video if (isset($attachments->videoRenderer) && isset($attachments->videoRenderer->videoId)) { + // Video if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted a video'; } @@ -222,10 +242,8 @@ class YouTubeCommunityTabBridge extends BridgeAbstract EOD; - } - - // Image - if (isset($attachments->backstageImageRenderer)) { + } elseif (isset($attachments->backstageImageRenderer)) { + // Image if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted an image'; } @@ -235,10 +253,8 @@ EOD; $content = <<

    EOD; - } - - // Poll - if (isset($attachments->pollRenderer)) { + } elseif (isset($attachments->pollRenderer)) { + // Poll if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted a poll'; } @@ -254,6 +270,23 @@ EOD; $content = <<

    Poll ({$attachments->pollRenderer->totalVotes->simpleText})

      {$pollChoices}

    EOD; + } elseif (isset($attachments->postMultiImageRenderer->images)) { + // Multiple images + $images = $attachments->postMultiImageRenderer->images; + + if (is_array($images)) { + if (empty($this->itemTitle)) { + $this->itemTitle = $this->feedName . ' posted ' . count($images) . ' images'; + } + + foreach ($images as $image) { + $lastThumb = end($image->backstageImageRenderer->image->thumbnails); + + $content .= <<

    +EOD; + } + } } } @@ -267,6 +300,7 @@ EOD; { $length = 100; + $text = strip_tags($text); if (strlen($text) > $length) { $text = explode('
    ', wordwrap($text, $length, '
    ')); return $text[0] . '...'; @@ -275,12 +309,26 @@ EOD; return $text; } - private function formatUrls($content) + private function formatUrls($content, $url) { - return preg_replace( - '/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims', - '$1 ', - $content - ); + if (substr(strval($url), 0, 1) == '/') { + // fix relative URL + $url = 'https://www.youtube.com' . $url; + } elseif (substr(strval($url), 0, 33) == 'https://www.youtube.com/redirect?') { + // extract actual URL from YouTube redirect + parse_str(substr($url, 33), $params); + if (strpos(($params['q'] ?? ''), rtrim($content, '.')) === 0) { + $url = $params['q']; + } + } + + // ensure all URLs are made clickable + $url = $url ?? $content; + + if (filter_var($url, FILTER_VALIDATE_URL)) { + return '' . $content . ''; + } + + return $content; } } diff --git a/bridges/YouTubeFeedExpanderBridge.php b/bridges/YouTubeFeedExpanderBridge.php new file mode 100644 index 00000000..3811d960 --- /dev/null +++ b/bridges/YouTubeFeedExpanderBridge.php @@ -0,0 +1,81 @@ + [ + 'name' => 'Channel ID', + 'required' => true, + // Example: vinesauce + 'exampleValue' => 'UCzORJV8l3FWY4cFO8ot-F2w', + ], + 'embed' => [ + 'name' => 'Add embed to entry', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Add embed to entry', + 'defaultValue' => 'checked', + ], + 'embedurl' => [ + 'name' => 'Use embed page as entry url', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Use embed page as entry url', + ], + 'nocookie' => [ + 'name' => 'Use nocookie embed page', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Use nocookie embed page' + ], + ]]; + + public function getIcon() + { + if ($this->getInput('channel') != null) { + $html = getSimpleHTMLDOMCached($this->getURI()); + return $html->find('[itemprop="thumbnailUrl"]', 0)->href; + } + return parent::getIcon(); + } + + public function collectData() + { + $url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' . $this->getInput('channel'); + $this->collectExpandableDatas($url); + } + + protected function parseItem(array $item) + { + $id = $item['yt']['videoId']; + $item['comments'] = $item['uri'] . '#comments'; + $item['uid'] = $item['id']; + + $thumbnail = sprintf('https://img.youtube.com/vi/%s/maxresdefault.jpg', $id); + $item['enclosures'] = [$thumbnail]; + + $item['content'] = $item['media']['group']['description']; + $item['content'] = str_replace("\n", '
    ', $item['content']); + unset($item['media']); + + $embedURI = self::URI; + if ($this->getInput('nocookie')) { + $embedURI = 'https://www.youtube-nocookie.com/'; + } + $embed = $embedURI . 'embed/' . $id; + if ($this->getInput('embed')) { + $iframe_fmt = ''; //phpcs:ignore + $iframe = sprintf($iframe_fmt, $embed, $item['title']) . '
    '; + $item['content'] = $iframe . $item['content']; + } + if ($this->getInput('embedurl')) { + $item['uri'] = $embed; + } + + return $item; + } +} diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 54a38d98..0c37be0c 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -1,17 +1,10 @@ ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); + $cacheKey = 'youtube_rate_limit'; + if ($this->cache->get($cacheKey)) { + throwRateLimitException(); + } + try { + $this->collectDataInternal(); + } catch (HttpException $e) { + if ($e->getCode() === 429) { + $this->cache->set($cacheKey, true, 60 * 16); + throwRateLimitException(); + } + throw $e; + } + } + + private function collectDataInternal() + { + $html = ''; + $url_feed = ''; + $url_listing = ''; + + $username = $this->getInput('u'); + $channel = $this->getInput('c'); + $custom = $this->getInput('custom'); + $playlist = $this->getInput('p'); + $search = $this->getInput('s'); + + $durationMin = $this->getInput('duration_min'); + $durationMax = $this->getInput('duration_max'); + + // Whether to discriminate videos by duration + $filterByDuration = $durationMin || $durationMax; + + if ($username) { + // user and channel + $url_feed = self::URI . '/feeds/videos.xml?user=' . urlencode($username); + $url_listing = self::URI . '/user/' . urlencode($username) . '/videos'; + } elseif ($channel) { + $url_feed = self::URI . '/feeds/videos.xml?channel_id=' . urlencode($channel); + $url_listing = self::URI . '/channel/' . urlencode($channel) . '/videos'; + } elseif ($custom) { + $url_listing = self::URI . '/' . urlencode($custom) . '/videos'; + } + + if ($url_feed || $url_listing) { + // user, channel or custom + $this->feeduri = $url_listing; + if ($custom) { + // Extract the feed url for the custom name + $html = $this->fetch($url_listing); + $jsonData = $this->extractJsonFromHtml($html); + // Pluck out the rss feed url + $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; + $this->feedIconUrl = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url; + } + if ($filterByDuration) { + if (!$custom) { + // Fetch the html page + $html = $this->fetch($url_listing); + $jsonData = $this->extractJsonFromHtml($html); + } + $channel_id = ''; + if (isset($jsonData->contents)) { + $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents; + // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + $this->fetchItemsFromFromJsonData($jsonData); + } else { + throwServerException('Unable to get data from YouTube'); + } + } else { + // Fetch the xml feed + $html = $this->fetch($url_feed); + $this->extractItemsFromXmlFeed($html); + } + $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + } elseif ($playlist) { + // playlist + $url_feed = self::URI . '/feeds/videos.xml?playlist_id=' . urlencode($playlist); + $url_listing = self::URI . '/playlist?list=' . urlencode($playlist); + $html = $this->fetch($url_listing); + $jsonData = $this->extractJsonFromHtml($html); + // TODO: this method returns only first 100 video items + // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0] ?? null; + if (!$jsonData) { + // playlist probably doesnt exists + throw new \Exception('Unable to find playlist: ' . $url_listing); + } + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; + $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; + $item_count = count($jsonData); + + if ($item_count > 15 || $filterByDuration) { + $this->fetchItemsFromFromJsonData($jsonData); + } else { + $xml = $this->fetch($url_feed); + $this->extractItemsFromXmlFeed($xml); + } + $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + usort($this->items, function ($item1, $item2) { + if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) { + $item1['timestamp'] = strtotime($item1['timestamp']); + $item2['timestamp'] = strtotime($item2['timestamp']); + } + return $item2['timestamp'] - $item1['timestamp']; + }); + } elseif ($search) { + // search + $url_listing = self::URI . '/results?search_query=' . urlencode($search) . '&sp=CAI%253D'; + $html = $this->fetch($url_listing); + $jsonData = $this->extractJsonFromHtml($html); + $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; + $jsonData = $jsonData->sectionListRenderer->contents[0]->itemSectionRenderer->contents; + $this->fetchItemsFromFromJsonData($jsonData); + $this->feeduri = $url_listing; + $this->feedName = 'Search: ' . $search; + } else { + throwClientException("You must either specify either:\n - YouTube username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)"); + } + } + + private function fetchVideoDetails($videoId, &$author, &$description, &$timestamp) + { + $url = self::URI . "/watch?v=$videoId"; + $html = $this->fetch($url, true); // Skip unavailable videos if (strpos($html->innertext, 'IS_UNAVAILABLE_PAGE') !== false) { @@ -94,15 +213,18 @@ class YoutubeBridge extends BridgeAbstract $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); if (!is_null($elDatePublished)) { - $time = strtotime($elDatePublished->getAttribute('content')); + $timestamp = strtotime($elDatePublished->getAttribute('content')); } - $jsonData = $this->getJSONData($html); - if (! isset($jsonData->contents)) { + $jsonData = $this->extractJsonFromHtml($html); + if (!isset($jsonData->contents)) { return; } - $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; + $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents ?? null; + if (!$jsonData) { + throw new \Exception('Unable to find json data'); + } $videoSecondaryInfo = null; foreach ($jsonData as $item) { if (isset($item->videoSecondaryInfoRenderer)) { @@ -111,149 +233,199 @@ class YoutubeBridge extends BridgeAbstract } } if (!$videoSecondaryInfo) { - returnServerError('Could not find videoSecondaryInfoRenderer. Error at: ' . $vid); + throwServerException('Could not find videoSecondaryInfoRenderer. Error at: ' . $videoId); } - if (isset($videoSecondaryInfo->description)) { - foreach ($videoSecondaryInfo->description->runs as $description) { - if (isset($description->navigationEndpoint)) { - $metadata = $description->navigationEndpoint->commandMetadata->webCommandMetadata; - $web_type = $metadata->webPageType; - $url = $metadata->url; - $text = ''; - switch ($web_type) { - case 'WEB_PAGE_TYPE_UNKNOWN': - $url_components = parse_url($url); - if (isset($url_components['query']) && strpos($url_components['query'], '&q=') !== false) { - parse_str($url_components['query'], $params); - $url = urldecode($params['q']); - } - $text = $url; - break; - case 'WEB_PAGE_TYPE_WATCH': - case 'WEB_PAGE_TYPE_BROWSE': - $url = 'https://www.youtube.com' . $url; - $text = $description->text; - break; - } - $desc .= "$text"; - } else { - $desc .= nl2br($description->text); - } + $description = $videoSecondaryInfo->attributedDescription->content ?? ''; + + // Default whitespace chars used by trim + non-breaking spaces (https://en.wikipedia.org/wiki/Non-breaking_space) + $whitespaceChars = " \t\n\r\0\x0B\u{A0}\u{2060}\u{202F}\u{2007}"; + $descEnhancements = $this->ytBridgeGetVideoDescriptionEnhancements($videoSecondaryInfo, $description, self::URI, $whitespaceChars); + foreach ($descEnhancements as $descEnhancement) { + if (isset($descEnhancement['url'])) { + $descBefore = mb_substr($description, 0, $descEnhancement['pos']); + $descValue = mb_substr($description, $descEnhancement['pos'], $descEnhancement['len']); + $descAfter = mb_substr($description, $descEnhancement['pos'] + $descEnhancement['len'], null); + + // Extended trim for the display value of internal links, e.g.: + // FAVICON • Video Name + // FAVICON / @ChannelName + $descValue = trim($descValue, $whitespaceChars . '•/'); + + $description = sprintf('%s%s%s', $descBefore, $descEnhancement['url'], $descValue, $descAfter); } } } - private function ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail = '') - { - $item = []; - $item['id'] = $vid; - $item['title'] = $title; - $item['author'] = $author; - $item['timestamp'] = $time; - $item['uri'] = self::URI . 'watch?v=' . $vid; - if (!$thumbnail) { - $thumbnail = '0'; // Fallback to default thumbnail if there aren't any provided. + private function ytBridgeGetVideoDescriptionEnhancements( + object $videoSecondaryInfo, + string $descriptionContent, + string $baseUrl, + string $whitespaceChars + ): array { + $commandRuns = $videoSecondaryInfo->attributedDescription->commandRuns ?? []; + if (count($commandRuns) <= 0) { + return []; } - $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail . '.jpg'; - $item['content'] = '
    ' . $desc; - $this->items[] = $item; + + $enhancements = []; + + $boundaryWhitespaceChars = mb_str_split($whitespaceChars); + $boundaryStartChars = array_merge($boundaryWhitespaceChars, [':', '-', '(']); + $boundaryEndChars = array_merge($boundaryWhitespaceChars, [',', '.', "'", ')']); + $hashtagBoundaryEndChars = array_merge($boundaryEndChars, ['#', '-']); + + $descriptionContentLength = mb_strlen($descriptionContent); + + $minPositionOffset = 0; + + $prevStartPosition = 0; + $totalLength = 0; + $maxPositionByStartIndex = []; + foreach (array_reverse($commandRuns) as $commandRun) { + $endPosition = $commandRun->startIndex + $commandRun->length; + if ($endPosition < $prevStartPosition) { + $totalLength += 1; + } + $totalLength += $commandRun->length; + $maxPositionByStartIndex[$commandRun->startIndex] = $totalLength; + $prevStartPosition = $commandRun->startIndex; + } + + foreach ($commandRuns as $commandRun) { + $commandMetadata = $commandRun->onTap->innertubeCommand->commandMetadata->webCommandMetadata ?? null; + if (!isset($commandMetadata)) { + continue; + } + + $enhancement = null; + + /* + $commandRun->startIndex can be offset by few positions in the positive direction + when some multibyte characters (e.g. emojis, but maybe also others) are used in the plain text video description. + (probably some difference between php and javascript in handling multibyte characters) + This loop should correct the position in most cases. It searches for the next word (determined by a set of boundary chars) with the expected length. + Several safeguards ensure that the correct word is chosen. When a link can not be matched, + everything will be discarded to prevent corrupting the description. + Hashtags require a different set of boundary chars. + */ + $isHashtag = $commandMetadata->webPageType === 'WEB_PAGE_TYPE_BROWSE'; + $prevEnhancement = end($enhancements); + $minPosition = $prevEnhancement === false ? 0 : $prevEnhancement['pos'] + $prevEnhancement['len']; + $maxPosition = $descriptionContentLength - $maxPositionByStartIndex[$commandRun->startIndex]; + $position = min($commandRun->startIndex - $minPositionOffset, $maxPosition); + while ($position >= $minPosition) { + // The link display value can only ever include a new line at the end (which will be removed further below), never in between. + $newLinePosition = mb_strpos($descriptionContent, "\n", $position); + if ($newLinePosition !== false && $newLinePosition < $position + ($commandRun->length - 1)) { + $position = $newLinePosition - ($commandRun->length - 1); + continue; + } + + $firstChar = mb_substr($descriptionContent, $position, 1); + $boundaryStart = mb_substr($descriptionContent, $position - 1, 1); + $boundaryEndIndex = $position + $commandRun->length; + $boundaryEnd = mb_substr($descriptionContent, $boundaryEndIndex, 1); + + $boundaryStartIsValid = $position === 0 || + in_array($boundaryStart, $boundaryStartChars) || + ($isHashtag && $firstChar === '#'); + $boundaryEndIsValid = $boundaryEndIndex === $descriptionContentLength || + in_array($boundaryEnd, $isHashtag ? $hashtagBoundaryEndChars : $boundaryEndChars); + + if ($boundaryStartIsValid && $boundaryEndIsValid) { + $minPositionOffset = $commandRun->startIndex - $position; + $enhancement = [ + 'pos' => $position, + 'len' => $commandRun->length, + ]; + break; + } + + $position--; + } + + if (!isset($enhancement)) { + $this->logger->debug(sprintf('Position %d cannot be corrected in "%s"', $commandRun->startIndex, substr($descriptionContent, 0, 50) . '...')); + // Skip to prevent the description from becoming corrupted + continue; + } + + // $commandRun->length sometimes incorrectly includes the newline as last char + $lastChar = mb_substr($descriptionContent, $enhancement['pos'] + $enhancement['len'] - 1, 1); + if ($lastChar === "\n") { + $enhancement['len'] -= 1; + } + + $commandUrl = parse_url($commandMetadata->url); + if ($commandUrl['path'] === '/redirect') { + parse_str($commandUrl['query'], $commandUrlQuery); + $enhancement['url'] = urldecode($commandUrlQuery['q']); + } elseif (isset($commandUrl['host'])) { + $enhancement['url'] = $commandMetadata->url; + } else { + $enhancement['url'] = $baseUrl . $commandMetadata->url; + } + + $enhancements[] = $enhancement; + } + + if (count($enhancements) !== count($commandRuns)) { + // At least one link can not be matched. Discard everything to prevent corrupting the description. + return []; + } + + // Sort by position in descending order to be able to safely replace values + return array_reverse($enhancements); } - private function ytBridgeParseXmlFeed($xml) + private function extractItemsFromXmlFeed($xml) { + $this->feedName = $this->decodeTitle($xml->find('feed > title', 0)->plaintext); + foreach ($xml->find('entry') as $element) { - $title = $this->ytBridgeFixTitle($element->find('title', 0)->plaintext); + $videoId = str_replace('yt:video:', '', $element->find('id', 0)->plaintext); + if (strpos($videoId, 'googleads') !== false) { + continue; + } + $title = $this->decodeTitle($element->find('title', 0)->plaintext); $author = $element->find('name', 0)->plaintext; $desc = $element->find('media:description', 0)->innertext; - - // Make sure the description is easy on the eye :) $desc = htmlspecialchars($desc); $desc = nl2br($desc); - $desc = preg_replace( - self::URI_REGEX, - '$1 ', - $desc - ); - - $vid = str_replace('yt:video:', '', $element->find('id', 0)->plaintext); + $desc = preg_replace(self::URI_REGEX, '$1 ', $desc); $time = strtotime($element->find('published', 0)->plaintext); - if (strpos($vid, 'googleads') === false) { - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } + $this->addItem($videoId, $title, $author, $desc, $time); } - $this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName() } - private function ytBridgeFixTitle($title) + private function fetch($url, bool $cache = false) { - // convert both Ӓ and " to UTF-8 - return html_entity_decode($title, ENT_QUOTES, 'UTF-8'); - } - - private function ytGetSimpleHTMLDOM($url, $cached = false) - { - $header = [ - 'Accept-Language: en-US' - ]; - $opts = []; - $lowercase = true; - $forceTagsClosed = true; - $target_charset = DEFAULT_TARGET_CHARSET; - $stripRN = false; - $defaultBRText = DEFAULT_BR_TEXT; - $defaultSpanText = DEFAULT_SPAN_TEXT; - if ($cached) { - return getSimpleHTMLDOMCached( - $url, - 86400, - $header, - $opts, - $lowercase, - $forceTagsClosed, - $target_charset, - $stripRN, - $defaultBRText, - $defaultSpanText - ); + $header = ['Accept-Language: en-US']; + $ttl = 86400 * 3; // 3d + $stripNewlines = false; + if ($cache) { + return getSimpleHTMLDOMCached($url, $ttl, $header, [], true, true, DEFAULT_TARGET_CHARSET, $stripNewlines); } - return getSimpleHTMLDOM( - $url, - $header, - $opts, - $lowercase, - $forceTagsClosed, - $target_charset, - $stripRN, - $defaultBRText, - $defaultSpanText - ); + return getSimpleHTMLDOM($url, $header, [], true, true, DEFAULT_TARGET_CHARSET, $stripNewlines); } - private function getJSONData($html) + private function extractJsonFromHtml($html) { $scriptRegex = '/var ytInitialData = (.*?);<\/script>/'; $result = preg_match($scriptRegex, $html, $matches); if (! $result) { - Logger::debug('Could not find ytInitialData'); + $this->logger->debug('Could not find ytInitialData'); return null; } - return json_decode($matches[1]); + $data = json_decode($matches[1]); + return $data; } - private function parseJSONListing($jsonData) + private function fetchItemsFromFromJsonData($jsonData) { - $duration_min = $this->getInput('duration_min') ?: -1; - $duration_min = $duration_min * 60; - - $duration_max = $this->getInput('duration_max') ?: INF; - $duration_max = $duration_max * 60; - - if ($duration_max < $duration_min) { - returnClientError('Max duration must be greater than min duration!'); - } - - // $vid_list = ''; + $minimumDurationSeconds = ($this->getInput('duration_min') ?: -1) * 60; + $maximumDurationSeconds = ($this->getInput('duration_max') ?: INF) * 60; foreach ($jsonData as $item) { $wrapper = null; @@ -269,24 +441,32 @@ class YoutubeBridge extends BridgeAbstract continue; } - $vid = $wrapper->videoId; - $title = $wrapper->title->runs[0]->text; - if (isset($wrapper->ownerText)) { - $this->channel_name = $wrapper->ownerText->runs[0]->text; - } elseif (isset($wrapper->shortBylineText)) { - $this->channel_name = $wrapper->shortBylineText->runs[0]->text; + // 01:03:30 | 15:06 | 1:24 + $lengthText = $wrapper->lengthText->simpleText ?? null; + // 6,875 views + $viewCount = $wrapper->viewCountText->simpleText ?? null; + // Dc645M8Het8 + $videoId = $wrapper->videoId; + // Jumbo frames - transfer more data faster! + $title = $wrapper->title->runs[0]->text ?? $wrapper->title->accessibility->accessibilityData->label ?? null; + $author = null; + $description = $wrapper->descriptionSnippet->runs[0]->text ?? null; + // 5 days ago | 1 month ago + $publishedTimeText = $wrapper->publishedTimeText->simpleText ?? $wrapper->videoInfo->runs[2]->text ?? null; + $timestamp = null; + if ($publishedTimeText) { + try { + $publicationDate = new \DateTimeImmutable($publishedTimeText); + // Hard-code hour, minute and second + $publicationDate = $publicationDate->setTime(0, 0, 0); + $timestamp = $publicationDate->getTimestamp(); + } catch (\Exception $e) { + } } - $author = ''; - $desc = ''; - $time = ''; - - // The duration comes in one of the formats: - // hh:mm:ss / mm:ss / m:ss - // 01:03:30 / 15:06 / 1:24 $durationText = 0; - if (isset($wrapper->lengthText)) { - $durationText = $wrapper->lengthText->simpleText; + if ($lengthText) { + $durationText = $lengthText; } else { foreach ($wrapper->thumbnailOverlays as $overlay) { if (isset($overlay->thumbnailOverlayTimeStatusRenderer)) { @@ -295,7 +475,6 @@ class YoutubeBridge extends BridgeAbstract } } } - if (is_string($durationText)) { if (preg_match('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', $durationText)) { $durationText = preg_replace('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); @@ -304,131 +483,50 @@ class YoutubeBridge extends BridgeAbstract } sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); $duration = $hours * 3600 + $minutes * 60 + $seconds; - if ($duration < $duration_min || $duration > $duration_max) { + if ($duration < $minimumDurationSeconds || $duration > $maximumDurationSeconds) { continue; } } - - // $vid_list .= $vid . ','; - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + if (!$description || !$timestamp) { + $this->fetchVideoDetails($videoId, $author, $description, $timestamp); + } + $this->addItem($videoId, $title, $author, $description, $timestamp); + if (count($this->items) >= 99) { + break; + } } } - public function collectData() + private function addItem($videoId, $title, $author, $description, $timestamp, $thumbnail = '') { - $xml = ''; - $html = ''; - $url_feed = ''; - $url_listing = ''; + $description = nl2br($description); - if ($this->getInput('u')) { /* User and Channel modes */ - $this->request = $this->getInput('u'); - $url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request); - $url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos'; - } elseif ($this->getInput('c')) { - $this->request = $this->getInput('c'); - $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); - $url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos'; - } elseif ($this->getInput('custom')) { - $this->request = $this->getInput('custom'); - $url_listing = self::URI . urlencode($this->request) . '/videos'; - } - - if (!empty($url_feed) || !empty($url_listing)) { - $this->feeduri = $url_listing; - if (!empty($this->getInput('custom'))) { - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; - $this->iconURL = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url; - } - if (!$this->skipFeeds()) { - $html = $this->ytGetSimpleHTMLDOM($url_feed); - $this->ytBridgeParseXmlFeed($html); - } else { - if (empty($this->getInput('custom'))) { - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - } - $channel_id = ''; - if (isset($jsonData->contents)) { - $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; - $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents; - // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; - $this->parseJSONListing($jsonData); - } else { - returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); - } - } - $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); - } elseif ($this->getInput('p')) { /* playlist mode */ - // TODO: this mode makes a lot of excess video query requests. - // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" - // This cache will be used to find out, which videos to fetch - // to make feed of 15 items or more, if there a lot of videos published on that date. - $this->request = $this->getInput('p'); - $url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request); - $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - // TODO: this method returns only first 100 video items - // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; - $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; - $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; - $item_count = count($jsonData); - - if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { - $this->ytBridgeParseXmlFeed($xml); - } else { - $this->parseJSONListing($jsonData); - } - $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() - usort($this->items, function ($item1, $item2) { - if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) { - $item1['timestamp'] = strtotime($item1['timestamp']); - $item2['timestamp'] = strtotime($item2['timestamp']); - } - return $item2['timestamp'] - $item1['timestamp']; - }); - } elseif ($this->getInput('s')) { /* search mode */ - $this->request = $this->getInput('s'); - $url_listing = self::URI - . 'results?search_query=' - . urlencode($this->request) - . '&sp=CAI%253D'; - - $html = $this->ytGetSimpleHTMLDOM($url_listing); - - $jsonData = $this->getJSONData($html); - $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; - $jsonData = $jsonData->sectionListRenderer->contents; - foreach ($jsonData as $data) { // Search result includes some ads, have to filter them - if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) { - $jsonData = $data->itemSectionRenderer->contents; - break; - } - } - $this->parseJSONListing($jsonData); - $this->feeduri = $url_listing; - $this->feedName = 'Search: ' . $this->request; // feedName will be used by getName() - } else { /* no valid mode */ - returnClientError("You must either specify either:\n - YouTube - username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)"); + $item = []; + // This should probably be uid? + $item['id'] = $videoId; + $item['title'] = $title; + $item['author'] = $author ?? ''; + $item['timestamp'] = $timestamp; + $item['uri'] = self::URI . '/watch?v=' . $videoId; + if (!$thumbnail) { + // Fallback to default thumbnail if there aren't any provided. + $thumbnail = '0'; } + $thumbnailUri = str_replace('/www.', '/img.', self::URI) . '/vi/' . $videoId . '/' . $thumbnail . '.jpg'; + $item['content'] = sprintf('
    %s', $item['uri'], $thumbnailUri, $description); + $this->items[] = $item; } - private function skipFeeds() + private function decodeTitle($title) { - return ($this->getInput('duration_min') || $this->getInput('duration_max')); + // convert both Ӓ and " to UTF-8 + return html_entity_decode($title, ENT_QUOTES, 'UTF-8'); } public function getURI() { if (!is_null($this->getInput('p'))) { - return static::URI . 'playlist?list=' . $this->getInput('p'); + return static::URI . '/playlist?list=' . $this->getInput('p'); } elseif ($this->feeduri) { return $this->feeduri; } @@ -438,14 +536,13 @@ class YoutubeBridge extends BridgeAbstract public function getName() { - // Name depends on queriedContext: switch ($this->queriedContext) { case 'By username': case 'By channel id': case 'By custom name': case 'By playlist Id': case 'Search result': - return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right? + return htmlspecialchars_decode($this->feedName) . ' - YouTube'; default: return parent::getName(); } @@ -453,10 +550,10 @@ class YoutubeBridge extends BridgeAbstract public function getIcon() { - if (empty($this->iconURL)) { + if (empty($this->feedIconUrl)) { return parent::getIcon(); } else { - return $this->iconURL; + return $this->feedIconUrl; } } } diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php index 693f542c..e3b659a8 100644 --- a/bridges/ZDNetBridge.php +++ b/bridges/ZDNetBridge.php @@ -174,19 +174,17 @@ class ZDNetBridge extends FeedExpander $this->collectExpandableDatas($url, $limit); } - protected function parseItem($item) + protected function parseItem(array $item) { - $item = parent::parseItem($item); - $article = getSimpleHTMLDOMCached($item['uri']); if (!$article) { - Logger::info('Unable to parse the dom from ' . $item['uri']); + $this->logger->info('Unable to parse the dom from ' . $item['uri']); return $item; } $articleTag = $article->find('article', 0) ?? $article->find('.c-articleContent', 0); if (!$articleTag) { - Logger::info('Unable to parse
    tag in ' . $item['uri']); + $this->logger->info('Unable to parse
    tag in ' . $item['uri']); return $item; } $contents = $articleTag->innertext; diff --git a/bridges/ZeitBridge.php b/bridges/ZeitBridge.php index a9e4bcf2..3f96d8d6 100644 --- a/bridges/ZeitBridge.php +++ b/bridges/ZeitBridge.php @@ -50,24 +50,20 @@ class ZeitBridge extends FeedExpander 'defaultValue' => 5 ] ]]; - const LIMIT = 5; public function collectData() { - $this->collectExpandableDatas( - $this->getInput('category'), - $this->getInput('limit') ?: static::LIMIT - ); + $url = $this->getInput('category'); + $limit = $this->getInput('limit') ?: 5; + + $this->collectExpandableDatas($url, $limit); } - protected function parseItem($item) + protected function parseItem(array $item) { - $item = parent::parseItem($item); $item['enclosures'] = []; $headers = [ - 'User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', - 'X-Forwarded-For: 66.249.66.1', 'Cookie: zonconsent=' . date('Y-m-d\TH:i:s.v\Z'), ]; @@ -91,7 +87,10 @@ class ZeitBridge extends FeedExpander // remove known bad elements foreach ( $article->find( - 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast' + 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, + .article-heading__container--podcast, .podcast-player__image, div[data-paywall], + .js-embed-consent, script, nav, .article-flexible-toc__subheading-link, .faq-link, + .zoner-article-magazinbox' ) as $bad ) { $bad->remove(); @@ -112,16 +111,28 @@ class ZeitBridge extends FeedExpander } // authors - $authors = $article->find('*[itemtype*="schema.org/Person"]'); - if (!$authors) { - $authors = $article->find('.metadata__source'); - } + $authors = $article->find('*[itemtype*="schema.org/Person"]') ?? $article->find('.metadata__source'); if ($authors) { - $item['author'] = implode(', ', $authors); + $item['author'] = implode(', ', array_map(function ($e) { + return trim($e->plaintext); + }, $authors)); + } + + $item['content'] = ''; + // advertorial marker + $advert = $article->find('.advertorial-marker', 0); + if ($advert) { + $item['content'] .= $advert; + } + + // summary + $summary = $article->find('.summary'); + if ($summary) { + $item['content'] .= implode('', $summary); } // header image - $headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('header', 0); + $headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('.article-header', 0) ?? $article->find('header', 0); if ($headerimg) { $item['content'] .= implode('', $headerimg->find('img[src], figcaption')); } @@ -131,7 +142,7 @@ class ZeitBridge extends FeedExpander if ($pages) { foreach ($pages as $page) { - $elements = $page->find('p, h2, figcaption, img[src]'); + $elements = $page->find('p, ul, ol, h2, figure.article__media img[src], figure.article__media figcaption, figure.quote'); $item['content'] .= implode('', $elements); } } diff --git a/bridges/ZonebourseBridge.php b/bridges/ZonebourseBridge.php new file mode 100644 index 00000000..e351b9dd --- /dev/null +++ b/bridges/ZonebourseBridge.php @@ -0,0 +1,85 @@ + [ + 'name' => 'topic', + 'type' => 'list', + 'values' => [ + 'toute-l-actualite' => [ + 'monde' => '/actualite-bourse/', + 'france' => '/actualite-bourse/regions/locales/', + 'europe' => '/actualite-bourse/regions/europe/', + 'amerique-du-nord' => '/actualite-bourse/regions/amerique-du-nord/', + 'amerique-du-sud' => '/actualite-bourse/regions/amerique-du-sud/', + 'asie' => '/actualite-bourse/regions/asie/', + 'afrique' => '/actualite-bourse/regions/afrique/', + 'moyen-orient' => '/actualite-bourse/regions/moyenorient/', + 'emergents' => '/actualite-bourse/regions/emergents/', + ], + 'societes' => [ + 'toute-l-actualite' => '/actualite-bourse/societes/', + 'reco-analystes' => '/actualite-bourse/societes/recommandations/', + 'rumeurs' => '/actualite-bourse/societes/rumeur/', + 'introductions' => '/actualite-bourse/societes/introductions/', + 'operations-capitalistiques' => '/actualite-bourse/societes/operations/', + 'nouveaux-contrats' => '/actualite-bourse/societes/nouveaux-contrats/', + 'profits-warnings' => '/actualite-bourse/societes/profits-warnings/', + 'nominations' => '/actualite-bourse/societes/nominations/', + 'communiques' => '/actualite-bourse/societes/communique/', + 'operations-sur-titre' => '/actualite-bourse/societes/operations_titre/', + 'publications-de-resultats' => '/actualite-bourse/societes/publications/', + 'nouveaux-marches' => '/actualite-bourse/societes/nouveaux-marches/', + 'nouveaux-produits' => '/actualite-bourse/societes/nouveaux-produits/', + 'strategies-societes' => '/actualite-bourse/societes/strategies-societes/', + 'risques-juridiques' => '/actualite-bourse/societes/risques-juridiques/', + 'rachats-d-actions' => '/actualite-bourse/societes/rachats-actions/', + 'fusions-et-acquisitions' => '/actualite-bourse/societes/fusions-acquisitions/', + 'call-transcripts' => '/actualite-bourse/societes/call-transcripts/', + 'guidance' => '/actualite-bourse/societes/guidance/', + ], + ], + ], + ], + ]; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ': ' . $topic : ''); + } + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI . $this->getInput('topic')); + $articles = $dom->find('table#newsScreener tbody tr'); + + if (!$articles) { + throwServerException('Failed to retrieve news content'); + } + + foreach ($articles as $article) { + $element = $article->find('.grid a', 0); + + if (!$element || empty($element->plaintext) || empty($element->href)) { + continue; + } + + $date = $article->find('span.js-date-relative.txt-muted.h-100', 0); + $timestamp = $date->{'data-utc-date'} ?? ''; + + $this->items[] = [ + 'timestamp' => $timestamp, + 'title' => trim($element->plaintext), + 'uid' => $element->href, + 'uri' => self::URI . $element->href, + ]; + } + } +} diff --git a/caches/ArrayCache.php b/caches/ArrayCache.php new file mode 100644 index 00000000..ddbd77d5 --- /dev/null +++ b/caches/ArrayCache.php @@ -0,0 +1,55 @@ +data[$key] ?? null; + if (!$item) { + return $default; + } + $expiration = $item['expiration']; + if ($expiration === 0 || $expiration > time()) { + return $item['value']; + } + $this->delete($key); + return $default; + } + + public function set(string $key, $value, ?int $ttl = null): void + { + $this->data[$key] = [ + 'key' => $key, + 'value' => $value, + 'expiration' => $ttl === null ? 0 : time() + $ttl, + ]; + } + + public function delete(string $key): void + { + unset($this->data[$key]); + } + + public function clear(): void + { + $this->data = []; + } + + public function prune(): void + { + foreach ($this->data as $key => $item) { + $expiration = $item['expiration']; + if ($expiration === 0 || $expiration > time()) { + continue; + } + $this->delete($key); + } + } +} diff --git a/caches/FileCache.php b/caches/FileCache.php index a4f48962..5b2f4b37 100644 --- a/caches/FileCache.php +++ b/caches/FileCache.php @@ -1,13 +1,17 @@ logger = $logger; $default = [ 'path' => null, 'enable_purge' => true, @@ -20,120 +24,97 @@ class FileCache implements CacheInterface $this->config['path'] = rtrim($this->config['path'], '/') . '/'; } - public function getConfig() + public function get(string $key, $default = null) { - return $this->config; + $cacheFile = $this->createCacheFile($key); + if (!file_exists($cacheFile)) { + return $default; + } + $data = file_get_contents($cacheFile); + $item = unserialize($data); + if ($item === false) { + $this->logger->warning(sprintf('Failed to unserialize: %s', $cacheFile)); + $this->delete($key); + return $default; + } + $expiration = $item['expiration'] ?? time(); + if ($expiration === 0 || $expiration > time()) { + return $item['value']; + } + $this->delete($key); + return $default; } - public function loadData() + public function set($key, $value, ?int $ttl = null): void { - if (!file_exists($this->getCacheFile())) { - return null; - } - $data = unserialize(file_get_contents($this->getCacheFile())); - if ($data === false) { - // Intentionally not throwing an exception - Logger::warning(sprintf('Failed to unserialize: %s', $this->getCacheFile())); - return null; - } - return $data; - } + $item = [ + 'key' => $key, + 'expiration' => $ttl === null ? 0 : time() + $ttl, + 'value' => $value, + ]; + $cacheFile = $this->createCacheFile($key); + $bytes = file_put_contents($cacheFile, serialize($item)); + + // TODO: Consider tightening the permissions of the created file. + // It usually allow others to read, depending on umask - public function saveData($data): void - { - $bytes = file_put_contents($this->getCacheFile(), serialize($data), LOCK_EX); if ($bytes === false) { - throw new \Exception(sprintf('Failed to write to: %s', $this->getCacheFile())); + // Typically means no disk space remaining + $this->logger->warning(sprintf('Failed to write to: %s', $cacheFile)); } } - public function getTime(): ?int + public function delete(string $key): void { - // https://www.php.net/manual/en/function.clearstatcache.php - clearstatcache(); - - $cacheFile = $this->getCacheFile(); - if (file_exists($cacheFile)) { - $time = filemtime($cacheFile); - if ($time !== false) { - return $time; - } - return null; - } - - return null; + unlink($this->createCacheFile($key)); } - public function purgeCache(int $seconds): void + public function clear(): void + { + foreach (scandir($this->config['path']) as $filename) { + $cacheFile = $this->config['path'] . $filename; + $excluded = ['.' => true, '..' => true, '.gitkeep' => true]; + if (isset($excluded[$filename]) || !is_file($cacheFile)) { + continue; + } + unlink($cacheFile); + } + } + + public function prune(): void { if (! $this->config['enable_purge']) { return; } - - $cachePath = $this->getScope(); - if (!file_exists($cachePath)) { - return; - } - $cacheIterator = new \RecursiveIteratorIterator( - new \RecursiveDirectoryIterator($cachePath), - \RecursiveIteratorIterator::CHILD_FIRST - ); - - foreach ($cacheIterator as $cacheFile) { - $basename = $cacheFile->getBasename(); - $excluded = [ - '.' => true, - '..' => true, - '.gitkeep' => true, - ]; - if (isset($excluded[$basename])) { + foreach (scandir($this->config['path']) as $filename) { + $cacheFile = $this->config['path'] . $filename; + $excluded = ['.' => true, '..' => true, '.gitkeep' => true]; + if (isset($excluded[$filename]) || !is_file($cacheFile)) { continue; - } elseif ($cacheFile->isFile()) { - $filepath = $cacheFile->getPathname(); - if (filemtime($filepath) < time() - $seconds) { - // todo: sometimes this file doesn't exists - unlink($filepath); - } } - } - } - - public function setScope(string $scope): void - { - $this->scope = $this->config['path'] . trim($scope, " \t\n\r\0\x0B\\\/") . '/'; - } - - public function setKey(array $key): void - { - $this->key = json_encode($key); - } - - private function getScope() - { - if (is_null($this->scope)) { - throw new \Exception('Call "setScope" first!'); - } - - if (!is_dir($this->scope)) { - if (mkdir($this->scope, 0755, true) !== true) { - throw new \Exception('mkdir: Unable to create file cache folder'); + $data = file_get_contents($cacheFile); + $item = unserialize($data); + if ($item === false) { + unlink($cacheFile); + continue; } + $expiration = $item['expiration'] ?? time(); + if ($expiration === 0 || $expiration > time()) { + // Cached forever, or not expired yet + continue; + } + // Expired, so delete file + unlink($cacheFile); } - - return $this->scope; } - private function getCacheFile() + private function createCacheFile(string $key): string { - return $this->getScope() . $this->getCacheName(); + return $this->config['path'] . hash('md5', $key) . '.cache'; } - private function getCacheName() + public function getConfig() { - if (is_null($this->key)) { - throw new \Exception('Call "setKey" first!'); - } - - return hash('md5', $this->key) . '.cache'; + return $this->config; } } diff --git a/caches/MemcachedCache.php b/caches/MemcachedCache.php index 8cd1932b..f994c1ae 100644 --- a/caches/MemcachedCache.php +++ b/caches/MemcachedCache.php @@ -1,110 +1,61 @@ logger = $logger; + $this->conn = new \Memcached(); + // This call does not actually connect to server yet + if (!$this->conn->addServer($host, $port)) { + throw new \Exception('Unable to add memcached server'); } - - $section = 'MemcachedCache'; - $host = Configuration::getConfig($section, 'host'); - $port = Configuration::getConfig($section, 'port'); - - if (empty($host) && empty($port)) { - throw new \Exception('Configuration for ' . $section . ' missing.'); - } - if (empty($host)) { - throw new \Exception('"host" param is not set for ' . $section); - } - if (empty($port)) { - throw new \Exception('"port" param is not set for ' . $section); - } - if (!ctype_digit($port)) { - throw new \Exception('"port" param is invalid for ' . $section); - } - - $port = intval($port); - - if ($port < 1 || $port > 65535) { - throw new \Exception('"port" param is invalid for ' . $section); - } - - $conn = new \Memcached(); - $conn->addServer($host, $port) or returnServerError('Could not connect to memcached server'); - $this->conn = $conn; } - public function loadData() + public function get(string $key, $default = null) { - if ($this->data) { - return $this->data; + $value = $this->conn->get($key); + if ($value === false) { + return $default; } - $result = $this->conn->get($this->getCacheKey()); + return $value; + } + + public function set(string $key, $value, $ttl = null): void + { + $expiration = $ttl === null ? 0 : time() + $ttl; + $result = $this->conn->set($key, $value, $expiration); if ($result === false) { - return null; + $this->logger->warning('Failed to store an item in memcached', [ + 'key' => $key, + 'code' => $this->conn->getLastErrorCode(), + 'message' => $this->conn->getLastErrorMessage(), + 'number' => $this->conn->getLastErrorErrno(), + ]); + // Intentionally not throwing an exception } - - $this->time = $result['time']; - $this->data = $result['data']; - return $result['data']; } - public function saveData($data): void + public function delete(string $key): void { - $time = time(); - $object_to_save = [ - 'data' => $data, - 'time' => $time, - ]; - $result = $this->conn->set($this->getCacheKey(), $object_to_save, $this->expiration); - - if ($result === false) { - throw new \Exception('Cannot write the cache to memcached server'); - } - - $this->time = $time; + $this->conn->delete($key); } - public function getTime(): ?int + public function clear(): void { - if ($this->time === null) { - $this->loadData(); - } - return $this->time; + $this->conn->flush(); } - public function purgeCache(int $seconds): void + public function prune(): void { - // Note: does not purges cache right now - // Just sets cache expiration and leave cache purging for memcached itself - $this->expiration = $seconds; - } - - public function setScope(string $scope): void - { - $this->scope = $scope; - } - - public function setKey(array $key): void - { - $this->key = json_encode($key); - } - - private function getCacheKey() - { - if (is_null($this->key)) { - throw new \Exception('Call "setKey" first!'); - } - - return 'rss_bridge_cache_' . hash('md5', $this->scope . $this->key . 'A'); + // memcached manages pruning on its own } } diff --git a/caches/NullCache.php b/caches/NullCache.php index 1a01df2f..9756b0aa 100644 --- a/caches/NullCache.php +++ b/caches/NullCache.php @@ -4,28 +4,24 @@ declare(strict_types=1); class NullCache implements CacheInterface { - public function setScope(string $scope): void + public function get(string $key, $default = null) + { + return $default; + } + + public function set(string $key, $value, ?int $ttl = null): void { } - public function setKey(array $key): void + public function delete(string $key): void { } - public function loadData() + public function clear(): void { } - public function saveData($data): void - { - } - - public function getTime(): ?int - { - return null; - } - - public function purgeCache(int $seconds): void + public function prune(): void { } } diff --git a/caches/SQLiteCache.php b/caches/SQLiteCache.php index 309b86d1..a8d51c74 100644 --- a/caches/SQLiteCache.php +++ b/caches/SQLiteCache.php @@ -1,17 +1,22 @@ + * The storage table has a column `updated` which is incorrectly named. + * It should have been named `expiration` and the code treats it as an expiration date (in unix timestamp) */ class SQLiteCache implements CacheInterface { - private \SQLite3 $db; - private string $scope; - private string $key; + private Logger $logger; private array $config; + private \SQLite3 $db; - public function __construct(array $config) - { + public function __construct( + Logger $logger, + array $config + ) { + $this->logger = $logger; $default = [ 'file' => null, 'timeout' => 5000, @@ -32,71 +37,89 @@ class SQLiteCache implements CacheInterface $this->db = new \SQLite3($config['file']); $this->db->enableExceptions(true); $this->db->exec("CREATE TABLE storage ('key' BLOB PRIMARY KEY, 'value' BLOB, 'updated' INTEGER)"); + // Consider uncommenting this to add an index on expiration + //$this->db->exec('CREATE INDEX idx_storage_updated ON storage (updated)'); } $this->db->busyTimeout($config['timeout']); + + // https://www.sqlite.org/pragma.html#pragma_journal_mode + $this->db->exec('PRAGMA journal_mode = wal'); + + // https://www.sqlite.org/pragma.html#pragma_synchronous + $this->db->exec('PRAGMA synchronous = NORMAL'); } - public function loadData() + public function get(string $key, $default = null) { - $stmt = $this->db->prepare('SELECT value FROM storage WHERE key = :key'); - $stmt->bindValue(':key', $this->getCacheKey()); + $cacheKey = $this->createCacheKey($key); + $stmt = $this->db->prepare('SELECT value, updated FROM storage WHERE key = :key'); + $stmt->bindValue(':key', $cacheKey); $result = $stmt->execute(); - if ($result) { - $data = $result->fetchArray(\SQLITE3_ASSOC); - if (isset($data['value'])) { - return unserialize($data['value']); - } + if (!$result) { + return $default; } - - return null; + $row = $result->fetchArray(\SQLITE3_ASSOC); + if ($row === false) { + return $default; + } + $expiration = $row['updated']; + if ($expiration === 0 || $expiration > time()) { + $blob = $row['value']; + $value = unserialize($blob); + if ($value === false) { + $this->logger->error(sprintf("Failed to unserialize: '%s'", mb_substr($blob, 0, 100))); + // delete? + return $default; + } + return $value; + } + // delete? + return $default; } - public function saveData($data): void + public function set(string $key, $value, ?int $ttl = null): void { + $cacheKey = $this->createCacheKey($key); + $blob = serialize($value); + $expiration = $ttl === null ? 0 : time() + $ttl; $stmt = $this->db->prepare('INSERT OR REPLACE INTO storage (key, value, updated) VALUES (:key, :value, :updated)'); - $stmt->bindValue(':key', $this->getCacheKey()); - $stmt->bindValue(':value', serialize($data)); - $stmt->bindValue(':updated', time()); - $stmt->execute(); - } - - public function getTime(): ?int - { - $stmt = $this->db->prepare('SELECT updated FROM storage WHERE key = :key'); - $stmt->bindValue(':key', $this->getCacheKey()); - $result = $stmt->execute(); - if ($result) { - $data = $result->fetchArray(\SQLITE3_ASSOC); - if (isset($data['updated'])) { - return $data['updated']; - } + $stmt->bindValue(':key', $cacheKey); + $stmt->bindValue(':value', $blob, \SQLITE3_BLOB); + $stmt->bindValue(':updated', $expiration); + try { + $result = $stmt->execute(); + // Should $result->finalize() be called here? + } catch (\Exception $e) { + $this->logger->warning(create_sane_exception_message($e)); + // Intentionally not rethrowing exception } - - return null; } - public function purgeCache(int $seconds): void + public function delete(string $key): void + { + $key = $this->createCacheKey($key); + $stmt = $this->db->prepare('DELETE FROM storage WHERE key = :key'); + $stmt->bindValue(':key', $key); + $result = $stmt->execute(); + } + + public function prune(): void { if (!$this->config['enable_purge']) { return; } - $stmt = $this->db->prepare('DELETE FROM storage WHERE updated < :expired'); - $stmt->bindValue(':expired', time() - $seconds); - $stmt->execute(); + $stmt = $this->db->prepare('DELETE FROM storage WHERE updated <= :now'); + $stmt->bindValue(':now', time()); + $result = $stmt->execute(); } - public function setScope(string $scope): void + public function clear(): void { - $this->scope = $scope; + $this->db->query('DELETE FROM storage'); } - public function setKey(array $key): void + private function createCacheKey($key) { - $this->key = json_encode($key); - } - - private function getCacheKey() - { - return hash('sha1', $this->scope . $this->key, true); + return hash('sha1', $key, true); } } diff --git a/composer.json b/composer.json index a08c9666..8d41c51b 100644 --- a/composer.json +++ b/composer.json @@ -28,18 +28,20 @@ "ext-openssl": "*", "ext-libxml": "*", "ext-simplexml": "*", + "ext-dom": "*", "ext-json": "*", - "ext-intl": "*" + "ext-filter": "*" }, "require-dev": { "phpunit/phpunit": "^9", "squizlabs/php_codesniffer": "^3.6" }, "suggest": { + "php-webdriver/webdriver": "Required for Selenium usage", "ext-memcached": "Allows to use memcached as cache type", "ext-sqlite3": "Allows to use an SQLite database for caching", "ext-zip": "Required for FDroidRepoBridge", - "ext-dom": "Allows to use some bridges based on XPath expressions" + "ext-intl": "Required for OLXBridge" }, "autoload-dev": { "psr-4": { @@ -48,7 +50,7 @@ }, "scripts": { "test": "./vendor/bin/phpunit", - "lint": "./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./", + "lint": "./vendor/bin/phpcs --parallel=2 --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./", "compat": "./vendor/bin/phpcs --standard=phpcompatibility.xml --warning-severity=0 --extensions=php -p ./" } } diff --git a/composer.lock b/composer.lock index 39e9cfc4..94a71227 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "24083060ddb8be9a95e75f6596e3bb83", + "content-hash": "6103a05bc4ac2c33281ef349fd8ff968", "packages": [], "packages-dev": [ { @@ -623,16 +623,16 @@ }, { "name": "phpunit/phpunit", - "version": "9.6.9", + "version": "9.6.11", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "a9aceaf20a682aeacf28d582654a1670d8826778" + "reference": "810500e92855eba8a7a5319ae913be2da6f957b0" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/a9aceaf20a682aeacf28d582654a1670d8826778", - "reference": "a9aceaf20a682aeacf28d582654a1670d8826778", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/810500e92855eba8a7a5319ae913be2da6f957b0", + "reference": "810500e92855eba8a7a5319ae913be2da6f957b0", "shasum": "" }, "require": { @@ -706,7 +706,7 @@ "support": { "issues": "https://github.com/sebastianbergmann/phpunit/issues", "security": "https://github.com/sebastianbergmann/phpunit/security/policy", - "source": "https://github.com/sebastianbergmann/phpunit/tree/9.6.9" + "source": "https://github.com/sebastianbergmann/phpunit/tree/9.6.11" }, "funding": [ { @@ -722,7 +722,7 @@ "type": "tidelift" } ], - "time": "2023-06-11T06:13:56+00:00" + "time": "2023-08-19T07:10:56+00:00" }, { "name": "sebastian/cli-parser", @@ -1690,16 +1690,16 @@ }, { "name": "squizlabs/php_codesniffer", - "version": "3.7.2", + "version": "3.8.1", "source": { "type": "git", - "url": "https://github.com/squizlabs/PHP_CodeSniffer.git", - "reference": "ed8e00df0a83aa96acf703f8c2979ff33341f879" + "url": "https://github.com/PHPCSStandards/PHP_CodeSniffer.git", + "reference": "14f5fff1e64118595db5408e946f3a22c75807f7" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/squizlabs/PHP_CodeSniffer/zipball/ed8e00df0a83aa96acf703f8c2979ff33341f879", - "reference": "ed8e00df0a83aa96acf703f8c2979ff33341f879", + "url": "https://api.github.com/repos/PHPCSStandards/PHP_CodeSniffer/zipball/14f5fff1e64118595db5408e946f3a22c75807f7", + "reference": "14f5fff1e64118595db5408e946f3a22c75807f7", "shasum": "" }, "require": { @@ -1709,11 +1709,11 @@ "php": ">=5.4.0" }, "require-dev": { - "phpunit/phpunit": "^4.0 || ^5.0 || ^6.0 || ^7.0" + "phpunit/phpunit": "^4.0 || ^5.0 || ^6.0 || ^7.0 || ^8.0 || ^9.3.4" }, "bin": [ - "bin/phpcs", - "bin/phpcbf" + "bin/phpcbf", + "bin/phpcs" ], "type": "library", "extra": { @@ -1728,22 +1728,45 @@ "authors": [ { "name": "Greg Sherwood", - "role": "lead" + "role": "Former lead" + }, + { + "name": "Juliette Reinders Folmer", + "role": "Current lead" + }, + { + "name": "Contributors", + "homepage": "https://github.com/PHPCSStandards/PHP_CodeSniffer/graphs/contributors" } ], "description": "PHP_CodeSniffer tokenizes PHP, JavaScript and CSS files and detects violations of a defined set of coding standards.", - "homepage": "https://github.com/squizlabs/PHP_CodeSniffer", + "homepage": "https://github.com/PHPCSStandards/PHP_CodeSniffer", "keywords": [ "phpcs", "standards", "static analysis" ], "support": { - "issues": "https://github.com/squizlabs/PHP_CodeSniffer/issues", - "source": "https://github.com/squizlabs/PHP_CodeSniffer", - "wiki": "https://github.com/squizlabs/PHP_CodeSniffer/wiki" + "issues": "https://github.com/PHPCSStandards/PHP_CodeSniffer/issues", + "security": "https://github.com/PHPCSStandards/PHP_CodeSniffer/security/policy", + "source": "https://github.com/PHPCSStandards/PHP_CodeSniffer", + "wiki": "https://github.com/PHPCSStandards/PHP_CodeSniffer/wiki" }, - "time": "2023-02-22T23:07:41+00:00" + "funding": [ + { + "url": "https://github.com/PHPCSStandards", + "type": "github" + }, + { + "url": "https://github.com/jrfnl", + "type": "github" + }, + { + "url": "https://opencollective.com/php_codesniffer", + "type": "open_collective" + } + ], + "time": "2024-01-11T20:47:48+00:00" }, { "name": "theseer/tokenizer", @@ -1808,9 +1831,9 @@ "ext-openssl": "*", "ext-libxml": "*", "ext-simplexml": "*", - "ext-json": "*", - "ext-intl": "*" + "ext-dom": "*", + "ext-json": "*" }, "platform-dev": [], - "plugin-api-version": "2.0.0" + "plugin-api-version": "2.6.0" } diff --git a/config.default.ini.php b/config.default.ini.php index 1c9a20ae..39329a2a 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -6,55 +6,60 @@ [system] +; System environment: "dev" or "prod" +env = "prod" + ; Only these bridges are available for feed production ; How to enable all bridges: enabled_bridges[] = * -enabled_bridges[] = FeedMerge -enabled_bridges[] = FeedReducerBridge -enabled_bridges[] = Filter -enabled_bridges[] = GettrBridge -enabled_bridges[] = MastodonBridge -enabled_bridges[] = Reddit -enabled_bridges[] = RumbleBridge -enabled_bridges[] = SoundcloudBridge -enabled_bridges[] = Telegram -enabled_bridges[] = ThePirateBay -enabled_bridges[] = TikTokBridge -enabled_bridges[] = Twitch -enabled_bridges[] = Twitter -enabled_bridges[] = Vk -enabled_bridges[] = XPathBridge -enabled_bridges[] = Youtube -enabled_bridges[] = YouTubeCommunityTabBridge +;enabled_bridges[] = CssSelectorBridge +;enabled_bridges[] = FeedMerge +;enabled_bridges[] = FeedReducerBridge +;enabled_bridges[] = Filter +;enabled_bridges[] = GettrBridge +;enabled_bridges[] = MastodonBridge +;enabled_bridges[] = Reddit +;enabled_bridges[] = RumbleBridge +;enabled_bridges[] = SoundcloudBridge +;enabled_bridges[] = Telegram +;enabled_bridges[] = ThePirateBay +;enabled_bridges[] = TikTokBridge +;enabled_bridges[] = Twitch +;enabled_bridges[] = XPathBridge +;enabled_bridges[] = Youtube +;enabled_bridges[] = YouTubeCommunityTabBridge +enabled_bridges[] = * -; Defines the timezone used by RSS-Bridge -; Find a list of supported timezones at -; https://www.php.net/manual/en/timezones.php -; timezone = "UTC" (default) timezone = "UTC" ; Display a system message to users. -message = "" - -; Whether to enable debug mode. -enable_debug_mode = false - -; Enable debug mode only for these permitted ip addresses -; debug_mode_whitelist[] = 127.0.0.1 -; debug_mode_whitelist[] = 192.168.1.10 +;message = "Hello world" ; Whether to enable maintenance mode. If enabled, feed requests receive 503 Service Unavailable enable_maintenance_mode = false +; Max file size for simple_html_dom in bytes (10000000 => 10 MB) +max_file_size = 10000000 + [http] -timeout = 60 -useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" + +; Operation timeout in seconds +timeout = 5 + +; Operation retry count in case of curl error +retries = 1 + +; Curl user agent +; This is already set by curl-impersonate, which comes included as default +; in RSS-Bridge docker container. Use only if you know what you're doing. +; For reference, see https://github.com/lexiforest/curl-impersonate/tree/main/docs +;useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" ; Max http response size in MB max_filesize = 20 [cache] -; Cache type: file, sqlite, memcached, null +; Cache type: file, sqlite, memcached, array, null type = "file" ; Allow users to specify custom timeout for specific requests. @@ -63,12 +68,13 @@ type = "file" custom_timeout = false [admin] + ; Advertise an email address where people can reach the administrator. ; This address is displayed on the main page, visible to everyone! ; "" = Disabled (default) email = "" -; Advertise a contact Telegram url e.g. "https://t.me/elegantobjects" +; Advertise a contact URL (can be any URL!) e.g. "https://t.me/elegantobjects" telegram = "" ; Show Donation information for bridges if available. @@ -80,7 +86,8 @@ donations = true [proxy] -; Sets the proxy url (i.e. "tcp://192.168.0.0:32") +; The HTTP proxy to tunnel requests through +; https://curl.se/libcurl/c/CURLOPT_PROXY.html ; "" = Proxy disabled (default) url = "" @@ -93,23 +100,25 @@ name = "Hidden proxy name" ; false = disabled (default) by_bridge = false +[webdriver] + +; Sets the url of the webdriver or selenium server +selenium_server_url = "http://localhost:4444" + +; Sets whether the browser should run in headless mode (no visible ui) +; true = enabled +; false = disabled (default) +headless = false + [authentication] -; Enables basic authentication for all requests to this RSS-Bridge instance. -; -; Warning: You'll have to upgrade existing feeds after enabling this option! -; -; true = enabled -; false = disabled (default) +; HTTP basic authentication enable = false - username = "admin" - -; The password cannot be the empty string if authentication is enabled. password = "" -; This will be used only for actions that require privileged access -access_token = "" +; Token authentication (URL) +token = "" [error] @@ -126,6 +135,7 @@ report_limit = 1 ; --- Cache specific configuration --------------------------------------------- [FileCache] + ; The root folder to store files in. ; "" = Use the cache folder in the repository (default) path = "" @@ -133,6 +143,7 @@ path = "" enable_purge = true [SQLiteCache] + ; Filepath of the sqlite db file file = "cache.sqlite" ; Whether to actually delete data when purging @@ -141,11 +152,17 @@ enable_purge = true timeout = 5000 [MemcachedCache] + host = "localhost" port = 11211 ; --- Bridge specific configuration ------ +[TelegramBridge] + +; Max pages to fetch (1 page => 20 messages), min=1 max=100 +max_pages = 1 + [DiscogsBridge] ; Sets the personal access token for interactions with Discogs. When diff --git a/config/nginx.conf b/config/nginx.conf index bb7b1dcb..c65f8e00 100644 --- a/config/nginx.conf +++ b/config/nginx.conf @@ -2,8 +2,8 @@ server { listen 80 default_server; listen [::]:80 default_server; root /app; - access_log /dev/stdout; - error_log /dev/stderr; + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; index index.php; location ~ /(\.|vendor|tests) { @@ -13,6 +13,7 @@ server { location ~ \.php$ { include snippets/fastcgi-php.conf; - fastcgi_pass 127.0.0.1:9000; + fastcgi_read_timeout 45s; + fastcgi_pass unix:/var/run/php/php8.2-fpm.sock; } } diff --git a/config/php-fpm.conf b/config/php-fpm.conf new file mode 100644 index 00000000..a508a0f6 --- /dev/null +++ b/config/php-fpm.conf @@ -0,0 +1,18 @@ +; Inspired by https://github.com/docker-library/php/blob/master/8.2/bookworm/fpm/Dockerfile + +[global] +error_log = /proc/self/fd/2 + +; https://github.com/docker-library/php/pull/725#issuecomment-443540114 +log_limit = 8192 + +[www] +; php-fpm closes STDOUT on startup, so sending logs to /proc/self/fd/1 does not work. +; https://bugs.php.net/bug.php?id=73886 +access.log = /proc/self/fd/2 + +clear_env = no + +; Ensure worker stdout and stderr are sent to the main error log. +catch_workers_output = yes +decorate_workers_output = no diff --git a/config/php.ini b/config/php.ini new file mode 100644 index 00000000..383afffb --- /dev/null +++ b/config/php.ini @@ -0,0 +1,4 @@ +; Inspired by https://github.com/docker-library/php/blob/master/8.2/bookworm/fpm/Dockerfile + +; https://github.com/docker-library/php/issues/878#issuecomment-938595965 +fastcgi.logging = Off diff --git a/contrib/prepare_release/fetch_contributors.php b/contrib/prepare_release/fetch_contributors.php deleted file mode 100644 index ad04458a..00000000 --- a/contrib/prepare_release/fetch_contributors.php +++ /dev/null @@ -1,49 +0,0 @@ - 'application/json', - 'Content-Type' => 'application/json', - 'User-Agent' => 'RSS-Bridge', - ]; - $result = _http_request($url, ['headers' => $headers]); - - foreach (json_decode($result['body']) as $contributor) { - $contributors[] = $contributor; - } - - // Extract links to "next", "last", etc... - $links = explode(',', $result['headers']['link'][0]); - $next = false; - - // Check if there is a link with 'rel="next"' - foreach ($links as $link) { - [$url, $type] = explode(';', $link, 2); - - if (trim($type) === 'rel="next"') { - $url = trim(preg_replace('/([<>])/', '', $url)); - $next = true; - break; - } - } -} - -/* Example JSON data: https://api.github.com/repos/rss-bridge/rss-bridge/contributors */ - -// We want contributors sorted by name -usort($contributors, function ($a, $b) { - return strcasecmp($a->login, $b->login); -}); - -// Export as Markdown list -foreach ($contributors as $contributor) { - echo " * [{$contributor->login}]({$contributor->html_url})\n"; -} diff --git a/contrib/prepare_release/rssbridge-log-helper.el b/contrib/prepare_release/rssbridge-log-helper.el deleted file mode 100644 index a4b28226..00000000 --- a/contrib/prepare_release/rssbridge-log-helper.el +++ /dev/null @@ -1,151 +0,0 @@ -;;; rssbridge-log-helper.el --- A helper for preparing RSS-Bridge releases -*- lexical-binding:t; coding:utf-8 -*- - -;;; Commentary: - -;; Keyboard abbreviations used below: -;; C-x == Ctrl + x -;; M-x == Alt + x - -;; How to use this helper? -;; 1. Run "git log --reverse 2021-04-25..master > tmp.md" (2021-04-25 is an example tag of a previous version) -;; 2. Copy the contents of template.md to the start of tmp.md -;; 3. Open Emacs. Type M-x load-file -;; 4. Enter in the path to rssbridge-log-helper.el then -;; 5. Type M-x find-file -;; 6. Enter the path to tmp.md then -;; 7. Type M-x rssbridge-log-transient-state -;; 8. You can now use the following shortcuts to organize the commits: -;; x: Delete commit -;; g: Copy as general change -;; n: Copy as new bridge -;; m: Copy as modified bridge -;; r: Copy as removed bridge -;; : Quit -;; 9. Once you are done with all the commits, type C-x then C-s -;; 10. Exit Emacs with C-x then C-c - -;;; Code: - -(defun rssbridge-log--get-commit-block () - "Select a commit block that begins before the cursor." - (re-search-backward "^commit ") ;; (move-beginning-of-line nil) - (set-mark-command nil) - (right-char) - (re-search-forward "^commit ") - (move-end-of-line 1)) - -(defun rssbridge-log--goto-first-commit () - "Find the first commit in the file." - (goto-char (point-min)) - (re-search-forward "^commit ")) - -(defun rssbridge-log--remove-until-prev-commit-block () - "Remove from start of current line to previous commit block." - (move-beginning-of-line nil) - (set-mark-command nil) - (re-search-backward "^commit ") - (delete-region (region-beginning) (region-end))) - -(defun rssbridge-log--remove-until-next-commit-block () - "Remove from start of current line to next commit block." - (move-beginning-of-line nil) - (set-mark-command nil) - (re-search-forward "^commit ") - (move-beginning-of-line nil) - (delete-region (region-beginning) (region-end))) - -(defun rssbridge-log--cut-paste (arg) - "Copy current line to header that matches ARG." - (kill-whole-line 0) - (rssbridge-log--remove-until-next-commit-block) - (goto-char (point-min)) - (re-search-forward arg) - (move-end-of-line 1) - (newline) - (yank) - (set-mark-command 1) - (re-search-forward "^commit ") - (recenter)) - -(defun rssbridge-log-remove () - "Remove the current commit block. - -You can bind this function or use `rssbridge-log-transient-state' -to access the function." - (interactive) - (rssbridge-log--get-commit-block) - (rssbridge-log--remove-until-prev-commit-block) - (set-mark-command 1) - (re-search-forward "^commit ")) - -(defun rssbridge-log-copy-as-new () - "Copy the current commit block as a new bridge. - -You can bind this function or use `rssbridge-log-transient-state' -to access the function." - (interactive) - (rssbridge-log--get-commit-block) - (re-search-backward "^.*\\[\\(.*\\)\\].*\\((.*)\\)" (region-beginning)) - (replace-match "* \\1 () \\2") - (rssbridge-log--remove-until-prev-commit-block) - (rssbridge-log--cut-paste "## New bridges")) - -(defun rssbridge-log-copy-as-mod () - "Copy the current commit block as a modified bridge. - -You can bind this function or use `rssbridge-log-transient-state' -to access the function." - (interactive) - (rssbridge-log--get-commit-block) - (re-search-backward "^.*\\[\\(.*\\)\\]" (region-beginning)) - (replace-match "* \\1:") - (rssbridge-log--remove-until-prev-commit-block) - (rssbridge-log--cut-paste "## Modified bridges")) - -(defun rssbridge-log-copy-as-gen () - "Copy the current commit block as a general change. - -You can bind this function or use `rssbridge-log-transient-state' -to access the function." - (interactive) - (rssbridge-log--get-commit-block) - (re-search-backward "^.*\\[\\(.*\\)\\]" (region-beginning)) - (replace-match "* \\1:") - (rssbridge-log--remove-until-prev-commit-block) - (rssbridge-log--cut-paste "## General changes")) - -(defun rssbridge-log-copy-as-rem () - "Copy the current commit block as a removed bridge. - -You can bind this function or use `rssbridge-log-transient-state' -to access the function." - (interactive) - (rssbridge-log--get-commit-block) - (re-search-backward "^.*\\[\\(.*\\)\\]" (region-beginning)) - (replace-match "* \\1:") - (rssbridge-log--remove-until-prev-commit-block) - (rssbridge-log--cut-paste "## Removed bridges")) - - -(defun rssbridge-log-transient-state () - "Create a transient map for convienience. -x: Delete commit -g: Copy as general change -n: Copy as new bridge -m: Copy as modified bridge -r: Copy as removed bridge -: Quit" - (interactive) - (rssbridge-log--goto-first-commit) - (set-transient-map - (let ((map (make-sparse-keymap))) - (define-key map "x" 'rssbridge-log-remove) - (define-key map "g" 'rssbridge-log-copy-as-gen) - (define-key map "n" 'rssbridge-log-copy-as-new) - (define-key map "m" 'rssbridge-log-copy-as-mod) - (define-key map "r" 'rssbridge-log-copy-as-rem) - map) - t)) - -(provide 'rssbridge-log-helper) -;;; rssbridge-log-helper.el ends here diff --git a/contrib/prepare_release/template.md b/contrib/prepare_release/template.md deleted file mode 100644 index 7314113d..00000000 --- a/contrib/prepare_release/template.md +++ /dev/null @@ -1,23 +0,0 @@ - - -## General changes - - -## New bridges - - -## Modified bridges - - -## Removed bridges - - - diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..9f178049 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +version: '2' +services: + rss-bridge: + image: rssbridge/rss-bridge:latest + volumes: + - ./config:/config + ports: + - 3000:80 + restart: unless-stopped diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index aa95fa87..f92d8b21 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -41,5 +41,5 @@ fi # nginx will daemonize nginx -# php-fpm will not -php-fpm +# php-fpm should not daemonize +exec php-fpm8.2 --nodaemonize diff --git a/docs/01_General/03_Requirements.md b/docs/01_General/03_Requirements.md index 1ae5aa26..c9c91a52 100644 --- a/docs/01_General/03_Requirements.md +++ b/docs/01_General/03_Requirements.md @@ -1,24 +1,3 @@ -**RSS-Bridge** requires either of the following: -## A Web server* with: - PHP 7.4 (or higher) - - [`openssl`](https://secure.php.net/manual/en/book.openssl.php) extension - - [`libxml`](https://secure.php.net/manual/en/book.libxml.php) extension (enabled by default, see [PHP Manual](http://php.net/manual/en/libxml.installation.php)) - - [`mbstring`](https://secure.php.net/manual/en/book.mbstring.php) extension - - [`simplexml`](https://secure.php.net/manual/en/book.simplexml.php) extension - - [`curl`](https://secure.php.net/manual/en/book.curl.php) extension - - [`json`](https://secure.php.net/manual/en/book.json.php) extension - - [`filter`](https://secure.php.net/manual/en/book.filter.php) extension - - [`zip`](https://secure.php.net/manual/en/book.zip.php) (for some bridges) - - [`sqlite3`](http://php.net/manual/en/book.sqlite3.php) extension (only when using SQLiteCache) - -Enable extensions by un-commenting the corresponding line in your PHP configuration (`php.ini`). - - -## A Linux server with: - - - Docker server configured (Any recent version should do) - - 100MB of disk space - -To setup RSS Bridge using Docker, see the [Docker Guide](../03_For_Hosts/03_Docker_Installation.md) on installing RSS Bridge. \ No newline at end of file diff --git a/docs/01_General/05_FAQ.md b/docs/01_General/05_FAQ.md index ade746d7..19cfae4b 100644 --- a/docs/01_General/05_FAQ.md +++ b/docs/01_General/05_FAQ.md @@ -1,30 +1,33 @@ -This page provides a collection of frequently asked questions and their answers. Please check this page before opening a new Issue :revolving_hearts: - -* [Why doesn't my bridge show new contents?](#why-doesnt-my-bridge-show-new-contents) -* [How can I make a bridge update more frequently?](#how-can-i-make-a-bridge-update-more-frequently) -* [Firefox doesn't show feeds anymore, what can I do?](#firefox-doesnt-show-feeds-anymore-what-can-i-do) - ## Why doesn't my bridge show new contents? -RSS-Bridge creates a cached version of your feed in order to reduce traffic and respond faster. The cached version is created on the first request and served for all subsequent requests. On every request RSS-Bridge checks if the cache timeout has elapsed. If the timeout has elapsed, it loads new contents and updates the cached version. +RSS-Bridge creates a cached version of your feed in order to reduce traffic and respond faster. +The cached version is created on the first request and served for all subsequent requests. +On every request RSS-Bridge checks if the cache timeout has elapsed. +If the timeout has elapsed, it loads new contents and updates the cached version. -_Notice_: RSS-Bridge only updates feeds if you actively request it, for example by pressing F5 in your browser or using a feed reader. +_Notice_: RSS-Bridge only updates feeds if you actively request it, +for example by pressing F5 in your browser or using a feed reader. -The cache duration is bridge specific and can last anywhere between five minutes and 24 hours. You can specify a custom cache timeout for each bridge if [this option](#how-can-i-make-a-bridge-update-more-frequently) has been enabled on the server. +The cache duration is bridge specific (usually `1h`) +You can specify a custom cache timeout for each bridge if +[this option](#how-can-i-make-a-bridge-update-more-frequently) has been enabled on the server. ## How can I make a bridge update more frequently? You can only do that if you are hosting the RSS-Bridge instance: +- Lower the bridge ttl: `CACHE_TIMEOUT` constant - Enable [`custom_timeout`](../03_For_Hosts/08_Custom_Configuration.md#customtimeout) -- Alternatively, change the default timeout for your bridge by modifying the `CACHE_TIMEOUT` constant in the relevant bridge file (e.g [here](https://github.com/RSS-Bridge/rss-bridge/blob/master/bridges/FilterBridge.php#L7) for the Filter Bridge). ## Firefox doesn't show feeds anymore, what can I do? -As of version 64, Firefox removed support for viewing Atom and RSS feeds in the browser. This results in the browser downloading the pages instead of showing contents. +As of version 64, Firefox removed support for viewing Atom and RSS feeds in the browser. +This results in the browser downloading the pages instead of showing contents. Further reading: - https://support.mozilla.org/en-US/kb/feed-reader-replacements-firefox - https://bugzilla.mozilla.org/show_bug.cgi?id=1477667 -To restore the original behavior in Firefox 64 or higher you can use following Add-on which attempts to recreate the original behavior (with some sugar on top): -- https://addons.mozilla.org/en-US/firefox/addon/rsspreview/ \ No newline at end of file +To restore the original behavior in Firefox 64 or higher you can use following Add-on +which attempts to recreate the original behavior (with some sugar on top): + +- https://addons.mozilla.org/en-US/firefox/addon/rsspreview/ diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md index 531e5383..71f6fdd6 100644 --- a/docs/01_General/06_Public_Hosts.md +++ b/docs/01_General/06_Public_Hosts.md @@ -3,9 +3,11 @@ | Country | Address | Status | Contact | Comment | |:-------:|---------|--------|----------|---------| | ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.org/bridge01 | ![](https://img.shields.io/website/https/rss-bridge.org/bridge01.svg) | [@dvikan](https://github.com/dvikan) | London, Digital Ocean| +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with Netcup Germany (Maintained in India) | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.cheredeprince.net | ![](https://img.shields.io/website/https/rss-bridge.cheredeprince.net) | [@La_Bécasse](https://cheredeprince.net/contact) | Self-Hosted at home in France | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.sans-nuage.fr | ![](https://img.shields.io/website/https/rss-bridge.sans-nuage.fr) | [@Alsace Réseau Neutre](https://arn-fai.net/contact) | Hosted in Alsace, France | | ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.lewd.tech | ![](https://img.shields.io/website/https/rss-bridge.lewd.tech.svg) | [@Erisa](https://github.com/Erisa) | Hosted in London, protected by Cloudflare Rate Limiting | -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://bridge.easter.fr | ![](https://img.shields.io/website/https/bridge.easter.fr.svg) | [@chatainsim](https://github.com/chatainsim) | Hosted in Isère, France | -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://wtf.roflcopter.fr/rss-bridge/ | ![](https://img.shields.io/website/https/wtf.roflcopter.fr/rss-bridge.svg) | [roflcopter.fr](https://wtf.roflcopter.fr/) | Hosted in France | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://wtf.roflcopter.fr/rss-bridge | ![](https://img.shields.io/website/https/wtf.roflcopter.fr/rss-bridge.svg) | [roflcopter.fr](https://wtf.roflcopter.fr/) | Hosted in France | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss.nixnet.services | ![](https://img.shields.io/website/https/rss.nixnet.services.svg) | [@amolith](https://nixnet.services/contact) | Hosted in Wunstorf, Germany | | ![](https://iplookup.flagfox.net/images/h16/AT.png) | https://rss-bridge.ggc-project.de | ![](https://img.shields.io/website/https/rss-bridge.ggc-project.de) | [@ggc-project.de](https://social.dev-wiki.de/@ggc_project) | Hosted in Steyr, Austria | | ![](https://iplookup.flagfox.net/images/h16/CA.png) | https://rssbridge.bus-hit.me | ![](https://img.shields.io/website/https/rssbridge.bus-hit.me.svg)| [@austinhuang0131](https://austinhuang.me/) | Hosted with Oracle in Québec, Canada | @@ -13,13 +15,17 @@ | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.boldair.dev | ![](https://img.shields.io/website?down_color=red&down_message=down&up_color=lime&up_message=up&url=https%3A%2F%2Frssbridge.boldair.dev) | [@Boldairdev](https://github.com/Boldairdev) | Latest Github release, Hosted on PHP 8.0 in Roubaix, France | | ![](https://iplookup.flagfox.net/images/h16/IN.png) | https://rss-bridge.bb8.fun | ![](https://img.shields.io/website/https/rss-bridge.bb8.fun.svg) | [@captn3m0](https://github.com/captn3m0) | Hosted in Bengaluru, India | | ![](https://iplookup.flagfox.net/images/h16/RU.png) | https://ololbu.ru/rss-bridge | ![](https://img.shields.io/website/https/ololbu.ru) | [@Ololbu](https://github.com/Ololbu) | Hosted in Moscow, Russia | -| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://tools.bheil.net/rss-bridge/ | ![](https://img.shields.io/website/https/tools.bheil.net.svg) | [@bheil](https://www.bheil.net) | Hosted in Germany | +| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://tools.bheil.net/rss-bridge | ![](https://img.shields.io/website/https/tools.bheil.net.svg) | [@bheil](https://www.bheil.net) | Hosted in Germany | | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://bridge.suumitsu.eu | ![](https://img.shields.io/website/https/bridge.suumitsu.eu.svg) | [@mitsukarenai](https://github.com/mitsukarenai) | Hosted in Paris, France | | ![](https://iplookup.flagfox.net/images/h16/NL.png) | https://feed.eugenemolotov.ru | ![](https://img.shields.io/website/https/feed.eugenemolotov.ru.svg) | [@em92](https://github.com/em92) | Hosted in Amsterdam, Netherlands | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss-bridge.mediani.de | ![](https://img.shields.io/website/https/rss-bridge.mediani.de.svg) | [@sokai](https://github.com/sokai) | Hosted with Netcup, Germany | -| ![](https://iplookup.flagfox.net/images/h16/US.png) | http://rb.vern.cc/ | ![](https://img.shields.io/website/https/rb.vern.cc.svg) | [@vern.cc](https://vern.cc/en/admin) | Hosted with Hetzner, US | -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in/ | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with OVH SAS (Maintained in India) -| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.foxhaven.cyou| ![](https://img.shields.io/badge/website-up-brightgreen) | [@Aysilu](https://foxhaven.cyou) | Hosted with Timeweb (Maintained in Poland) +| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany +| ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine +| ![](https://iplookup.flagfox.net/images/h16/IN.png) | https://rssbridge.projectsegfau.lt | ![](https://img.shields.io/website/https/rssbridge.projectsegfau.lt) | [@gi-yt](https://aryak.me) | Self-Hosted at Mumbai, India with Airtel (ISP) | +| ![](https://iplookup.flagfox.net/images/h16/US.png) | https://rb.vern.cc | ![](https://img.shields.io/website/https/rb.vern.cc.svg) | [@vern.cc](https://vern.cc/en/admin) | Hosted with Hetzner, US | +| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss.bloat.cat | ![](https://img.shields.io/website/https/rss.bloat.cat) | [@vlnst](https://bloat.cat/contact) | Hosted with Datalix, Germany | +| ![](https://iplookup.flagfox.net/images/h16/CZ.png) | https://rssbridge.prenghy.org | ![](https://img.shields.io/website/https/rssbridge.prenghy.org.svg) | [@pprenghy](https://github.com/pprenghy) | Hosted with vpsFree, The Czech Republic | + ## Inactive instances diff --git a/docs/02_CLI/index.md b/docs/02_CLI/index.md index 9292746a..727e59cb 100644 --- a/docs/02_CLI/index.md +++ b/docs/02_CLI/index.md @@ -1,10 +1,12 @@ -RSS-Bridge supports calls via CLI. You can use the same parameters as you would normally use via the URI. Example: +RSS-Bridge supports calls via CLI. +You can use the same parameters as you would normally use via the URI. Example: `php index.php action=display bridge=DansTonChat format=Json` ## Required parameters -RSS-Bridge requires a few parameters that must be specified on every call. Omitting these parameters will result in error messages: +RSS-Bridge requires a few parameters that must be specified on every call. +Omitting these parameters will result in error messages: ### action @@ -17,20 +19,26 @@ Value | Description ### bridge -This parameter specifies the name of the bridge RSS-Bridge should return feeds from. The name of the bridge equals the class name of the bridges in the ./bridges/ folder without the 'Bridge' prefix. For example: DansTonChatBridge => DansTonChat. +This parameter specifies the name of the bridge RSS-Bridge should return feeds from. +The name of the bridge equals the class name of the bridges in the ./bridges/ folder without the 'Bridge' prefix. +For example: DansTonChatBridge => DansTonChat. ### format -This parameter specifies the format in which RSS-Bridge returns the contents. RSS-Bridge currently supports five formats: `Atom`, `Html`, `Json`, `Mrss`and `Plaintext`. +This parameter specifies the format in which RSS-Bridge returns the contents. ## Optional parameters -RSS-Bridge supports optional parameters. These parameters are only valid if the options have been enabled in the index.php script. +RSS-Bridge supports optional parameters. +These parameters are only valid if the options have been enabled in the index.php script. ### \_noproxy -This parameter is only available if a proxy server has been specified via `proxy.url` and `proxy.by_bridge` has been enabled. This is a Boolean parameter that can be set to `true` or `false`. +This parameter is only available if a proxy server has been specified via `proxy.url` and `proxy.by_bridge` +has been enabled. This is a Boolean parameter that can be set to `true` or `false`. ## Bridge parameters -Each bridge can specify its own set of parameters. As in the example above, some bridges don't specify any parameters or only optional parameters that can be neglected. For more details read the `PARAMETERS` definition for your bridge. \ No newline at end of file +Each bridge can specify its own set of parameters. +As in the example above, some bridges don't specify any parameters or only optional parameters that can be neglected. +For more details read the `PARAMETERS` definition for your bridge. diff --git a/docs/03_For_Hosts/01_Installation.md b/docs/03_For_Hosts/01_Installation.md index 39df7918..3312230d 100644 --- a/docs/03_For_Hosts/01_Installation.md +++ b/docs/03_For_Hosts/01_Installation.md @@ -1,12 +1 @@ -In order to install RSS-Bridge on your own web server* do as follows: - -* Make sure your web server meets all [requirements](../01_General/03_Requirements.md) -* Download the ZIP file of the [last stable release](https://github.com/RSS-Bridge/rss-bridge/releases) -* Place all files on your web server - -For linux hosts: -* Grant read-write-access for `www-data` to the `./cache` directory (`chown -R www-data ./cache`) - -You have successfully installed RSS-Bridge. - -Instructions for Docker setups are at [Docker Installation](../03_For_Hosts/03_Docker_Installation.md) \ No newline at end of file +https://github.com/RSS-Bridge/rss-bridge/blob/master/README.md diff --git a/docs/03_For_Hosts/02_Updating.md b/docs/03_For_Hosts/02_Updating.md index 3ec98049..e69de29b 100644 --- a/docs/03_For_Hosts/02_Updating.md +++ b/docs/03_For_Hosts/02_Updating.md @@ -1,42 +0,0 @@ -Updating an existing installation is very simple, depending on your type of installation. - -## Release Build - -* Download latest version -* Extract all files -* Replace existing files - -This will update all core files to the latest version. Your custom configuration and bridges are left untouched. Keep in mind that changes to any core file of RSS-Bridge will be replaced. - -## Docker - -Simply get the latest Docker build via `:latest` or specific builds via `:`. - -## Heroku - -### If you didn't fork the repo before - -Fork the repo by clicking the `Fork` button at the top right of this page (must be on desktop site). Then on your Heroku account, go to the application. Click on the `Deploy` tab and connect the repo named `yourusername/rss-bridge`. Do a manual deploy of the `master` branch. - -### If you forked the repo before - -[Click here to create a new pull request to your fork](https://github.com/RSS-Bridge/rss-bridge/pull/new/master). Select `compare across forks`, make the base repository `yourusername/rss-bridge` and ensure the branch is set to master. Put any title you want and create the pull request. On the page that comes after this, merge the pull request. - -You then want to go to your application in Heroku, connect your fork via the `Deploy` tab and deploy the `master` branch. - -You can turn on auto-deploy for the master branch if you don't want to go through the process of logging into Heroku and deploying the branch every time changes to the repo are made in the future. - -## Git - -To get the latest changes from the master branch - -``` -git pull -``` - -To use a specific tag - -``` -git fetch --all -git checkout tags/ -``` \ No newline at end of file diff --git a/docs/03_For_Hosts/03_Docker_Installation.md b/docs/03_For_Hosts/03_Docker_Installation.md deleted file mode 100644 index d895e748..00000000 --- a/docs/03_For_Hosts/03_Docker_Installation.md +++ /dev/null @@ -1,49 +0,0 @@ -This guide is for people who want to run RSS Bridge using Docker. If you want to run it a simple PHP Webhost environment, see [Installation](../03_For_Hosts/01_Installation.md) instead. - -## Setup - -### Create the container - -```bash -docker create \ ---name=rss-bridge \ ---volume :/config \ ---publish 3000:80 \ -rssbridge/rss-bridge:latest -``` -### Run it -```bash -docker start rss-bridge -``` - -Access it using `http://IP_Address:3000`. If you'd like to run a specific version, you can run it by changing the ':latest' on the image to a tag listed [here](https://hub.docker.com/r/rssbridge/rss-bridge/tags/) - -The server runs on port 80 internally, map any port of your choice (in this example 3000). - -You can run it using a `docker-compose.yml` as well: - -```yml -version: '2' -services: - rss-bridge: - image: rssbridge/rss-bridge:latest - volumes: - - :/config - ports: - - 3000:80 - restart: unless-stopped -``` - -# Container access and information - -|Function|Command| -|----|----| -|Shell access (live container)|`docker exec -it rss-bridge /bin/sh`| -|Realtime container logs|`docker logs -f rss-bridge`| - -# Adding custom bridges and configurations -If you want to add a bridge that is not part of [`/bridges`](https://github.com/RSS-Bridge/rss-bridge/tree/master/bridges), you can map a folder to the `/config` folder of the `rss-bridge` container. - -1. Create a folder in the location of your docker-compose.yml or your general docker working area (in this example it will be `/home/docker/rssbridge/config` ). -2. Copy your [custom bridges](../05_Bridge_API/01_How_to_create_a_new_bridge.md) to the `/home/docker/rssbridge/config` folder. Applies also to [config.ini.php](../03_For_Hosts/08_Custom_Configuration.md). -3. Map the folder to `/config` inside the container. To do that, replace the `` from the previous examples with `/home/docker/rssbridge/config` \ No newline at end of file diff --git a/docs/03_For_Hosts/05_Whitelisting.md b/docs/03_For_Hosts/05_Whitelisting.md index 113c4e3d..156174f0 100644 --- a/docs/03_For_Hosts/05_Whitelisting.md +++ b/docs/03_For_Hosts/05_Whitelisting.md @@ -1,14 +1,18 @@ -Modify `config.ini.php` to limit available bridges. +Modify `config.ini.php` to limit available bridges. Those changes should be applied in the `[system]` section. ## Enable all bridges ``` +[system] + enabled_bridges[] = * ``` ## Enable some bridges ``` +[system] + enabled_bridges[] = TwitchBridge enabled_bridges[] = GettrBridge ``` diff --git a/docs/03_For_Hosts/06_Authentication.md b/docs/03_For_Hosts/06_Authentication.md index bb9c6656..f505f5a6 100644 --- a/docs/03_For_Hosts/06_Authentication.md +++ b/docs/03_For_Hosts/06_Authentication.md @@ -1,101 +1,6 @@ -Depending on your servers abilities you can choose between two types of authentication: -* [.htaccess](#htaccess) -* [RSS-Bridge Authentication](#rss-bridge-authentication) - -**General advice**: - -- Make sure to use a strong password, no matter which solution you choose! -- Enable HTTPS on your server to ensure your connection is encrypted and secure! - -## .htaccess - -.htaccess files are commonly used to restrict access to files on a web server. One of the features of .htaccess files is the ability to password protect specific (or all) directories. If setup correctly, a password is required to access the files. - -The usage of .htaccess files requires three basic steps: - -1) [Enable .htaccess](#enable-htaccess) -2) [Create a .htpasswd file](#create-a-htpasswd-file) -3) [Create a .htaccess file](#create-a-htaccess-file) - -### Enable .htaccess - -This process depends on the server you are using. Some providers may require you to change some settings, or place/change some file. Here are some helpful links for your server (please add your own if missing :sparkling_heart:) - -- Apache: http://ask.xmodulo.com/enable-htaccess-apache.html - -### Create a .htpasswd file - -The `.htpasswd` file contains the user name and password used for login to your web server. Please notice that the password is stored in encrypted form, which requires you to encrypt your password before creating the `.htpasswd` file! - -Here are three ways of creating your own `.htpasswd` file: - -**1) Example file** - -Example `.htpasswd` file (user name: "test", password: "test"): - -```.htpasswd -test:$apr1$a52u9ILP$XTNG8qMJiEXSm1zD0lQcR0 -``` - -Just copy and paste the contents to your `.htpasswd` file. - -**2) Online generator (read warning!)** - -You can create your own `.htpasswd` file online using a `.htpasswd` generator like this: https://www.htaccesstools.com/htpasswd-generator/ - -**WARNING!** -- Never insert real passwords to an online generator! - -**3) Generate your own password** - -Another way to create your own `.htpasswd` file is to run this script on your server (it'll output the data for you, you just have to paste it int a `.htpasswd` file): - -```PHP - -``` - ->source: https://www.htaccesstools.com/articles/create-password-for-htpasswd-file-using-php/ - -### Create a .htaccess file - -The `.htaccess` file is used to specify which directories are password protected. For that purpose you should place the file in whatever directory you want to restrict access. If you want to restrict access to RSS-Bridge in general, you should place the file in the root directory (where `index.php` is located). - -Two parameters must be specified in the `.htaccess` file: - -* AuthName -* AuthUserFile - -`AuthName` specifies the name of the authentication (i.e. "RSS-Bridge"). `AuthUserFile` defines the **absolute** path to a `.htpasswd` file. - -Here are two ways of creating your own `.htaccess` file: - -**1) Example file** - -```.htaccess -AuthType Basic -AuthName "My Protected Area" -AuthUserFile /path/to/.htpasswd -Require valid-user -``` - -Notice: You must change the `AuthUserFile` location to fit your own server (i.e. `/var/www/html/rss-bridge/.htpasswd`) - -**2) Online generator** - -You can use an online generator to create the file for you and copy-paste it to your `.htaccess` file: https://www.htaccesstools.com/htaccess-authentication/ - -## RSS-Bridge Authentication - -RSS-Bridge ships with an authentication module designed for single user environments. You can enable authentication and specify the username & password in the [configuration file](../03_For_Hosts/08_Custom_Configuration.md#authentication). - -Please notice that the password is stored in plain text and thus is readable to anyone who can access the file. Make sure to restrict access to the file, so that it cannot be read remotely! \ No newline at end of file +* http basic auth +* token +* Access control via webserver (see nginx/caddy/apache docs) + +https://github.com/RSS-Bridge/rss-bridge/blob/master/README.md diff --git a/docs/03_For_Hosts/07_Customizations.md b/docs/03_For_Hosts/07_Customizations.md index be4c7f85..380f5f3a 100644 --- a/docs/03_For_Hosts/07_Customizations.md +++ b/docs/03_For_Hosts/07_Customizations.md @@ -1,9 +1,14 @@ -RSS-Bridge ships a few options the host may or may not activate. All options are listed in the [config.default.ini.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/config.default.ini.php) file, see [Custom Configuration](08_Custom_Configuration.md) section for more information. +RSS-Bridge ships a few options the host may or may not activate. +All options are listed in the [config.default.ini.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/config.default.ini.php) file, +see [Custom Configuration](08_Custom_Configuration.md) section for more information. ## Customizable cache timeout -Sometimes it is necessary to specify custom timeouts to update contents more frequently than the bridge maintainer intended. In these cases the client may specify a custom cache timeout to prevent loading contents from cache earlier (or later). +Sometimes it is necessary to specify custom timeouts to update contents more frequently +than the bridge maintainer intended. +In these cases the client may specify a custom cache timeout to prevent loading contents +from cache earlier (or later). -This option can be activated by setting the [`cache.custom_timeout`](08_Custom_Configuration.md#custom_timeout) option to `true`. When enabled each bridge receives an additional parameter `Cache timeout in seconds` that can be set to any value between 1 and 86400 (24 hours). If the value is not within the limits the default settings apply (as specified by the bridge maintainer). - -The cache timeout is send to RSS-Bridge using the `_cache_timeout` parameter. RSS-Bridge will return an error message if the parameter is received and the option is disabled. +This option can be activated by setting the [`cache.custom_timeout`](08_Custom_Configuration.md#custom_timeout) option to `true`. +When enabled each bridge receives an additional parameter `Cache timeout in seconds` +that can be set to any value. diff --git a/docs/03_For_Hosts/08_Custom_Configuration.md b/docs/03_For_Hosts/08_Custom_Configuration.md index 9a1f78f2..ebb53cb3 100644 --- a/docs/03_For_Hosts/08_Custom_Configuration.md +++ b/docs/03_For_Hosts/08_Custom_Configuration.md @@ -1,16 +1,21 @@ RSS-Bridge supports custom configurations for common parameters on the server side! -A default configuration file (`config.default.ini.php`) is shipped with RSS-Bridge. Please do not edit this file, as it gets replaced when upgrading RSS-Bridge! +A default configuration file (`config.default.ini.php`) is shipped with RSS-Bridge. +Please do not edit this file, as it gets replaced when upgrading RSS-Bridge! -You should, however, use this file as template to create your own configuration (or leave it as is, to keep the default settings). In order to create your own configuration perform following actions: +You should, however, use this file as template to create your own configuration +(or leave it as is, to keep the default settings). +In order to create your own configuration perform following actions: * Create the file `config.ini.php` in the RSS-Bridge root folder (next to `config.default.ini.php`) * Copy the contents from `config.default.ini.php` to your configuration file * Change the parameters to satisfy your requirements -RSS-Bridge will automatically detect the `config.ini.php` and use it. If the file doesn't exist it will default to `config.default.ini.php` automatically. +RSS-Bridge will automatically detect the `config.ini.php` and use it. +If the file doesn't exist it will default to `config.default.ini.php` automatically. -__Notice__: If a parameter is not specified in your `config.ini.php` RSS-Bridge will automatically use the default settings from `config.default.ini.php`. +__Notice__: If a parameter is not specified in your `config.ini.php` RSS-Bridge will +automatically use the default settings from `config.default.ini.php`. # Available parameters @@ -114,7 +119,9 @@ Default network timeout. ### useragent -Default user agent. +Overrides the user agent value. Note that the default value, together with a set of other detection-preventing options is set +automatically by the [libcurl-impersonate](https://github.com/lexiforest/curl-impersonate), which is used by the default Docker container distributed together with RSS-Bridge. Use only if you know what you're doing, otherwise you may stop libcurl-impersonate +from doing its job impersonating real browser. ## Authentication diff --git a/docs/03_For_Hosts/index.md b/docs/03_For_Hosts/index.md index 1529cb37..5ecbd4b7 100644 --- a/docs/03_For_Hosts/index.md +++ b/docs/03_For_Hosts/index.md @@ -1,11 +1,12 @@ This section is directed at **hosts** and **server administrators**. -To install RSS-Bridge, please follow the [installation instructions](../03_For_Hosts/01_Installation.md). You must have access to a web server with a working PHP environment! +RSS-Bridge comes with a large amount of bridges. -RSS-Bridge comes with a large amount of bridges. Only few bridges are enabled by default. Unlock more bridges by adding them to the [whitelist](../03_For_Hosts/05_Whitelisting.md). +Some bridges could be implemented more efficiently by actually using proprietary APIs, +but there are reasons against it: -Some bridges could be implemented more efficiently by actually using proprietary APIs, but there are reasons against it: +- RSS-Bridge exists in the first place to NOT use APIs. + See [the rant](https://github.com/RSS-Bridge/rss-bridge/blob/master/README.md#Rant). -- RSS-Bridge exists in the first place to NOT use APIs. See [the rant](https://github.com/RSS-Bridge/rss-bridge/blob/master/README.md#Rant) - -- APIs require private keys that could be stored on servers running RSS-Bridge, which is a security concern, involves complex authorizations for inexperienced users and could cause harm (when using paid services for example). In a closed environment (a server only you use for yourself) however you might be interested in using them anyway. So, check [this](https://github.com/RSS-Bridge/rss-bridge/pull/478/files) possible implementation of an anti-captcha solution. \ No newline at end of file +- APIs require private keys that could be stored on servers running RSS-Bridge, + which is a security concern, involves complex authorizations for inexperienced users and could cause harm (when using paid services for example). In a closed environment (a server only you use for yourself) however you might be interested in using them anyway. So, check [this](https://github.com/RSS-Bridge/rss-bridge/pull/478/files) possible implementation of an anti-captcha solution. diff --git a/docs/04_For_Developers/01_Coding_style_policy.md b/docs/04_For_Developers/01_Coding_style_policy.md index 4e796c32..9fb0eb10 100644 --- a/docs/04_For_Developers/01_Coding_style_policy.md +++ b/docs/04_For_Developers/01_Coding_style_policy.md @@ -1,8 +1,13 @@ -This section explains the coding style policy for RSS-Bridge with examples and references to external resources. Please make sure your code is compliant before opening a pull request. +This section explains the coding style policy for RSS-Bridge with examples and references to external resources. +Please make sure your code is compliant before opening a pull request. -You will automatically be notified if issues were found in your pull request. You must fix those issues before the pull request will be merged. Refer to [phpcs.xml](https://github.com/RSS-Bridge/rss-bridge/blob/master/phpcs.xml) for a complete list of policies enforced by Travis-CI. +You will automatically be notified if issues were found in your pull request. +You must fix those issues before the pull request will be merged. +Refer to [phpcs.xml](https://github.com/RSS-Bridge/rss-bridge/blob/master/phpcs.xml) for a complete list of policies enforced by Travis-CI. -If you want to run the checks locally, make sure you have [`phpcs`](https://github.com/squizlabs/PHP_CodeSniffer) and [`phpunit`](https://phpunit.de/) installed on your machine and run following commands in the root directory of RSS-Bridge (tested on Debian): +If you want to run the checks locally, make sure you have +[`phpcs`](https://github.com/squizlabs/PHP_CodeSniffer) and +[`phpunit`](https://phpunit.de/) installed on your machine and run following commands in the root directory of RSS-Bridge (tested on Debian): ```console ./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./ @@ -73,7 +78,7 @@ _Reference_: [`Squiz.WhiteSpace.SuperfluousWhitespace`](https://github.com/squiz # Maximum Line Length -There is no maximum line length. +180 # Strings diff --git a/docs/04_For_Developers/04_Actions.md b/docs/04_For_Developers/04_Actions.md index c346731a..74c8cbfb 100644 --- a/docs/04_For_Developers/04_Actions.md +++ b/docs/04_For_Developers/04_Actions.md @@ -1,8 +1,9 @@ -RSS-Bridge currently supports three 'actions' which it can operate: +RSS-Bridge currently supports four 'actions' which it can operate: 1) [Display](#display) (`?action=display`) 2) [Detect](#detect) (`?action=detect`) 3) [List](#list) (`?action=list`) +3) [FindFeed](#findfeed) (`?action=findfeed`) ## Display @@ -19,7 +20,7 @@ The `detect` action attempts to redirect the user to an appropriate `display` ac If an appropriate bridge is found, a `301 Moved Permanently` HTTP status code is returned with a relative location for a `display` action. If no appropriate bridge is found or a required parameter is missing, a `400 Bad Request` status code is returned. -The parameters for this action are listed bellow: +The parameters for this action are listed below: Parameter | Required | Description ----------|----------|------------ @@ -75,4 +76,91 @@ Parameter | Optional | Description ### `total` -This parameter represents the total number of bridges available to the current instance of RSS-Bridge. \ No newline at end of file +This parameter represents the total number of bridges available to the current instance of RSS-Bridge. + +## FindFeed + +The `findfeed` action attempts to list all available feeds based on a supplied URL for the active bridges of this instance. As bridges have to individually implement `detectParameters`, this it may not work for every bridge. + +If one or more bridges return a feed, a JSON data array structure is returned. If no feeds were found, a `404 Not Found` status code is returned. If a required parameter is missing, a `400 Bad Request` status code is returned. + +For each feed, the whole feed URL is sent in the `url` member, the feed specific bridge parameters metadata in the `bridgeData` member and the Bridge metadata in the `bridgeMeta` member. + +This example shows JSON data for the NASA Instagram account URL (`https://www.instagram.com/nasa/`) using the `Html` format : + +```JSON +[ + { + "url": "https://rssbridge.host/?action=display&context=Username&u=nasa&bridge=InstagramBridge&format=Html", + "bridgeParams": { + "context": "Username", + "u": "nasa", + "bridge": "InstagramBridge", + "format": "Html" + }, + "bridgeData": { + "context": { + "name": "Context", + "value": "Username" + }, + "u": { + "name": "username", + "value": "nasa" + } + }, + "bridgeMeta": { + "name": "Instagram Bridge", + "description": "Returns the newest images", + "parameters": { + "Username": { + "u": { + "name": "username", + "exampleValue": "aesoprockwins", + "required": true + } + }, + "Hashtag": { + "h": { + "name": "hashtag", + "exampleValue": "beautifulday", + "required": true + } + }, + "Location": { + "l": { + "name": "location", + "exampleValue": "london", + "required": true + } + }, + "global": { + "media_type": { + "name": "Media type", + "type": "list", + "required": false, + "values": { + "All": "all", + "Video": "video", + "Picture": "picture", + "Multiple": "multiple" + }, + "defaultValue": "all" + }, + "direct_links": { + "name": "Use direct media links", + "type": "checkbox" + } + } + }, + "icon": "https://www.instagram.com//favicon.ico" + } + } +] +``` + +The parameters for this action are listed below: + +Parameter | Required | Description +----------|----------|------------ +`url` | yes | Specifies the URL to attempt to find a feed from. The value of this should be URL encoded. +`format` | yes | Specifies the name of the format to use for the URL of the feeds. This is passed to the detected `display` action. Possible values are determined from the formats available to the current instance of RSS-Bridge. diff --git a/docs/04_For_Developers/05_Debug_mode.md b/docs/04_For_Developers/05_Debug_mode.md index 6bdb1d48..cccf2768 100644 --- a/docs/04_For_Developers/05_Debug_mode.md +++ b/docs/04_For_Developers/05_Debug_mode.md @@ -1,33 +1,28 @@ -

    Warning!

    +Debug mode has been removed. -Enabling debug mode on a public server may result in malicious clients retrieving sensitive data about your server and possibly gaining access to it. Do not enable debug mode on a public server, unless you understand the implications of your doing! +If you want to disable caching you can set cache type to array (in-memory cache): -*** +```ini +[cache] -Debug mode enables error reporting and prevents loading data from the cache (data is still written to the cache). -To enable debug mode, set in `config.ini.php`: +; Cache type: file, sqlite, memcached, array, null +type = "array" +``` - enable_debug_mode = true +Alternatively, you can comment out the cache middleware in `lib/RssBridge.php`: -Allow only explicit ip addresses: +```diff +diff --git a/lib/RssBridge.php b/lib/RssBridge.php +index d16f1d89..da3df8be 100644 +--- a/lib/RssBridge.php ++++ b/lib/RssBridge.php +@@ -24,7 +24,7 @@ final class RssBridge - debug_mode_whitelist[] = 127.0.0.1 - debug_mode_whitelist[] = 192.168.1.10 - -_Notice_: - -* An empty file enables debug mode for anyone! -* The bridge whitelist still applies! (debug mode does **not** enable all bridges) - -RSS-Bridge will give you a visual feedback when debug mode is enabled. - -While debug mode is active, RSS-Bridge will write additional data to your servers `error.log`. - -Debug mode is controlled by the static class `Debug`. It provides three core functions: - -* `Debug::isEnabled()`: Returns `true` if debug mode is enabled. -* `Debug::log($message)`: Adds a message to `error.log`. It takes one parameter, which can be anything. - -Example: `Debug::log('Hello World!');` - -**Notice**: `Debug::log($message)` calls `Debug::isEnabled()` internally. You don't have to do that manually. \ No newline at end of file + $middlewares = [ + new BasicAuthMiddleware(), +- new CacheMiddleware($this->container['cache']), ++ //new CacheMiddleware($this->container['cache']), + new ExceptionMiddleware($this->container['logger']), + new SecurityMiddleware(), + new MaintenanceMiddleware(), +``` \ No newline at end of file diff --git a/docs/04_For_Developers/07_Development_Environment_Setup.md b/docs/04_For_Developers/07_Development_Environment_Setup.md index 23a4b101..d3a5ee8d 100644 --- a/docs/04_For_Developers/07_Development_Environment_Setup.md +++ b/docs/04_For_Developers/07_Development_Environment_Setup.md @@ -1,39 +1,5 @@ -These are examples of how to setup a local development environment to add bridges, improve the docs, etc. -## Docker - -The following can serve as an example for using docker: - -``` -# create a new directory -mkdir rss-bridge-contribution -cd rss-bridge-contribution - -# clone the project into a subfolder -git clone https://github.com/RSS-Bridge/rss-bridge -``` - -Then add a `docker-compose.yml` file: - -```yml -version: '3' - -services: - rss-bridge: - build: - context: ./rss-bridge - ports: - - 3000:80 - volumes: - - ./config:/config - - ./rss-bridge/bridges:/app/bridges -``` - -You can then access RSS-Bridge at `localhost:3000` and [add your bridge](../05_Bridge_API/How_to_create_a_new_bridge) to the `rss-bridge/bridges` folder. - -If you need to edit any other files, like from the `lib` folder add this to the `volumes` section: `./rss-bridge/lib:/app/lib`. - -### Docs with Docker +## Docs with Docker If you want to edit the docs add this to your docker-compose.yml: diff --git a/docs/04_For_Developers/index.md b/docs/04_For_Developers/index.md index fcce11e3..31e513a5 100644 --- a/docs/04_For_Developers/index.md +++ b/docs/04_For_Developers/index.md @@ -1,10 +1,11 @@ -This area is intended for developers who decide to contribute to **RSS-Bridge** which is primarily written in [`PHP`](http://www.php.net/) with some aspects of [`HTML`](https://en.wikipedia.org/wiki/HTML) and [`CSS`](https://en.wikipedia.org/wiki/Cascading_Style_Sheets). +This area is intended for developers who decide to contribute to **RSS-Bridge**. + +It is written in PHP. If you are new to **RSS-Bridge** you should make yourself familiar with some general aspects: + - [Coding style policy](./01_Coding_style_policy.md) - [Folder structure](./03_Folder_structure.md) - - [Debug mode](./05_Debug_mode.md) - [Bridge API](../05_Bridge_API/index.md) - [Cache API](../07_Cache_API/index.md) - - [Format API](../08_Format_API/index.md) - - [Technical recommendations](../09_Technical_recommendations/index.md) \ No newline at end of file + - [Technical recommendations](../09_Technical_recommendations/index.md) diff --git a/docs/05_Bridge_API/01_How_to_create_a_new_bridge.md b/docs/05_Bridge_API/01_How_to_create_a_new_bridge.md index 391d179f..e7d401dd 100644 --- a/docs/05_Bridge_API/01_How_to_create_a_new_bridge.md +++ b/docs/05_Bridge_API/01_How_to_create_a_new_bridge.md @@ -1,30 +1,36 @@ -Create a new file in the `bridges/` folder (see [Folder structure](../04_For_Developers/03_Folder_structure.md)). +# How to create a completely new bridge -The file name must be named according to following specification: -* It starts with the full name of the site -* All white-space must be removed -* The first letter of a word is written in upper-case, unless the site name is specified otherwise (example: Freenews, not FreeNews, because the site is named 'Freenews') -* The first character must be upper-case -* The file name must end with 'Bridge' -* The file type must be PHP, written in **small** letters (seriously!) ".php" - -**Examples:** - -Site | Filename ------|--------- -Wikipedia | **Wikipedia**Bridge.php -Facebook | **Facebook**Bridge.php -GitHub | **GitHub**Bridge.php -Freenews | **Freenews**Bridge.php - -The file must start with the PHP tags and end with an empty line. The closing tag `?>` is [omitted](http://php.net/basic-syntax.instruction-separation). - -**Example:** +New code files MUST have `declare(strict_types=1);` at the top of file: ```php find('.blog-posts li') as $li) { + $a = $li->find('a', 0); + $this->items[] = [ + 'title' => $a->plaintext, + 'uri' => 'https://herman.bearblog.dev' . $a->href, + ]; + } + } +} +``` + +Learn more in [bridge api](https://rss-bridge.github.io/rss-bridge/Bridge_API/index.html). diff --git a/docs/05_Bridge_API/02_BridgeAbstract.md b/docs/05_Bridge_API/02_BridgeAbstract.md index 120361be..9cb16050 100644 --- a/docs/05_Bridge_API/02_BridgeAbstract.md +++ b/docs/05_Bridge_API/02_BridgeAbstract.md @@ -1,4 +1,5 @@ -`BridgeAbstract` is a base class for standard bridges. It implements the most common functions to simplify the process of adding new bridges. +`BridgeAbstract` is a base class for standard bridges. +It implements the most common functions to simplify the process of adding new bridges. *** @@ -11,7 +12,9 @@ You need four basic steps in order to create a new bridge: [**Step 3**](#step-3---add-general-constants-to-the-class) - Add general constants to the class [**Step 4**](#step-4---implement-a-function-to-collect-feed-data) - Implement a function to collect feed data -These steps are described in more detail below. At the end of this document you'll find a complete [template](#template) based on these instructions. The pictures below show an example based on these instructions: +These steps are described in more detail below. +At the end of this document you'll find a complete [template](#template) based on these instructions. +The pictures below show an example based on these instructions:
    Show pictures
    @@ -23,7 +26,12 @@ These steps are described in more detail below. At the end of this document you'

    -Make sure to read these instructions carefully. Please don't hesitate to open an [Issue](https://github.com/RSS-Bridge/rss-bridge/issues) if you have further questions (or suggestions). Once your bridge is finished, please open a [Pull Request](https://github.com/RSS-Bridge/rss-bridge/pulls), in order to get your bridge merge into RSS-Bridge. +Make sure to read these instructions carefully. +Please don't hesitate to open an +[Issue](https://github.com/RSS-Bridge/rss-bridge/issues) +if you have further questions (or suggestions). +Once your bridge is finished, please open a [Pull Request](https://github.com/RSS-Bridge/rss-bridge/pulls), +in order to get your bridge merge into RSS-Bridge. *** @@ -33,7 +41,8 @@ Please read [these instructions](./01_How_to_create_a_new_bridge.md) on how to c ## Step 2 - Add a class, extending `BridgeAbstract` -Your bridge needs to be a class, which extends `BridgeAbstract`. The class name must **exactly** match the name of the file, without the file extension. +Your bridge needs to be a class, which extends `BridgeAbstract`. +The class name must **exactly** match the name of the file, without the file extension. For example: `MyBridge.php` => `MyBridge` @@ -41,10 +50,10 @@ For example: `MyBridge.php` => `MyBridge` ```PHP @@ -65,43 +74,47 @@ const CACHE_TIMEOUT // (optional) Defines the maximum duration for the cache in
    Show example
    ```PHP -

    -**Notice**: `const PARAMETERS` can be used to request information from the user. Refer to [these instructions](#parameters) for more information. +**Notice**: `const PARAMETERS` can be used to request information from the user. +Refer to [these instructions](#parameters) for more information. ## Step 4 - Implement a function to collect feed data -In order for RSS-Bridge to collect data, you must implement the **public** function `collectData`. This function takes no arguments and returns nothing. It generates a list of feed elements, which must be placed into the variable `$this->items`. +In order for RSS-Bridge to collect data, you must implement the **public** function `collectData`. +This function takes no arguments and returns nothing. +It generates a list of feed elements, which must be placed into the variable `$this->items`.
    Show example
    ```PHP -items[] = $item; // Add item to the list - } + public function collectData() + { + $item = []; + $item['title'] = 'Hello World!'; + $this->items[] = $item; + } } -// This line is empty (just imagine it!) ```

    @@ -112,32 +125,36 @@ For more details on the `collectData` function refer to [these instructions](#co # Template -Use this template to create your own bridge. Please remove any unnecessary comments and parameters. +Use this template to create your own bridge. +Please remove any unnecessary comments and parameters. ```php items[] = $item; // Add item to the list - } +class MyBridge extends BridgeAbstract +{ + const NAME = 'Unnamed bridge'; + const URI = ''; + const DESCRIPTION = 'No description provided'; + const MAINTAINER = 'No maintainer'; + const PARAMETERS = []; // Can be omitted! + const CACHE_TIMEOUT = 3600; // Can be omitted! + + public function collectData() + { + $item = []; // Create an empty item + + $item['title'] = 'Hello World!'; + + $this->items[] = $item; // Add item to the list + } } -// This line is empty (just imagine it!) ``` # PARAMETERS -You can specify additional parameters in order to customize the bridge (i.e. to specify how many items to return). This document explains how to specify those parameters and which options are available to you. +You can specify additional parameters in order to customize the bridge (i.e. to specify how many items to return). +This document explains how to specify those parameters and which options are available to you. For information on how to read parameter values during execution, please refer to the [getInput](../06_Helper_functions/index.md#getinput) function. @@ -145,15 +162,17 @@ For information on how to read parameter values during execution, please refer t ## Adding parameters to a bridge -Parameters are specified as part of the bridge class. An empty list of parameters is defined as `const PARAMETERS = array();` +Parameters are specified as part of the bridge class. +An empty list of parameters is defined as `const PARAMETERS = [];`
    Show example
    ```PHP -Show example
    ```PHP -const PARAMETERS = array( - 'My Context 1' => array(), - 'My Context 2' => array() -); +const PARAMETERS = [ + 'My Context 1' => [], + 'My Context 2' => [], +]; ``` **Output** @@ -189,37 +209,41 @@ _Notice_: The name of a context can be left empty if only one context is needed!
    Show example
    ```PHP -const PARAMETERS = array( - array() -); +const PARAMETERS = [ + [] +]; ```

    -You can also define a set of parameters that will be applied to every possible context of your bridge. To do this, specify a context named `global`. +You can also define a set of parameters that will be applied to every possible context of your bridge. +To do this, specify a context named `global`.
    Show example
    ```PHP -const PARAMETERS = array( - 'global' => array() // Applies to all contexts! -); +const PARAMETERS = [ + 'global' => [] // Applies to all contexts! +]; ```
    ## Level 2 - Parameter -Parameters are placed inside a context. They are defined as associative array of parameter specifications. Each parameter is defined by it's internal input name, a definition in the form `'n' => array();`, where `n` is the name with which the bridge can access the parameter during execution. +Parameters are placed inside a context. +They are defined as associative array of parameter specifications. +Each parameter is defined by it's internal input name, a definition in the form `'n' => [];`, +where `n` is the name with which the bridge can access the parameter during execution.
    Show example
    ```PHP -const PARAMETERS = array( - 'My Context' => array( - 'n' => array() - ) -); +const PARAMETERS = [ + 'My Context' => [ + 'n' => [], + ] +]; ```

    @@ -229,17 +253,17 @@ The parameter specification consists of various fields, listed in the table belo
    Show example
    ```PHP -const PARAMETERS = array( - 'My Context' => array( - 'n' => array( +const PARAMETERS = [ + 'My Context' => [ + 'n' => [ 'name' => 'Limit', 'type' => 'number', 'required' => false, 'title' => 'Maximum number of items to return', - 'defaultValue' => 10 - ) - ) -); + 'defaultValue' => 10, + ] + ] +]; ``` **Output** @@ -268,29 +292,30 @@ List values are defined in an associative array where keys are the string displa ```PHP ... 'type' => 'list', - 'values' => array( + 'values' => [ 'Item A' => 'itemA' 'Item B' => 'itemB' - ) + ] ... ``` If a more complex organization is required to display the values, the above key/value can be used to set a title as a key and another array as a value: + ```PHP ... 'type' => 'list', - 'values' => array( + 'values' => [ 'Item A' => 'itemA', - 'List 1' => array( + 'List 1' => [ 'Item C' => 'itemC', 'Item D' => 'itemD' - ), - 'List 2' => array( + ], + 'List 2' => [ 'Item E' => 'itemE', 'Item F' => 'itemF' - ), + ], 'Item B' => 'itemB' - ) + ] ... ``` @@ -313,20 +338,18 @@ It provides a way to identify which context the bridge is called with. Example: ```PHP -... - const PARAMETERS = array( - 'By user name' => array( - 'u' => array('name' => 'Username') - ), - 'By user ID' => array( - 'id' => array('name' => 'User ID') - ) - ); - -... +const PARAMETERS = [ + 'By user name' => [ + 'u' => ['name' => 'Username'] + ], + 'By user ID' => [ + 'id' => ['name' => 'User ID'] + ] +]; ``` -In this example `$this->queriedContext` will either return **By user name** or **By user ID**. The queried context might return no value, so the best way to handle it is by using a case-structure: +In this example `$this->queriedContext` will either return **By user name** or **By user ID**. +The queried context might return no value, so the best way to handle it is by using a case-structure: ```PHP switch($this->queriedContext){ @@ -339,32 +362,36 @@ switch($this->queriedContext){ ``` # collectData -The `collectData` function is responsible for collecting data and adding items to generate feeds from. If you are unsure how to solve a specific problem, please don't hesitate to open an [Issue](https://github.com/RSS-Bridge/rss-bridge/issues) on GitHub. Existing bridges are also a good source to learn implementing your own bridge. + +The `collectData` function is responsible for collecting data and adding items to generate feeds from. +If you are unsure how to solve a specific problem, please don't hesitate to open an [Issue](https://github.com/RSS-Bridge/rss-bridge/issues) on GitHub. +Existing bridges are also a good source to learn implementing your own bridge. ## Implementing the `collectData` function -Implementation for the `collectData` function is specific to each bridge. However, there are certain reoccurring elements, described below. RSS-Bridge also provides functions to simplify the process of collecting and parsing HTML data (see "Helper Functions" on the sidebar) +Implementation for the `collectData` function is specific to each bridge. +However, there are certain reoccurring elements, described below. RSS-Bridge also provides functions to simplify the process of collecting and parsing HTML data (see "Helper Functions" on the sidebar) -Elements collected by this function must be stored in `$this->items`. The `items` variable is an array of item elements, each of which is an associative array that may contain arbitrary keys. RSS-Bridge specifies common keys which are used to generate most common feed formats. +Elements collected by this function must be stored in `$this->items`. +The `items` variable is an array of item elements, each of which is an associative array that may contain arbitrary keys. +RSS-Bridge specifies common keys which are used to generate most common feed formats.
    Show example
    ```PHP - -$item = array(); // Create a new item - +$item = []; $item['title'] = 'Hello World!'; - -$this->items[] = $item; // Add item to the list - +$this->items[] = $item; ```

    + Additional keys may be added for custom APIs (ignored by RSS-Bridge). ## Item parameters -The item array should provide as much information as possible for RSS-Bridge to generate feature rich feeds. Find below list of keys supported by RSS-Bridge. +The item array should provide as much information as possible for RSS-Bridge to generate feature rich feeds. +Find below list of keys supported by RSS-Bridge. ```PHP $item['uri'] // URI to reach the subject ("https://...") @@ -376,139 +403,165 @@ $item['enclosures'] // Array of URIs to an attachments (pictures, files, etc...) $item['categories'] // Array of categories / tags / topics $item['uid'] // A unique ID to identify the current item ``` + All formats support these parameters. The formats `Plaintext` and `JSON` also support custom parameters. # getDescription The `getDescription` function returns the description for a bridge. -**Notice:** By default **RSS-Bridge** returns the contents of `const DESCRIPTION`, so you only have to implement this function if you require different behavior! +**Notice:** By default **RSS-Bridge** returns the contents of `const DESCRIPTION`, +so you only have to implement this function if you require different behavior! ```PHP - public function getDescription(){ - return self::DESCRIPTION; - } +public function getDescription() +{ + return self::DESCRIPTION; +} ``` # getMaintainer The `getMaintainer` function returns the name of the maintainer for a bridge. -**Notice:** By default **RSS-Bridge** returns `const MAINTAINER`, so you only have to implement this function if you require different behavior! +**Notice:** By default **RSS-Bridge** returns `const MAINTAINER`, +so you only have to implement this function if you require different behavior! ```PHP - public function getMaintainer(){ - return self::MAINTAINER; - } +public function getMaintainer() +{ + return self::MAINTAINER; +} ``` # getName + The `getName` function returns the name of a bridge. -**Notice:** By default **RSS-Bridge** returns `const NAME`, so you only have to implement this function if you require different behavior! +**Notice:** By default **RSS-Bridge** returns `const NAME`, +so you only have to implement this function if you require different behavior! ```PHP - public function getName(){ - return self::NAME; - } +public function getName() +{ + return self::NAME; +} ``` # getURI + The `getURI` function returns the base URI for a bridge. -**Notice:** By default **RSS-Bridge** returns `const URI`, so you only have to implement this function if you require different behavior! +**Notice:** By default **RSS-Bridge** returns `const URI`, +so you only have to implement this function if you require different behavior! ```PHP - public function getURI(){ - return self::URI; - } +public function getURI() +{ + return self::URI; +} ``` # getIcon + The `getIcon` function returns the URI for an icon, used as favicon in feeds. -If no icon is specified by the bridge, RSS-Bridge will use a default location: `static::URI . '/favicon.ico'` (i.e. "https://github.com/favicon.ico") which may or may not exist. +If no icon is specified by the bridge, +RSS-Bridge will use a default location: `static::URI . '/favicon.ico'` (i.e. "https://github.com/favicon.ico") which may or may not exist. ```PHP - public function getIcon(){ - return static::URI . '/favicon.ico'; - } -``` -# detectParameters -The `detectParameters` function takes a URL and attempts to extract a valid set of parameters for the current bridge. - -If the passed URL is valid for this bridge the function should return an array of parameter -> value pairs that can be used by this bridge, or an empty array if the bridge requires no parameters. If the URL is not relevant for this bridge the function should return `null`. - -**Notice:** Implementing this function is optional. By default **RSS-Bridge** tries to match the supplied URL to the `URI` constant defined in the bridge which may be enough for bridges without any parameters defined. - -```PHP -public function detectParameters($url){ - $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; - if(empty(static::PARAMETERS) - && preg_match($regex, $url, $urlMatches) > 0 - && preg_match($regex, static::URI, $bridgeUriMatches) > 0 - && $urlMatches[3] === $bridgeUriMatches[3]) { - return array(); - } else { - return null; - } +public function getIcon() +{ + return static::URI . '/favicon.ico'; } ``` +# detectParameters + +The `detectParameters` function takes a URL and attempts to extract a valid set of parameters for the current bridge. + +If the passed URL is valid for this bridge, the function should return an array of parameter -> value pairs that can be used by this bridge, including context if available, or an empty array if the bridge requires no parameters. If the URL is not relevant for this bridge, the function should return `null`. + +**Notice:** Implementing this function is optional. By default, **RSS-Bridge** tries to match the supplied URL to the `URI` constant defined in the bridge, which may be enough for bridges without any parameters defined. + +```PHP +public function detectParameters($url) +{ + $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; + if (empty(static::PARAMETERS) + && preg_match($regex, $url, $urlMatches) > 0 + && preg_match($regex, static::URI, $bridgeUriMatches) > 0 + && $urlMatches[3] === $bridgeUriMatches[3] + ) { + return []; + } else { + return null; + } +} +``` + +**Notice:** This function is also used by the [findFeed](../04_For_Developers/04_Actions.md#findfeed) action. +This action allows an user to get a list of all feeds corresponding to an URL. + +You can implement automated tests for the `detectParameters` function by adding the `TEST_DETECT_PARAMETERS` constant to your bridge class constant. + +`TEST_DETECT_PARAMETERS` is an array, with as key the URL passed to the `detectParameters`function and as value, the array of parameters returned by `detectParameters` + +```PHP +const TEST_DETECT_PARAMETERS = [ + 'https://www.instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'], + 'https://instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'], + 'http://www.instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'], +]; +``` + +**Notice:** Adding this constant is optional. If the constant is not present, no automated test will be executed. + + *** # Helper Methods -`BridgeAbstract` implements helper methods to make it easier for bridge maintainers to create bridges. Use these methods whenever possible instead of writing your own. -- [saveCacheValue](#savecachevalue) -- [loadCacheValue](#loadcachevalue) +`BridgeAbstract` implements helper methods to make it easier for bridge maintainers to create bridges. +Use these methods whenever possible instead of writing your own. ## saveCacheValue -Within the context of the current bridge, stores a value by key in the cache. The value can later be retrieved with [loadCacheValue](#loadcachevalue). + +Within the context of the current bridge, stores a value by key in the cache. +The value can later be retrieved with [loadCacheValue](#loadcachevalue). ```php -protected function saveCacheValue($key, $value) +protected function saveCacheValue($key, $value, $ttl = null) ``` -- `$key` - the name under which the value is stored in the cache. -- `$value` - the value to store in the cache. - -Usage example: +Example: ```php -const MY_KEY = 'MyKey'; - public function collectData() { - $value = 'my value'; - $this->saveCacheValue(MY_KEY, $value); + $this->saveCacheValue('my_key', 'my_value', 3600); // 1h } ``` ## loadCacheValue -Within the context of the current bridge, loads a value by key from cache. Optionally specifies the cache duration for the key. Returns `null` if the key doesn't exist or the value is expired. + +Within the context of the current bridge, loads a value by key from cache. +Optionally specifies the cache duration for the key. +Returns `null` if the key doesn't exist or the value is expired. ```php -protected function loadCacheValue($key, $duration = null) +protected function loadCacheValue($key, $default = null) ``` -- `$key` - the name under which the value is stored in the cache. -- `$duration` - the maximum time in seconds after which the value expires. - -Usage example: +Example: ```php -const MY_KEY = 'MyKey'; - public function collectData() { - $value = $this->loadCacheValue(MY_KEY, 1800 /* 30 minutes */); + $value = $this->loadCacheValue('my_key'); - if (!isset($value)){ - // load value - $this->saveCacheValue(MY_KEY, $value); - } - - // ... + if (! $value) { + $this->saveCacheValue('my_key', 'foobar'); + } } ``` diff --git a/docs/05_Bridge_API/03_FeedExpander.md b/docs/05_Bridge_API/03_FeedExpander.md index 7e72670a..62356fc9 100644 --- a/docs/05_Bridge_API/03_FeedExpander.md +++ b/docs/05_Bridge_API/03_FeedExpander.md @@ -1,85 +1,35 @@ -`FeedExpander` extends [`BridgeAbstract`](./02_BridgeAbstract.md) and adds functions to collect data from existing feeds. - **Usage example**: _You have discovered a site that provides feeds which are hidden and inaccessible by normal means. You want your bridge to directly read the feeds and provide them via **RSS-Bridge**_ -To create a new Bridge extending `FeedExpander` you must implement all required functions of [`BridgeAbstract`](./02_BridgeAbstract.md). `FeedExpander` additionally provides following functions: - -* [`parseItem`](#the-parseitem-function) -* [`getName`](#the-getname-function) -* [`getURI`](#the-geturi-function) -* [`getDescription`](#the-getdescription-function) - Find a [template](#template) at the end of this file. **Notice:** For a standard feed only `collectData` need to be implemented. `collectData` should call `$this->collectExpandableDatas('your URI here');` to automatically load feed items and header data (will subsequently call `parseItem` for each item in the feed). You can limit the number of items to fetch by specifying an additional parameter for: `$this->collectExpandableDatas('your URI here', 10)` (limited to 10 items). -## The `parseItem` function +## The `parseItem` method -This function receives one item from the current feed and should return one **RSS-Bridge** item. +This method receives one item from the current feed and should return one **RSS-Bridge** item. The default function does all the work to get the item data from the feed, whether it is RSS 1.0, -RSS 2.0 or Atom 1.0. If you have to redefine this function in your **RSS-Bridge** for whatever reason, -you should first call the parent function to initialize the item, then apply the changes that you require. +RSS 2.0 or Atom 1.0. **Notice:** The following code sample is just an example. Implementation depends on your requirements! ```PHP -protected function parseItem($feedItem){ - $item = parent::parseItem($feedItem); - $item['content'] = str_replace('rssbridge','RSS-Bridge',$feedItem->content); - +protected function parseItem(array $item) +{ + $item['content'] = str_replace('rssbridge','RSS-Bridge',$item['content']); return $item; } ``` -### Helper functions +### Feed parsing -The `FeedExpander` already provides a set of functions to parse RSS or Atom items based on the specifications. Where possible make use of these functions: - -Function | Description ----------|------------ -`parseATOMItem` | Parses an Atom 1.0 feed item -`parseRSS_0_9_1_Item` | Parses an RSS 0.91 feed item -`parseRSS_1_0_Item` | Parses an RSS 1.0 feed item -`parseRSS_2_0_Item` | Parses an RSS 2.0 feed item - -In the following list you'll find the feed tags assigned to the the **RSS-Bridge** item keys: +How rss-bridge processes xml feeds: Function | uri | title | timestamp | author | content ---------|-----|-------|-----------|--------|-------- -`parseATOMItem` | id | title | updated | author | content -`parseRSS_0_9_1_Item` | link | title | | | description -`parseRSS_1_0_Item` | link | title | dc:date | dc:creator | description -`parseRSS_2_0_Item` | link, guid | title | pubDate, dc:date | author, dc:creator | description - -## The `getName` function - -Returns the name of the current feed. - -```PHP -return $this->name; -``` - -**Notice:** Only implement this function if you require different behavior! - -## The `getURI` function - -Return the uri for the current feed. - -```PHP -return $this->uri; -``` - -**Notice:** Only implement this function if you require different behavior! - -## The `getDescription` function - -Returns the description for the current bridge. - -```PHP -return $this->description; -``` - -**Notice:** Only implement this function if you require different behavior! +`atom` | id | title | updated | author | content +`rss 0.91` | link | title | | | description +`rss 1.0` | link | title | dc:date | dc:creator | description +`rss 2.0` | link, guid | title | pubDate, dc:date | author, dc:creator | description # Template @@ -87,18 +37,19 @@ This is the template for a new bridge: ```PHP collectExpandableDatas('your feed URI'); } } -// Imaginary empty line! ``` \ No newline at end of file diff --git a/docs/05_Bridge_API/04_WebDriverAbstract.md b/docs/05_Bridge_API/04_WebDriverAbstract.md new file mode 100644 index 00000000..60b5e99d --- /dev/null +++ b/docs/05_Bridge_API/04_WebDriverAbstract.md @@ -0,0 +1,83 @@ +`WebDriverAbstract` extends [`BridgeAbstract`](./02_BridgeAbstract.md) and adds functionality for generating feeds +from active websites that use XMLHttpRequest (XHR) to load content and / or JavaScript to +modify content. +It highly depends on the php-webdriver library which offers Selenium WebDriver bindings for PHP. + +- https://github.com/php-webdriver/php-webdriver (Project Repository) +- https://php-webdriver.github.io/php-webdriver/latest/ (API) + +Please note that this class is intended as a solution for websites _that cannot be covered +by the other classes_. The WebDriver starts a browser and is therefore very resource-intensive. + +# Configuration + +You need a running WebDriver to use bridges that depend on `WebDriverAbstract`. +The easiest way is to start the Selenium server from the project of the same name: +``` +docker run -d -p 4444:4444 --shm-size="2g" docker.io/selenium/standalone-chrome:latest +``` + +- https://github.com/SeleniumHQ/docker-selenium + +With these parameters only one browser window can be started at a time. +On a multi-user site, Selenium Grid should be used +and the number of sessions should be adjusted to the number of processor cores. + +Finally, the `config.ini.php` file must be adjusted so that the WebDriver +can find the Selenium server: +``` +[webdriver] + +selenium_server_url = "http://localhost:4444" +``` + +# Development + +While you are programming a new bridge, it is easier to start a local WebDriver because then you can see what is happening and where the errors are. I've also had good experience recording the process with a screen video to find any timing problems. + +``` +chromedriver --port=4444 +``` + +- https://chromedriver.chromium.org/ + +If you start rss-bridge from a container, then Chrome driver is only accessible +if you call it with the `--allowed-ips` option so that it binds to all network interfaces. + +``` +chromedriver --port=4444 --allowed-ips=192.168.1.42 +``` + +The **most important rule** is that after an event such as loading the web page +or pressing a button, you often have to explicitly wait for the desired elements to appear. + +A simple example is the bridge `ScalableCapitalBlogBridge.php`. +A more complex and relatively complete example is the bridge `GULPProjekteBridge.php`. + +# Template + +Use this template to create your own bridge. + +```PHP +cleanUp(); + } + } +} + +``` \ No newline at end of file diff --git a/docs/05_Bridge_API/04_XPathAbstract.md b/docs/05_Bridge_API/05_XPathAbstract.md similarity index 94% rename from docs/05_Bridge_API/04_XPathAbstract.md rename to docs/05_Bridge_API/05_XPathAbstract.md index cf091edc..28a20a81 100644 --- a/docs/05_Bridge_API/04_XPathAbstract.md +++ b/docs/05_Bridge_API/05_XPathAbstract.md @@ -68,6 +68,9 @@ Should return the XPath expression for extracting an item title from the item co ### Method `getExpressionItemContent()` Should return the XPath expression for extracting an item's content from the item context. +### Method `getSettingUseRawItemContent()` +Should return the 'Use raw item content' setting value (bool true or false). + ### Method `getExpressionItemUri()` Should return the XPath expression for extracting an item link from the item context. @@ -96,7 +99,7 @@ This method should return the HTML source as a base for the XPath expressions. U This method should provide the feed title. Usually the XPath expression defined in `XPATH_EXPRESSION_FEED_TITLE` is used for extracting the title directly from the page source. ### Method `provideFeedIcon()` -This method should provide the feed title. Usually the XPath expression defined in `XPATH_EXPRESSION_FEED_ICON` is used for extracting the title directly from the page source. +This method should provide the URL of the feed's favicon. Usually the XPath expression defined in `XPATH_EXPRESSION_FEED_ICON` is used for extracting the title directly from the page source. ### Method `provideFeedItems()` This method should provide the feed items. Usually the XPath expression defined in `XPATH_EXPRESSION_ITEM` is used for extracting the items from the page source. All other XPath expressions are applied on a per-item basis, item by item, and only on the item's contents. @@ -119,8 +122,8 @@ Accepts the items author as parameter, processes and returns it. Should return a ### Method `formatItemTimestamp()` Accepts the items creation timestamp as parameter, processes and returns it. Should return a unix timestamp as integer. -### Method `cleanImageUrl()` -Method invoked for cleaning feed icon and item image URL's. Extracts the image URL from the passed parameter, stripping any additional content. Furthermore makes sure that relative image URL's get transformed to absolute ones. +### Method `cleanMediaUrl()` +Method invoked for cleaning feed icon, item image and media attachment (like .mp3, .webp) URL's. Extracts the media URL from the passed parameter, stripping any additional content. Furthermore, makes sure that relative media URL's get transformed to absolute ones. ### Method `fixEncoding()` Only invoked when class constant `SETTING_FIX_ENCODING` is set to true. It then passes all extracted string values through PHP's `utf8_decode` function. diff --git a/docs/05_Bridge_API/index.md b/docs/05_Bridge_API/index.md index 6115fa01..ea6fd315 100644 --- a/docs/05_Bridge_API/index.md +++ b/docs/05_Bridge_API/index.md @@ -1,9 +1,14 @@ -A _Bridge_ is an class that allows **RSS-Bridge** to create an RSS-feed from a website. A _Bridge_ represents one element on the [Welcome screen](../01_General/04_Screenshots.md) and covers one or more sites to return feeds for. It is developed in a PHP file located in the `bridges/` folder (see [Folder structure](../04_For_Developers/03_Folder_structure.md)) and extends one of the base classes of **RSS-Bridge**: +A _Bridge_ is a class that allows **RSS-Bridge** to create an RSS-feed from a website. +A _Bridge_ represents one element on the [Welcome screen](../01_General/04_Screenshots.md) +and covers one or more sites to return feeds for. +It is developed in a PHP file located in the `bridges/` folder (see [Folder structure](../04_For_Developers/03_Folder_structure.md)) +and extends one of the base classes of **RSS-Bridge**: Base class | Description -----------|------------ [`BridgeAbstract`](./02_BridgeAbstract.md) | This class is intended for standard _Bridges_ that need to filter HTML pages for content. -[`FeedExpander`](./03_FeedExpander.md) | This class is an extension of `HttpCachingBridgeAbstract`, designed to load existing feeds into **RSS-Bridge** -[`XPathAbstract`](./04_XPathAbstract.md) | This class is meant as an alternative base class for bridge implementations. It offers preliminary functionality for generating feeds based on _XPath expressions_. +[`FeedExpander`](./03_FeedExpander.md) | Expand/modify existing feed urls +[`WebDriverAbstract`](./04_WebDriverAbstract) | +[`XPathAbstract`](./05_XPathAbstract) | This class is meant as an alternative base class for bridge implementations. It offers preliminary functionality for generating feeds based on _XPath expressions_. For more information about how to create a new _Bridge_, read [How to create a new Bridge?](./01_How_to_create_a_new_bridge.md) \ No newline at end of file diff --git a/docs/06_Helper_functions/index.md b/docs/06_Helper_functions/index.md index 2f0c513c..4004138e 100644 --- a/docs/06_Helper_functions/index.md +++ b/docs/06_Helper_functions/index.md @@ -5,10 +5,14 @@ The `getInput` function is used to receive a value for a parameter, specified in $this->getInput('your input name here'); ``` -`getInput` will either return the value for your parameter or `null` if the parameter is unknown or not specified. +`getInput` will either return the value for your parameter +or `null` if the parameter is unknown or not specified. + +[Defined in lib/BridgeAbstract.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/BridgeAbstract.php) # getKey -The `getKey` function is used to receive the key name to a selected list value given the name of the list, specified in `const PARAMETERS` +The `getKey` function is used to receive the key name to a selected list +value given the name of the list, specified in `const PARAMETERS` Is able to work with multidimensional list arrays. ```PHP @@ -34,7 +38,10 @@ $this->getKey('country'); // if the selected value was "ve", this function will return "Venezuela" ``` -`getKey` will either return the key name for your parameter or `null` if the parameter is unknown or not specified. +`getKey` will either return the key name for your parameter or `null` if the parameter +is unknown or not specified. + +[Defined in lib/BridgeAbstract.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/BridgeAbstract.php) # getContents The `getContents` function uses [cURL](https://secure.php.net/manual/en/book.curl.php) to acquire data from the specified URI while respecting the various settings defined at a global level by RSS-Bridge (i.e., proxy host, user agent, etc.). This function accepts a few parameters: @@ -52,52 +59,64 @@ $opts = array(CURLOPT_POST => 1); $html = getContents($url, $header, $opts); ``` +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # getSimpleHTMLDOM -The `getSimpleHTMLDOM` function is a wrapper for the [simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design. +The `getSimpleHTMLDOM` function is a wrapper for the +[simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design. ```PHP $html = getSimpleHTMLDOM('your URI'); ``` + +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # getSimpleHTMLDOMCached -The `getSimpleHTMLDOMCached` function does the same as the [`getSimpleHTMLDOM`](#getsimplehtmldom) function, except that the content received for the given URI is stored in a cache and loaded from cache on the next request if the specified cache duration was not reached. Use this function for data that is very unlikely to change between consecutive requests to **RSS-Bridge**. This function allows to specify the cache duration with the second parameter (default is 24 hours / 86400 seconds). +The `getSimpleHTMLDOMCached` function does the same as the +[`getSimpleHTMLDOM`](#getsimplehtmldom) function, +except that the content received for the given URI is stored in a cache +and loaded from cache on the next request if the specified cache duration +was not reached. + +Use this function for data that is very unlikely to change between consecutive requests to **RSS-Bridge**. +This function allows to specify the cache duration with the second parameter. ```PHP $html = getSimpleHTMLDOMCached('your URI', 86400); // Duration 24h ``` -**Notice:** Due to the current implementation a value greater than 86400 seconds (24 hours) will not work as the cache is purged every 24 hours automatically. +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) -# returnError -**Notice:** Whenever possible make use of [`returnClientError`](#returnclienterror) or [`returnServerError`](#returnservererror) - -The `returnError` function aborts execution of the current bridge and returns the given error message with the provided error number: +# throwClientException($message = '') +The `throwClientException` function aborts execution of the current bridge. ```PHP -returnError('Your error message', 404); -``` - -Check the [list of error codes](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes) for applicable error numbers. - -# returnClientError -The `returnClientError` function aborts execution of the current bridge and returns the given error message with error code **400**: - -```PHP -returnClientError('Your error message') +throwClientException('Bad user input') ``` Use this function when the user provided invalid parameter or a required parameter is missing. -# returnServerError -The `returnServerError` function aborts execution of the current bridge and returns the given error message with error code **500**: +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + +# throwServerException($message = '') +The `throwServerException` function aborts execution of the current bridge. ```PHP -returnServerError('Your error message') +throwServerException('Received empty reply from thirdparty api') ``` -Use this function when a problem occurs that has nothing to do with the parameters provided by the user. (like: Host service gone missing, empty data received, etc...) +Use this function when a problem occurs that has nothing to do with the parameters provided by the user. +(like: Host service gone missing, empty data received, etc...) + +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + +# throwRateLimitException($message = '') + +Throws a `RateLimitException` which produces an HTTP 429 response. # defaultLinkTo -Automatically replaces any relative URL in a given string or DOM object (i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL. +Automatically replaces any relative URL in a given string or DOM object +(i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL. ```php defaultLinkTo ( mixed $content, string $server ) : object @@ -121,6 +140,8 @@ $html = defaultLinkTo($html, $this->getURI()); // Using bridge URL // ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # backgroundToImg Replaces tags with styles of `backgroud-image` by `` tags. @@ -130,6 +151,8 @@ backgroundToImg(mixed $htmlContent) : object Returns a DOM object (even if provided a string). +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # extractFromDelimiters Extract the first part of a string matching the specified start and end delimiters. ```php @@ -150,6 +173,8 @@ $extracted = extractFromDelimiters($string, $start, $end); // 'John Doe' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # stripWithDelimiters Remove one or more part(s) of a string using a start and end delimiter. It is the inverse of `extractFromDelimiters`. @@ -172,6 +197,8 @@ $cleaned = stripWithDelimiters($string, $start, $end); // 'foobar' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # stripRecursiveHTMLSection Remove HTML sections containing one or more sections using the same HTML tag. @@ -191,6 +218,8 @@ $cleaned = stripRecursiveHTMLSection($string, $tag_name, $tag_start); // 'foobar' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # markdownToHtml Converts markdown input to HTML using [Parsedown](https://parsedown.org/). @@ -232,3 +261,84 @@ $html = markdownToHtml($input); //
  • Translation improvements
  • // ``` + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# e +The `e` function is used to convert special characters to HTML entities + +```PHP +e('0 < 1 and 2 > 1'); +``` + +`e` will return the content of the string escape that can be rendered as is in HTML + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# truncate +The `truncate` function is used to shorten a string if exceeds a certain length, and add a string indicating that the string has been shortened. + +```PHP +truncate('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed a neque nunc. Nam nibh sem.', 20 , '...'); +``` + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# sanitize +The `sanitize` function is used to remove some tags from a given HTML text. + +```PHP +$html = 'Sample Page +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit...

    + + +'; +$tags_to_remove = ['script', 'iframe', 'input', 'form']; +$attributes_to_keep = ['title', 'href', 'src']; +$text_to_keep = []; +sanitize($html, $tags_to_remove, $attributes_to_keep, $text_to_keep); +``` + +This function returns a simplehtmldom object of the remaining contents. + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# convertLazyLoading +The `convertLazyLoading` function is used to convert onvert lazy-loading images and frames (video embeds) into static elements. It accepts the HTML content as HTML objects or string objects. It returns the HTML content with fixed image/frame URLs (same type as input). + +```PHP +$html = ' + +

    Hello world!

    + + +backgroundToImg($html); +``` + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# Json::encode +The `Json::encode` function is used to encode a value as à JSON string. + +```PHP +$array = [ + "foo" => "bar", + "bar" => "foo", +]; +Json::encode($array, true, true); +``` + +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + +# Json::decode +The `Json::decode` function is used to decode a JSON string into à PHP variable. + +```PHP +$json = '{ + "foo": "bar", + "bar": "foo" +}'; +Json::decode($json); +``` + +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) diff --git a/docs/07_Cache_API/02_CacheInterface.md b/docs/07_Cache_API/02_CacheInterface.md index 61127a0d..3e71237d 100644 --- a/docs/07_Cache_API/02_CacheInterface.md +++ b/docs/07_Cache_API/02_CacheInterface.md @@ -3,16 +3,14 @@ See `CacheInterface`. ```php interface CacheInterface { - public function setScope(string $scope): void; + public function get(string $key, $default = null); - public function setKey(array $key): void; + public function set(string $key, $value, int $ttl = null): void; - public function loadData(); + public function delete(string $key): void; - public function saveData($data): void; + public function clear(): void; - public function getTime(): ?int; - - public function purgeCache(int $seconds): void; + public function prune(): void; } -``` \ No newline at end of file +``` diff --git a/docs/07_Cache_API/index.md b/docs/07_Cache_API/index.md index 57692847..17afbacc 100644 --- a/docs/07_Cache_API/index.md +++ b/docs/07_Cache_API/index.md @@ -1,4 +1,6 @@ -A _Cache_ is a class that allows **RSS-Bridge** to store fetched data in a local storage area on the server. Cache imlementations are placed in the `caches/` folder (see [Folder structure](../04_For_Developers/03_Folder_structure.md)). A cache must implement the [`CacheInterface`](../07_Cache_API/02_CacheInterface.md) interface. - -For more information about how to create a new `Cache`, read [How to create a new cache?](../07_Cache_API/01_How_to_create_a_new_cache.md) +A _Cache_ is a class that allows **RSS-Bridge** to store fetched data in a local storage area on the server. +Cache imlementations are placed in the `caches/` folder (see [Folder structure](../04_For_Developers/03_Folder_structure.md)). +A cache must implement the [`CacheInterface`](../07_Cache_API/02_CacheInterface.md) interface. +For more information about how to create a new `Cache`, read +[How to create a new cache?](../07_Cache_API/01_How_to_create_a_new_cache.md) diff --git a/docs/08_Format_API/01_How_to_create_a_new_format.md b/docs/08_Format_API/01_How_to_create_a_new_format.md deleted file mode 100644 index f031e65b..00000000 --- a/docs/08_Format_API/01_How_to_create_a_new_format.md +++ /dev/null @@ -1,24 +0,0 @@ -Create a new file in the `formats/` folder (see [Folder structure](../04_For_Developers/03_Folder_structure.md)). - -The file must be named according to following specification: - -* It starts with the type -* The file name must end with 'Format' -* The file type must be PHP, written in small letters (seriously!) ".php" - -**Examples:** - -Type | Filename ------|--------- -Atom | AtomFormat.php -Html | HtmlFormat.php - -The file must start with the PHP tags and end with an empty line. The closing tag `?>` is [omitted](http://php.net/basic-syntax.instruction-separation). - -Example: - -```PHP -items = $items; - return $this; - } - - public function getItems(){ - return $this->items; - } - - public function setCharset($charset){ - $this->charset = $charset; - return $this; - } - - public function getCharset(){ - return $this->charset; - } - - public function setExtraInfos(array $infos){ - $this->extraInfos = $infos; - return $this; - } - - public function getExtraInfos(){ - return $this->extraInfos; - } -} -// Imaginary empty line! -``` diff --git a/docs/08_Format_API/index.md b/docs/08_Format_API/index.md deleted file mode 100644 index c5b9e6af..00000000 --- a/docs/08_Format_API/index.md +++ /dev/null @@ -1,9 +0,0 @@ -A Format is a class that allows RSS-Bridge to turn items from a bridge into an RSS-feed format. -It is developed in a PHP file located in the `formats/` folder -[Folder structure](../04_For_Developers/03_Folder_structure.md) -and either implements the -[FormatInterface](../08_Format_API/02_FormatInterface.md) -interface or extends the FormatAbstract class. - -For more information about how to create a new _Format_, read -[How to create a new Format?](./01_How_to_create_a_new_format.md) \ No newline at end of file diff --git a/docs/09_Technical_recommendations/index.md b/docs/09_Technical_recommendations/index.md index f2b15476..92c672ca 100644 --- a/docs/09_Technical_recommendations/index.md +++ b/docs/09_Technical_recommendations/index.md @@ -1,28 +1,32 @@ ## General recommendations -* Use [HTTPS](https://en.wikipedia.org/wiki/HTTPS) (`https://...`) over [HTTP](https://en.wikipedia.org/wiki/HTTPS) (`http://...`) whenever possible - ## Test a site before building a bridge -Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers, but not in the PHP environment. To check if a site works with RSS-Bridge, create a new bridge using the [template](../05_Bridge_API/02_BridgeAbstract.md#template) and load a valid URL (not the base URL!). +Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers, +but not in the PHP environment. RSS-Bridge Docker container by default resorts to using libcurl-impersonate, which helps mitigating anti-bot mechanisms. + +To check if a site works with RSS-Bridge, create a new bridge using the +[template](../05_Bridge_API/02_BridgeAbstract.md#template) +and load a valid URL (not the base URL!). **Example (using github.com)** ```PHP " + +[EconomistBridge] +cookie = "" +``` diff --git a/docs/10_Bridge_Specific/FacebookBridge.md b/docs/10_Bridge_Specific/FacebookBridge.md index c2a1fd0e..665b802f 100644 --- a/docs/10_Bridge_Specific/FacebookBridge.md +++ b/docs/10_Bridge_Specific/FacebookBridge.md @@ -1,18 +1,18 @@ FacebookBridge =============== -Resume of the actual state of this bridge: +State of this bridge: +- Facebook Groups (and probably other sections too) do not work at all - No maintainer -- Need Cookies consent -- New design architecture deployed +- Needs cookie consent support for public pages +- Needs login support see [this example](https://github.com/RSS-Bridge/rss-bridge/issues/1891) for Instagram) for private groups -Due [facebook-redesing](https://engineering.fb.com/2020/05/08/web/facebook-redesign/) +Due to the 2020 [Facebook redesign](https://engineering.fb.com/2020/05/08/web/facebook-redesign/) and the requirement to [accept cookies](https://www.facebook.com/business/help/348535683460989) -users start getting [Problems with Facebook on public RSS-Bridge instances](https://github.com/RSS-Bridge/rss-bridge/issues/2047 ) +users are getting [problems with Facebook on public RSS-Bridge instances](https://github.com/RSS-Bridge/rss-bridge/issues/2047). +Relevant Info +-------------- -[Facebook Cookies](https://www.facebook.com/policy/cookies/) - -"Datr" is a unique identifier for your browser and it has a lifespan of two years. - -"c_user" and "xs" cookies to verify the account and have a lifespan of 365 days - +- [Facebook Cookies](https://www.facebook.com/policy/cookies/) +- "Datr" is a unique identifier for your browser and it has a lifespan of two years. +- "c_user" and "xs" cookies to verify the account and have a lifespan of 365 days diff --git a/docs/10_Bridge_Specific/FurAffinityBridge.md b/docs/10_Bridge_Specific/FurAffinityBridge.md new file mode 100644 index 00000000..369f41cd --- /dev/null +++ b/docs/10_Bridge_Specific/FurAffinityBridge.md @@ -0,0 +1,13 @@ +FurAffinityBridge +=============== +By default this bridge will only return submissions that are rated "General" and are public. + +To unlock the ability to load submissions that require an account to view or are rated "Mature" and higher, you must set the following in `config.ini.php` with cookies from an existing FurAffinity account with the desired maturity ratings enabled in [Account Settings](https://www.furaffinity.net/controls/settings/). + +``` +[FurAffinityBridge] +aCookie = "your-a-cookie-value-here" ; from cookie "a" +bCookie = "your-b-cookie-value-here" ; from cookie "b" +``` + +To confirm the bridge is authenticated, the name of the authenticating account will be shown in the bridge's name once the bridge has been used at least once. (Example: `user's FurAffinity Bridge`) diff --git a/docs/10_Bridge_Specific/Instagram.md b/docs/10_Bridge_Specific/Instagram.md index 7b9bf6b2..85e7b8a8 100644 --- a/docs/10_Bridge_Specific/Instagram.md +++ b/docs/10_Bridge_Specific/Instagram.md @@ -4,6 +4,8 @@ InstagramBridge To somehow bypass the [rate limiting issue](https://github.com/RSS-Bridge/rss-bridge/issues/1891) it is suggested to deploy a private RSS-Bridge instance that uses a working Instagram account. +**NOTE**: There exists alternative bridges (e.g. PicukiBridge and PicnobBridge) for viewing posts without a working account. + Configuration ------------- diff --git a/docs/10_Bridge_Specific/PixivBridge.md b/docs/10_Bridge_Specific/PixivBridge.md new file mode 100644 index 00000000..ba8da2d8 --- /dev/null +++ b/docs/10_Bridge_Specific/PixivBridge.md @@ -0,0 +1,30 @@ +PixivBridge +=============== + +# Image proxy + +As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set, +caching or image proxy is required to use this bridge. + +To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php` +to the url of the proxy. + +The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy): + +Before: `https://i.pximg.net/img-original/img/0000/00/00/00/00/00/12345678_p0.png` + +After: `https://proxy.example.com/img-original/img/0000/00/00/00/00/00/12345678_p0.png` + +``` +proxy_url = "https://proxy.example.com" +``` + +# Authentication + +Authentication is required to view and search R-18+ and non-public images. +To enable this, set the following in this bridge's configuration in `config.ini.php`. + +```ini +; from cookie "PHPSESSID". Recommend to get in incognito browser. +cookie = "00000000_hashedsessionidhere" +``` \ No newline at end of file diff --git a/docs/10_Bridge_Specific/Substack.md b/docs/10_Bridge_Specific/Substack.md new file mode 100644 index 00000000..7595bbef --- /dev/null +++ b/docs/10_Bridge_Specific/Substack.md @@ -0,0 +1,18 @@ +# SubstackBridge + +[Substack](https://substack.com) provides RSS feeds at `/feed` path, e.g., https://newsletter.pragmaticengineer.com/feed/. However, these feeds have two problems, addressed by this bridge: +- They use RSS 2.0 with the draft [content extension](https://web.resource.org/rss/1.0/modules/content/), which isn't supported by some readers; +- They don't have the full content for paywalled posts. + +Retrieving the full content is only possible _with an active subscription to the blog_. If you have one, Substack will return the full feed if it's fetched with the right set of cookies. Figuring out whether it's the intended behaviour is left as an exercise for the reader. + +To obtain the session cookie, authorize at https://substack.com/, open DevTools, go to Application -> Cookies -> https://substack.com, copy the value of `substack.sid` and paste it to the RSS bridge config: + +``` +[SubstackBridge] +sid = "" +``` + +Authorization sometimes requires CAPTCHA, hence this operation is manual. The cookie lives for three months. + +After you've done this, the bridge should return full feeds for your subscriptions. diff --git a/docs/10_Bridge_Specific/Telegram.md b/docs/10_Bridge_Specific/Telegram.md new file mode 100644 index 00000000..528de788 --- /dev/null +++ b/docs/10_Bridge_Specific/Telegram.md @@ -0,0 +1,12 @@ +# TelegramBridge + +By default, it fetches a single page with up to 20 messages. + +To increase this limit, tweak the `max_pages` config: + +```ini +[TelegramBridge] + +; Fetch a maximum of 3 pages (requires 3 http requests) +max_pages = 3 +``` diff --git a/docs/10_Bridge_Specific/Vk2.md b/docs/10_Bridge_Specific/Vk2.md new file mode 100644 index 00000000..7c48ad0a --- /dev/null +++ b/docs/10_Bridge_Specific/Vk2.md @@ -0,0 +1,41 @@ +Vk2Bridge +========= + +Работа этого скрипта основана [VK API](https://dev.vk.com/reference). +По сравнению с VkBridge у этого скрипта есть свои приемущества и недостатки. + +Приемущества +------------ + +- Стабильность. + Скрипт не зависит от HTML-структуры страницы VK групп или пользователей, которые могут поменяться в любой момент. + +Недостатки +---------- + +- Требуется наличие зарегистированного в ВК пользователя. + Данный пользователь должен получить `access_token`, который используется для этого скрипта. + Подробнее в разделе "Настройка" + +- Количество запросов при выключенном кэше ограничено - [5000 запросов в сутки](https://dev.vk.com/ru/reference/roadmap#%D0%9E%D0%B3%D1%80%D0%B0%D0%BD%D0%B8%D1%87%D0%B5%D0%BD%D0%B8%D1%8F%20API%20%D0%B4%D0%BB%D1%8F%20%D0%BF%D0%BE%D0%B8%D1%81%D0%BA%D0%B0) + +Настройка +--------- + +1. Перейдите по [ссылке](https://oauth.vk.com/oauth/authorize?client_id=5149410&scope=offline&redirect_uri=https://oauth.vk.com/blank.html&display=page&response_type=token) + +2. Авторизуйтесь в приложение `my_personal_app` + +3. Получите ссылку вида `https://oauth.vk.com/blank.html#access_token=MNOGO_BUKAV&expires_in=0&user_id=123456`. + Из этой ссылки скопируйте `MNOGO_BUKAV`. + +4. В `config.ini.php` в раздел Vk2Bridge вставьте `access_token` + +``` +[Vk2Bridge] +access_token = "MNOGO_BUKAV" +``` + +Примечание: в данной инструкции используется приложение, администратор которого является [@em92](https://github.com/em92). +Допускается вместо упомянутого приложения использование своего standalone-приложения. +Для этого надо в ссылке из п.1. заменить значение `client_id` на свой. diff --git a/docs/images/screenshot_rss-bridge_welcome.png b/docs/images/screenshot_rss-bridge_welcome.png index 3ceb432e..9a0dcfc5 100644 Binary files a/docs/images/screenshot_rss-bridge_welcome.png and b/docs/images/screenshot_rss-bridge_welcome.png differ diff --git a/docs/index.md b/docs/index.md index 71fa9f37..c370cb1b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,8 @@ -**RSS-Bridge** is free and open source software for generating Atom or RSS feeds from websites which don't have one. It is written in PHP and intended to run on a Web server. See the [Screenshots](01_General/04_Screenshots.md) for a quick introduction to **RSS-Bridge** +RSS-Bridge is a web application. + +It generates web feeds for websites that don't have one. + +Officially hosted instance: https://rss-bridge.org/bridge01/ - You want to know more about **RSS-Bridge**? Check out our **[project goals](01_General/01_Project-goals.md)**. diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 1b9053fa..8df51189 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -14,52 +14,67 @@ class AtomFormat extends FormatAbstract protected const ATOM_NS = 'http://www.w3.org/2005/Atom'; protected const MRSS_NS = 'http://search.yahoo.com/mrss/'; - const LIMIT_TITLE = 140; - - public function stringify() + public function render(): string { + $document = new \DomDocument('1.0', 'UTF-8'); + $document->formatOutput = true; + $feedUrl = get_current_url(); - $extraInfos = $this->getExtraInfos(); - if (empty($extraInfos['uri'])) { - $uri = REPOSITORY; - } else { - $uri = $extraInfos['uri']; - } - - $document = new \DomDocument('1.0', $this->getCharset()); - $document->formatOutput = true; $feed = $document->createElementNS(self::ATOM_NS, 'feed'); $document->appendChild($feed); $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS); - $title = $document->createElement('title'); - $feed->appendChild($title); - $title->setAttribute('type', 'text'); - $title->appendChild($document->createTextNode($extraInfos['name'])); + $feedArray = $this->getFeed(); + foreach ($feedArray as $feedKey => $feedValue) { + if (in_array($feedKey, ['donationUri'])) { + continue; + } + if ($feedKey === 'name') { + $title = $document->createElement('title'); + $feed->appendChild($title); + $title->setAttribute('type', 'text'); + $title->appendChild($document->createTextNode($feedValue)); + } elseif ($feedKey === 'icon') { + if ($feedValue) { + $icon = $document->createElement('icon'); + $feed->appendChild($icon); + $icon->appendChild($document->createTextNode($feedValue)); + + $logo = $document->createElement('logo'); + $feed->appendChild($logo); + $logo->appendChild($document->createTextNode($feedValue)); + } + } elseif ($feedKey === 'uri') { + if ($feedValue) { + $linkAlternate = $document->createElement('link'); + $feed->appendChild($linkAlternate); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $feedValue); + + $linkSelf = $document->createElement('link'); + $feed->appendChild($linkSelf); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $linkSelf->setAttribute('href', $feedUrl); + } + } elseif ($feedKey === 'itunes') { + // todo: skip? + } else { + $element = $document->createElement($feedKey); + $feed->appendChild($element); + $element->appendChild($document->createTextNode($feedValue)); + } + } $id = $document->createElement('id'); $feed->appendChild($id); $id->appendChild($document->createTextNode($feedUrl)); - $uriparts = parse_url($uri); - if (empty($extraInfos['icon'])) { - $iconUrl = $uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico'; - } else { - $iconUrl = $extraInfos['icon']; - } - $icon = $document->createElement('icon'); - $feed->appendChild($icon); - $icon->appendChild($document->createTextNode($iconUrl)); - - $logo = $document->createElement('logo'); - $feed->appendChild($logo); - $logo->appendChild($document->createTextNode($iconUrl)); - - $feedTimestamp = gmdate(DATE_ATOM, $this->lastModified); $updated = $document->createElement('updated'); $feed->appendChild($updated); - $updated->appendChild($document->createTextNode($feedTimestamp)); + $updated->appendChild($document->createTextNode(gmdate(DATE_ATOM, $this->lastModified))); // since we can't guarantee that all items have an author, // a global feed author is mandatory @@ -70,19 +85,8 @@ class AtomFormat extends FormatAbstract $author->appendChild($authorName); $authorName->appendChild($document->createTextNode($feedAuthor)); - $linkAlternate = $document->createElement('link'); - $feed->appendChild($linkAlternate); - $linkAlternate->setAttribute('rel', 'alternate'); - $linkAlternate->setAttribute('type', 'text/html'); - $linkAlternate->setAttribute('href', $uri); - - $linkSelf = $document->createElement('link'); - $feed->appendChild($linkSelf); - $linkSelf->setAttribute('rel', 'self'); - $linkSelf->setAttribute('type', 'application/atom+xml'); - $linkSelf->setAttribute('href', $feedUrl); - foreach ($this->getItems() as $item) { + $itemArray = $item->toArray(); $entryTimestamp = $item->getTimestamp(); $entryTitle = $item->getTitle(); $entryContent = $item->getContent(); @@ -103,14 +107,10 @@ class AtomFormat extends FormatAbstract $entryID = 'urn:sha1:' . hash('sha1', $entryTitle . $entryContent); } - if (empty($entryTimestamp)) { - $entryTimestamp = $this->lastModified; - } - if (empty($entryTitle)) { $entryTitle = str_replace("\n", ' ', strip_tags($entryContent)); - if (strlen($entryTitle) > self::LIMIT_TITLE) { - $wrapPos = strpos(wordwrap($entryTitle, self::LIMIT_TITLE), "\n"); + if (strlen($entryTitle) > 140) { + $wrapPos = strpos(wordwrap($entryTitle, 140), "\n"); $entryTitle = substr($entryTitle, 0, $wrapPos) . '...'; } } @@ -127,20 +127,37 @@ class AtomFormat extends FormatAbstract $title->setAttribute('type', 'html'); $title->appendChild($document->createTextNode($entryTitle)); - $entryTimestamp = gmdate(\DATE_ATOM, $entryTimestamp); - $published = $document->createElement('published'); - $entry->appendChild($published); - $published->appendChild($document->createTextNode($entryTimestamp)); + if ($entryTimestamp) { + $timestamp = gmdate(\DATE_ATOM, $entryTimestamp); - $updated = $document->createElement('updated'); - $entry->appendChild($updated); - $updated->appendChild($document->createTextNode($entryTimestamp)); + $published = $document->createElement('published'); + $entry->appendChild($published); + $published->appendChild($document->createTextNode($timestamp)); + + $updated = $document->createElement('updated'); + $entry->appendChild($updated); + $updated->appendChild($document->createTextNode($timestamp)); + } $id = $document->createElement('id'); $entry->appendChild($id); $id->appendChild($document->createTextNode($entryID)); - if (!empty($entryUri)) { + if (isset($itemArray['itunes'])) { + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS); + foreach ($itemArray['itunes'] as $itunesKey => $itunesValue) { + $itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey); + $entry->appendChild($itunesProperty); + $itunesProperty->appendChild($document->createTextNode($itunesValue)); + } + if (isset($itemArray['enclosure'])) { + $itunesEnclosure = $document->createElement('enclosure'); + $entry->appendChild($itunesEnclosure); + $itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']); + $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); + $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); + } + } elseif (!empty($entryUri)) { $entryLinkAlternate = $document->createElement('link'); $entry->appendChild($entryLinkAlternate); $entryLinkAlternate->setAttribute('rel', 'alternate'); @@ -158,7 +175,7 @@ class AtomFormat extends FormatAbstract $content = $document->createElement('content'); $content->setAttribute('type', 'html'); - $content->appendChild($document->createTextNode(break_annoying_html_tags($entryContent))); + $content->appendChild($document->createTextNode($entryContent)); $entry->appendChild($content); foreach ($item->getEnclosures() as $enclosure) { @@ -182,11 +199,7 @@ class AtomFormat extends FormatAbstract } } - $toReturn = $document->saveXML(); - - // Remove invalid characters - ini_set('mbstring.substitute_character', 'none'); - $toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8'); - return $toReturn; + $xml = $document->saveXML(); + return $xml; } } diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 9ee4aeea..04721ae1 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -4,40 +4,38 @@ class HtmlFormat extends FormatAbstract { const MIME_TYPE = 'text/html'; - public function stringify() + public function render(): string { - $extraInfos = $this->getExtraInfos(); + // This query string is url encoded + $queryString = $_SERVER['QUERY_STRING']; + + // TODO: this should be the proper bridge short name and not user provided string + $bridgeName = $_GET['bridge']; + + $feedArray = $this->getFeed(); $formatFactory = new FormatFactory(); - $buttons = []; - $linkTags = []; - foreach ($formatFactory->getFormatNames() as $format) { - // Dynamically build buttons for all formats (except HTML) - if ($format === 'Html') { + $formats = []; + + // Create all formats (except HTML) + $formatNames = $formatFactory->getFormatNames(); + foreach ($formatNames as $formatName) { + if ($formatName === 'Html') { continue; } - $formatUrl = '?' . str_ireplace('format=Html', 'format=' . $format, htmlentities($_SERVER['QUERY_STRING'])); - $buttons[] = [ - 'href' => $formatUrl, - 'value' => $format, - ]; - $linkTags[] = [ - 'href' => $formatUrl, - 'title' => $format, - 'type' => $formatFactory->create($format)->getMimeType(), - ]; - } - - if (Configuration::getConfig('admin', 'donations') && $extraInfos['donationUri'] !== '') { - $buttons[] = [ - 'href' => e($extraInfos['donationUri']), - 'value' => 'Donate to maintainer', + // The format url is relative, but should be absolute in order to help feed readers. + $formatUrl = '?' . str_ireplace('format=Html', 'format=' . $formatName, $queryString); + $formatObject = $formatFactory->create($formatName); + $formats[] = [ + 'url' => $formatUrl, + 'name' => $formatName, + 'type' => $formatObject->getMimeType(), ]; } $items = []; foreach ($this->getItems() as $item) { $items[] = [ - 'url' => $item->getURI() ?: $extraInfos['uri'], + 'url' => $item->getURI() ?: $feedArray['uri'], 'title' => $item->getTitle() ?? '(no title)', 'timestamp' => $item->getTimestamp(), 'author' => $item->getAuthor(), @@ -47,16 +45,19 @@ class HtmlFormat extends FormatAbstract ]; } + $donationUri = null; + if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) { + $donationUri = $feedArray['donationUri']; + } + $html = render_template(__DIR__ . '/../templates/html-format.html.php', [ - 'charset' => $this->getCharset(), - 'title' => $extraInfos['name'], - 'linkTags' => $linkTags, - 'uri' => $extraInfos['uri'], - 'buttons' => $buttons, - 'items' => $items, + 'bridge_name' => $bridgeName, + 'title' => $feedArray['name'], + 'formats' => $formats, + 'uri' => $feedArray['uri'], + 'items' => $items, + 'donation_uri' => $donationUri, ]); - // Remove invalid characters - ini_set('mbstring.substitute_character', 'none'); - return mb_convert_encoding($html, $this->getCharset(), 'UTF-8'); + return $html; } } diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php index 5bb1a525..3548ef6e 100644 --- a/formats/JsonFormat.php +++ b/formats/JsonFormat.php @@ -23,20 +23,20 @@ class JsonFormat extends FormatAbstract 'uid', ]; - public function stringify() + public function render(): string { - $host = $_SERVER['HTTP_HOST'] ?? ''; - $extraInfos = $this->getExtraInfos(); + $feedArray = $this->getFeed(); + $data = [ - 'version' => 'https://jsonfeed.org/version/1', - 'title' => empty($extraInfos['name']) ? $host : $extraInfos['name'], - 'home_page_url' => empty($extraInfos['uri']) ? REPOSITORY : $extraInfos['uri'], - 'feed_url' => get_current_url(), + 'version' => 'https://jsonfeed.org/version/1', + 'title' => $feedArray['name'], + 'home_page_url' => $feedArray['uri'], + 'feed_url' => get_current_url(), ]; - if (!empty($extraInfos['icon'])) { - $data['icon'] = $extraInfos['icon']; - $data['favicon'] = $extraInfos['icon']; + if ($feedArray['icon']) { + $data['icon'] = $feedArray['icon']; + $data['favicon'] = $feedArray['icon']; } $items = []; @@ -47,7 +47,7 @@ class JsonFormat extends FormatAbstract $entryTitle = $item->getTitle(); $entryUri = $item->getURI(); $entryTimestamp = $item->getTimestamp(); - $entryContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : ''; + $entryContent = $item->getContent() ?? ''; $entryEnclosures = $item->getEnclosures(); $entryCategories = $item->getCategories(); @@ -110,13 +110,8 @@ class JsonFormat extends FormatAbstract } $data['items'] = $items; - /** - * The intention here is to discard non-utf8 byte sequences. - * But the JSON_PARTIAL_OUTPUT_ON_ERROR also discards lots of other errors. - * So consider this a hack. - * Switch to JSON_INVALID_UTF8_IGNORE when PHP 7.2 is the latest platform requirement. - */ - $json = json_encode($data, \JSON_PRETTY_PRINT | \JSON_PARTIAL_OUTPUT_ON_ERROR); + // Ignoring invalid json + $json = json_encode($data, \JSON_PRETTY_PRINT | \JSON_INVALID_UTF8_IGNORE); return $json; } diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index 3c3dce5a..4066ec73 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -32,24 +32,11 @@ class MrssFormat extends FormatAbstract protected const ATOM_NS = 'http://www.w3.org/2005/Atom'; protected const MRSS_NS = 'http://search.yahoo.com/mrss/'; - const ALLOWED_IMAGE_EXT = [ - '.gif', - '.jpg', - '.png', - ]; - - public function stringify() + public function render(): string { - $feedUrl = get_current_url(); - $extraInfos = $this->getExtraInfos(); - if (empty($extraInfos['uri'])) { - $uri = REPOSITORY; - } else { - $uri = $extraInfos['uri']; - } - - $document = new \DomDocument('1.0', $this->getCharset()); + $document = new \DomDocument('1.0', 'UTF-8'); $document->formatOutput = true; + $feed = $document->createElement('rss'); $document->appendChild($feed); $feed->setAttribute('version', '2.0'); @@ -59,63 +46,86 @@ class MrssFormat extends FormatAbstract $channel = $document->createElement('channel'); $feed->appendChild($channel); - $title = $extraInfos['name']; - $channelTitle = $document->createElement('title'); - $channel->appendChild($channelTitle); - $channelTitle->appendChild($document->createTextNode($title)); + $feedArray = $this->getFeed(); + $uri = $feedArray['uri']; + $title = $feedArray['name']; - $link = $document->createElement('link'); - $channel->appendChild($link); - $link->appendChild($document->createTextNode($uri)); + foreach ($feedArray as $feedKey => $feedValue) { + if (in_array($feedKey, ['atom', 'donationUri'])) { + continue; + } + if ($feedKey === 'name') { + $channelTitle = $document->createElement('title'); + $channel->appendChild($channelTitle); + $channelTitle->appendChild($document->createTextNode($title)); - $description = $document->createElement('description'); - $channel->appendChild($description); - $description->appendChild($document->createTextNode($extraInfos['name'])); + $description = $document->createElement('description'); + $channel->appendChild($description); + $description->appendChild($document->createTextNode($title)); + } elseif ($feedKey === 'uri') { + $link = $document->createElement('link'); + $channel->appendChild($link); + $link->appendChild($document->createTextNode($uri)); - $icon = $extraInfos['icon']; - if (!empty($icon) && in_array(substr($icon, -4), self::ALLOWED_IMAGE_EXT)) { - $feedImage = $document->createElement('image'); - $channel->appendChild($feedImage); - $iconUrl = $document->createElement('url'); - $iconUrl->appendChild($document->createTextNode($icon)); - $feedImage->appendChild($iconUrl); - $iconTitle = $document->createElement('title'); - $iconTitle->appendChild($document->createTextNode($title)); - $feedImage->appendChild($iconTitle); - $iconLink = $document->createElement('link'); - $iconLink->appendChild($document->createTextNode($uri)); - $feedImage->appendChild($iconLink); + $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link'); + $channel->appendChild($linkAlternate); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $uri); + + $linkSelf = $document->createElementNS(self::ATOM_NS, 'link'); + $channel->appendChild($linkSelf); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $feedUrl = get_current_url(); + $linkSelf->setAttribute('href', $feedUrl); + } elseif ($feedKey === 'icon') { + $icon = $feedValue; + if ($icon) { + $feedImage = $document->createElement('image'); + $channel->appendChild($feedImage); + $iconUrl = $document->createElement('url'); + $iconUrl->appendChild($document->createTextNode($icon)); + $feedImage->appendChild($iconUrl); + $iconTitle = $document->createElement('title'); + $iconTitle->appendChild($document->createTextNode($title)); + $feedImage->appendChild($iconTitle); + $iconLink = $document->createElement('link'); + $iconLink->appendChild($document->createTextNode($uri)); + $feedImage->appendChild($iconLink); + } + } elseif ($feedKey === 'itunes') { + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS); + foreach ($feedValue as $itunesKey => $itunesValue) { + $itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey); + $channel->appendChild($itunesProperty); + $itunesProperty->appendChild($document->createTextNode($itunesValue)); + } + } else { + $element = $document->createElement($feedKey); + $channel->appendChild($element); + $element->appendChild($document->createTextNode($feedValue)); + } } - $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link'); - $channel->appendChild($linkAlternate); - $linkAlternate->setAttribute('rel', 'alternate'); - $linkAlternate->setAttribute('type', 'text/html'); - $linkAlternate->setAttribute('href', $uri); - - $linkSelf = $document->createElementNS(self::ATOM_NS, 'link'); - $channel->appendChild($linkSelf); - $linkSelf->setAttribute('rel', 'self'); - $linkSelf->setAttribute('type', 'application/atom+xml'); - $linkSelf->setAttribute('href', $feedUrl); - foreach ($this->getItems() as $item) { + $itemArray = $item->toArray(); $itemTimestamp = $item->getTimestamp(); $itemTitle = $item->getTitle(); $itemUri = $item->getURI(); - $itemContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : ''; - $entryID = $item->getUid(); + $itemContent = $item->getContent() ?? ''; + $itemUid = $item->getUid(); $isPermaLink = 'false'; - if (empty($entryID) && !empty($itemUri)) { + if (empty($itemUid) && !empty($itemUri)) { // Fallback to provided URI - $entryID = $itemUri; + $itemUid = $itemUri; $isPermaLink = 'true'; } - if (empty($entryID)) { + if (empty($itemUid)) { // Fallback to title and content - $entryID = hash('sha1', $itemTitle . $itemContent); + $itemUid = hash('sha1', $itemTitle . $itemContent); } $entry = $document->createElement('item'); @@ -127,6 +137,23 @@ class MrssFormat extends FormatAbstract $entryTitle->appendChild($document->createTextNode($itemTitle)); } + if (isset($itemArray['itunes'])) { + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS); + foreach ($itemArray['itunes'] as $itunesKey => $itunesValue) { + $itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey); + $entry->appendChild($itunesProperty); + $itunesProperty->appendChild($document->createTextNode($itunesValue)); + } + + if (isset($itemArray['enclosure'])) { + $itunesEnclosure = $document->createElement('enclosure'); + $entry->appendChild($itunesEnclosure); + $itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']); + $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); + $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); + } + } + if (!empty($itemUri)) { $entryLink = $document->createElement('link'); $entry->appendChild($entryLink); @@ -136,7 +163,7 @@ class MrssFormat extends FormatAbstract $entryGuid = $document->createElement('guid'); $entryGuid->setAttribute('isPermaLink', $isPermaLink); $entry->appendChild($entryGuid); - $entryGuid->appendChild($document->createTextNode($entryID)); + $entryGuid->appendChild($document->createTextNode($itemUid)); if (!empty($itemTimestamp)) { $entryPublished = $document->createElement('pubDate'); @@ -164,11 +191,7 @@ class MrssFormat extends FormatAbstract } } - $toReturn = $document->saveXML(); - - // Remove invalid non-UTF8 characters - ini_set('mbstring.substitute_character', 'none'); - $toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8'); - return $toReturn; + $xml = $document->saveXML(); + return $xml; } } diff --git a/formats/PlaintextFormat.php b/formats/PlaintextFormat.php index c8c4e9d6..e93c94b5 100644 --- a/formats/PlaintextFormat.php +++ b/formats/PlaintextFormat.php @@ -4,20 +4,13 @@ class PlaintextFormat extends FormatAbstract { const MIME_TYPE = 'text/plain'; - public function stringify() + public function render(): string { - $items = $this->getItems(); - $data = []; - - foreach ($items as $item) { - $data[] = $item->toArray(); + $feed = $this->getFeed(); + foreach ($this->getItems() as $item) { + $feed['items'][] = $item->toArray(); } - - $toReturn = print_r($data, true); - - // Remove invalid non-UTF8 characters - ini_set('mbstring.substitute_character', 'none'); - $toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8'); - return $toReturn; + $text = print_r($feed, true); + return $text; } } diff --git a/formats/SfeedFormat.php b/formats/SfeedFormat.php index 85d5f608..063e4543 100644 --- a/formats/SfeedFormat.php +++ b/formats/SfeedFormat.php @@ -4,6 +4,31 @@ class SfeedFormat extends FormatAbstract { const MIME_TYPE = 'text/plain'; + public function render(): string + { + $text = ''; + foreach ($this->getItems() as $item) { + $text .= sprintf( + "%s\t%s\t%s\t%s\thtml\t\t%s\t%s\t%s\n", + $item->toArray()['timestamp'], + preg_replace('/\s/', ' ', $item->toArray()['title']), + $item->toArray()['uri'], + $this->escape($item->toArray()['content']), + $item->toArray()['author'], + $this->getFirstEnclosure( + $item->toArray()['enclosures'] + ), + $this->escape( + $this->getCategories( + $item->toArray()['categories'] + ) + ) + ); + } + + return $text; + } + private function escape(string $str) { $str = str_replace('\\', '\\\\', $str); @@ -31,39 +56,4 @@ class SfeedFormat extends FormatAbstract } return $toReturn; } - - public function stringify() - { - $items = $this->getItems(); - - $toReturn = ''; - foreach ($items as $item) { - $toReturn .= sprintf( - "%s\t%s\t%s\t%s\thtml\t\t%s\t%s\t%s\n", - $item->toArray()['timestamp'], - preg_replace('/\s/', ' ', $item->toArray()['title']), - $item->toArray()['uri'], - $this->escape($item->toArray()['content']), - $item->toArray()['author'], - $this->getFirstEnclosure( - $item->toArray()['enclosures'] - ), - $this->escape( - $this->getCategories( - $item->toArray()['categories'] - ) - ) - ); - } - - // Remove invalid non-UTF8 characters - ini_set('mbstring.substitute_character', 'none'); - $toReturn = mb_convert_encoding( - $toReturn, - $this->getCharset(), - 'UTF-8' - ); - return $toReturn; - } } -// vi: expandtab diff --git a/index.php b/index.php index 538f1c6e..79403e11 100644 --- a/index.php +++ b/index.php @@ -1,7 +1,75 @@ main($argv ?? []); +require __DIR__ . '/lib/bootstrap.php'; +require __DIR__ . '/lib/config.php'; + +$container = require __DIR__ . '/lib/dependencies.php'; + +$logger = $container['logger']; + +set_exception_handler(function (\Throwable $e) use ($logger) { + $response = new Response(render(__DIR__ . '/templates/exception.html.php', ['e' => $e]), 500); + $response->send(); + $logger->error('Uncaught Exception', ['e' => $e]); +}); + +set_error_handler(function ($code, $message, $file, $line) use ($logger) { + // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); + if ((error_reporting() & $code) === 0) { + // Deprecation messages and other masked errors are typically ignored here + return false; + } + if (Configuration::getConfig('system', 'env') === 'dev') { + // This might be annoying, but it's for the greater good + throw new \ErrorException($message, 0, $code, $file, $line); + } + $text = sprintf( + '%s at %s line %s', + sanitize_root($message), + sanitize_root($file), + $line + ); + $logger->warning($text); + // todo: return false to prevent default error handler from running? +}); + +// There might be some fatal errors which are not caught by set_error_handler() or \Throwable. +register_shutdown_function(function () use ($logger) { + $error = error_get_last(); + if ($error) { + $message = sprintf( + '(shutdown) %s: %s in %s line %s', + $error['type'], + sanitize_root($error['message']), + sanitize_root($error['file']), + $error['line'] + ); + $logger->error($message); + } +}); + +date_default_timezone_set(Configuration::getConfig('system', 'timezone')); + +$argv = $argv ?? null; +if ($argv) { + parse_str(implode('&', array_slice($argv, 1)), $cliArgs); + $request = Request::fromCli($cliArgs); +} else { + $request = Request::fromGlobals(); +} + +$rssBridge = new RssBridge($container); + +$response = $rssBridge->main($request); + +$response->send(); \ No newline at end of file diff --git a/lib/ActionInterface.php b/lib/ActionInterface.php index 4eb9cc65..d2e1c709 100644 --- a/lib/ActionInterface.php +++ b/lib/ActionInterface.php @@ -1,28 +1,6 @@ exit('Access token is not set in this instance', 403); - } - - if (isset($request['access_token'])) { - $accessTokenGiven = $request['access_token']; - } else { - $header = trim($_SERVER['HTTP_AUTHORIZATION'] ?? ''); - $position = strrpos($header, 'Bearer '); - - if ($position !== false) { - $accessTokenGiven = substr($header, $position + 7); - } else { - $accessTokenGiven = ''; - } - } - - if (!$accessTokenGiven) { - $this->exit('No access token given', 403); - } - - if ($accessTokenGiven != $accessTokenInConfig) { - $this->exit('Incorrect access token', 403); - } - } - - private function exit($message, $code) - { - http_response_code($code); - header('content-type: text/plain'); - die($message); - } -} diff --git a/lib/AuthenticationMiddleware.php b/lib/AuthenticationMiddleware.php deleted file mode 100644 index c77e1b91..00000000 --- a/lib/AuthenticationMiddleware.php +++ /dev/null @@ -1,49 +0,0 @@ -renderAuthenticationDialog(); - exit; - } - if ( - Configuration::getConfig('authentication', 'username') === $user - && Configuration::getConfig('authentication', 'password') === $password - ) { - return; - } - print $this->renderAuthenticationDialog(); - exit; - } - - private function renderAuthenticationDialog(): string - { - http_response_code(401); - header('WWW-Authenticate: Basic realm="RSS-Bridge"'); - return render('access-denied.html.php'); - } -} diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index f024393d..f08d201f 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -1,78 +1,24 @@ 'Maximum number of items to return', ]; - /** - * Holds the list of items collected by the bridge - * - * Items must be collected by {@see BridgeInterface::collectData()} - * - * Use {@see BridgeAbstract::getItems()} to access items. - * - * @var array - */ protected array $items = []; - - /** - * Holds the list of input parameters used by the bridge - * - * Do not access this parameter directly! - * Use {@see BridgeAbstract::setInputs()} and {@see BridgeAbstract::getInput()} instead! - * - * @var array - */ protected array $inputs = []; + protected ?string $queriedContext = ''; + private array $configuration = []; + + protected CacheInterface $cache; + protected Logger $logger; + + public function __construct( + CacheInterface $cache, + Logger $logger + ) { + $this->cache = $cache; + $this->logger = $logger; + } + + abstract public function collectData(); + + public function getFeed(): array + { + return [ + 'name' => $this->getName(), + 'uri' => $this->getURI(), + 'donationUri' => $this->getDonationURI(), + 'icon' => $this->getIcon(), + ]; + } + + public function getName() + { + return static::NAME ?? $this->getShortName(); + } + + public function getURI() + { + return static::URI ?? 'https://github.com/RSS-Bridge/rss-bridge/'; + } + + public function getDonationURI(): string + { + return static::DONATION_URI; + } + + public function getIcon() + { + if (static::URI) { + // This favicon may or may not exist + return rtrim(static::URI, '/') . '/favicon.ico'; + } + return ''; + } + + public function getOption(string $name) + { + return $this->configuration[$name] ?? null; + } /** - * Holds the name of the queried context - * - * @var string + * The description is only used in bridge card rendering on frontpage */ - protected $queriedContext = ''; + public function getDescription() + { + return static::DESCRIPTION; + } + + public function getMaintainer(): string + { + return static::MAINTAINER; + } + + /** + * A more correct method name would have been "getContexts" + */ + public function getParameters(): array + { + return static::PARAMETERS; + } - /** {@inheritdoc} */ public function getItems() { return $this->items; } - /** - * Sets the input values for a given context. - * - * @param array $inputs Associative array of inputs - * @param string $queriedContext The context name - * @return void - */ - protected function setInputs(array $inputs, $queriedContext) + public function getCacheTimeout() + { + return static::CACHE_TIMEOUT; + } + + public function loadConfiguration() + { + foreach (static::CONFIGURATION as $optionName => $optionValue) { + $section = $this->getShortName(); + $configurationOption = Configuration::getConfig($section, $optionName); + + if ($configurationOption !== null) { + $this->configuration[$optionName] = $configurationOption; + continue; + } + + if (isset($optionValue['required']) && $optionValue['required'] === true) { + throw new \Exception(sprintf('Missing configuration option: %s', $optionName)); + } elseif (isset($optionValue['defaultValue'])) { + $this->configuration[$optionName] = $optionValue['defaultValue']; + } + } + } + + public function setInput(array $input) + { + // This is the submitted context + $contextName = $input['context'] ?? null; + if ($contextName) { + // Context hinting (optional) + $this->queriedContext = $contextName; + unset($input['context']); + } + + $contexts = $this->getParameters(); + + if (!$contexts) { + if ($input) { + throw new \Exception('Invalid parameters value(s)'); + } + return; + } + + $validator = new ParameterValidator(); + + // $input IS PASSED BY REFERENCE! + $errors = $validator->validateInput($input, $contexts); + if ($errors !== []) { + $invalidParameterKeys = array_column($errors, 'name'); + throw new \Exception(sprintf('Invalid parameters value(s): %s', implode(', ', $invalidParameterKeys))); + } + + // Guess the context from input data + if (empty($this->queriedContext)) { + $queriedContext = $validator->getQueriedContext($input, $contexts); + $this->queriedContext = $queriedContext; + } + + if (is_null($this->queriedContext)) { + throw new \Exception('Required parameter(s) missing'); + } elseif ($this->queriedContext === false) { + throw new \Exception('Mixed context parameters'); + } + + $this->setInputWithContext($input, $this->queriedContext); + } + + private function setInputWithContext(array $input, $queriedContext) { // Import and assign all inputs to their context - foreach ($inputs as $name => $value) { - foreach (static::PARAMETERS as $context => $set) { - if (array_key_exists($name, static::PARAMETERS[$context])) { + foreach ($input as $name => $value) { + foreach ($this->getParameters() as $context => $set) { + if (array_key_exists($name, $this->getParameters()[$context])) { $this->inputs[$context][$name]['value'] = $value; } } } // Apply default values to missing data - $contexts = [$queriedContext]; - if (array_key_exists('global', static::PARAMETERS)) { - $contexts[] = 'global'; + $contextNames = [$queriedContext]; + if (array_key_exists('global', $this->getParameters())) { + $contextNames[] = 'global'; } - foreach ($contexts as $context) { - foreach (static::PARAMETERS[$context] as $name => $properties) { + foreach ($contextNames as $context) { + if (!isset($this->getParameters()[$context])) { + // unknown context provided by client, throw exception here? or continue? + } + + foreach ($this->getParameters()[$context] as $name => $properties) { if (isset($this->inputs[$context][$name]['value'])) { continue; } @@ -153,11 +212,7 @@ abstract class BridgeAbstract implements BridgeInterface switch ($type) { case 'checkbox': - if (!isset($properties['defaultValue'])) { - $this->inputs[$context][$name]['value'] = false; - } else { - $this->inputs[$context][$name]['value'] = $properties['defaultValue']; - } + $this->inputs[$context][$name]['value'] = $input[$context][$name]['value'] ?? false; break; case 'list': if (!isset($properties['defaultValue'])) { @@ -180,14 +235,18 @@ abstract class BridgeAbstract implements BridgeInterface } // Copy global parameter values to the guessed context - if (array_key_exists('global', static::PARAMETERS)) { - foreach (static::PARAMETERS['global'] as $name => $properties) { - if (isset($inputs[$name])) { - $value = $inputs[$name]; - } elseif (isset($properties['defaultValue'])) { - $value = $properties['defaultValue']; + if (array_key_exists('global', $this->getParameters())) { + foreach ($this->getParameters()['global'] as $name => $properties) { + if (isset($input[$name])) { + $value = $input[$name]; } else { - continue; + if (($properties['type'] ?? null) === 'checkbox') { + $value = false; + } elseif (isset($properties['defaultValue'])) { + $value = $properties['defaultValue']; + } else { + continue; + } } $this->inputs[$queriedContext][$name]['value'] = $value; } @@ -195,103 +254,17 @@ abstract class BridgeAbstract implements BridgeInterface // Only keep guessed context parameters values if (isset($this->inputs[$queriedContext])) { - $this->inputs = [$queriedContext => $this->inputs[$queriedContext]]; + $this->inputs = [ + $queriedContext => $this->inputs[$queriedContext], + ]; } else { $this->inputs = []; } } - /** - * Set inputs for the bridge - * - * Returns errors and aborts execution if the provided input parameters are - * invalid. - * - * @param array List of input parameters. Each element in this list must - * relate to an item in {@see BridgeAbstract::PARAMETERS} - * @return void - */ - public function setDatas(array $inputs) - { - if (isset($inputs['context'])) { // Context hinting (optional) - $this->queriedContext = $inputs['context']; - unset($inputs['context']); - } - - if (empty(static::PARAMETERS)) { - if (!empty($inputs)) { - throw new \Exception('Invalid parameters value(s)'); - } - - return; - } - - $validator = new ParameterValidator(); - - if (!$validator->validateData($inputs, static::PARAMETERS)) { - $parameters = array_map( - function ($i) { - return $i['name']; - }, // Just display parameter names - $validator->getInvalidParameters() - ); - - throw new \Exception(sprintf('Invalid parameters value(s): %s', implode(', ', $parameters))); - } - - // Guess the context from input data - if (empty($this->queriedContext)) { - $this->queriedContext = $validator->getQueriedContext($inputs, static::PARAMETERS); - } - - if (is_null($this->queriedContext)) { - throw new \Exception('Required parameter(s) missing'); - } elseif ($this->queriedContext === false) { - throw new \Exception('Mixed context parameters'); - } - - $this->setInputs($inputs, $this->queriedContext); - } - - /** - * Loads configuration for the bridge - * - * Returns errors and aborts execution if the provided configuration is - * invalid. - * - * @return void - */ - public function loadConfiguration() - { - foreach (static::CONFIGURATION as $optionName => $optionValue) { - $section = $this->getShortName(); - $configurationOption = Configuration::getConfig($section, $optionName); - - if ($configurationOption !== null) { - $this->configuration[$optionName] = $configurationOption; - continue; - } - - if (isset($optionValue['required']) && $optionValue['required'] === true) { - throw new \Exception(sprintf('Missing configuration option: %s', $optionName)); - } elseif (isset($optionValue['defaultValue'])) { - $this->configuration[$optionName] = $optionValue['defaultValue']; - } - } - } - - /** - * Returns the value for the provided input - * - * @param string $input The input name - * @return mixed|null The input value or null if the input is not defined - */ protected function getInput($input) { - if (!isset($this->inputs[$this->queriedContext][$input]['value'])) { - return null; - } - return $this->inputs[$this->queriedContext][$input]['value']; + return $this->inputs[$this->queriedContext][$input]['value'] ?? null; } /** @@ -306,17 +279,20 @@ abstract class BridgeAbstract implements BridgeInterface if (!isset($this->inputs[$this->queriedContext][$input]['value'])) { return null; } - if (array_key_exists('global', static::PARAMETERS)) { - if (array_key_exists($input, static::PARAMETERS['global'])) { - $context = 'global'; + + $contexts = $this->getParameters(); + + if (array_key_exists('global', $contexts)) { + if (array_key_exists($input, $contexts['global'])) { + $contextName = 'global'; } } - if (!isset($context)) { - $context = $this->queriedContext; + if (!isset($contextName)) { + $contextName = $this->queriedContext; } $needle = $this->inputs[$this->queriedContext][$input]['value']; - foreach (static::PARAMETERS[$context][$input]['values'] as $first_level_key => $first_level_value) { + foreach ($contexts[$contextName][$input]['values'] as $first_level_key => $first_level_value) { if (!is_array($first_level_value) && $needle === (string)$first_level_value) { return $first_level_key; } elseif (is_array($first_level_value)) { @@ -329,74 +305,14 @@ abstract class BridgeAbstract implements BridgeInterface } } - /** - * Get bridge configuration value - */ - public function getOption($name) - { - return $this->configuration[$name] ?? null; - } - - /** {@inheritdoc} */ - public function getDescription() - { - return static::DESCRIPTION; - } - - /** {@inheritdoc} */ - public function getMaintainer() - { - return static::MAINTAINER; - } - - /** {@inheritdoc} */ - public function getName() - { - return static::NAME; - } - - /** {@inheritdoc} */ - public function getIcon() - { - return static::URI . '/favicon.ico'; - } - - /** {@inheritdoc} */ - public function getConfiguration() - { - return static::CONFIGURATION; - } - - /** {@inheritdoc} */ - public function getParameters() - { - return static::PARAMETERS; - } - - /** {@inheritdoc} */ - public function getURI() - { - return static::URI; - } - - /** {@inheritdoc} */ - public function getDonationURI() - { - return static::DONATION_URI; - } - - /** {@inheritdoc} */ - public function getCacheTimeout() - { - return static::CACHE_TIMEOUT; - } - - /** {@inheritdoc} */ public function detectParameters($url) { $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; + + $contexts = $this->getParameters(); + if ( - empty(static::PARAMETERS) + empty($contexts) && preg_match($regex, $url, $urlMatches) > 0 && preg_match($regex, static::URI, $bridgeUriMatches) > 0 && $urlMatches[3] === $bridgeUriMatches[3] @@ -406,43 +322,14 @@ abstract class BridgeAbstract implements BridgeInterface return null; } - /** - * Loads a cached value for the specified key - * - * @param int $duration Cache duration (optional) - * @return mixed Cached value or null if the key doesn't exist or has expired - */ - protected function loadCacheValue(string $key, $duration = null) + protected function loadCacheValue(string $key, $default = null) { - $cache = RssBridge::getCache(); - // Create class name without the namespace part - $scope = $this->getShortName(); - $cache->setScope($scope); - $cache->setKey([$key]); - $timestamp = $cache->getTime(); - - if ( - $duration - && $timestamp - && $timestamp < time() - $duration - ) { - return null; - } - return $cache->loadData(); + return $this->cache->get($this->getShortName() . '_' . $key, $default); } - /** - * Stores a value to cache with the specified key - * - * @param mixed $value Value to cache - */ - protected function saveCacheValue(string $key, $value) + protected function saveCacheValue(string $key, $value, int $ttl = 86400) { - $cache = RssBridge::getCache(); - $scope = $this->getShortName(); - $cache->setScope($scope); - $cache->setKey([$key]); - $cache->saveData($value); + $this->cache->set($this->getShortName() . '_' . $key, $value, $ttl); } public function getShortName(): string diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 6eef3879..2f171002 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -1,57 +1,34 @@ $bridgeClassName The bridge name - * @param array $formats A list of formats - * @param bool $isActive Indicates if the bridge is active or not - * @return string The bridge card - */ - public static function displayBridgeCard($bridgeClassName, $formats, $isActive = true) - { - $bridgeFactory = new BridgeFactory(); - + public static function render( + BridgeFactory $bridgeFactory, + string $bridgeClassName, + ?string $token + ): string { $bridge = $bridgeFactory->create($bridgeClassName); - $isHttps = strpos($bridge->getURI(), 'https') === 0; - $uri = $bridge->getURI(); $name = $bridge->getName(); $icon = $bridge->getIcon(); $description = $bridge->getDescription(); - $parameters = $bridge->getParameters(); - if (Configuration::getConfig('proxy', 'url') && Configuration::getConfig('proxy', 'by_bridge')) { - $parameters['global']['_noproxy'] = [ - 'name' => 'Disable proxy (' . (Configuration::getConfig('proxy', 'name') ?: Configuration::getConfig('proxy', 'url')) . ')', - 'type' => 'checkbox' + $contexts = $bridge->getParameters(); + + // Checkbox for disabling of proxy (if enabled) + if ( + Configuration::getConfig('proxy', 'url') + && Configuration::getConfig('proxy', 'by_bridge') + ) { + $proxyName = Configuration::getConfig('proxy', 'name') ?: Configuration::getConfig('proxy', 'url'); + $contexts['global']['_noproxy'] = [ + 'name' => sprintf('Disable proxy (%s)', $proxyName), + 'type' => 'checkbox', ]; } if (Configuration::getConfig('cache', 'custom_timeout')) { - $parameters['global']['_cache_timeout'] = [ + $contexts['global']['_cache_timeout'] = [ 'name' => 'Cache timeout in seconds', 'type' => 'number', 'defaultValue' => $bridge->getCacheTimeout() @@ -60,46 +37,56 @@ final class BridgeCard $shortName = $bridge->getShortName(); $card = << +
    -

    {$name}

    -

    {$description}

    - - -CARD; + +

    #

    +
    - // If we don't have any parameter for the bridge, we print a generic form to load it. - if (count($parameters) === 0) { - $card .= self::getForm($bridgeClassName, $formats, $isActive, $isHttps); +

    {$name}

    +

    {$description}

    - // Display form with cache timeout and/or noproxy options (if enabled) when bridge has no parameters - } elseif (count($parameters) === 1 && array_key_exists('global', $parameters)) { - $card .= self::getForm($bridgeClassName, $formats, $isActive, $isHttps, '', $parameters['global']); + + + + + CARD; + + if (count($contexts) === 0) { + // The bridge has zero parameters + $card .= self::renderForm($bridgeClassName, '', [], $token); + } elseif (count($contexts) === 1 && array_key_exists('global', $contexts)) { + // The bridge has a single context with key 'global' + $card .= self::renderForm($bridgeClassName, '', $contexts['global'], $token); } else { - foreach ($parameters as $parameterName => $parameter) { - if (!is_numeric($parameterName) && $parameterName === 'global') { + // The bridge has one or more contexts (named or unnamed) + foreach ($contexts as $contextName => $contextParameters) { + if ($contextName === 'global') { continue; } - if (array_key_exists('global', $parameters)) { - $parameter = array_merge($parameter, $parameters['global']); + if (array_key_exists('global', $contexts)) { + // Merge the global parameters into current context + $contextParameters = array_merge($contextParameters, $contexts['global']); } - if (!is_numeric($parameterName)) { - $card .= '
    ' . $parameterName . '
    ' . PHP_EOL; + if (!is_numeric($contextName)) { + // This is a named context + $card .= '
    ' . $contextName . '
    ' . PHP_EOL; } - $card .= self::getForm($bridgeClassName, $formats, $isActive, $isHttps, $parameterName, $parameter); + $card .= self::renderForm($bridgeClassName, $contextName, $contextParameters, $token); } } $card .= sprintf('', $bridgeClassName); - if ($bridge->getDonationURI() !== '' && Configuration::getConfig('admin', 'donations')) { + + if (Configuration::getConfig('admin', 'donations') && $bridge->getDonationURI()) { $card .= sprintf( '

    %s ~ Donate

    ', $bridge->getMaintainer(), @@ -113,58 +100,30 @@ CARD; return $card; } - /** - * Get the form header for a bridge card - * - * @param class-string $bridgeClassName The bridge name - * @param bool $isHttps If disabled, adds a warning to the form - * @return string The form header - */ - private static function getFormHeader($bridgeClassName, $isHttps = false, $parameterName = '') - { - $form = << - - -EOD; - - if (!empty($parameterName)) { - $form .= sprintf('', $parameterName); - } - - if (!$isHttps) { - $form .= '
    Warning : -This bridge is not fetching its content through a secure connection
    '; - } - - return $form; - } - - /** - * Get the form body for a bridge - * - * @param class-string $bridgeClassName The bridge name - * @param array $formats A list of supported formats - * @param bool $isActive Indicates if a bridge is enabled or not - * @param bool $isHttps Indicates if a bridge uses HTTPS or not - * @param string $parameterName Sets the bridge context for the current form - * @param array $parameters The bridge parameters - * @return string The form body - */ - private static function getForm( - $bridgeClassName, - $formats, - $isActive = false, - $isHttps = false, - $parameterName = '', - $parameters = [] + private static function renderForm( + string $bridgeClassName, + string $contextName, + array $contextParameters, + ?string $token ) { - $form = self::getFormHeader($bridgeClassName, $isHttps, $parameterName); + $form = << + + + EOD; - if (count($parameters) > 0) { + if (Configuration::getConfig('authentication', 'token') && $token) { + $form .= sprintf('', e($token)); + } + + if (!empty($contextName)) { + $form .= sprintf('', $contextName); + } + + if (count($contextParameters) > 0) { $form .= '
    '; - foreach ($parameters as $id => $inputEntry) { + foreach ($contextParameters as $id => $inputEntry) { if (!isset($inputEntry['exampleValue'])) { $inputEntry['exampleValue'] = ''; } @@ -173,33 +132,39 @@ This bridge is not fetching its content through a secure connection
    '; $inputEntry['defaultValue'] = ''; } - $idArg = 'arg-' - . urlencode($bridgeClassName) - . '-' - . urlencode($parameterName) - . '-' - . urlencode($id); + $idArg = 'arg-' . urlencode($bridgeClassName) . '-' . urlencode($contextName) . '-' . urlencode($id); - $form .= '' - . PHP_EOL; + $inputName = filter_var($inputEntry['name'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + $form .= '' . PHP_EOL; - if (!isset($inputEntry['type']) || $inputEntry['type'] === 'text') { - $form .= self::getTextInput($inputEntry, $idArg, $id); + if ( + !isset($inputEntry['type']) + || $inputEntry['type'] === 'text' + ) { + $form .= self::getTextInput($inputEntry, $idArg, $id) . "\n"; } elseif ($inputEntry['type'] === 'number') { $form .= self::getNumberInput($inputEntry, $idArg, $id); } elseif ($inputEntry['type'] === 'list') { - $form .= self::getListInput($inputEntry, $idArg, $id); + $form .= self::getListInput($inputEntry, $idArg, $id) . "\n"; } elseif ($inputEntry['type'] === 'checkbox') { $form .= self::getCheckboxInput($inputEntry, $idArg, $id); + } else { + $foo = 2; + // oops? } + $infoText = []; + $infoTextScript = ''; if (isset($inputEntry['title'])) { - $title_filtered = filter_var($inputEntry['title'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); - $form .= 'i'; + $infoText[] = filter_var($inputEntry['title'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + } + if ($inputEntry['exampleValue'] !== '') { + $infoText[] = "Example (right click to use):\n" . filter_var($inputEntry['exampleValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + $infoTextScript = 'rssbridge_use_placeholder_value(this);'; + } + + if (count($infoText) > 0) { + $form .= 'i'; } else { $form .= ''; } @@ -208,106 +173,39 @@ This bridge is not fetching its content through a secure connection
    '; $form .= '
    '; } - if ($isActive) { - $form .= ''; - } else { - $form .= 'Inactive'; - } + $form .= ''; return $form . '' . PHP_EOL; } - /** - * Get input field attributes - * - * @param array $entry The current entry - * @return string The input field attributes - */ - private static function getInputAttributes($entry) + public static function getTextInput(array $entry, string $id, string $name): string { - $retVal = ''; + $defaultValue = filter_var($entry['defaultValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + $exampleValue = filter_var($entry['exampleValue'], FILTER_SANITIZE_FULL_SPECIAL_CHARS); + $attributes = self::getInputAttributes($entry); - if (isset($entry['required']) && $entry['required'] === true) { - $retVal .= ' required'; - } - - if (isset($entry['pattern'])) { - $retVal .= ' pattern="' . $entry['pattern'] . '"'; - } - - return $retVal; + return sprintf('', $attributes, $id, $defaultValue, $exampleValue, $name); } - /** - * Get text input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The text input field - */ - private static function getTextInput($entry, $id, $name) + public static function getNumberInput(array $entry, string $id, string $name): string { - return '' - . PHP_EOL; + $defaultValue = filter_var($entry['defaultValue'], FILTER_SANITIZE_NUMBER_INT); + $exampleValue = filter_var($entry['exampleValue'], FILTER_SANITIZE_NUMBER_INT); + $attributes = self::getInputAttributes($entry); + + return sprintf('' . "\n", $attributes, $id, $defaultValue, $exampleValue, $name); } - /** - * Get number input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The number input field - */ - private static function getNumberInput($entry, $id, $name) + public static function getListInput(array $entry, string $id, string $name): string { - return '' - . PHP_EOL; - } - - /** - * Get list input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The list input field - */ - private static function getListInput($entry, $id, $name) - { - if (isset($entry['required']) && $entry['required'] === true) { - Debug::log('The "required" attribute is not supported for lists.'); + $required = $entry['required'] ?? null; + if ($required) { + trigger_error('The required attribute is not supported for lists'); unset($entry['required']); } - $list = '' . "\n", $attributes, $id, $name); foreach ($entry['values'] as $name => $value) { if (is_array($value)) { @@ -317,17 +215,9 @@ This bridge is not fetching its content through a secure connection
    '; $entry['defaultValue'] === $subname || $entry['defaultValue'] === $subvalue ) { - $list .= ''; + $list .= ''; } else { - $list .= ''; + $list .= ''; } } $list .= ''; @@ -336,17 +226,9 @@ This bridge is not fetching its content through a secure connection
    '; $entry['defaultValue'] === $name || $entry['defaultValue'] === $value ) { - $list .= ''; + $list .= '' . "\n"; } else { - $list .= ''; + $list .= '' . "\n"; } } } @@ -356,30 +238,35 @@ This bridge is not fetching its content through a secure connection
    '; return $list; } - /** - * Get checkbox input - * - * @param array $entry The current entry - * @param string $id The field ID - * @param string $name The field name - * @return string The checkbox input field - */ - private static function getCheckboxInput($entry, $id, $name) + + public static function getCheckboxInput(array $entry, string $id, string $name): string { - if (isset($entry['required']) && $entry['required'] === true) { - Debug::log('The "required" attribute is not supported for checkboxes.'); + $required = $entry['required'] ?? null; + if ($required) { + trigger_error('The required attribute is not supported for checkboxes'); unset($entry['required']); } - return '' - . PHP_EOL; + $checked = $entry['defaultValue'] === 'checked' ? 'checked' : ''; + $attributes = self::getInputAttributes($entry); + + return sprintf('' . "\n", $attributes, $id, $name, $checked); + } + + public static function getInputAttributes(array $entry): string + { + $result = ''; + + $required = $entry['required'] ?? null; + if ($required) { + $result .= ' required'; + } + + $pattern = $entry['pattern'] ?? null; + if ($pattern) { + $result .= ' pattern="' . $pattern . '"'; + } + + return $result; } } diff --git a/lib/BridgeFactory.php b/lib/BridgeFactory.php index db2c394a..c214e44b 100644 --- a/lib/BridgeFactory.php +++ b/lib/BridgeFactory.php @@ -2,11 +2,19 @@ final class BridgeFactory { - private $bridgeClassNames = []; - private $enabledBridges = []; + private CacheInterface $cache; + private Logger $logger; + private array $bridgeClassNames = []; + private array $enabledBridges = []; + private array $missingEnabledBridges = []; + + public function __construct( + CacheInterface $cache, + Logger $logger + ) { + $this->cache = $cache; + $this->logger = $logger; - public function __construct() - { // Create all possible bridge class names from fs foreach (scandir(__DIR__ . '/../bridges/') as $file) { if (preg_match('/^([^.]+Bridge)\.php$/U', $file, $m)) { @@ -16,20 +24,26 @@ final class BridgeFactory $enabledBridges = Configuration::getConfig('system', 'enabled_bridges'); if ($enabledBridges === null) { - throw new \Exception('No bridges are enabled... wtf?'); + throw new \Exception('No bridges are enabled...'); } foreach ($enabledBridges as $enabledBridge) { if ($enabledBridge === '*') { $this->enabledBridges = $this->bridgeClassNames; break; } - $this->enabledBridges[] = $this->createBridgeClassName($enabledBridge); + $bridgeClassName = $this->createBridgeClassName($enabledBridge); + if ($bridgeClassName) { + $this->enabledBridges[] = $bridgeClassName; + } else { + $this->missingEnabledBridges[] = $enabledBridge; + $this->logger->info(sprintf('Bridge not found: %s', $enabledBridge)); + } } } - public function create(string $name): BridgeInterface + public function create(string $name): BridgeAbstract { - return new $name(); + return new $name($this->cache, $this->logger); } public function isEnabled(string $bridgeName): bool @@ -42,13 +56,10 @@ final class BridgeFactory $name = self::normalizeBridgeName($bridgeName); $namesLoweredCase = array_map('strtolower', $this->bridgeClassNames); $nameLoweredCase = strtolower($name); - if (! in_array($nameLoweredCase, $namesLoweredCase)) { - throw new \Exception(sprintf('Bridge name invalid: %s', $bridgeName)); + return null; } - $index = array_search($nameLoweredCase, $namesLoweredCase); - return $this->bridgeClassNames[$index]; } @@ -67,4 +78,9 @@ final class BridgeFactory { return $this->bridgeClassNames; } + + public function getMissingEnabledBridges(): array + { + return $this->missingEnabledBridges; + } } diff --git a/lib/BridgeInterface.php b/lib/BridgeInterface.php deleted file mode 100644 index b461ed12..00000000 --- a/lib/BridgeInterface.php +++ /dev/null @@ -1,148 +0,0 @@ -logger = $logger; + } + + public function create(?string $name = null): CacheInterface { - $name ??= Configuration::getConfig('cache', 'type'); - if (!$name) { - throw new \Exception('No cache type configured'); - } $cacheNames = []; foreach (scandir(PATH_LIB_CACHES) as $file) { if (preg_match('/^([^.]+)Cache\.php$/U', $file, $m)) { @@ -29,6 +33,7 @@ class CacheFactory if ($index === false) { throw new \InvalidArgumentException(sprintf('Invalid cache name: "%s"', $name)); } + $className = $cacheNames[$index] . 'Cache'; if (!preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $className)) { throw new \InvalidArgumentException(sprintf('Invalid cache classname: "%s"', $className)); @@ -49,7 +54,7 @@ class CacheFactory if (!is_writable($fileCacheConfig['path'])) { throw new \Exception(sprintf('The FileCache path is not writable: %s', $fileCacheConfig['path'])); } - return new FileCache($fileCacheConfig); + return new FileCache($this->logger, $fileCacheConfig); case SQLiteCache::class: if (!extension_loaded('sqlite3')) { throw new \Exception('"sqlite3" extension not loaded. Please check "php.ini"'); @@ -66,13 +71,36 @@ class CacheFactory } elseif (!is_dir(dirname($file))) { throw new \Exception(sprintf('Invalid configuration for %s', 'SQLiteCache')); } - return new SQLiteCache([ + return new SQLiteCache($this->logger, [ 'file' => $file, 'timeout' => Configuration::getConfig('SQLiteCache', 'timeout'), 'enable_purge' => Configuration::getConfig('SQLiteCache', 'enable_purge'), ]); case MemcachedCache::class: - return new MemcachedCache(); + if (!extension_loaded('memcached')) { + throw new \Exception('"memcached" extension not loaded. Please check "php.ini"'); + } + $section = 'MemcachedCache'; + $host = Configuration::getConfig($section, 'host'); + $port = Configuration::getConfig($section, 'port'); + if (empty($host) && empty($port)) { + throw new \Exception('Configuration for ' . $section . ' missing.'); + } + if (empty($host)) { + throw new \Exception('"host" param is not set for ' . $section); + } + if (empty($port)) { + throw new \Exception('"port" param is not set for ' . $section); + } + $port = (string) $port; + if (!ctype_digit($port)) { + throw new \Exception('"port" param is invalid for ' . $section); + } + $port = intval($port); + if ($port < 1 || $port > 65535) { + throw new \Exception('"port" param is invalid for ' . $section); + } + return new MemcachedCache($this->logger, $host, $port); default: if (!file_exists(PATH_LIB_CACHES . $className . '.php')) { throw new \Exception('Unable to find the cache file'); diff --git a/lib/CacheInterface.php b/lib/CacheInterface.php index 414a9c84..02f522e6 100644 --- a/lib/CacheInterface.php +++ b/lib/CacheInterface.php @@ -2,15 +2,13 @@ interface CacheInterface { - public function setScope(string $scope): void; + public function get(string $key, $default = null); - public function setKey(array $key): void; + public function set(string $key, $value, ?int $ttl = null): void; - public function loadData(); + public function delete(string $key): void; - public function saveData($data): void; + public function clear(): void; - public function getTime(): ?int; - - public function purgeCache(int $seconds): void; + public function prune(): void; } diff --git a/lib/Configuration.php b/lib/Configuration.php index b6b2e4d6..d8f64d21 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -1,17 +1,5 @@ $section) { foreach ($section as $key => $value) { @@ -100,11 +36,10 @@ final class Configuration } if (file_exists(__DIR__ . '/../DEBUG')) { - // The debug mode has been moved to config. Preserve existing installs which has this DEBUG file. - self::setConfig('system', 'enable_debug_mode', true); $debug = trim(file_get_contents(__DIR__ . '/../DEBUG')); - if ($debug) { - self::setConfig('system', 'debug_mode_whitelist', explode("\n", str_replace("\r", '', $debug))); + if ($debug === '') { + self::setConfig('system', 'env', 'dev'); + self::setConfig('cache', 'type', 'array'); } } @@ -113,7 +48,7 @@ final class Configuration if ($enabledBridges === '*') { self::setConfig('system', 'enabled_bridges', ['*']); } else { - self::setConfig('system', 'enabled_bridges', array_filter(explode("\n", $enabledBridges))); + self::setConfig('system', 'enabled_bridges', array_filter(array_map('trim', explode("\n", $enabledBridges)))); } } @@ -146,6 +81,10 @@ final class Configuration } } + if (!in_array(self::getConfig('system', 'env'), ['dev', 'prod'])) { + self::throwConfigError('system', 'env', 'Must be dev or prod'); + } + if (!is_array(self::getConfig('system', 'enabled_bridges'))) { self::throwConfigError('system', 'enabled_bridges', 'Is not an array'); } @@ -157,13 +96,6 @@ final class Configuration self::throwConfigError('system', 'timezone'); } - if (!is_bool(self::getConfig('system', 'enable_debug_mode'))) { - self::throwConfigError('system', 'enable_debug_mode', 'Is not a valid Boolean'); - } - if (!is_array(self::getConfig('system', 'debug_mode_whitelist') ?: [])) { - self::throwConfigError('system', 'debug_mode_whitelist', 'Is not a valid array'); - } - if (!is_string(self::getConfig('proxy', 'url'))) { self::throwConfigError('proxy', 'url', 'Is not a valid string'); } @@ -211,6 +143,9 @@ final class Configuration if (!is_string(self::getConfig('error', 'output'))) { self::throwConfigError('error', 'output', 'Is not a valid String'); } + if (!in_array(self::getConfig('error', 'output'), ['feed', 'http', 'none'])) { + self::throwConfigError('error', 'output', 'Invalid output'); + } if ( !is_numeric(self::getConfig('error', 'report_limit')) @@ -222,10 +157,16 @@ final class Configuration public static function getConfig(string $section, string $key, $default = null) { + if (self::$config === []) { + throw new \Exception('Config has not been loaded'); + } return self::$config[strtolower($section)][strtolower($key)] ?? $default; } - private static function setConfig(string $section, string $key, $value): void + /** + * @internal Please avoid usage + */ + public static function setConfig(string $section, string $key, $value): void { self::$config[strtolower($section)][strtolower($key)] = $value; } @@ -250,6 +191,8 @@ final class Configuration private static function throwConfigError($section, $key, $message = '') { - throw new \Exception("Config [$section] => [$key] is invalid. $message"); + http_response_code(500); + print ("Config [$section] => [$key] is invalid. $message"); + exit(1); } } diff --git a/lib/Container.php b/lib/Container.php new file mode 100644 index 00000000..086bd1f6 --- /dev/null +++ b/lib/Container.php @@ -0,0 +1,35 @@ +values[$offset] = $value; + } + + #[\ReturnTypeWillChange] + public function offsetGet($offset) + { + if (!isset($this->values[$offset])) { + throw new \Exception(sprintf('Unknown container key: "%s"', $offset)); + } + if (!isset($this->resolved[$offset])) { + $this->resolved[$offset] = $this->values[$offset]($this); + } + return $this->resolved[$offset]; + } + + #[\ReturnTypeWillChange] + public function offsetExists($offset) + { + } + + public function offsetUnset($offset): void + { + } +} diff --git a/lib/Debug.php b/lib/Debug.php deleted file mode 100644 index f6a8d105..00000000 --- a/lib/Debug.php +++ /dev/null @@ -1,29 +0,0 @@ -message)); - } - if ($xmlErrors) { - // Render only the first error into exception message - $firstXmlErrorMessage = $xmlErrors[0]->message; - } - throw new \Exception(sprintf('Unable to parse xml from `%s` %s', $url, $firstXmlErrorMessage ?? ''), 11); - } - // Restore previous behaviour in case other code relies on it being off - libxml_use_internal_errors(false); - - // Commented out because it's spammy - // Debug::log(sprintf("RSS content is ===========\n%s===========", var_export($rssContent, true))); - - switch (true) { - case isset($rssContent->item[0]): - Debug::log('Detected RSS 1.0 format'); - $this->feedType = self::FEED_TYPE_RSS_1_0; - $this->collectRss1($rssContent, $maxItems); - break; - case isset($rssContent->channel[0]): - Debug::log('Detected RSS 0.9x or 2.0 format'); - $this->feedType = self::FEED_TYPE_RSS_2_0; - $this->collectRss2($rssContent, $maxItems); - break; - case isset($rssContent->entry[0]): - Debug::log('Detected ATOM format'); - $this->feedType = self::FEED_TYPE_ATOM_1_0; - $this->collectAtom1($rssContent, $maxItems); - break; - default: - Debug::log(sprintf('Unable to detect feed format from `%s`', $url)); - throw new \Exception(sprintf('Unable to detect feed format from `%s`', $url)); + $xmlString = $this->prepareXml($xmlString); + $feedParser = new FeedParser(); + try { + $this->feed = $feedParser->parseFeed($xmlString); + } catch (\Exception $e) { + // FeedMergeBridge relies on this string + throw new \Exception(sprintf('Failed to parse xml from %s: %s', $url, create_sane_exception_message($e))); } - return $this; - } - - /** - * Collect data from an RSS 1.0 compatible feed - * - * @link http://web.resource.org/rss/1.0/spec RDF Site Summary (RSS) 1.0 - * - * @param string $rssContent The RSS content - * @param int $maxItems Maximum number of items to collect from the feed - * (`-1`: no limit). - * @return void - * - * @todo Instead of passing $maxItems to all functions, just add all items - * and remove excessive items later. - */ - protected function collectRss1($rssContent, $maxItems) - { - $this->loadRss2Data($rssContent->channel[0]); - foreach ($rssContent->item as $item) { - $tmp_item = $this->parseItem($item); - if (!empty($tmp_item)) { - $this->items[] = $tmp_item; - } - if ($maxItems !== -1 && count($this->items) >= $maxItems) { - break; + $items = array_slice($this->feed['items'], 0, $maxItems); + // todo: extract parse logic out from FeedParser + foreach ($items as $item) { + // Give bridges a chance to modify the item + $item = $this->parseItem($item); + if ($item) { + $this->items[] = $item; } } } /** - * Collect data from a RSS 2.0 compatible feed + * This method is overridden by bridges * - * @link http://www.rssboard.org/rss-specification RSS 2.0 Specification - * - * @param int $maxItems Maximum number of items to collect from the feed (`-1`: no limit). - * @return void - * - * @todo Instead of passing $maxItems to all functions, just add all items and remove excessive items later. + * @return array */ - protected function collectRss2(\SimpleXMLElement $rssContent, $maxItems) + protected function parseItem(array $item) { - $rssContent = $rssContent->channel[0]; - $this->loadRss2Data($rssContent); - foreach ($rssContent->item as $item) { - $tmp_item = $this->parseItem($item); - if (!empty($tmp_item)) { - $this->items[] = $tmp_item; - } - if ($maxItems !== -1 && count($this->items) >= $maxItems) { - break; - } - } - } - - /** - * Collect data from a Atom 1.0 compatible feed - * - * @link https://tools.ietf.org/html/rfc4287 The Atom Syndication Format - * - * @param object $content The Atom content - * @param int $maxItems Maximum number of items to collect from the feed - * (`-1`: no limit). - * @return void - * - * @todo Instead of passing $maxItems to all functions, just add all items - * and remove excessive items later. - */ - protected function collectAtom1($content, $maxItems) - { - $this->loadAtomData($content); - foreach ($content->entry as $item) { - $tmp_item = $this->parseItem($item); - if (!empty($tmp_item)) { - $this->items[] = $tmp_item; - } - if ($maxItems !== -1 && count($this->items) >= $maxItems) { - break; - } - } - } - - /** - * Load RSS 2.0 feed data into RSS-Bridge - * - * @param object $rssContent The RSS content - * @return void - * - * @todo set title, link, description, language, and so on - */ - protected function loadRss2Data($rssContent) - { - $this->title = trim((string)$rssContent->title); - $this->uri = trim((string)$rssContent->link); - - if (!empty($rssContent->image)) { - $this->icon = trim((string)$rssContent->image->url); - } - } - - /** - * Load Atom feed data into RSS-Bridge - * - * @param object $content The Atom content - * @return void - */ - protected function loadAtomData($content) - { - $this->title = (string)$content->title; - - // Find best link (only one, or first of 'alternate') - if (!isset($content->link)) { - $this->uri = ''; - } elseif (count($content->link) === 1) { - $this->uri = (string)$content->link[0]['href']; - } else { - $this->uri = ''; - foreach ($content->link as $link) { - if (strtolower($link['rel']) === 'alternate') { - $this->uri = (string)$link['href']; - break; - } - } - } - - if (!empty($content->icon)) { - $this->icon = (string)$content->icon; - } elseif (!empty($content->logo)) { - $this->icon = (string)$content->logo; - } - } - - /** - * Parse the contents of a single Atom feed item into a RSS-Bridge item for - * further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseATOMItem($feedItem) - { - // Some ATOM entries also contain RSS 2.0 fields - $item = $this->parseRss2Item($feedItem); - - if (isset($feedItem->id)) { - $item['uri'] = (string)$feedItem->id; - } - if (isset($feedItem->title)) { - $item['title'] = html_entity_decode((string)$feedItem->title); - } - if (isset($feedItem->updated)) { - $item['timestamp'] = strtotime((string)$feedItem->updated); - } - if (isset($feedItem->author)) { - $item['author'] = (string)$feedItem->author->name; - } - if (isset($feedItem->content)) { - $item['content'] = (string)$feedItem->content; - } - - //When "link" field is present, URL is more reliable than "id" field - if (count($feedItem->link) === 1) { - $item['uri'] = (string)$feedItem->link[0]['href']; - } else { - foreach ($feedItem->link as $link) { - if (strtolower($link['rel']) === 'alternate') { - $item['uri'] = (string)$link['href']; - } - if (strtolower($link['rel']) === 'enclosure') { - $item['enclosures'][] = (string)$link['href']; - } - } - } - return $item; } /** - * Parse the contents of a single RSS 0.91 feed item into a RSS-Bridge item - * for further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseRss091Item($feedItem) + * Prepare XML document to make it more acceptable by the parser + * This method can be overriden by bridges to change this behavior + * + * @return string + */ + protected function prepareXml(string $xmlString): string { - $item = []; - if (isset($feedItem->link)) { - $item['uri'] = (string)$feedItem->link; - } - if (isset($feedItem->title)) { - $item['title'] = html_entity_decode((string)$feedItem->title); - } - // rss 0.91 doesn't support timestamps - // rss 0.91 doesn't support authors - // rss 0.91 doesn't support enclosures - if (isset($feedItem->description)) { - $item['content'] = (string)$feedItem->description; - } - return $item; + // Remove problematic escape sequences + $problematicStrings = [ + ' ', + '»', + '’', + ]; + return str_replace($problematicStrings, '', $xmlString); } - /** - * Parse the contents of a single RSS 1.0 feed item into a RSS-Bridge item - * for further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseRss1Item($feedItem) - { - // 1.0 adds optional elements around the 0.91 standard - $item = $this->parseRss091Item($feedItem); - - $namespaces = $feedItem->getNamespaces(true); - if (isset($namespaces['dc'])) { - $dc = $feedItem->children($namespaces['dc']); - if (isset($dc->date)) { - $item['timestamp'] = strtotime((string)$dc->date); - } - if (isset($dc->creator)) { - $item['author'] = (string)$dc->creator; - } - } - - return $item; - } - - /** - * Parse the contents of a single RSS 2.0 feed item into a RSS-Bridge item - * for further transformation. - * - * @param object $feedItem A single feed item - * @return object The RSS-Bridge item - * - * @todo To reduce confusion, the RSS-Bridge item should maybe have a class - * of its own? - */ - protected function parseRss2Item($feedItem) - { - // Primary data is compatible to 0.91 with some additional data - $item = $this->parseRss091Item($feedItem); - - $namespaces = $feedItem->getNamespaces(true); - if (isset($namespaces['dc'])) { - $dc = $feedItem->children($namespaces['dc']); - } - if (isset($namespaces['media'])) { - $media = $feedItem->children($namespaces['media']); - } - - if (isset($feedItem->guid)) { - foreach ($feedItem->guid->attributes() as $attribute => $value) { - if ( - $attribute === 'isPermaLink' - && ( - $value === 'true' || ( - filter_var($feedItem->guid, FILTER_VALIDATE_URL) - && (empty($item['uri']) || !filter_var($item['uri'], FILTER_VALIDATE_URL)) - ) - ) - ) { - $item['uri'] = (string)$feedItem->guid; - break; - } - } - } - - if (isset($feedItem->pubDate)) { - $item['timestamp'] = strtotime((string)$feedItem->pubDate); - } elseif (isset($dc->date)) { - $item['timestamp'] = strtotime((string)$dc->date); - } - - if (isset($feedItem->author)) { - $item['author'] = (string)$feedItem->author; - } elseif (isset($feedItem->creator)) { - $item['author'] = (string)$feedItem->creator; - } elseif (isset($dc->creator)) { - $item['author'] = (string)$dc->creator; - } elseif (isset($media->credit)) { - $item['author'] = (string)$media->credit; - } - - if (isset($feedItem->enclosure) && !empty($feedItem->enclosure['url'])) { - $item['enclosures'] = [(string)$feedItem->enclosure['url']]; - } - - return $item; - } - - /** - * Parse the contents of a single feed item, depending on the current feed - * type, into a RSS-Bridge item. - * - * @param object $item The current feed item - * @return object A RSS-Bridge item, with (hopefully) the whole content - */ - protected function parseItem($item) - { - switch ($this->feedType) { - case self::FEED_TYPE_RSS_1_0: - return $this->parseRss1Item($item); - case self::FEED_TYPE_RSS_2_0: - return $this->parseRss2Item($item); - case self::FEED_TYPE_ATOM_1_0: - return $this->parseATOMItem($item); - default: - throw new \Exception(sprintf('Unknown version %s!', $this->getInput('version'))); - } - } - - /** {@inheritdoc} */ public function getURI() { - if (!empty($this->uri)) { - return $this->uri; - } - return parent::getURI(); + return $this->feed['uri'] ?? parent::getURI(); } - /** {@inheritdoc} */ public function getName() { - if (!empty($this->title)) { - return $this->title; - } - return parent::getName(); + return $this->feed['title'] ?? parent::getName(); } - /** {@inheritdoc} */ public function getIcon() { - if (!empty($this->icon)) { - return $this->icon; - } - return parent::getIcon(); + return $this->feed['icon'] ?? parent::getIcon(); } } diff --git a/lib/FeedItem.php b/lib/FeedItem.php index 3d1b4509..e94348f7 100644 --- a/lib/FeedItem.php +++ b/lib/FeedItem.php @@ -1,511 +1,36 @@ uri = 'https://www.github.com/rss-bridge/rss-bridge/'; - * $feedItem->title = 'Title'; - * $feedItem->timestamp = strtotime('now'); - * $feedItem->autor = 'Unknown author'; - * $feedItem->content = 'Hello World!'; - * $feedItem->enclosures = array('https://github.com/favicon.ico'); - * $feedItem->categories = array('php', 'rss-bridge', 'awesome'); - * ``` - * - * @param array $item (optional) A legacy item (empty: no legacy support). - * @return object A new object of this class - */ - public function __construct($item = []) + public static function fromArray(array $itemArray): self { - if (!is_array($item)) { - Debug::log('Item must be an array!'); - } - - foreach ($item as $key => $value) { - $this->__set($key, $value); + $item = new self(); + foreach ($itemArray as $key => $value) { + $item->__set($key, $value); } + return $item; } - /** - * Get current URI. - * - * Use {@see FeedItem::setURI()} to set the URI. - * - * @return string|null The URI or null if it hasn't been set. - */ - public function getURI() + private function __construct() { - return $this->uri; + global $container; + + // The default NullLogger is for when running the unit tests + $this->logger = $container['logger'] ?? new NullLogger(); } - /** - * Set URI to the full article. - * - * Use {@see FeedItem::getURI()} to get the URI. - * - * _Note_: Removes whitespace from the beginning and end of the URI. - * - * _Remarks_: Uses the attribute "href" or "src" if the provided URI is an - * object of simple_html_dom_node. - * - * @param object|string $uri URI to the full article. - * @return self - */ - public function setURI($uri) - { - $this->uri = null; // Clear previous data - - if ($uri instanceof simple_html_dom_node) { - if ($uri->hasAttribute('href')) { // Anchor - $uri = $uri->href; - } elseif ($uri->hasAttribute('src')) { // Image - $uri = $uri->src; - } else { - Debug::log('The item provided as URI is unknown!'); - } - } - if (!is_string($uri)) { - Debug::log(sprintf('Expected $uri to be string but got %s', gettype($uri))); - return $this; - } - $uri = trim($uri); - // Intentionally doing a weak url validation here because FILTER_VALIDATE_URL is too strict - if (!preg_match('#^https?://#i', $uri)) { - Debug::log(sprintf('Not a valid url: "%s"', $uri)); - return $this; - } - $this->uri = $uri; - return $this; - } - - /** - * Get current title. - * - * Use {@see FeedItem::setTitle()} to set the title. - * - * @return string|null The current title or null if it hasn't been set. - */ - public function getTitle() - { - return $this->title; - } - - /** - * Set title. - * - * Use {@see FeedItem::getTitle()} to get the title. - * - * _Note_: Removes whitespace from beginning and end of the title. - * - * @param string $title The title - * @return self - */ - public function setTitle($title) - { - $this->title = null; // Clear previous data - - if (!is_string($title)) { - Debug::log('Title must be a string!'); - } else { - $this->title = truncate(trim($title)); - } - - return $this; - } - - /** - * Get current timestamp. - * - * Use {@see FeedItem::setTimestamp()} to set the timestamp. - * - * @return int|null The current timestamp or null if it hasn't been set. - */ - public function getTimestamp() - { - return $this->timestamp; - } - - /** - * Set timestamp of first release. - * - * _Note_: The timestamp should represent the number of seconds since - * January 1 1970 00:00:00 GMT (Unix time). - * - * _Remarks_: If the provided timestamp is a string (not numeric), this - * function automatically attempts to parse the string using - * [strtotime](http://php.net/manual/en/function.strtotime.php) - * - * @link http://php.net/manual/en/function.strtotime.php strtotime (PHP) - * @link https://en.wikipedia.org/wiki/Unix_time Unix time (Wikipedia) - * - * @param string|int $timestamp A timestamp of when the item was first released - * @return self - */ - public function setTimestamp($timestamp) - { - $this->timestamp = null; // Clear previous data - - if ( - !is_numeric($timestamp) - && !$timestamp = strtotime($timestamp) - ) { - Debug::log('Unable to parse timestamp!'); - } - - if ($timestamp <= 0) { - Debug::log('Timestamp must be greater than zero!'); - } else { - $this->timestamp = $timestamp; - } - - return $this; - } - - /** - * Get the current author name. - * - * Use {@see FeedItem::setAuthor()} to set the author. - * - * @return string|null The author or null if it hasn't been set. - */ - public function getAuthor() - { - return $this->author; - } - - /** - * Set the author name. - * - * Use {@see FeedItem::getAuthor()} to get the author. - * - * @param string $author The author name. - * @return self - */ - public function setAuthor($author) - { - $this->author = null; // Clear previous data - - if (!is_string($author)) { - Debug::log('Author must be a string!'); - } else { - $this->author = $author; - } - - return $this; - } - - /** - * Get item content. - * - * Use {@see FeedItem::setContent()} to set the item content. - * - * @return string|null The item content or null if it hasn't been set. - */ - public function getContent() - { - return $this->content; - } - - /** - * Set item content. - * - * Note: This function casts objects of type simple_html_dom and - * simple_html_dom_node to string. - * - * Use {@see FeedItem::getContent()} to get the current item content. - * - * @param string|object $content The item content as text or simple_html_dom object. - * @return self - */ - public function setContent($content) - { - $this->content = null; // Clear previous data - - if ( - $content instanceof simple_html_dom - || $content instanceof simple_html_dom_node - ) { - $content = (string)$content; - } - - if (is_string($content)) { - $this->content = $content; - } else { - Debug::log(sprintf('Feed content must be a string but got %s', gettype($content))); - } - - return $this; - } - - /** - * Get item enclosures. - * - * Use {@see FeedItem::setEnclosures()} to set feed enclosures. - * - * @return array Enclosures as array of enclosure URIs. - */ - public function getEnclosures() - { - return $this->enclosures; - } - - /** - * Set item enclosures. - * - * Use {@see FeedItem::getEnclosures()} to get the current item enclosures. - * - * @param array $enclosures Array of enclosures, where each element links to - * one enclosure. - * @return self - */ - public function setEnclosures($enclosures) - { - $this->enclosures = []; - - if (is_array($enclosures)) { - foreach ($enclosures as $enclosure) { - if ( - !filter_var( - $enclosure, - FILTER_VALIDATE_URL, - FILTER_FLAG_PATH_REQUIRED - ) - ) { - Debug::log('Each enclosure must contain a scheme, host and path!'); - } elseif (!in_array($enclosure, $this->enclosures)) { - $this->enclosures[] = $enclosure; - } - } - } else { - Debug::log('Enclosures must be an array!'); - } - - return $this; - } - - /** - * Get item categories. - * - * Use {@see FeedItem::setCategories()} to set item categories. - * - * @param array The item categories. - */ - public function getCategories() - { - return $this->categories; - } - - /** - * Set item categories. - * - * Use {@see FeedItem::getCategories()} to get the current item categories. - * - * @param array $categories Array of categories, where each element defines - * a single category name. - * @return self - */ - public function setCategories($categories) - { - $this->categories = []; - - if (is_array($categories)) { - foreach ($categories as $category) { - if (!is_string($category)) { - Debug::log('Category must be a string!'); - } else { - $this->categories[] = $category; - } - } - } else { - Debug::log('Categories must be an array!'); - } - - return $this; - } - - /** - * Get unique id - * - * Use {@see FeedItem::setUid()} to set the unique id. - * - * @param string The unique id. - */ - public function getUid() - { - return $this->uid; - } - - /** - * Set unique id. - * - * Use {@see FeedItem::getUid()} to get the unique id. - * - * @param string $uid A string that uniquely identifies the current item - * @return self - */ - public function setUid($uid) - { - $this->uid = null; // Clear previous data - - if (!is_string($uid)) { - Debug::log('Unique id must be a string!'); - } elseif (preg_match('/^[a-f0-9]{40}$/', $uid)) { - // keep id if it already is a SHA-1 hash - $this->uid = $uid; - } else { - $this->uid = sha1($uid); - } - - return $this; - } - - /** - * Add miscellaneous elements to the item. - * - * @param string $key Name of the element. - * @param mixed $value Value of the element. - * @return self - */ - public function addMisc($key, $value) - { - if (!is_string($key)) { - Debug::log('Key must be a string!'); - } elseif (in_array($key, get_object_vars($this))) { - Debug::log('Key must be unique!'); - } else { - $this->misc[$key] = $value; - } - - return $this; - } - - /** - * Transform current object to array - * - * @return array - */ - public function toArray() - { - return array_merge( - [ - 'uri' => $this->uri, - 'title' => $this->title, - 'timestamp' => $this->timestamp, - 'author' => $this->author, - 'content' => $this->content, - 'enclosures' => $this->enclosures, - 'categories' => $this->categories, - 'uid' => $this->uid, - ], - $this->misc - ); - } - - /** - * Set item property - * - * Allows simple assignment to parameters. This method is slower, but easier - * to implement in some cases: - * - * ```PHP - * $item = new \FeedItem(); - * $item->content = 'Hello World!'; - * $item->my_id = 42; - * ``` - * - * @param string $name Property name - * @param mixed $value Property value - */ public function __set($name, $value) { switch ($name) { @@ -538,15 +63,6 @@ class FeedItem } } - /** - * Get item property - * - * Allows simple assignment to parameters. This method is slower, but easier - * to implement in some cases. - * - * @param string $name Property name - * @return mixed Property value - */ public function __get($name) { switch ($name) { @@ -573,4 +89,212 @@ class FeedItem return null; } } + + public function getURI(): ?string + { + return $this->uri; + } + + public function setURI($uri) + { + $this->uri = null; // Clear previous data + + if ($uri instanceof simple_html_dom_node) { + if ($uri->hasAttribute('href')) { // Anchor + $uri = $uri->href; + } elseif ($uri->hasAttribute('src')) { // Image + $uri = $uri->src; + } else { + $this->logger->debug('The item provided as URI is unknown!'); + } + } + if (!is_string($uri)) { + $this->logger->debug(sprintf('Expected $uri to be string but got %s', gettype($uri))); + return; + } + $uri = trim($uri); + // Intentionally doing a weak url validation here because FILTER_VALIDATE_URL is too strict + if (!preg_match('#^https?://#i', $uri)) { + $this->logger->debug(sprintf('Not a valid url: "%s"', $uri)); + return; + } + $this->uri = $uri; + } + + public function getTitle(): ?string + { + return $this->title; + } + + public function setTitle($title) + { + $this->title = null; + if (!is_string($title)) { + $this->logger->debug('Title must be a string: ' . print_r($title, true)); + } else { + $this->title = truncate(trim($title)); + } + } + + public function getTimestamp(): ?int + { + return $this->timestamp; + } + + public function setTimestamp($datetime) + { + $this->timestamp = null; + if (is_numeric($datetime)) { + $timestamp = $datetime; + } else { + $timestamp = strtotime($datetime); + if ($timestamp === false) { + $this->logger->debug('Unable to parse timestamp!'); + } + } + if ($timestamp <= 0) { + $this->logger->debug('Timestamp must be greater than zero!'); + } else { + $this->timestamp = $timestamp; + } + } + + public function getAuthor(): ?string + { + return $this->author; + } + + public function setAuthor($author) + { + $this->author = null; + if (!is_string($author)) { + $this->logger->debug('Author must be a string!'); + } else { + $this->author = $author; + } + } + + public function getContent(): ?string + { + return $this->content; + } + + /** + * @param string|array|\simple_html_dom|\simple_html_dom_node $content The item content + */ + public function setContent($content) + { + $this->content = null; + + if ( + $content instanceof simple_html_dom + || $content instanceof simple_html_dom_node + ) { + $content = (string) $content; + } + + if (is_string($content)) { + $this->content = $content; + } else { + $this->logger->debug(sprintf('Unable to convert feed content to string: %s', gettype($content))); + } + } + + public function getEnclosures(): array + { + return $this->enclosures; + } + + public function setEnclosures($enclosures) + { + $this->enclosures = []; + + if (!is_array($enclosures)) { + $this->logger->debug('Enclosures must be an array!'); + return; + } + foreach ($enclosures as $enclosure) { + if ( + !filter_var( + $enclosure, + FILTER_VALIDATE_URL, + FILTER_FLAG_PATH_REQUIRED + ) + ) { + $this->logger->debug('Each enclosure must contain a scheme, host and path!'); + } elseif (!in_array($enclosure, $this->enclosures)) { + $this->enclosures[] = $enclosure; + } + } + } + + public function getCategories(): array + { + return $this->categories; + } + + public function setCategories($categories) + { + $this->categories = []; + + if (!is_array($categories)) { + $this->logger->debug('Categories must be an array!'); + return; + } + foreach ($categories as $category) { + if (is_string($category)) { + $this->categories[] = $category; + } else { + $this->logger->debug('Category must be a string!'); + } + } + } + + public function getUid(): ?string + { + return $this->uid; + } + + public function setUid($uid): void + { + $this->uid = null; + if (!is_string($uid)) { + $this->logger->debug(sprintf('uid must be string: %s (%s)', (string) $uid, var_export($uid, true))); + return; + } + if (preg_match('/^[a-f0-9]{40}$/', $uid)) { + // Preserve sha1 hash + $this->uid = $uid; + } else { + $this->uid = sha1($uid); + } + } + + public function addMisc($name, $value) + { + if (!is_string($name)) { + $this->logger->debug('Key must be a string!'); + } elseif (in_array($name, get_object_vars($this))) { + $this->logger->debug('Key must be unique!'); + } else { + $this->misc[$name] = $value; + } + } + + public function toArray(): array + { + return array_merge( + [ + 'uri' => $this->uri, + 'title' => $this->title, + 'timestamp' => $this->timestamp, + 'author' => $this->author, + 'content' => $this->content, + 'enclosures' => $this->enclosures, + 'categories' => $this->categories, + 'uid' => $this->uid, + ], + $this->misc + ); + } } diff --git a/lib/FeedParser.php b/lib/FeedParser.php new file mode 100644 index 00000000..3b133b85 --- /dev/null +++ b/lib/FeedParser.php @@ -0,0 +1,267 @@ +message; + } + throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? '')); + } + $feed = [ + 'title' => null, + 'uri' => null, + 'icon' => null, + 'items' => [], + ]; + if (isset($xml->item[0])) { + // rss 1.0 + $channel = $xml->channel[0]; + $feed['title'] = trim((string)$channel->title); + $feed['uri'] = trim((string)$channel->link); + if (isset($channel->image->url)) { + $feed['icon'] = trim((string)$channel->image->url); + } + foreach ($xml->item as $item) { + $feed['items'][] = $this->parseRss1Item($item); + } + } elseif (isset($xml->channel[0])) { + // rss 2.0 + $channel = $xml->channel[0]; + $feed['title'] = trim((string)$channel->title); + $feed['uri'] = trim((string)$channel->link); + if (isset($channel->image->url)) { + $feed['icon'] = trim((string)$channel->image->url); + } + foreach ($channel->item as $item) { + $feed['items'][] = $this->parseRss2Item($item); + } + } elseif (isset($xml->entry[0])) { + // atom 1.0 + $feed['title'] = (string)$xml->title; + // Find best link (only one, or first of 'alternate') + if (!isset($xml->link)) { + $feed['uri'] = ''; + } elseif (count($xml->link) === 1) { + $feed['uri'] = (string)$xml->link[0]['href']; + } else { + $feed['uri'] = ''; + foreach ($xml->link as $link) { + if (strtolower((string) $link['rel']) === 'alternate') { + $feed['uri'] = (string)$link['href']; + break; + } + } + } + if (isset($xml->icon)) { + $feed['icon'] = (string) $xml->icon; + } elseif (isset($xml->logo)) { + $feed['icon'] = (string) $xml->logo; + } + foreach ($xml->entry as $item) { + $feed['items'][] = $this->parseAtomItem($item); + } + } else { + throw new \Exception('Unable to detect feed format'); + } + + return $feed; + } + + public function parseAtomItem(\SimpleXMLElement $feedItem): array + { + $item = $this->parseRss2Item($feedItem); + if (isset($feedItem->id)) { + $item['uri'] = (string)$feedItem->id; + } + if (isset($feedItem->title)) { + $item['title'] = trim(html_entity_decode((string)$feedItem->title)); + } + if (isset($feedItem->updated)) { + $item['timestamp'] = strtotime((string)$feedItem->updated); + } + if (isset($feedItem->author)) { + $item['author'] = (string)$feedItem->author->name; + } + if (isset($feedItem->content)) { + $contentChildren = $feedItem->content->children(); + if (count($contentChildren) > 0) { + $content = ''; + foreach ($contentChildren as $contentChild) { + $content .= $contentChild->asXML(); + } + $item['content'] = $content; + } else { + $item['content'] = (string)$feedItem->content; + } + } + + // When "link" field is present, URL is more reliable than "id" field + if (count($feedItem->link) === 1) { + $item['uri'] = (string)$feedItem->link[0]['href']; + } else { + foreach ($feedItem->link as $link) { + if (strtolower((string) $link['rel']) === 'alternate') { + $item['uri'] = (string)$link['href']; + } + if (strtolower((string) $link['rel']) === 'enclosure') { + $item['enclosures'][] = (string)$link['href']; + } + } + } + return $item; + } + + public function parseRss2Item(\SimpleXMLElement $feedItem): array + { + $item = [ + 'uri' => '', + 'title' => '', + 'content' => '', + 'timestamp' => '', + 'author' => '', + //'uid' => null, + //'categories' => [], + //'enclosures' => [], + ]; + + foreach ($feedItem as $k => $v) { + $hasChildren = count($v) !== 0; + if (!$hasChildren) { + $item[$k] = (string) $v; + } + } + + if (isset($feedItem->link)) { + // todo: trim uri + $item['uri'] = (string)$feedItem->link; + } + if (isset($feedItem->title)) { + $item['title'] = trim(html_entity_decode((string)$feedItem->title)); + } + if (isset($feedItem->description)) { + $item['content'] = (string)$feedItem->description; + } + + $namespaces = $feedItem->getNamespaces(true); + if (isset($namespaces['dc'])) { + $dc = $feedItem->children($namespaces['dc']); + } + if (isset($namespaces['media'])) { + $media = $feedItem->children($namespaces['media']); + } + + if (isset($namespaces['content'])) { + $content = $feedItem->children($namespaces['content']); + $item['content'] = (string) $content; + } + + foreach ($namespaces as $namespaceName => $namespaceUrl) { + if (in_array($namespaceName, ['', 'content'])) { + continue; + } + $item[$namespaceName] = $this->parseModule($feedItem, $namespaceName, $namespaceUrl); + } + if (isset($namespaces['itunes'])) { + $enclosure = $feedItem->enclosure; + $item['enclosure'] = [ + 'url' => (string) $enclosure['url'], + 'length' => (string) $enclosure['length'], + 'type' => (string) $enclosure['type'], + ]; + } + if (!$item['uri']) { + // Let's use guid as uri if it's a permalink + if (isset($feedItem->guid)) { + foreach ($feedItem->guid->attributes() as $attribute => $value) { + if ($attribute === 'isPermaLink' && ($value === 'true' || (filter_var($feedItem->guid, FILTER_VALIDATE_URL)))) { + $item['uri'] = (string) $feedItem->guid; + break; + } + } + } + } + + $item['timestamp'] = $feedItem->pubDate ?? $dc->date ?? ''; + $item['timestamp'] = strtotime((string) $item['timestamp']); + + $item['author'] = $feedItem->author ?? $feedItem->creator ?? $dc->creator ?? $media->credit ?? ''; + $item['author'] = (string) $item['author']; + + if (isset($feedItem->enclosure) && !empty($feedItem->enclosure['url'])) { + $item['enclosures'] = [ + (string) $feedItem->enclosure['url'], + ]; + } + return $item; + } + + public function parseRss1Item(\SimpleXMLElement $feedItem): array + { + $item = [ + 'uri' => '', + 'title' => '', + 'content' => '', + 'timestamp' => '', + 'author' => '', + //'uid' => null, + //'categories' => [], + //'enclosures' => [], + ]; + if (isset($feedItem->link)) { + // todo: trim uri + $item['uri'] = (string)$feedItem->link; + } + if (isset($feedItem->title)) { + $item['title'] = html_entity_decode((string)$feedItem->title); + } + if (isset($feedItem->description)) { + $item['content'] = (string)$feedItem->description; + } + $namespaces = $feedItem->getNamespaces(true); + if (isset($namespaces['dc'])) { + $dc = $feedItem->children($namespaces['dc']); + if (isset($dc->date)) { + $item['timestamp'] = strtotime((string)$dc->date); + } + if (isset($dc->creator)) { + $item['author'] = (string)$dc->creator; + } + } + return $item; + } + + private function parseModule(\SimpleXMLElement $element, string $namespaceName, string $namespaceUrl): array + { + // Unfortunately this parses out only node values as string + // TODO: parse attributes too + + $result = []; + $module = $element->children($namespaceUrl); + foreach ($module as $name => $value) { + if (get_class($value) === 'SimpleXMLElement' && $value->count() !== 0) { + $result[$name] = $this->parseModule($value, $namespaceName, $namespaceUrl); + } else { + $result[$name] = (string) $value; + } + } + return $result; + } +} diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index 3289d651..17e733a7 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -1,132 +1,56 @@ '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + ]; + $this->feed = array_merge($default, $feed); + } + + public function getFeed(): array + { + return $this->feed; + } + + public function setItems(array $items): void + { + foreach ($items as $item) { + $this->items[] = FeedItem::fromArray($item); + } + } /** - * @var int $lastModified A timestamp to indicate the last modified time of - * the output data. + * @return FeedItem[] The items */ - protected $lastModified; + public function getItems(): array + { + return $this->items; + } - /** @var array $extraInfos The extra infos */ - protected $extraInfos; - - /** {@inheritdoc} */ - public function getMimeType() + public function getMimeType(): string { return static::MIME_TYPE; } - /** - * {@inheritdoc} - * - * @param string $charset {@inheritdoc} - */ - public function setCharset($charset) - { - $this->charset = $charset; - - return $this; - } - - /** {@inheritdoc} */ - public function getCharset() - { - $charset = $this->charset; - - if (is_null($charset)) { - return static::DEFAULT_CHARSET; - } - return $charset; - } - - /** - * Set the last modified time - * - * @param int $lastModified The last modified time - * @return void - */ - public function setLastModified($lastModified) + public function setLastModified(int $lastModified) { $this->lastModified = $lastModified; } - - /** - * {@inheritdoc} - * - * @param array $items {@inheritdoc} - */ - public function setItems(array $items) - { - $this->items = $items; - - return $this; - } - - /** {@inheritdoc} */ - public function getItems() - { - if (!is_array($this->items)) { - throw new \LogicException(sprintf('Feed the %s with "setItems" method before !', get_class($this))); - } - - return $this->items; - } - - /** - * {@inheritdoc} - * - * @param array $extraInfos {@inheritdoc} - */ - public function setExtraInfos(array $extraInfos = []) - { - foreach (['name', 'uri', 'icon', 'donationUri'] as $infoName) { - if (!isset($extraInfos[$infoName])) { - $extraInfos[$infoName] = ''; - } - } - - $this->extraInfos = $extraInfos; - - return $this; - } - - /** {@inheritdoc} */ - public function getExtraInfos() - { - if (is_null($this->extraInfos)) { // No extra info ? - $this->setExtraInfos(); // Define with default value - } - - return $this->extraInfos; - } } diff --git a/lib/FormatFactory.php b/lib/FormatFactory.php index d27d7d6a..e9cbe597 100644 --- a/lib/FormatFactory.php +++ b/lib/FormatFactory.php @@ -1,45 +1,27 @@ folder = $folder; - - // create format names - foreach (scandir($this->folder) as $file) { - if (preg_match('/^([^.]+)Format\.php$/U', $file, $m)) { + $iterator = new \FilesystemIterator(__DIR__ . '/../formats'); + foreach ($iterator as $file) { + if (preg_match('/^([^.]+)Format\.php$/U', $file->getFilename(), $m)) { $this->formatNames[] = $m[1]; } } + sort($this->formatNames); } - /** - * @throws \InvalidArgumentException - * @param string $name The name of the format e.g. "Atom", "Mrss" or "Json" - */ - public function create(string $name): FormatInterface + public function create(string $name): FormatAbstract { if (! preg_match('/^[a-zA-Z0-9-]*$/', $name)) { throw new \InvalidArgumentException('Format name invalid!'); } - $sanitizedName = $this->sanitizeFormatName($name); - if ($sanitizedName === null) { + $sanitizedName = $this->sanitizeName($name); + if (!$sanitizedName) { throw new \InvalidArgumentException(sprintf('Unknown format given `%s`', $name)); } $className = '\\' . $sanitizedName . 'Format'; @@ -51,15 +33,13 @@ class FormatFactory return $this->formatNames; } - protected function sanitizeFormatName(string $name) + protected function sanitizeName(string $name): ?string { $name = ucfirst(strtolower($name)); - // Trim trailing '.php' if exists if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { $name = $matches[1]; } - // Trim trailing 'Format' if exists if (preg_match('/(.+)(?:Format)/i', $name, $matches)) { $name = $matches[1]; diff --git a/lib/FormatInterface.php b/lib/FormatInterface.php deleted file mode 100644 index c0355804..00000000 --- a/lib/FormatInterface.php +++ /dev/null @@ -1,85 +0,0 @@ -getCode(); - $context['message'] = sanitize_root($e->getMessage()); - $context['file'] = sanitize_root($e->getFile()); - $context['line'] = $e->getLine(); - $context['url'] = get_current_url(); - $context['trace'] = trace_to_call_points(trace_from_exception($e)); - // Don't log these exceptions - // todo: this logic belongs in log handler - $ignoredExceptions = [ - 'You must specify a format', - 'Format name invalid', - 'Unknown format given', - 'Bridge name invalid', - 'Invalid action', - 'twitter: No results for this query', - // telegram - 'Unable to find channel. The channel is non-existing or non-public', - // fb - 'This group is not public! RSS-Bridge only supports public groups!', - // tiktok 404 - 'https://www.tiktok.com/@', - ]; - foreach ($ignoredExceptions as $ignoredException) { - if (str_starts_with($e->getMessage(), $ignoredException)) { - return; - } - } - } - // Intentionally not sanitizing $message - $text = sprintf( - "[%s] rssbridge.%s %s %s\n", - now()->format('Y-m-d H:i:s'), - $level, - $message, - $context ? Json::encode($context) : '' - ); - - // Log to stderr/stdout whatever that is - // todo: extract to log handler - error_log($text); - - // Log to file - // todo: extract to log handler - // file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND | LOCK_EX); - } -} diff --git a/lib/ParameterValidator.php b/lib/ParameterValidator.php index 31934432..e2783586 100644 --- a/lib/ParameterValidator.php +++ b/lib/ParameterValidator.php @@ -1,236 +1,98 @@ invalid[] = [ - 'name' => $name, - 'reason' => $reason, - ]; - } - - /** - * Return list of invalid parameters. - * - * Each element is an array of 'name' and 'reason'. - * - * @return array List of invalid parameters - */ - public function getInvalidParameters() - { - return $this->invalid; - } - - /** - * Validate value for a text input - * - * @param string $value The value of a text input - * @param string|null $pattern (optional) A regex pattern - * @return string|null The filtered value or null if the value is invalid - */ - private function validateTextValue($value, $pattern = null) - { - if (!is_null($pattern)) { - $filteredValue = filter_var( - $value, - FILTER_VALIDATE_REGEXP, - ['options' => [ - 'regexp' => '/^' . $pattern . '$/' - ] - ] - ); - } else { - $filteredValue = filter_var($value); - } - - if ($filteredValue === false) { - return null; - } - - return $filteredValue; - } - - /** - * Validate value for a number input - * - * @param int $value The value of a number input - * @return int|null The filtered value or null if the value is invalid - */ - private function validateNumberValue($value) - { - $filteredValue = filter_var($value, FILTER_VALIDATE_INT); - - if ($filteredValue === false) { - return null; - } - - return $filteredValue; - } - - /** - * Validate value for a checkbox - * - * @param bool $value The value of a checkbox - * @return bool The filtered value - */ - private function validateCheckboxValue($value) - { - return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); - } - - /** - * Validate value for a list - * - * @param string $value The value of a list - * @param array $expectedValues A list of expected values - * @return string|null The filtered value or null if the value is invalid - */ - private function validateListValue($value, $expectedValues) - { - $filteredValue = filter_var($value); - - if ($filteredValue === false) { - return null; - } - - if (!in_array($filteredValue, $expectedValues)) { // Check sub-values? - foreach ($expectedValues as $subName => $subValue) { - if (is_array($subValue) && in_array($filteredValue, $subValue)) { - return $filteredValue; - } - } - return null; - } - - return $filteredValue; - } - - /** - * Check if all required parameters are satisfied - * - * @param array $data (ref) A list of input values - * @param array $parameters The bridge parameters - * @return bool True if all parameters are satisfied - */ - public function validateData(&$data, $parameters) - { - if (!is_array($data)) { - return false; - } - - foreach ($data as $name => $value) { - // Some RSS readers add a cache-busting parameter (_=) to feed URLs, detect and ignore them. - if ($name === '_') { - continue; - } + $errors = []; + foreach ($input as $name => $value) { $registered = false; - foreach ($parameters as $context => $set) { - if (array_key_exists($name, $set)) { - $registered = true; - if (!isset($set[$name]['type'])) { - $set[$name]['type'] = 'text'; - } + foreach ($contexts as $contextName => $contextParameters) { + if (!array_key_exists($name, $contextParameters)) { + continue; + } + $registered = true; + if (!isset($contextParameters[$name]['type'])) { + // Default type is text + $contextParameters[$name]['type'] = 'text'; + } - switch ($set[$name]['type']) { - case 'number': - $data[$name] = $this->validateNumberValue($value); - break; - case 'checkbox': - $data[$name] = $this->validateCheckboxValue($value); - break; - case 'list': - $data[$name] = $this->validateListValue($value, $set[$name]['values']); - break; - default: - case 'text': - if (isset($set[$name]['pattern'])) { - $data[$name] = $this->validateTextValue($value, $set[$name]['pattern']); - } else { - $data[$name] = $this->validateTextValue($value); - } - break; - } + switch ($contextParameters[$name]['type']) { + case 'number': + $input[$name] = $this->validateNumberValue($value); + break; + case 'checkbox': + $input[$name] = $this->validateCheckboxValue($value); + break; + case 'list': + $input[$name] = $this->validateListValue($value, $contextParameters[$name]['values']); + break; + default: + case 'text': + if (isset($contextParameters[$name]['pattern'])) { + $input[$name] = $this->validateTextValue($value, $contextParameters[$name]['pattern']); + } else { + $input[$name] = $this->validateTextValue($value); + } + break; + } - if (is_null($data[$name]) && isset($set[$name]['required']) && $set[$name]['required']) { - $this->addInvalidParameter($name, 'Parameter is invalid!'); - } + if ( + is_null($input[$name]) + && isset($contextParameters[$name]['required']) + && $contextParameters[$name]['required'] + ) { + $errors[] = ['name' => $name, 'reason' => 'Parameter is invalid!']; } } if (!$registered) { - $this->addInvalidParameter($name, 'Parameter is not registered!'); + $errors[] = ['name' => $name, 'reason' => 'Parameter is not registered!']; } } - return empty($this->invalid); + return $errors; } /** * Get the name of the context matching the provided inputs * - * @param array $data Associative array of user data - * @param array $parameters Array of bridge parameters + * @param array $input Associative array of user data + * @param array $contexts Array of bridge parameters * @return string|null Returns the context name or null if no match was found */ - public function getQueriedContext($data, $parameters) + public function getQueriedContext(array $input, array $contexts) { $queriedContexts = []; // Detect matching context - foreach ($parameters as $context => $set) { - $queriedContexts[$context] = null; + foreach ($contexts as $contextName => $contextParameters) { + $queriedContexts[$contextName] = null; // Ensure all user data exist in the current context - $notInContext = array_diff_key($data, $set); - if (array_key_exists('global', $parameters)) { - $notInContext = array_diff_key($notInContext, $parameters['global']); + $notInContext = array_diff_key($input, $contextParameters); + if (array_key_exists('global', $contexts)) { + $notInContext = array_diff_key($notInContext, $contexts['global']); } if (count($notInContext) > 0) { continue; } // Check if all parameters of the context are satisfied - foreach ($set as $id => $properties) { - if (isset($data[$id]) && !empty($data[$id])) { - $queriedContexts[$context] = true; + foreach ($contextParameters as $id => $properties) { + if (!empty($input[$id])) { + $queriedContexts[$contextName] = true; } elseif ( isset($properties['type']) && ($properties['type'] === 'checkbox' || $properties['type'] === 'list') ) { continue; } elseif (isset($properties['required']) && $properties['required'] === true) { - $queriedContexts[$context] = false; + $queriedContexts[$contextName] = false; break; } } @@ -238,7 +100,7 @@ class ParameterValidator // Abort if one of the globally required parameters is not satisfied if ( - array_key_exists('global', $parameters) + array_key_exists('global', $contexts) && $queriedContexts['global'] === false ) { return null; @@ -248,12 +110,12 @@ class ParameterValidator switch (array_sum($queriedContexts)) { case 0: // Found no match, is there a context without parameters? - if (isset($data['context'])) { - return $data['context']; + if (isset($input['context'])) { + return $input['context']; } - foreach ($queriedContexts as $context => $queried) { + foreach ($queriedContexts as $context2 => $queried) { if (is_null($queried)) { - return $context; + return $context2; } } return null; @@ -264,4 +126,50 @@ class ParameterValidator return false; } } + + private function validateTextValue($value, $pattern = null) + { + if (is_null($pattern)) { + // No filtering taking place + $filteredValue = filter_var($value); + } else { + $filteredValue = filter_var($value, FILTER_VALIDATE_REGEXP, ['options' => ['regexp' => '/^' . $pattern . '$/']]); + } + if ($filteredValue === false) { + return null; + } + return $filteredValue; + } + + private function validateNumberValue($value) + { + $filteredValue = filter_var($value, FILTER_VALIDATE_INT); + if ($filteredValue === false) { + return null; + } + return $filteredValue; + } + + private function validateCheckboxValue($value) + { + return filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); + } + + private function validateListValue($value, $expectedValues) + { + $filteredValue = filter_var($value); + if ($filteredValue === false) { + return null; + } + if (!in_array($filteredValue, $expectedValues)) { + // Check sub-values? + foreach ($expectedValues as $subName => $subValue) { + if (is_array($subValue) && in_array($filteredValue, $subValue)) { + return $filteredValue; + } + } + return null; + } + return $filteredValue; + } } diff --git a/lib/RssBridge.php b/lib/RssBridge.php index 79d1d710..d16f1d89 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -2,116 +2,40 @@ final class RssBridge { - private static CacheInterface $cache; + private Container $container; - public function main(array $argv = []) - { - if ($argv) { - parse_str(implode('&', array_slice($argv, 1)), $cliArgs); - $request = $cliArgs; - } else { - $request = array_merge($_GET, $_POST); - } - - try { - $this->run($request); - } catch (\Throwable $e) { - Logger::error('Exception in main', ['e' => $e]); - http_response_code(500); - print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]); - } + public function __construct( + Container $container + ) { + $this->container = $container; } - private function run($request): void + public function main(Request $request): Response { - Configuration::verifyInstallation(); - - $customConfig = []; - if (file_exists(__DIR__ . '/../config.ini.php')) { - $customConfig = parse_ini_file(__DIR__ . '/../config.ini.php', true, INI_SCANNER_TYPED); - } - Configuration::loadConfiguration($customConfig, getenv()); - - set_error_handler(function ($code, $message, $file, $line) { - if ((error_reporting() & $code) === 0) { - return false; - } - $text = sprintf( - '%s at %s line %s', - sanitize_root($message), - sanitize_root($file), - $line - ); - Logger::warning($text); - if (Debug::isEnabled()) { - // todo: extract to log handler - print sprintf("
    %s
    \n", e($text)); - } - }); - - // There might be some fatal errors which are not caught by set_error_handler() or \Throwable. - register_shutdown_function(function () { - $error = error_get_last(); - if ($error) { - $message = sprintf( - '(shutdown) %s: %s in %s line %s', - $error['type'], - sanitize_root($error['message']), - sanitize_root($error['file']), - $error['line'] - ); - Logger::error($message); - if (Debug::isEnabled()) { - // todo: extract to log handler - print sprintf("
    %s
    \n", e($message)); - } - } - }); - - // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); - date_default_timezone_set(Configuration::getConfig('system', 'timezone')); - - // Create cache - $cacheFactory = new CacheFactory(); - self::setCache($cacheFactory->create()); - - if (Configuration::getConfig('authentication', 'enable')) { - $authenticationMiddleware = new AuthenticationMiddleware(); - $authenticationMiddleware(); - } - - foreach ($request as $key => $value) { - if (!is_string($value)) { - throw new \Exception("Query parameter \"$key\" is not a string."); - } - } - - $actionName = $request['action'] ?? 'Frontpage'; - $actionName = strtolower($actionName) . 'Action'; + $action = $request->get('action', 'Frontpage'); + $actionName = strtolower($action) . 'Action'; $actionName = implode(array_map('ucfirst', explode('-', $actionName))); - $filePath = __DIR__ . '/../actions/' . $actionName . '.php'; if (!file_exists($filePath)) { - throw new \Exception(sprintf('Invalid action: %s', $actionName)); + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Invalid action']), 400); } - $className = '\\' . $actionName; - $action = new $className(); - $response = $action->execute($request); - if (is_string($response)) { - print $response; - } elseif ($response instanceof Response) { - $response->send(); + $handler = $this->container[$actionName]; + + $middlewares = [ + new BasicAuthMiddleware(), + new CacheMiddleware($this->container['cache']), + new ExceptionMiddleware($this->container['logger']), + new SecurityMiddleware(), + new MaintenanceMiddleware(), + new TokenAuthenticationMiddleware(), + ]; + $action = function ($req) use ($handler) { + return $handler($req); + }; + foreach (array_reverse($middlewares) as $middleware) { + $action = fn ($req) => $middleware($req, $action); } - } - - public static function getCache(): CacheInterface - { - return self::$cache; - } - - public static function setCache(CacheInterface $cache): void - { - self::$cache = $cache; + return $action($request->withAttribute('action', $actionName)); } } diff --git a/lib/TwitterClient.php b/lib/TwitterClient.php index 605c27ff..c50f8835 100644 --- a/lib/TwitterClient.php +++ b/lib/TwitterClient.php @@ -11,56 +11,115 @@ class TwitterClient public function __construct(CacheInterface $cache) { $this->cache = $cache; - $this->authorization = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'; - $this->data = $cache->loadData() ?? []; + + $data = $this->cache->get('twitter') ?? []; + $this->data = $data; + + $this->authorization = 'AAAAAAAAAAAAAAAAAAAAAGHtAgAAAAAA%2Bx7ILXNILCqkSGIzy6faIHZ9s3Q%3DQy97w6SIrzE7lQwPJEYQBsArEE2fC25caFwRBvAGi456G09vGR'; + $this->tw_consumer_key = '3nVuSoBZnx6U4vzUxf5w'; + $this->tw_consumer_secret = 'Bcs59EFbbsdF6Sl9Ng71smgStWEGwXXKSjYvPVt7qys'; + $this->oauth_token = ''; //Fill here + $this->oauth_token_secret = ''; //Fill here } - public function fetchUserTweets(string $screenName): \stdClass + private function getOauthAuthorization( + $oauth_token, + $oauth_token_secret, + $method = 'GET', + $url = '', + $body = '', + $timestamp = null, + $oauth_nonce = null + ) { + if (!$url) { + return ''; + } + $method = strtoupper($method); + $parseUrl = parse_url($url); + $link = $parseUrl['scheme'] . '://' . $parseUrl['host'] . $parseUrl['path']; + parse_str($parseUrl['query'], $query_params); + if ($body) { + parse_str($body, $body_params); + $query_params = array_merge($query_params, $body_params); + } + $payload = [ + 'oauth_version' => '1.0', + 'oauth_signature_method' => 'HMAC-SHA1', + 'oauth_consumer_key' => $this->tw_consumer_key, + 'oauth_token' => $oauth_token, + 'oauth_nonce' => $oauth_nonce ? $oauth_nonce : implode('', array_fill(0, 3, strval(time()))), + 'oauth_timestamp' => $timestamp ? $timestamp : time(), + ]; + $payload = array_merge($payload, $query_params); + ksort($payload); + + $url_parts = parse_url($url); + $url_parts['query'] = http_build_query($payload, '', '&', PHP_QUERY_RFC3986); + $base_url = $url_parts['scheme'] . '://' . $url_parts['host'] . $url_parts['path']; + $signature_base_string = strtoupper($method) . '&' . rawurlencode($base_url) . '&' . rawurlencode($url_parts['query']); + $hmac_key = $this->tw_consumer_secret . '&' . $oauth_token_secret; + $hex_signature = hash_hmac('sha1', $signature_base_string, $hmac_key, true); + $signature = base64_encode($hex_signature); + + $header_params = [ + 'oauth_version' => '1.0', + 'oauth_token' => $oauth_token, + 'oauth_nonce' => $payload['oauth_nonce'], + 'oauth_timestamp' => $payload['oauth_timestamp'], + 'oauth_signature' => $signature, + 'oauth_consumer_key' => $this->tw_consumer_key, + 'oauth_signature_method' => 'HMAC-SHA1', + ]; + // ksort($header_params); + $header_values = []; + foreach ($header_params as $key => $value) { + $header_values[] = rawurlencode($key) . '="' . (is_int($value) ? $value : rawurlencode($value)) . '"'; + } + return 'OAuth realm="http://api.twitter.com/", ' . implode(', ', $header_values); + } + + private function extractTweetAndUsersFromGraphQL($timeline) { - $this->fetchGuestToken(); - try { - $userInfo = $this->fetchUserInfoByScreenName($screenName); - } catch (HttpException $e) { - if ($e->getCode() === 403) { - Logger::info('The guest token has expired'); - $this->data['guest_token'] = null; - $this->fetchGuestToken(); - $userInfo = $this->fetchUserInfoByScreenName($screenName); - } else { - throw $e; - } + if (isset($timeline->data->user)) { + $result = $timeline->data->user->result; + $instructions = $result->timeline_v2->timeline->instructions; + } elseif (isset($timeline->data->user_result)) { + $result = $timeline->data->user_result->result->timeline_response; + $instructions = $result->timeline->instructions; } - try { - $timeline = $this->fetchTimeline($userInfo->rest_id); - } catch (HttpException $e) { - if ($e->getCode() === 403) { - Logger::info('The guest token has expired'); - $this->data['guest_token'] = null; - $this->fetchGuestToken(); - $timeline = $this->fetchTimeline($userInfo->rest_id); - } else { - throw $e; - } - } - - $result = $timeline->data->user->result; - if ($result->__typename === 'UserUnavailable') { + if (isset($result->__typename) && $result->__typename === 'UserUnavailable') { throw new \Exception('UserUnavailable'); } + + if (isset($timeline->data->list)) { + $result = $timeline->data->list->timeline_response; + $instructions = $result->timeline->instructions; + } + + if (!isset($result) && !isset($instructions)) { + throw new \Exception('Unable to fetch user/list timeline'); + } + $instructionTypes = [ 'TimelineAddEntries', 'TimelineClearCache', 'TimelinePinEntry', // unclear purpose, maybe pinned tweet? ]; - $instructions = $result->timeline_v2->timeline->instructions; - if (!isset($instructions[1])) { + if (!isset($instructions[1]) && isset($timeline->data->user)) { throw new \Exception('The account exists but has not tweeted yet?'); } $entries = null; foreach ($instructions as $instruction) { - if ($instruction->type === 'TimelineAddEntries') { + $instructionType = ''; + if (isset($instruction->type)) { + $instructionType = $instruction->type; + } else { + $instructionType = $instruction->__typename; + } + + if ($instructionType === 'TimelineAddEntries') { $entries = $instruction->entries; break; } @@ -70,33 +129,149 @@ class TwitterClient } $tweets = []; + $userIds = []; foreach ($entries as $entry) { - if ($entry->content->entryType !== 'TimelineTimelineItem') { + $entryType = ''; + + if (isset($entry->content->entryType)) { + $entryType = $entry->content->entryType; + } else { + $entryType = $entry->content->__typename; + } + + if ($entryType !== 'TimelineTimelineItem') { continue; } - if (!isset($entry->content->itemContent->tweet_results->result->legacy)) { - continue; + + if (isset($timeline->data->user)) { + if (!isset($entry->content->itemContent->tweet_results->result)) { + continue; + } + + if (isset($entry->content->itemContent->promotedMetadata)) { + continue; + } + + $tweets[] = $entry->content->itemContent->tweet_results->result; + + $userIds[] = $entry->content->itemContent->tweet_results->result->core->user_results->result; + } else { + if (!isset($entry->content->content->tweetResult->result->legacy)) { + continue; + } + + // Filter out any advertise tweet + if (isset($entry->content->content->tweetPromotedMetadata)) { + continue; + } + + $tweets[] = $entry->content->content->tweetResult->result; + + $userIds[] = $entry->content->content->tweetResult->result->core->user_result->result; } - $tweets[] = $entry->content->itemContent->tweet_results->result->legacy; } + + return (object) [ + 'userIds' => $userIds, + 'tweets' => $tweets, + ]; + } + + private function extractTweetFromSearch($searchResult) + { + return $searchResult->statuses; + } + + public function fetchUserTweets(string $screenName): \stdClass + { + $this->fetchGuestToken(); + try { + $userInfo = $this->fetchUserInfoByScreenName($screenName); + } catch (HttpException $e) { + if ($e->getCode() === 403) { + $this->data['guest_token'] = null; + $this->fetchGuestToken(); + $userInfo = $this->fetchUserInfoByScreenName($screenName); + } else { + throw $e; + } + } + + $timeline = $this->fetchTimeline($userInfo->rest_id); + // try { + // // $timeline = $this->fetchTimelineUsingSearch($screenName); + // } catch (HttpException $e) { + // if ($e->getCode() === 403) { + // $this->data['guest_token'] = null; + // $this->fetchGuestToken(); + // // $timeline = $this->fetchTimelineUsingSearch($screenName); + // $timeline = $this->fetchTimeline($userInfo->rest_id); + // } else { + // throw $e; + // } + // } + + // $tweets = $this->extractTweetFromSearch($timeline); + $tweets = $this->extractTweetAndUsersFromGraphQL($timeline)->tweets; + return (object) [ 'user_info' => $userInfo, 'tweets' => $tweets, ]; } + public function fetchListTweets($query, $operation = '') + { + $id = ''; + $this->fetchGuestToken(); + if ($operation == 'By list') { + try { + $listInfo = $this->fetchListInfoBySlug($query['screenName'], $query['listSlug']); + $id = $listInfo->id_str; + } catch (HttpException $e) { + if ($e->getCode() === 403) { + $this->data['guest_token'] = null; + $this->fetchGuestToken(); + $listInfo = $this->fetchListInfoBySlug($query['screenName'], $query['listSlug']); + $id = $listInfo->id_str; + } else { + throw $e; + } + } + } else if ($operation == 'By list ID') { + $id = $query['listId']; + } else { + throw new \Exception('Unknown operation to make list tweets'); + } + + try { + $timeline = $this->fetchListTimeline($id); + } catch (HttpException $e) { + if ($e->getCode() === 403) { + $this->data['guest_token'] = null; + $this->fetchGuestToken(); + $timeline = $this->fetchListTimeline($id); + } else { + throw $e; + } + } + + $data = $this->extractTweetAndUsersFromGraphQL($timeline); + + return $data; + } + private function fetchGuestToken(): void { if (isset($this->data['guest_token'])) { - Logger::info('Reusing cached guest token: ' . $this->data['guest_token']); return; } $url = 'https://api.twitter.com/1.1/guest/activate.json'; $response = getContents($url, $this->createHttpHeaders(), [CURLOPT_POST => true]); $guest_token = json_decode($response)->guest_token; $this->data['guest_token'] = $guest_token; - $this->cache->saveData($this->data); - Logger::info("Fetch new guest token: $guest_token"); + + $this->cache->set('twitter', $this->data); } private function fetchUserInfoByScreenName(string $screenName) @@ -112,69 +287,223 @@ class TwitterClient 'https://twitter.com/i/api/graphql/hc-pka9A7gyS3xODIafnrQ/UserByScreenName?variables=%s', urlencode(json_encode($variables)) ); - $response = json_decode(getContents($url, $this->createHttpHeaders())); + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); if (isset($response->errors)) { // Grab the first error message - throw new \Exception(sprintf('From twitter api: "%s"', $response->errors[0]->message)); + throwClientException(sprintf('From twitter api: "%s"', $response->errors[0]->message)); } $userInfo = $response->data->user; $this->data[$screenName] = $userInfo; - $this->cache->saveData($this->data); + + $this->cache->set('twitter', $this->data); return $userInfo; } private function fetchTimeline($userId) { $variables = [ - 'userId' => $userId, + 'autoplay_enabled' => true, 'count' => 40, - 'includePromotedContent' => true, - 'withQuickPromoteEligibilityTweetFields' => true, - 'withSuperFollowsUserFields' => true, - 'withDownvotePerspective' => false, - 'withReactionsMetadata' => false, - 'withReactionsPerspective' => false, - 'withSuperFollowsTweetFields' => true, - 'withVoice' => true, - 'withV2Timeline' => true, + 'includeEditControl' => true, + 'includeEditPerspective' => false, + 'includeHasBirdwatchNotes' => false, + 'includeTweetImpression' => true, + 'includeTweetVisibilityNudge' => true, + 'rest_id' => $userId ]; $features = [ - 'responsive_web_twitter_blue_verified_badge_is_enabled' => true, - 'responsive_web_graphql_exclude_directive_enabled' => false, - 'verified_phone_label_enabled' => false, - 'responsive_web_graphql_timeline_navigation_enabled' => true, - 'responsive_web_graphql_skip_user_profile_image_extensions_enabled' => false, + 'android_graphql_skip_api_media_color_palette' => true, + 'blue_business_profile_image_shape_enabled' => true, + 'creator_subscriptions_subscription_count_enabled' => true, + 'creator_subscriptions_tweet_preview_api_enabled' => true, + 'freedom_of_speech_not_reach_fetch_enabled' => true, 'longform_notetweets_consumption_enabled' => true, + 'longform_notetweets_inline_media_enabled' => true, + 'longform_notetweets_rich_text_read_enabled' => true, + 'subscriptions_verification_info_enabled' => true, + 'super_follow_badge_privacy_enabled' => true, + 'super_follow_exclusive_tweet_notifications_enabled' => true, + 'super_follow_tweet_api_enabled' => true, + 'super_follow_user_api_enabled' => true, + 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled' => true, 'tweetypie_unmention_optimization_enabled' => true, - 'vibe_api_enabled' => true, - 'responsive_web_edit_tweet_api_enabled' => true, - 'graphql_is_translatable_rweb_tweet_is_translatable_enabled' => true, - 'view_counts_everywhere_api_enabled' => true, - 'freedom_of_speech_not_reach_appeal_label_enabled' => false, - 'standardized_nudges_misinfo' => true, - 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled' => false, - 'interactive_text_enabled' => true, - 'responsive_web_text_conversations_enabled' => false, - 'responsive_web_enhance_cards_enabled' => false, + 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled' => true, ]; $url = sprintf( - 'https://twitter.com/i/api/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets?variables=%s&features=%s', + 'https://api.twitter.com/graphql/3JNH4e9dq1BifLxAa3UMWg/UserWithProfileTweetsQueryV2?variables=%s&features=%s', urlencode(json_encode($variables)), urlencode(json_encode($features)) ); - $response = json_decode(getContents($url, $this->createHttpHeaders())); + $oauth = $this->getOauthAuthorization($this->oauth_token, $this->oauth_token_secret, 'GET', $url); + $response = Json::decode(getContents($url, $this->createHttpHeaders($oauth)), false); return $response; } - private function createHttpHeaders(): array + private function fetchTimelineUsingSearch($screenName) + { + $params = [ + 'q' => 'from:' . $screenName, + 'modules' => 'status', + 'result_type' => 'recent' + ]; + $response = $this->search($params); + return $response; + } + + public function search($queryParam) + { + $url = sprintf( + 'https://api.twitter.com/1.1/search/tweets.json?%s', + http_build_query($queryParam) + ); + $oauth = $this->getOauthAuthorization($this->oauth_token, $this->oauth_token_secret, 'GET', $url); + $response = Json::decode(getContents($url, $this->createHttpHeaders($oauth)), false); + return $response; + } + + private function fetchListInfoBySlug($screenName, $listSlug) + { + if (isset($this->data[$screenName . '-' . $listSlug])) { + return $this->data[$screenName . '-' . $listSlug]; + } + + $features = [ + 'android_graphql_skip_api_media_color_palette' => false, + 'blue_business_profile_image_shape_enabled' => false, + 'creator_subscriptions_subscription_count_enabled' => false, + 'creator_subscriptions_tweet_preview_api_enabled' => true, + 'freedom_of_speech_not_reach_fetch_enabled' => false, + 'graphql_is_translatable_rweb_tweet_is_translatable_enabled' => false, + 'hidden_profile_likes_enabled' => false, + 'highlights_tweets_tab_ui_enabled' => false, + 'interactive_text_enabled' => false, + 'longform_notetweets_consumption_enabled' => true, + 'longform_notetweets_inline_media_enabled' => false, + 'longform_notetweets_richtext_consumption_enabled' => true, + 'longform_notetweets_rich_text_read_enabled' => false, + 'responsive_web_edit_tweet_api_enabled' => false, + 'responsive_web_enhance_cards_enabled' => false, + 'responsive_web_graphql_exclude_directive_enabled' => true, + 'responsive_web_graphql_skip_user_profile_image_extensions_enabled' => false, + 'responsive_web_graphql_timeline_navigation_enabled' => false, + 'responsive_web_media_download_video_enabled' => false, + 'responsive_web_text_conversations_enabled' => false, + 'responsive_web_twitter_article_tweet_consumption_enabled' => false, + 'responsive_web_twitter_blue_verified_badge_is_enabled' => true, + 'rweb_lists_timeline_redesign_enabled' => true, + 'spaces_2022_h2_clipping' => true, + 'spaces_2022_h2_spaces_communities' => true, + 'standardized_nudges_misinfo' => false, + 'subscriptions_verification_info_enabled' => true, + 'subscriptions_verification_info_reason_enabled' => true, + 'subscriptions_verification_info_verified_since_enabled' => true, + 'super_follow_badge_privacy_enabled' => false, + 'super_follow_exclusive_tweet_notifications_enabled' => false, + 'super_follow_tweet_api_enabled' => false, + 'super_follow_user_api_enabled' => false, + 'tweet_awards_web_tipping_enabled' => false, + 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled' => false, + 'tweetypie_unmention_optimization_enabled' => false, + 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled' => false, + 'verified_phone_label_enabled' => false, + 'vibe_api_enabled' => false, + 'view_counts_everywhere_api_enabled' => false + ]; + $variables = [ + 'screenName' => $screenName, + 'listSlug' => $listSlug + ]; + + $url = sprintf( + 'https://twitter.com/i/api/graphql/-kmqNvm5Y-cVrfvBy6docg/ListBySlug?variables=%s&features=%s', + urlencode(json_encode($variables)), + urlencode(json_encode($features)) + ); + + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); + if (isset($response->errors)) { + // Grab the first error message + throwClientException(sprintf('From twitter api: "%s"', $response->errors[0]->message)); + } + $listInfo = $response->data->user_by_screen_name->list; + $this->data[$screenName . '-' . $listSlug] = $listInfo; + + $this->cache->set('twitter', $this->data); + return $listInfo; + } + + private function fetchListTimeline($listId) + { + $features = [ + 'android_graphql_skip_api_media_color_palette' => false, + 'blue_business_profile_image_shape_enabled' => false, + 'creator_subscriptions_subscription_count_enabled' => false, + 'creator_subscriptions_tweet_preview_api_enabled' => true, + 'freedom_of_speech_not_reach_fetch_enabled' => false, + 'graphql_is_translatable_rweb_tweet_is_translatable_enabled' => false, + 'hidden_profile_likes_enabled' => false, + 'highlights_tweets_tab_ui_enabled' => false, + 'interactive_text_enabled' => false, + 'longform_notetweets_consumption_enabled' => true, + 'longform_notetweets_inline_media_enabled' => false, + 'longform_notetweets_richtext_consumption_enabled' => true, + 'longform_notetweets_rich_text_read_enabled' => false, + 'responsive_web_edit_tweet_api_enabled' => false, + 'responsive_web_enhance_cards_enabled' => false, + 'responsive_web_graphql_exclude_directive_enabled' => true, + 'responsive_web_graphql_skip_user_profile_image_extensions_enabled' => false, + 'responsive_web_graphql_timeline_navigation_enabled' => false, + 'responsive_web_media_download_video_enabled' => false, + 'responsive_web_text_conversations_enabled' => false, + 'responsive_web_twitter_article_tweet_consumption_enabled' => false, + 'responsive_web_twitter_blue_verified_badge_is_enabled' => true, + 'rweb_lists_timeline_redesign_enabled' => true, + 'spaces_2022_h2_clipping' => true, + 'spaces_2022_h2_spaces_communities' => true, + 'standardized_nudges_misinfo' => false, + 'subscriptions_verification_info_enabled' => true, + 'subscriptions_verification_info_reason_enabled' => true, + 'subscriptions_verification_info_verified_since_enabled' => true, + 'super_follow_badge_privacy_enabled' => false, + 'super_follow_exclusive_tweet_notifications_enabled' => false, + 'super_follow_tweet_api_enabled' => false, + 'super_follow_user_api_enabled' => false, + 'tweet_awards_web_tipping_enabled' => false, + 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled' => false, + 'tweetypie_unmention_optimization_enabled' => false, + 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled' => false, + 'verified_phone_label_enabled' => false, + 'vibe_api_enabled' => false, + 'view_counts_everywhere_api_enabled' => false + ]; + $variables = [ + 'rest_id' => $listId, + 'count' => 20 + ]; + + $url = sprintf( + 'https://api.twitter.com/graphql/BbGLL1ZfMibdFNWlk7a0Pw/ListTimeline?variables=%s&features=%s', + urlencode(json_encode($variables)), + urlencode(json_encode($features)) + ); + $oauth = $this->getOauthAuthorization($this->oauth_token, $this->oauth_token_secret, 'GET', $url); + $response = Json::decode(getContents($url, $this->createHttpHeaders($oauth)), false); + return $response; + } + + private function createHttpHeaders($oauth = null): array { $headers = [ 'authorization' => sprintf('Bearer %s', $this->authorization), 'x-guest-token' => $this->data['guest_token'] ?? null, ]; - foreach ($headers as $key => $value) { - $headers[] = sprintf('%s: %s', $key, $value); + if (isset($oauth)) { + $headers['authorization'] = $oauth; + unset($headers['x-guest-token']); } - return $headers; + foreach ($headers as $key => $value) { + $headers2[] = sprintf('%s: %s', $key, $value); + } + return $headers2; } } diff --git a/lib/WebDriverAbstract.php b/lib/WebDriverAbstract.php new file mode 100644 index 00000000..db2fb7b1 --- /dev/null +++ b/lib/WebDriverAbstract.php @@ -0,0 +1,141 @@ +driver; + } + + /** + * Returns the uri of the feed's icon. + * + * @return string + */ + public function getIcon() + { + return $this->feedIcon ?: parent::getIcon(); + } + + /** + * Sets the uri of the feed's icon. + * + * @param $iconurl string + */ + protected function setIcon($iconurl) + { + $this->feedIcon = $iconurl; + } + + /** + * Returns the ChromeOptions object. + * + * If the configuration parameter 'headless' is set to true, the + * argument '--headless' is added. Override this to change or add + * more options. + * + * @return ChromeOptions + */ + protected function getBrowserOptions() + { + $chromeOptions = new ChromeOptions(); + if (Configuration::getConfig('webdriver', 'headless')) { + $chromeOptions->addArguments(['--headless']); // --window-size=1024,1024 + } + return $chromeOptions; + } + + /** + * Returns the DesiredCapabilities object for the Chrome browser. + * + * The Chrome options are added. Override this to change or add + * more capabilities. + * + * @return WebDriverCapabilities + */ + protected function getDesiredCapabilities(): WebDriverCapabilities + { + $desiredCapabilities = DesiredCapabilities::chrome(); + $desiredCapabilities->setCapability(ChromeOptions::CAPABILITY, $this->getBrowserOptions()); + return $desiredCapabilities; + } + + /** + * Constructs the remote webdriver with the url of the remote (Selenium) + * webdriver server and the desired capabilities. + * + * This should be called in collectData() first. + */ + protected function prepareWebDriver() + { + $server = Configuration::getConfig('webdriver', 'selenium_server_url'); + $this->driver = RemoteWebDriver::create($server, $this->getDesiredCapabilities()); + } + + /** + * Maximizes the remote browser window (often important for reactive sites + * which change their appearance depending on the window size) and opens + * the uri set in the constant URI. + */ + protected function prepareWindow() + { + $this->getDriver()->manage()->window()->maximize(); + $this->getDriver()->get($this->getURI()); + } + + /** + * Closes the remote browser window and shuts down the remote webdriver + * connection. + * + * This must be called at the end of scraping, for example within a + * 'finally' block. + */ + protected function cleanUp() + { + $this->getDriver()->quit(); + } + + /** + * Do your web scraping here and fill the $items array. + * + * Override this but call parent() first. + * Don't forget to call cleanUp() at the end. + */ + public function collectData() + { + $this->prepareWebDriver(); + $this->prepareWindow(); + } +} \ No newline at end of file diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index 05929322..44cbab67 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -149,6 +149,15 @@ abstract class XPathAbstract extends BridgeAbstract */ const SETTING_FIX_ENCODING = false; + /** + * Use raw item content + * Whether to use the raw item content or to replace certain characters with + * special significance in HTML by HTML entities (using the PHP function htmlspecialchars). + * + * Use {@see XPathAbstract::getSettingUseRawItemContent()} to read this parameter + */ + const SETTING_USE_RAW_ITEM_CONTENT = true; + /** * Internal storage for resulting feed name, automatically detected * @var string @@ -284,13 +293,22 @@ abstract class XPathAbstract extends BridgeAbstract /** * Fix encoding - * @return string + * @return bool */ - protected function getSettingFixEncoding() + protected function getSettingFixEncoding(): bool { return static::SETTING_FIX_ENCODING; } + /** + * Use raw item content + * @return bool + */ + protected function getSettingUseRawItemContent(): bool + { + return static::SETTING_USE_RAW_ITEM_CONTENT; + } + /** * Internal helper method for quickly accessing all the user defined constants * in derived classes @@ -325,6 +343,8 @@ abstract class XPathAbstract extends BridgeAbstract return $this->getExpressionItemCategories(); case 'fix_encoding': return $this->getSettingFixEncoding(); + case 'raw_content': + return $this->getSettingUseRawItemContent(); } } @@ -370,7 +390,7 @@ abstract class XPathAbstract extends BridgeAbstract * Should provide the feed's items. * * @param \DOMXPath $xpath - * @return \DOMNodeList + * @return \DOMNodeList|false */ protected function provideFeedItems(\DOMXPath $xpath) { @@ -397,13 +417,23 @@ abstract class XPathAbstract extends BridgeAbstract $entries = $this->provideFeedItems($xpath); if ($entries === false) { + // malformed return; } foreach ($entries as $entry) { - $item = new FeedItem(); - foreach (['title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories'] as $param) { - $expression = $this->getParam($param); + $item = []; + $parameters = [ + 'title', + 'content', + 'uri', + 'author', + 'timestamp', + 'enclosures', + 'categories', + ]; + foreach ($parameters as $parameter) { + $expression = $this->getParam($parameter); if ('' === $expression) { continue; } @@ -417,13 +447,21 @@ abstract class XPathAbstract extends BridgeAbstract continue; } - $value = $this->getItemValueOrNodeValue($typedResult, $param === 'content'); - $item->__set($param, $this->formatParamValue($param, $value)); + if ('categories' === $parameter && $typedResult instanceof \DOMNodeList) { + $value = []; + foreach ($typedResult as $domNode) { + $value[] = $this->getItemValueOrNodeValue($domNode, false); + } + } else { + $value = $this->getItemValueOrNodeValue($typedResult, 'content' === $parameter); + } + + $item[$parameter] = $this->formatParamValue($parameter, $value); } $itemId = $this->generateItemId($item); if (null !== $itemId) { - $item->setUid($itemId); + $item['uid'] = $itemId; } $this->items[] = $item; @@ -437,7 +475,8 @@ abstract class XPathAbstract extends BridgeAbstract */ protected function formatParamValue($param, $value) { - $value = $this->fixEncoding($value); + $value = is_array($value) ? array_map('trim', $value) : trim($value); + $value = is_array($value) ? array_map([$this, 'fixEncoding'], $value) : $this->fixEncoding($value); switch ($param) { case 'title': return $this->formatItemTitle($value); @@ -480,7 +519,7 @@ abstract class XPathAbstract extends BridgeAbstract */ protected function formatItemContent($value) { - return $value; + return $this->getParam('raw_content') ? $value : htmlspecialchars($value); } /** @@ -496,7 +535,10 @@ abstract class XPathAbstract extends BridgeAbstract if (strlen($value) === 0) { return ''; } - if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { + if ( + strpos($value, 'http://') === 0 + || strpos($value, 'https://') === 0 + ) { return $value; } @@ -547,12 +589,12 @@ abstract class XPathAbstract extends BridgeAbstract * formatted as array. * Can be easily overwritten for in case the values need to be transformed into something * else. - * @param string $value + * @param string|array $value * @return array */ protected function formatItemCategories($value) { - return [$value]; + return is_array($value) ? $value : [$value]; } /** @@ -571,35 +613,30 @@ abstract class XPathAbstract extends BridgeAbstract /** * @param $typedResult + * @param bool $returnXML + * @param bool $escapeHtml * @return string + * @throws Exception */ protected function getItemValueOrNodeValue($typedResult, $returnXML = false) { if ($typedResult instanceof \DOMNodeList) { - $item = $typedResult->item(0); - if ($item instanceof \DOMElement) { - // Don't escape XML - if ($returnXML) { - return ($item->ownerDocument ?? $item)->saveXML($item); - } - $text = $item->nodeValue; - } elseif ($item instanceof \DOMAttr) { - $text = $item->value; - } elseif ($item instanceof \DOMText) { - $text = $item->wholeText; - } - } elseif (is_string($typedResult) && strlen($typedResult) > 0) { - $text = $typedResult; - } else { - throw new \Exception('Unknown type of XPath expression result.'); + $typedResult = $typedResult->item(0); } - $text = trim($text); - - if ($returnXML) { - return htmlspecialchars($text); + if ($typedResult instanceof \DOMElement) { + return $returnXML ? ($typedResult->ownerDocument ?? $typedResult)->saveXML($typedResult) : $typedResult->nodeValue; + } elseif ($typedResult instanceof \DOMAttr) { + return $typedResult->value; + } elseif ($typedResult instanceof \DOMText) { + return $typedResult->wholeText; + } elseif (is_string($typedResult)) { + return $typedResult; + } elseif (null === $typedResult) { + return ''; } - return $text; + + throw new \Exception('Unknown type of XPath expression result: ' . gettype($typedResult)); } /** @@ -618,10 +655,9 @@ abstract class XPathAbstract extends BridgeAbstract /** * Allows overriding default mechanism determining items Uid's * - * @param FeedItem $item * @return string|null */ - protected function generateItemId(FeedItem $item) + protected function generateItemId(array $item) { return null; } diff --git a/lib/bootstrap.php b/lib/bootstrap.php index e05dd94a..8a7c62a1 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -1,52 +1,26 @@ 'Continue', - '101' => 'Switching Protocols', - '200' => 'OK', - '201' => 'Created', - '202' => 'Accepted', - '203' => 'Non-Authoritative Information', - '204' => 'No Content', - '205' => 'Reset Content', - '206' => 'Partial Content', - '300' => 'Multiple Choices', - '301' => 'Moved Permanently', - '302' => 'Found', - '303' => 'See Other', - '304' => 'Not Modified', - '305' => 'Use Proxy', - '400' => 'Bad Request', - '401' => 'Unauthorized', - '402' => 'Payment Required', - '403' => 'Forbidden', - '404' => 'Not Found', - '405' => 'Method Not Allowed', - '406' => 'Not Acceptable', - '407' => 'Proxy Authentication Required', - '408' => 'Request Timeout', - '409' => 'Conflict', - '410' => 'Gone', - '411' => 'Length Required', - '412' => 'Precondition Failed', - '413' => 'Request Entity Too Large', - '414' => 'Request-URI Too Long', - '415' => 'Unsupported Media Type', - '416' => 'Requested Range Not Satisfiable', - '417' => 'Expectation Failed', - '429' => 'Too Many Requests', - '500' => 'Internal Server Error', - '501' => 'Not Implemented', - '502' => 'Bad Gateway', - '503' => 'Service Unavailable', - '504' => 'Gateway Timeout', - '505' => 'HTTP Version Not Supported' - ]; - private string $body; - private int $code; - private array $headers; - - public function __construct( - string $body = '', - int $code = 200, - array $headers = [] - ) { - $this->body = $body; - $this->code = $code; - $this->headers = $headers; - } - - public function getBody() - { - return $this->body; - } - - public function getHeaders() - { - return $this->headers; - } - - public function send(): void - { - http_response_code($this->code); - foreach ($this->headers as $name => $value) { - header(sprintf('%s: %s', $name, $value)); - } - print $this->body; - } -} - /** * Fetch data from an http url * * @param array $httpHeaders E.g. ['Content-type: text/plain'] * @param array $curlOptions Associative array e.g. [CURLOPT_MAXREDIRS => 3] - * @param bool $returnFull Whether to return an array: - * [ - * 'code' => int, - * 'header' => array, - * 'content' => string, - * 'status_lines' => array, - * ] - - * @return string|array + * @param bool $returnFull Whether to return Response object + * @return string|Response */ function getContents( string $url, @@ -99,21 +14,23 @@ function getContents( array $curlOptions = [], bool $returnFull = false ) { - $cache = RssBridge::getCache(); - $cache->setScope('server'); - $cache->setKey([$url]); + global $container; - // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 - $defaultHttpHeaders = [ - 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', - 'Accept-Language' => 'en-US,en;q=0.5', - 'Upgrade-Insecure-Requests' => '1', - 'Sec-Fetch-Dest' => 'document', - 'Sec-Fetch-Mode' => 'navigate', - 'Sec-Fetch-Site' => 'none', - 'Sec-Fetch-User' => '?1', - 'TE' => 'trailers', + /** @var HttpClient $httpClient */ + $httpClient = $container['http_client']; + + /** @var CacheInterface $cache */ + $cache = $container['cache']; + + // TODO: consider url validation at this point + + $config = [ + 'useragent' => Configuration::getConfig('http', 'useragent'), + 'timeout' => Configuration::getConfig('http', 'timeout'), + 'retries' => Configuration::getConfig('http', 'retries'), + 'curl_options' => $curlOptions, ]; + $httpHeadersNormalized = []; foreach ($httpHeaders as $httpHeader) { $parts = explode(':', $httpHeader); @@ -121,49 +38,60 @@ function getContents( $headerValue = trim(implode(':', array_slice($parts, 1))); $httpHeadersNormalized[$headerName] = $headerValue; } - $config = [ - 'useragent' => Configuration::getConfig('http', 'useragent'), - 'timeout' => Configuration::getConfig('http', 'timeout'), - 'headers' => array_merge($defaultHttpHeaders, $httpHeadersNormalized), - 'curl_options' => $curlOptions, - ]; + + $requestBodyHash = null; + if (isset($curlOptions[CURLOPT_POSTFIELDS])) { + $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false)); + } + $cacheKey = implode('_', ['server', $url, $requestBodyHash]); + + /** @var Response $cachedResponse */ + $cachedResponse = $cache->get($cacheKey); + if ($cachedResponse) { + $lastModified = $cachedResponse->getHeader('last-modified'); + if ($lastModified) { + try { + // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime + $lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified); + $config['if_not_modified_since'] = $lastModified->getTimestamp(); + } catch (Exception $e) { + // Failed to parse last-modified + } + } + $etag = $cachedResponse->getHeader('etag'); + if ($etag) { + $httpHeadersNormalized['if-none-match'] = $etag; + } + } + + $config['headers'] = $httpHeadersNormalized; $maxFileSize = Configuration::getConfig('http', 'max_filesize'); if ($maxFileSize) { - // Multiply with 2^20 (1M) to the value in bytes + // Convert from MB to B by multiplying with 2^20 (1M) $config['max_filesize'] = $maxFileSize * 2 ** 20; } if (Configuration::getConfig('proxy', 'url') && !defined('NOPROXY')) { $config['proxy'] = Configuration::getConfig('proxy', 'url'); } - if (!Debug::isEnabled() && $cache->getTime()) { - $config['if_not_modified_since'] = $cache->getTime(); - } - $result = _http_request($url, $config); - $response = [ - 'code' => $result['code'], - 'status_lines' => $result['status_lines'], - 'header' => $result['headers'], - 'content' => $result['body'], - ]; + $response = $httpClient->request($url, $config); - switch ($result['code']) { + switch ($response->getCode()) { case 200: case 201: case 202: - if (isset($result['headers']['cache-control'])) { - $cachecontrol = $result['headers']['cache-control']; - $lastValue = array_pop($cachecontrol); - $directives = explode(',', $lastValue); + $cacheControl = $response->getHeader('cache-control'); + if ($cacheControl) { + $directives = explode(',', $cacheControl); $directives = array_map('trim', $directives); if (in_array('no-cache', $directives) || in_array('no-store', $directives)) { // Don't cache as instructed by the server break; } } - $cache->saveData($result['body']); + $cache->set($cacheKey, $response, 86400 * 10); break; case 301: case 302: @@ -172,157 +100,16 @@ function getContents( break; case 304: // Not Modified - $response['content'] = $cache->loadData(); + $response = $response->withBody($cachedResponse->getBody()); break; default: - $exceptionMessage = sprintf( - '%s resulted in %s %s %s', - $url, - $result['code'], - Response::STATUS_CODES[$result['code']] ?? '', - // If debug, include a part of the response body in the exception message - Debug::isEnabled() ? mb_substr($result['body'], 0, 500) : '', - ); - - // The following code must be extracted if it grows too much - $cloudflareTitles = [ - 'Just a moment...', - '<title>Please Wait...', - '<title>Attention Required!', - '<title>Security | Glassdoor', - ]; - foreach ($cloudflareTitles as $cloudflareTitle) { - if (str_contains($result['body'], $cloudflareTitle)) { - throw new CloudFlareException($exceptionMessage, $result['code']); - } - } - - throw new HttpException($exceptionMessage, $result['code']); + $e = HttpException::fromResponse($response, $url); + throw $e; } if ($returnFull === true) { return $response; } - return $response['content']; -} - -/** - * Fetch content from url - * - * @internal Private function used internally - * @throws HttpException - */ -function _http_request(string $url, array $config = []): array -{ - $defaults = [ - 'useragent' => null, - 'timeout' => 5, - 'headers' => [], - 'proxy' => null, - 'curl_options' => [], - 'if_not_modified_since' => null, - 'retries' => 3, - 'max_filesize' => null, - 'max_redirections' => 5, - ]; - $config = array_merge($defaults, $config); - - $ch = curl_init($url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); - curl_setopt($ch, CURLOPT_HEADER, false); - $httpHeaders = []; - foreach ($config['headers'] as $name => $value) { - $httpHeaders[] = sprintf('%s: %s', $name, $value); - } - curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); - if ($config['useragent']) { - curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); - } - curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); - curl_setopt($ch, CURLOPT_ENCODING, ''); - curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); - - if ($config['max_filesize']) { - // This option inspects the Content-Length header - curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); - curl_setopt($ch, CURLOPT_NOPROGRESS, false); - // This progress function will monitor responses who omit the Content-Length header - curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { - if ($downloaded > $config['max_filesize']) { - // Return a non-zero value to abort the transfer - return -1; - } - return 0; - }); - } - - if ($config['proxy']) { - curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); - } - if (curl_setopt_array($ch, $config['curl_options']) === false) { - throw new \Exception('Tried to set an illegal curl option'); - } - - if ($config['if_not_modified_since']) { - curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); - curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); - } - - $responseStatusLines = []; - $responseHeaders = []; - curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { - $len = strlen($rawHeader); - if ($rawHeader === "\r\n") { - return $len; - } - if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { - $responseStatusLines[] = $rawHeader; - return $len; - } - $header = explode(':', $rawHeader); - if (count($header) === 1) { - return $len; - } - $name = mb_strtolower(trim($header[0])); - $value = trim(implode(':', array_slice($header, 1))); - if (!isset($responseHeaders[$name])) { - $responseHeaders[$name] = []; - } - $responseHeaders[$name][] = $value; - return $len; - }); - - $attempts = 0; - while (true) { - $attempts++; - $data = curl_exec($ch); - if ($data !== false) { - // The network call was successful, so break out of the loop - break; - } - if ($attempts > $config['retries']) { - // Finally give up - $curl_error = curl_error($ch); - $curl_errno = curl_errno($ch); - throw new HttpException(sprintf( - 'cURL error %s: %s (%s) for %s', - $curl_error, - $curl_errno, - 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', - $url - )); - } - } - - $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - return [ - 'code' => $statusCode, - 'status_lines' => $responseStatusLines, - 'headers' => $responseHeaders, - 'body' => $data, - ]; + return $response->getBody(); } /** @@ -349,7 +136,6 @@ function _http_request(string $url, array $config = []): array * when returning plaintext. * @param string $defaultSpanText Specifies the replacement text for `<span />` * tags when returning plaintext. - * @return false|simple_html_dom Contents as simplehtmldom object. */ function getSimpleHTMLDOM( $url, @@ -361,14 +147,14 @@ function getSimpleHTMLDOM( $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT -) { - $content = getContents( - $url, - $header ?? [], - $opts ?? [] - ); +): \simple_html_dom { + $html = getContents($url, $header ?? [], $opts ?? []); + if ($html === '') { + throw new \Exception('Unable to parse dom because the http response was the empty string'); + } + return str_get_html( - $content, + $html, $lowercase, $forceTagsClosed, $target_charset, @@ -379,13 +165,11 @@ function getSimpleHTMLDOM( } /** - * Gets contents from the Internet as simplhtmldom object. Contents are cached + * Fetch contents from the Internet as simplhtmldom object. Contents are cached * and re-used for subsequent calls until the cache duration elapsed. * - * _Notice_: Cached contents are forcefully removed after 24 hours (86400 seconds). - * * @param string $url The URL. - * @param int $duration Cache duration in seconds. + * @param int $ttl Cache duration in seconds. * @param array $header (optional) A list of cURL header. * For more information follow the links below. * * https://php.net/manual/en/function.curl-setopt.php @@ -410,7 +194,7 @@ function getSimpleHTMLDOM( */ function getSimpleHTMLDOMCached( $url, - $duration = 86400, + $ttl = 86400, $header = [], $opts = [], $lowercase = true, @@ -420,32 +204,17 @@ function getSimpleHTMLDOMCached( $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT ) { - $cache = RssBridge::getCache(); - $cache->setScope('pages'); - $cache->setKey([$url]); + global $container; - // Determine if cached file is within duration - $time = $cache->getTime(); - if ( - $time - && time() - $duration < $time - && !Debug::isEnabled() - ) { - // Cache hit - $content = $cache->loadData(); - } else { - $content = getContents( - $url, - $header ?? [], - $opts ?? [] - ); - if ($content) { - $cache->setScope('pages'); - $cache->setKey([$url]); - $cache->saveData($content); - } + /** @var CacheInterface $cache */ + $cache = $container['cache']; + + $cacheKey = 'pages_' . $url; + $content = $cache->get($cacheKey); + if (!$content) { + $content = getContents($url, $header ?? [], $opts ?? []); + $cache->set($cacheKey, $content, $ttl); } - return str_get_html( $content, $lowercase, diff --git a/lib/dependencies.php b/lib/dependencies.php new file mode 100644 index 00000000..dfaa828b --- /dev/null +++ b/lib/dependencies.php @@ -0,0 +1,70 @@ +<?php + +declare(strict_types=1); + +$container = new Container(); + +$container[ConnectivityAction::class] = function ($c) { + return new ConnectivityAction($c['bridge_factory']); +}; + +$container[DetectAction::class] = function ($c) { + return new DetectAction($c['bridge_factory']); +}; + +$container[DisplayAction::class] = function ($c) { + return new DisplayAction($c['cache'], $c['logger'], $c['bridge_factory']); +}; + +$container[FindfeedAction::class] = function ($c) { + return new FindfeedAction($c['bridge_factory']); +}; + +$container[FrontpageAction::class] = function ($c) { + return new FrontpageAction($c['bridge_factory']); +}; + +$container[HealthAction::class] = function () { + return new HealthAction(); +}; + +$container[ListAction::class] = function ($c) { + return new ListAction($c['bridge_factory']); +}; + +$container['bridge_factory'] = function ($c) { + return new BridgeFactory($c['cache'], $c['logger']); +}; + + +$container['http_client'] = function () { + return new CurlHttpClient(); +}; + +$container['cache_factory'] = function ($c) { + return new CacheFactory($c['logger']); +}; + +$container['logger'] = function () { + $logger = new SimpleLogger('rssbridge'); + if (Configuration::getConfig('system', 'env') === 'dev') { + $logger->addHandler(new ErrorLogHandler(Logger::DEBUG)); + } else { + $logger->addHandler(new ErrorLogHandler(Logger::INFO)); + } + // Uncomment this for info logging to fs + // $logger->addHandler(new StreamHandler('/tmp/rss-bridge.log', Logger::INFO)); + + // Uncomment this for debug logging to fs + // $logger->addHandler(new StreamHandler('/tmp/rss-bridge-debug.log', Logger::DEBUG)); + return $logger; +}; + +$container['cache'] = function ($c) { + /** @var CacheFactory $cacheFactory */ + $cacheFactory = $c['cache_factory']; + $cache = $cacheFactory->create(Configuration::getConfig('cache', 'type')); + return $cache; +}; + +return $container; diff --git a/lib/error.php b/lib/error.php deleted file mode 100644 index 4439fb38..00000000 --- a/lib/error.php +++ /dev/null @@ -1,47 +0,0 @@ -<?php - -/** - * This file is part of RSS-Bridge, a PHP project capable of generating RSS and - * Atom feeds for websites that don't have one. - * - * For the full license information, please view the UNLICENSE file distributed - * with this source code. - * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge - */ - -/** - * Throws an exception when called. - * - * @throws \Exception when called - * @param string $message The error message - * @param int $code The HTTP error code - * @link https://en.wikipedia.org/wiki/List_of_HTTP_status_codes List of HTTP - * status codes - */ -function returnError($message, $code) -{ - throw new \Exception($message, $code); -} - -/** - * Returns HTTP Error 400 (Bad Request) when called. - * - * @param string $message The error message - */ -function returnClientError($message) -{ - returnError($message, 400); -} - -/** - * Returns HTTP Error 500 (Internal Server Error) when called. - * - * @param string $message The error message - */ -function returnServerError($message) -{ - returnError($message, 500); -} diff --git a/lib/html.php b/lib/html.php index 489fb5b4..dd756979 100644 --- a/lib/html.php +++ b/lib/html.php @@ -15,27 +15,24 @@ function render(string $template, array $context = []): string 'level' => 'info', ]; } - if (Debug::isEnabled()) { - $debugModeWhitelist = Configuration::getConfig('system', 'debug_mode_whitelist') ?: []; - if ($debugModeWhitelist === []) { - $context['messages'][] = [ - 'body' => 'Warning : Debug mode is active from any location, make sure only you can access RSS-Bridge.', - 'level' => 'error' - ]; - } else { - $context['messages'][] = [ - 'body' => 'Warning : Debug mode is active from your IP address, your requests will bypass the cache.', - 'level' => 'warning' - ]; - } + if (Configuration::getConfig('system', 'env') === 'dev') { + $context['messages'][] = [ + 'body' => 'System environment: dev', + 'level' => 'error' + ]; + $context['messages'][] = [ + 'body' => sprintf('Cache type: %s', Configuration::getConfig('cache', 'type')), + 'level' => 'info' + ]; } $context['page'] = render_template($template, $context); return render_template('base.html.php', $context); } /** - * Render template as absolute path or relative to templates folder. - * Do not pass user input in $template + * Render php template with context + * + * DO NOT PASS USER INPUT IN $template OR $context (keys!) */ function render_template(string $template, array $context = []): string { @@ -225,6 +222,63 @@ function defaultLinkTo($dom, $url) return $dom; } +/** + * Parse a srcset HTML attribute value and return size => URL mappings + * Srcset contains a list of image URLs with associated size specified as size (e.g. 1024w) or scale (e.g. 2x) + * The web browser should pick the most appropriate image depending on screen size and/or pixel density + * + * This function takes a srcset string such as the following: + * header640.png 640w, header960.png 960w, header1024.png 1024w + * + * Returns an array such as the following: + * [ + * '640w' => 'header640.png', + * '960w' => 'header960.png', + * '1024w' => 'header1024.png' + * ] + * + * @param string $srcset Content of srcset html attribute + * @param bool $return_largest_url Instead of returning an array, return URL for the largest entry + * @return array|string Content of srcset attribute as { size => url } array, or largest entry URL if requested + */ +function parseSrcset(string $srcset, bool $return_largest_url = false) +{ + // The srcset format is more tricky to parse that it seems: + // URLs may contain commas, and space after comma is not mandatory, so the following is valid: + // image.png?resize=640,640 640w,image.png?resize=960,960 960w,image.png?resize=1024,1024 1024w + // Since splitting by space or comma will not work, there is a precise algorithm to parse srcset attribute: + // https://html.spec.whatwg.org/multipage/images.html#parse-a-srcset-attribute + // To summarize, each srcset entry has the following format: + // 1. Leading spaces and comma. Zero or more spaces, zero or at most one comma + // 2. Any amount of characters up to the next whitespace (space, tab, newline...): This is the URL + // 3. A nonnegative number followed by lowercase w, x or h: This is the image size + // We parse the srcset entries using a regex to mimick the above parser/tokenizer behavior. + $preg_status = preg_match_all('/[\s]*,?[\s]*([^\s]+)\s+([0-9]+[wxh])/', $srcset, $matches); + $entries = []; + if ($preg_status !== false && $preg_status > 0) { + foreach ($matches[1] as $index => $url) { + if (array_key_exists($index, $matches[2])) { + $size = $matches[2][$index]; + $entries[$size] = html_entity_decode($url); + } + } + } + if ($return_largest_url) { + $largest_image_url = null; + $largest_image_size = -1; + foreach ($entries as $size => $url) { + $size_int = intval(substr($size, 0, strlen($size) - 1)); + if ($size_int > $largest_image_size) { + $largest_image_size = $size_int; + $largest_image_url = $url; + } + } + return $largest_image_url; + } else { + return $entries; + } +} + /** * Convert lazy-loading images and frames (video embeds) into static elements * @@ -248,15 +302,26 @@ function convertLazyLoading($dom) if (!empty($img->getAttribute('data-src'))) { $img->src = $img->getAttribute('data-src'); } elseif (!empty($img->getAttribute('data-srcset'))) { - $img->src = explode(' ', $img->getAttribute('data-srcset'))[0]; + $img->src = parseSrcset($img->getAttribute('data-srcset')); } elseif (!empty($img->getAttribute('data-lazy-src'))) { $img->src = $img->getAttribute('data-lazy-src'); + } elseif (!empty($img->getAttribute('data-orig-file'))) { + $img->src = $img->getAttribute('data-orig-file'); } elseif (!empty($img->getAttribute('srcset'))) { - $img->src = explode(' ', $img->getAttribute('srcset'))[0]; + $img->src = parseSrcset($img->getAttribute('srcset')); } else { continue; // Proceed to next element without removing attributes } - foreach (['loading', 'decoding', 'srcset', 'data-src', 'data-srcset'] as $attr) { + + // Remove data attributes, no longer necessary + foreach ($img->getAllAttributes() as $attr => $val) { + if (str_starts_with($attr, 'data-')) { + $img->removeAttribute($attr); + } + } + + // Remove other attributes that may be processed by the client + foreach (['loading', 'decoding', 'srcset'] as $attr) { if ($img->hasAttribute($attr)) { $img->removeAttribute($attr); } @@ -273,7 +338,7 @@ function convertLazyLoading($dom) $img->tag = 'img'; } // Adding/removing node would change its position inside the parent element, - // So instead we rewrite the node in-place though the outertext attribute + // So instead we rewrite the node in-place through the outertext attribute $picture->outertext = $img->outertext; } } diff --git a/lib/http.php b/lib/http.php new file mode 100644 index 00000000..8f0a1638 --- /dev/null +++ b/lib/http.php @@ -0,0 +1,386 @@ +<?php + +/** + * Thrown by bridges + */ +final class RateLimitException extends \Exception +{ +} + +/** + * @internal Do not use this class in bridges + */ +class HttpException extends \Exception +{ + public ?Response $response; + + public function __construct(string $message = '', int $statusCode = 0, ?Response $response = null) + { + parent::__construct($message, $statusCode); + $this->response = $response ?? new Response('', 0); + } + + public static function fromResponse(Response $response, string $url): HttpException + { + $message = sprintf( + '%s resulted in %s %s', + $url, + $response->getCode(), + $response->getStatusLine() + ); + if (CloudFlareException::isCloudFlareResponse($response)) { + return new CloudFlareException($message, $response->getCode(), $response); + } + return new HttpException(trim($message), $response->getCode(), $response); + } +} + +final class CloudFlareException extends HttpException +{ + public static function isCloudFlareResponse(Response $response): bool + { + $cloudflareTitles = [ + '<title>Just a moment...', + '<title>Please Wait...', + '<title>Attention Required!', + '<title>Security | Glassdoor', + '<title>Access denied', // cf as seen on patreon.com + ]; + foreach ($cloudflareTitles as $cloudflareTitle) { + if (str_contains($response->getBody(), $cloudflareTitle)) { + return true; + } + } + return false; + } +} + +interface HttpClient +{ + public function request(string $url, array $config = []): Response; +} + +final class CurlHttpClient implements HttpClient +{ + public function request(string $url, array $config = []): Response + { + $ch = curl_init($url); + + $defaults = [ + 'useragent' => null, + 'timeout' => 5, + 'headers' => [], + 'proxy' => null, + 'curl_options' => [], + 'if_not_modified_since' => null, + 'retries' => 2, + 'max_filesize' => null, + 'max_redirections' => 5, + ]; + + // if curl-impersonate is not detected, use some basic defaults + if (curl_version()['ssl_version'] != 'BoringSSL') { + // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 + $defaults['headers'] = [ + 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', + 'Accept-Language' => 'en-US,en;q=0.5', + 'Upgrade-Insecure-Requests' => '1', + 'Sec-Fetch-Dest' => 'document', + 'Sec-Fetch-Mode' => 'navigate', + 'Sec-Fetch-Site' => 'none', + 'Sec-Fetch-User' => '?1', + 'TE' => 'trailers', + ]; + $defaults['useragent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0'; + + curl_setopt($ch, CURLOPT_HEADER, false); + } + + $config = array_merge($defaults, $config); + + $httpHeaders = []; + foreach ($config['headers'] as $name => $value) { + $httpHeaders[] = sprintf('%s: %s', $name, $value); + } + curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); + if ($config['useragent']) { + curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); + } + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); + curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); + curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); + + if ($config['max_filesize']) { + // This option inspects the Content-Length header + curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); + curl_setopt($ch, CURLOPT_NOPROGRESS, false); + // This progress function will monitor responses who omit the Content-Length header + curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { + if ($downloaded > $config['max_filesize']) { + // Return a non-zero value to abort the transfer + return -1; + } + return 0; + }); + } + + if ($config['proxy']) { + curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); + } + + if (curl_setopt_array($ch, $config['curl_options']) === false) { + throw new \Exception('Tried to set an illegal curl option'); + } + + if ($config['if_not_modified_since']) { + curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); + curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); + } + + $responseStatusLines = []; + $responseHeaders = []; + curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { + $len = strlen($rawHeader); + if ($rawHeader === "\r\n") { + return $len; + } + if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { + $responseStatusLines[] = trim($rawHeader); + return $len; + } + $header = explode(':', $rawHeader); + if (count($header) === 1) { + return $len; + } + $name = mb_strtolower(trim($header[0])); + $value = trim(implode(':', array_slice($header, 1))); + if (!isset($responseHeaders[$name])) { + $responseHeaders[$name] = []; + } + $responseHeaders[$name][] = $value; + return $len; + }); + + // This retry logic is a bit hard to understand, but it works + $tries = 0; + while (true) { + $tries++; + $body = curl_exec($ch); + if ($body !== false) { + // The network call was successful, so break out of the loop + break; + } + if ($tries <= $config['retries']) { + continue; + } + // Max retries reached, give up + $curl_error = curl_error($ch); + $curl_errno = curl_errno($ch); + throw new HttpException(sprintf( + 'cURL error %s: %s (%s) for %s', + $curl_error, + $curl_errno, + 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', + $url + )); + } + + $statusCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE); + curl_close($ch); + return new Response($body, $statusCode, $responseHeaders); + } +} + +final class Request +{ + private array $get; + private array $server; + private array $attributes; + + private function __construct() + { + } + + public static function fromGlobals(): self + { + $self = new self(); + $self->get = $_GET; + $self->server = $_SERVER; + $self->attributes = []; + return $self; + } + + public static function fromCli(array $cliArgs): self + { + $self = new self(); + $self->get = $cliArgs; + return $self; + } + + public function get(string $key, $default = null): ?string + { + return $this->get[$key] ?? $default; + } + + public function server(string $key, ?string $default = null): ?string + { + return $this->server[$key] ?? $default; + } + + public function withAttribute(string $name, $value = true): self + { + $clone = clone $this; + $clone->attributes[$name] = $value; + return $clone; + } + + public function getAttribute(string $key, $default = null) + { + return $this->attributes[$key] ?? $default; + } + + public function toArray(): array + { + return $this->get; + } +} + +final class Response +{ + public const STATUS_CODES = [ + '100' => 'Continue', + '101' => 'Switching Protocols', + '200' => 'OK', + '201' => 'Created', + '202' => 'Accepted', + '203' => 'Non-Authoritative Information', + '204' => 'No Content', + '205' => 'Reset Content', + '206' => 'Partial Content', + '300' => 'Multiple Choices', + '301' => 'Moved Permanently', + '302' => 'Found', + '303' => 'See Other', + '304' => 'Not Modified', + '305' => 'Use Proxy', + '400' => 'Bad Request', + '401' => 'Unauthorized', + '402' => 'Payment Required', + '403' => 'Forbidden', + '404' => 'Not Found', + '405' => 'Method Not Allowed', + '406' => 'Not Acceptable', + '407' => 'Proxy Authentication Required', + '408' => 'Request Timeout', + '409' => 'Conflict', + '410' => 'Gone', + '411' => 'Length Required', + '412' => 'Precondition Failed', + '413' => 'Request Entity Too Large', + '414' => 'Request-URI Too Long', + '415' => 'Unsupported Media Type', + '416' => 'Requested Range Not Satisfiable', + '417' => 'Expectation Failed', + '429' => 'Too Many Requests', + '500' => 'Internal Server Error', + '501' => 'Not Implemented', + '502' => 'Bad Gateway', + '503' => 'Service Unavailable', + '504' => 'Gateway Timeout', + '505' => 'HTTP Version Not Supported' + ]; + private string $body; + private int $code; + private array $headers; + + public function __construct( + string $body = '', + int $code = 200, + array $headers = [] + ) { + $this->body = $body; + $this->code = $code; + $this->headers = []; + + foreach ($headers as $name => $value) { + $name = mb_strtolower($name); + if (!isset($this->headers[$name])) { + $this->headers[$name] = []; + } + if (is_string($value)) { + $this->headers[$name][] = $value; + } + if (is_array($value)) { + $this->headers[$name] = $value; + } + } + } + + public function getBody(): string + { + return $this->body; + } + + public function getCode(): int + { + return $this->code; + } + + public function getStatusLine(): string + { + return self::STATUS_CODES[$this->code] ?? ''; + } + + public function getHeaders(): array + { + return $this->headers; + } + + /** + * HTTP response may have multiple headers with the same name. + * + * This method by default, returns only the last header. + * + * @return string[]|string|null + */ + public function getHeader(string $name, bool $all = false) + { + $name = mb_strtolower($name); + $header = $this->headers[$name] ?? null; + if (!$header) { + return null; + } + if ($all) { + return $header; + } + return array_pop($header); + } + + public function withHeader(string $name, string $value): self + { + $clone = clone $this; + $clone->headers[$name] = [$value]; + return $clone; + } + + public function withBody(string $body): self + { + $clone = clone $this; + $clone->body = $body; + return $clone; + } + + public function send(): void + { + http_response_code($this->code); + foreach ($this->headers as $name => $values) { + foreach ($values as $value) { + header(sprintf('%s: %s', $name, $value)); + } + } + print $this->body; + } +} diff --git a/lib/logger.php b/lib/logger.php new file mode 100644 index 00000000..04b56b33 --- /dev/null +++ b/lib/logger.php @@ -0,0 +1,218 @@ + 'DEBUG', + self::INFO => 'INFO', + self::WARNING => 'WARNING', + self::ERROR => 'ERROR', + ]; + + public function debug(string $message, array $context = []); + + public function info(string $message, array $context = []): void; + + public function warning(string $message, array $context = []): void; + + public function error(string $message, array $context = []): void; +} + +final class SimpleLogger implements Logger +{ + private string $name; + private array $handlers; + + /** + * @param callable[] $handlers + */ + public function __construct( + string $name, + array $handlers = [] + ) { + $this->name = $name; + $this->handlers = $handlers; + } + + public function addHandler(callable $fn) + { + $this->handlers[] = $fn; + } + + public function debug(string $message, array $context = []) + { + $this->log(self::DEBUG, $message, $context); + } + + public function info(string $message, array $context = []): void + { + $this->log(self::INFO, $message, $context); + } + + public function warning(string $message, array $context = []): void + { + $this->log(self::WARNING, $message, $context); + } + + public function error(string $message, array $context = []): void + { + $this->log(self::ERROR, $message, $context); + } + + private function log(int $level, string $message, array $context = []): void + { + $ignoredMessages = [ + 'Format name invalid', + 'Unknown format given', + 'Unable to find', + ]; + foreach ($ignoredMessages as $ignoredMessage) { + if (str_starts_with($message, $ignoredMessage)) { + return; + } + } + + if (isset($context['e'])) { + /** @var \Throwable $e */ + $e = $context['e']; + + if ($e instanceof RateLimitException) { + return; + } + } + + foreach ($this->handlers as $handler) { + $handler([ + 'name' => $this->name, + 'created_at' => now(), + 'level' => $level, + 'level_name' => self::LEVEL_NAMES[$level], + 'message' => $message, + 'context' => $context, + ]); + } + } +} + +final class StreamHandler +{ + private string $stream; + private int $level; + + public function __construct(string $stream, int $level = Logger::DEBUG) + { + $this->stream = $stream; + $this->level = $level; + } + + public function __invoke(array $record) + { + if ($record['level'] < $this->level) { + return; + } + if (isset($record['context']['e'])) { + /** @var \Throwable $e */ + $e = $record['context']['e']; + unset($record['context']['e']); + $record['context']['type'] = get_class($e); + $record['context']['code'] = $e->getCode(); + $record['context']['message'] = sanitize_root($e->getMessage()); + $record['context']['file'] = sanitize_root($e->getFile()); + $record['context']['line'] = $e->getLine(); + $record['context']['url'] = get_current_url(); + $record['context']['trace'] = trace_to_call_points(trace_from_exception($e)); + } + $context = ''; + if ($record['context']) { + try { + $context = Json::encode($record['context']); + } catch (\JsonException $e) { + $record['context']['message'] = null; + $context = Json::encode($record['context']); + } + } + $text = sprintf( + "[%s] %s.%s %s %s\n", + $record['created_at']->format('Y-m-d H:i:s'), + $record['name'], + $record['level_name'], + $record['message'], + $context + ); + $bytes = file_put_contents($this->stream, $text, FILE_APPEND); + } +} + +final class ErrorLogHandler +{ + private int $level; + + public function __construct(int $level = Logger::DEBUG) + { + $this->level = $level; + } + + public function __invoke(array $record) + { + if ($record['level'] < $this->level) { + return; + } + if (isset($record['context']['e'])) { + /** @var \Throwable $e */ + $e = $record['context']['e']; + unset($record['context']['e']); + $record['context']['type'] = get_class($e); + $record['context']['code'] = $e->getCode(); + $record['context']['message'] = sanitize_root($e->getMessage()); + $record['context']['file'] = sanitize_root($e->getFile()); + $record['context']['line'] = $e->getLine(); + $record['context']['url'] = get_current_url(); + $record['context']['trace'] = trace_to_call_points(trace_from_exception($e)); + } + $context = ''; + if ($record['context']) { + try { + $context = Json::encode($record['context']); + } catch (\JsonException $e) { + $record['context']['message'] = null; + $context = Json::encode($record['context']); + } + } + // Intentionally omitting newline + $text = sprintf( + '[%s] %s.%s %s %s', + $record['created_at']->format('Y-m-d H:i:s'), + $record['name'], + $record['level_name'], + $record['message'], + $context + ); + error_log($text); + } +} + +final class NullLogger implements Logger +{ + public function debug(string $message, array $context = []) + { + } + + public function info(string $message, array $context = []): void + { + } + + public function warning(string $message, array $context = []): void + { + } + + public function error(string $message, array $context = []): void + { + } +} diff --git a/vendor/parsedown/LICENSE.txt b/lib/parsedown/LICENSE.txt similarity index 100% rename from vendor/parsedown/LICENSE.txt rename to lib/parsedown/LICENSE.txt diff --git a/vendor/parsedown/Parsedown.php b/lib/parsedown/Parsedown.php similarity index 99% rename from vendor/parsedown/Parsedown.php rename to lib/parsedown/Parsedown.php index 1b9d6d5b..d9b5476a 100644 --- a/vendor/parsedown/Parsedown.php +++ b/lib/parsedown/Parsedown.php @@ -712,7 +712,7 @@ class Parsedown # # Setext - protected function blockSetextHeader($Line, array $Block = null) + protected function blockSetextHeader($Line, ?array $Block = null) { if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) { @@ -850,7 +850,7 @@ class Parsedown # # Table - protected function blockTable($Line, array $Block = null) + protected function blockTable($Line, ?array $Block = null) { if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) { diff --git a/vendor/php-urljoin/LICENSE b/lib/php-urljoin/LICENSE similarity index 100% rename from vendor/php-urljoin/LICENSE rename to lib/php-urljoin/LICENSE diff --git a/vendor/php-urljoin/src/urljoin.php b/lib/php-urljoin/src/urljoin.php similarity index 96% rename from vendor/php-urljoin/src/urljoin.php rename to lib/php-urljoin/src/urljoin.php index ef84fbcd..026b767e 100644 --- a/vendor/php-urljoin/src/urljoin.php +++ b/lib/php-urljoin/src/urljoin.php @@ -40,7 +40,10 @@ function urljoin($base, $rel) { } if (isset($prel['scheme'])) { - if ($prel['scheme'] != $pbase['scheme'] || in_array($prel['scheme'], $uses_relative) == false) { + if ( + $prel['scheme'] != ($pbase['scheme'] ?? null) + || in_array($prel['scheme'], $uses_relative) == false + ) { return $rel; } } diff --git a/lib/php8backports.php b/lib/php8backports.php index 30dfdbd9..ccef6016 100644 --- a/lib/php8backports.php +++ b/lib/php8backports.php @@ -1,39 +1,5 @@ tags names that contains the expected value + // There are various source candidates per type of data, listed from most reliable to least reliable + static $meta_mappings = [ + // + // + // + // + // + // + // + 'uri' => [ + 'og:url', + 'twitter:url', + 'canonical', + ], + 'title' => [ + 'og:title', + 'twitter:title', + ], + 'content' => [ + 'og:description', + 'twitter:description', + 'description', + ], + 'timestamp' => [ + 'article:published_time', + 'og:article:published_time', + 'releaseDate', + 'releasedate', + 'article:modified_time', + 'og:article:modified_time', + 'lastModified', + 'lastmodified', + 'time', + ], + 'enclosures' => [ + 'og:image:secure_url', + 'og:image:url', + 'og:image', + 'twitter:image', + 'thumbnailImg', + 'thumbnailimg', + ], + 'author' => [ + 'article:author', + 'og:article:author', + 'author', + 'article:author:username', + 'profile:first_name', + 'profile:last_name', + 'article:author:first_name', + 'article:author:last_name', + 'twitter:creator', + ], + ]; + + $author_first_name = null; + $author_last_name = null; + + // For each Entry property, look for corresponding HTML tags using a list of candidates + foreach ($meta_mappings as $property => $field_list) { + foreach ($field_list as $field) { + // Look for HTML meta tag + $element = null; + if ($field === 'canonical') { + $element = $html->find('link[rel=canonical]'); + } else if ($field === 'time') { + $element = $html->find('time[datetime]'); + } else { + $element = $html->find("meta[property=$field], meta[name=$field]"); + } + // Found something? Extract the value and populate Entry field + if (!empty($element)) { + $element = $element[0]; + $field_value = ''; + if ($field === 'canonical') { + $field_value = $element->href; + } else if ($field === 'time') { + $field_value = $element->datetime; + } else { + $field_value = $element->content; + } + if (!empty($field_value)) { + if ($field === 'article:author:first_name' || $field === 'profile:first_name') { + $author_first_name = $field_value; + } else if ($field === 'article:author:last_name' || $field === 'profile:last_name') { + $author_last_name = $field_value; + } else { + $item[$property] = $field_value; + break; // Stop on first match, e.g. og:url has priority over canonical url. + } + } + } + } + } + + // Populate author from first name and last name if all we have is nothing or Twitter @username + if ((!isset($item['author']) || $item['author'][0] === '@') && (is_string($author_first_name) || is_string($author_last_name))) { + $author = ''; + if (is_string($author_first_name)) { + $author = $author_first_name; + } + if (is_string($author_last_name)) { + $author = $author . ' ' . $author_last_name; + } + $item['author'] = trim($author); + } + + // == Second source of metadata: Embedded JSON == + // JSON linked data - https://www.w3.org/TR/2014/REC-json-ld-20140116/ + // JSON linked data is COMPLEX and MAY BE LESS RELIABLE than tags. Used for fields not found as tags. + // The implementation below will load all ld+json we can understand and attempt to extract relevant information. + + // ld+json object types that hold article metadata + // Each mapping define item fields and a list of possible JSON field for this field + // Each candiate JSON field is either a string (field name) or a list (path to nested field) + static $ldjson_article_types = ['webpage', 'article', 'newsarticle', 'blogposting']; + static $ldjson_article_mappings = [ + 'uri' => ['url', 'mainEntityOfPage'], + 'title' => ['headline'], + 'content' => ['description'], + 'timestamp' => ['dateModified', 'datePublished'], + 'enclosures' => ['image'], + 'author' => [['author', 'name'], ['author', '@id'], 'author'], + ]; + + // ld+json object types that hold author metadata + $ldjson_author_types = ['person', 'organization']; + $ldjson_author_mappings = []; // ID => Name + $ldjson_author_id = null; + + // Utility function for checking if JSON array matches one of the desired ld+json object types + // A JSON object may have a single ld+json @type as a string OR several types at once as a list + $ldjson_is_of_type = function ($json, $allowed_types) { + if (isset($json['@type'])) { + $json_types = $json['@type']; + if (!is_array($json_types)) { + $json_types = [ $json_types ]; + } + foreach ($json_types as $item_type) { + if (in_array(strtolower($item_type), $allowed_types)) { + return true; + } + } + } + return false; + }; + + // Process ld+json objects embedded in the HTML DOM + foreach ($html->find('script[type=application/ld+json]') as $html_ldjson_node) { + $json_raw = json_decode($html_ldjson_node->innertext, true); + if (is_array($json_raw)) { + // The JSON we just loaded may contain directly a single ld+json object AND/OR several ones under the '@graph' key + $json_items = [ $json_raw ]; + if (isset($json_raw['@graph'])) { + foreach ($json_raw['@graph'] as $json_raw_sub_item) { + $json_items[] = $json_raw_sub_item; + } + } + // Now that we have a list of distinct JSON items, we can process them individually + foreach ($json_items as $json) { + // JSON item that holds an ld+json Article object (or a variant) + if ($ldjson_is_of_type($json, $ldjson_article_types)) { + // For each item property, look for corresponding JSON fields and populate the item + foreach ($ldjson_article_mappings as $property => $field_list) { + // Skip fields already found as tags, except Twitter @username (because we might find a better name) + if (!isset($item[$property]) || ($property === 'author' && $item['author'][0] === '@')) { + foreach ($field_list as $field) { + $json_root = $json; + // If necessary, navigate inside the JSON object to access a nested field + if (is_array($field)) { + // At this point, $field = ['author', 'name'] and $json_root = {"author": {"name": "John Doe"}} + $json_navigate_ok = true; + while (count($field) > 1) { + $sub_field = array_shift($field); + if (array_key_exists($sub_field, $json_root)) { + $json_root = $json_root[$sub_field]; + if (array_is_list($json_root) && count($json_root) === 1) { + $json_root = $json_root[0]; // Unwrap list of single item e.g. {"author":[{"name":"John Doe"}]} + } + } else { + // Desired path not found in JSON, stop navigating + $json_navigate_ok = false; + break; + } + } + if (!$json_navigate_ok) { + continue; //Desired path not found in JSON, skip this field + } + $field = $field[0]; + // At this point, $field = "name" and $json_root = {"name": "John Doe"} + } + // Now we can check for desired field in JSON and populate $item accordingly + if (isset($json_root[$field])) { + $field_value = $json_root[$field]; + if (is_array($field_value) && isset($field_value[0])) { + $field_value = $field_value[0]; // Different versions of the same enclosure? Take the first one + } + if (is_string($field_value) && !empty($field_value)) { + if ($property === 'author' && $field === '@id') { + $ldjson_author_id = $field_value; // Author is referred to by its ID: We'll see later if we can resolve it + } else { + $item[$property] = $field_value; + break; // Stop on first match, e.g. {"author":{"name":"John Doe"}} has priority over {"author":"John Doe"} + } + } + } + } + } + } + // JSON item that holds an ld+json Author object (or a variant) + } else if ($ldjson_is_of_type($json, $ldjson_author_types)) { + if (isset($json['@id']) && isset($json['name'])) { + $ldjson_author_mappings[$json['@id']] = $json['name']; + } + } + } + } + } + + // Attempt to resolve ld+json author if all we have is nothing or Twitter @username + if ((!isset($item['author']) || $item['author'][0] === '@') && !is_null($ldjson_author_id) && isset($ldjson_author_mappings[$ldjson_author_id])) { + $item['author'] = $ldjson_author_mappings[$ldjson_author_id]; + } + + // Adjust item field types + if (isset($item['enclosures'])) { + $item['enclosures'] = [ $item['enclosures'] ]; + } + if (isset($item['timestamp'])) { + $item['timestamp'] = strtotime($item['timestamp']); + } + + return $item; +} diff --git a/vendor/simplehtmldom/LICENSE b/lib/simplehtmldom/LICENSE similarity index 100% rename from vendor/simplehtmldom/LICENSE rename to lib/simplehtmldom/LICENSE diff --git a/vendor/simplehtmldom/simple_html_dom.php b/lib/simplehtmldom/simple_html_dom.php similarity index 99% rename from vendor/simplehtmldom/simple_html_dom.php rename to lib/simplehtmldom/simple_html_dom.php index 3fc95760..e8b3a727 100644 --- a/vendor/simplehtmldom/simple_html_dom.php +++ b/lib/simplehtmldom/simple_html_dom.php @@ -114,14 +114,10 @@ function str_get_html( if (empty($str)) { throw new \Exception('Refusing to parse empty string input'); } - if (strlen($str) > MAX_FILE_SIZE) { - throw new \Exception('Refusing to parse too big input'); - } - if (empty($str) || strlen($str) > MAX_FILE_SIZE) { - $dom->clear(); - return false; - } + if (strlen($str) > Configuration::getConfig('system', 'max_file_size')) { + throw new \Exception('simple_html_dom: Refusing to parse too big input: ' . strlen($str)); + } return $dom->load($str, $lowercase, $stripRN); } diff --git a/lib/url.php b/lib/url.php new file mode 100644 index 00000000..9a1b59ad --- /dev/null +++ b/lib/url.php @@ -0,0 +1,151 @@ +withScheme($parts['scheme'] ?? '') + ->withHost($parts['host']) + ->withPort($parts['port'] ?? 80) + ->withPath($parts['path'] ?? '/') + ->withQueryString($parts['query'] ?? null); + // todo: add fragment + } + + public static function validate(string $url): bool + { + if (strlen($url) > 1500) { + return false; + } + $pattern = '#^https?://' // scheme + . '([a-z0-9-]+\.?)+' // one or more domain names + . '(\.[a-z]{1,24})?' // optional global tld + . '(:\d+)?' // optional port + . '($|/|\?)#i'; // end of string or slash or question mark + + return preg_match($pattern, $url) === 1; + } + + public function getScheme(): string + { + return $this->scheme; + } + + public function getHost(): string + { + return $this->host; + } + + public function getPort(): int + { + return $this->port; + } + + public function getPath(): string + { + return $this->path; + } + + public function getQueryString(): string + { + return $this->queryString; + } + + public function withScheme(string $scheme): self + { + if (!in_array($scheme, ['http', 'https'])) { + throw new UrlException(sprintf('Invalid scheme %s', $scheme)); + } + $clone = clone $this; + $clone->scheme = $scheme; + return $clone; + } + + public function withHost(string $host): self + { + $clone = clone $this; + $clone->host = $host; + return $clone; + } + + public function withPort(int $port) + { + $clone = clone $this; + $clone->port = $port; + return $clone; + } + + public function withPath(string $path): self + { + if (!str_starts_with($path, '/')) { + throw new UrlException(sprintf('Path must start with forward slash: %s', $path)); + } + if (str_starts_with($path, '//')) { + throw new UrlException(sprintf('Illegal path (too many forward slashes): %s', $path)); + } + $clone = clone $this; + $clone->path = $path; + return $clone; + } + + public function withQueryString(?string $queryString): self + { + $clone = clone $this; + $clone->queryString = $queryString; + return $clone; + } + + public function __toString() + { + if ($this->port === 80) { + $port = ''; + } else { + $port = ':' . $this->port; + } + if ($this->queryString) { + $queryString = '?' . $this->queryString; + } else { + $queryString = ''; + } + + return sprintf( + '%s://%s%s%s%s', + $this->scheme, + $this->host, + $port, + $this->path, + $queryString + ); + } +} diff --git a/lib/utils.php b/lib/utils.php index ea329f8d..dfee437b 100644 --- a/lib/utils.php +++ b/lib/utils.php @@ -1,18 +1,17 @@ getMessage()); + $sanitizedFilepath = sanitize_root($e->getFile()); return sprintf( '%s: %s in %s line %s', get_class($e), - sanitize_root($e->getMessage()), - sanitize_root($e->getFile()), + $sanitizedMessage, + $sanitizedFilepath, $e->getLine() ); } @@ -139,7 +140,7 @@ function _sanitize_path_name(string $s, string $pathName): string } /** - * This is buggy because strip tags removes a lot that isn't html + * This is buggy because strip_tags() removes a lot that isn't html */ function is_html(string $text): bool { @@ -170,27 +171,32 @@ function parse_mime_type($url) 'jpg' => 'image/jpeg', 'gif' => 'image/gif', 'png' => 'image/png', + 'webp' => 'image/webp', 'image' => 'image/*', 'mp3' => 'audio/mpeg', ]; - // '@' is used to mute open_basedir warning, see issue #818 - if (@is_readable('/etc/mime.types')) { - $file = fopen('/etc/mime.types', 'r'); - while (($line = fgets($file)) !== false) { - $line = trim(preg_replace('/#.*/', '', $line)); - if (!$line) { - continue; - } - $parts = preg_split('/\s+/', $line); - if (count($parts) == 1) { - continue; - } - $type = array_shift($parts); - foreach ($parts as $part) { - $mime[$part] = $type; + // if-check to avoid excessive php errors about open_basedir restriction (#4502) + $open_basedir = ini_get('open_basedir'); + if (! $open_basedir) { + // '@' is used to mute open_basedir warning, see issue #818 + if (@is_readable('/etc/mime.types')) { + $file = fopen('/etc/mime.types', 'r'); + while (($line = fgets($file)) !== false) { + $line = trim(preg_replace('/#.*/', '', $line)); + if (!$line) { + continue; + } + $parts = preg_split('/\s+/', $line); + if (count($parts) == 1) { + continue; + } + $type = array_shift($parts); + foreach ($parts as $part) { + $mime[$part] = $type; + } } + fclose($file); } - fclose($file); } } @@ -235,3 +241,41 @@ function create_random_string(int $bytes = 16): string { return bin2hex(openssl_random_pseudo_bytes($bytes)); } + +/** + * Thrown by bridges to indicate user failure. Will not be logged. + */ +final class ClientException extends \Exception +{ +} + +function throwClientException(string $message = '') +{ + throw new ClientException($message, 400); +} + +function throwServerException(string $message = '') +{ + throw new \Exception($message, 500); +} + +function throwRateLimitException(string $message = '') +{ + throw new RateLimitException($message); +} + +/** + * @deprecated Use throwClientException() instead + */ +function returnClientError(string $message = '') +{ + throw new \Exception($message); +} + +/** + * @deprecated Use throwServerException() instead + */ +function returnServerError(string $message = '') +{ + throw new \Exception($message); +} diff --git a/middlewares/BasicAuthMiddleware.php b/middlewares/BasicAuthMiddleware.php new file mode 100644 index 00000000..6b0803e2 --- /dev/null +++ b/middlewares/BasicAuthMiddleware.php @@ -0,0 +1,38 @@ +server('PHP_AUTH_USER'); + $password = $request->server('PHP_AUTH_PW'); + if ($user === null || $password === null) { + $html = render(__DIR__ . '/../templates/error.html.php', [ + 'message' => 'Please authenticate in order to access this instance!', + ]); + return new Response($html, 401, ['WWW-Authenticate' => 'Basic realm="RSS-Bridge"']); + } + if ( + (Configuration::getConfig('authentication', 'username') !== $user) + || (!hash_equals(Configuration::getConfig('authentication', 'password'), $password)) + ) { + $html = render(__DIR__ . '/../templates/error.html.php', [ + 'message' => 'Please authenticate in order to access this instance!', + ]); + return new Response($html, 401, ['WWW-Authenticate' => 'Basic realm="RSS-Bridge"']); + } + return $next($request); + } +} diff --git a/middlewares/CacheMiddleware.php b/middlewares/CacheMiddleware.php new file mode 100644 index 00000000..b8a34754 --- /dev/null +++ b/middlewares/CacheMiddleware.php @@ -0,0 +1,64 @@ +cache = $cache; + } + + public function __invoke(Request $request, $next): Response + { + $action = $request->getAttribute('action'); + + if ($action !== 'DisplayAction') { + // We only cache DisplayAction (for now) + return $next($request); + } + + // TODO: might want to remove som params from query + $cacheKey = 'http_' . json_encode($request->toArray()); + $cachedResponse = $this->cache->get($cacheKey); + + if ($cachedResponse) { + $ifModifiedSince = $request->server('HTTP_IF_MODIFIED_SINCE'); + $lastModified = $cachedResponse->getHeader('last-modified'); + if ($ifModifiedSince && $lastModified) { + $lastModified = new \DateTimeImmutable($lastModified); + $lastModifiedTimestamp = $lastModified->getTimestamp(); + $modifiedSince = strtotime($ifModifiedSince); + // TODO: \DateTimeImmutable can be compared directly + if ($lastModifiedTimestamp <= $modifiedSince) { + $modificationTimeGMT = gmdate('D, d M Y H:i:s ', $lastModifiedTimestamp); + return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']); + } + } + return $cachedResponse; + } + + /** @var Response $response */ + $response = $next($request); + + if ($response->getCode() === 200) { + // Do nothing because DisplayAction has already cached this on $cacheKey + } elseif (in_array($response->getCode(), [400, 403, 404, 429, 500, 503])) { + // Cache these responses for about ~10 mins on average + $this->cache->set($cacheKey, $response, 60 * 5 + rand(1, 60 * 10)); + } else { + // Should never happen + $this->cache->set($cacheKey, $response, 60 * 5); + } + + // For 1% of requests, prune cache + if (rand(1, 100) === 1) { + // This might be resource intensive! + $this->cache->prune(); + } + + return $response; + } +} \ No newline at end of file diff --git a/middlewares/ExceptionMiddleware.php b/middlewares/ExceptionMiddleware.php new file mode 100644 index 00000000..8bb74713 --- /dev/null +++ b/middlewares/ExceptionMiddleware.php @@ -0,0 +1,24 @@ +logger = $logger; + } + + public function __invoke(Request $request, $next): Response + { + try { + return $next($request); + } catch (\Throwable $e) { + $this->logger->error('Exception in ExceptionMiddleware', ['e' => $e]); + + return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500); + } + } +} \ No newline at end of file diff --git a/middlewares/MaintenanceMiddleware.php b/middlewares/MaintenanceMiddleware.php new file mode 100644 index 00000000..de8a1baf --- /dev/null +++ b/middlewares/MaintenanceMiddleware.php @@ -0,0 +1,17 @@ + '503 Service Unavailable', + 'message' => 'RSS-Bridge is down for maintenance.', + ]), 503); + } +} diff --git a/middlewares/Middleware.php b/middlewares/Middleware.php new file mode 100644 index 00000000..83d93a3b --- /dev/null +++ b/middlewares/Middleware.php @@ -0,0 +1,8 @@ +toArray() as $key => $value) { + if (!is_string($value)) { + return new Response(render(__DIR__ . '/../templates/error.html.php', [ + 'message' => "Query parameter \"$key\" is not a string.", + ]), 400); + } + } + return $next($request); + } +} diff --git a/middlewares/TokenAuthenticationMiddleware.php b/middlewares/TokenAuthenticationMiddleware.php new file mode 100644 index 00000000..31544ab7 --- /dev/null +++ b/middlewares/TokenAuthenticationMiddleware.php @@ -0,0 +1,33 @@ +get('token'); + + if (! $token) { + return new Response(render(__DIR__ . '/../templates/token.html.php', [ + 'message' => 'Missing token', + 'token' => '', + ]), 401); + } + + if (! hash_equals(Configuration::getConfig('authentication', 'token'), $token)) { + return new Response(render(__DIR__ . '/../templates/token.html.php', [ + 'message' => 'Invalid token', + 'token' => $token, + ]), 401); + } + + $request = $request->withAttribute('token', $token); + + return $next($request); + } +} diff --git a/phpcs.xml b/phpcs.xml index 2db0553a..e4a1cd6e 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -1,8 +1,16 @@ - Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/ + + Originally created with the PHP Coding Standard Generator. + But later manually tweaked. + http://edorian.github.com/php-coding-standard-generator/ + + ./static ./vendor + ./lib/parsedown + ./lib/php-urljoin + ./lib/simplehtmldom ./templates ./config.default.ini.php ./config.ini.php @@ -11,6 +19,7 @@ + @@ -20,12 +29,22 @@ - - + + + + + + + + + + + + diff --git a/static/connectivity.js b/static/connectivity.js index 89f01f01..2f39ce6b 100644 --- a/static/connectivity.js +++ b/static/connectivity.js @@ -4,7 +4,7 @@ var abort = false; window.onload = function() { - fetch(remote + '/index.php?action=list').then(function(response) { + fetch(remote + '/?action=list').then(function(response) { return response.text() }).then(function(data){ processBridgeList(data); @@ -46,9 +46,9 @@ function buildTable(bridgeList) { var td_bridge = document.createElement('td'); td_bridge.innerText = bridgeList.bridges[bridge].name; - // Link to the actual bridge on index.php + // Link to the actual bridge on frontpage var a = document.createElement('a'); - a.href = remote + "/index.php?show_inactive=1#bridge-" + bridge; + a.href = remote + "/?#bridge-" + bridge; a.target = '_blank'; a.innerText = '[Show]'; a.style.marginLeft = '5px'; @@ -104,7 +104,7 @@ function checkNextBridgeAsync() { msg.getElementsByTagName('span')[0].textContent = 'Processing ' + bridge + '...'; - fetch(remote + '/index.php?action=Connectivity&bridge=' + bridge) + fetch(remote + '/?action=Connectivity&bridge=' + bridge) .then(function(response) { return response.text() }) .then(JSON.parse) .then(processBridgeResultAsync) diff --git a/static/rss-bridge.js b/static/rss-bridge.js index 498acd37..9cd004cb 100644 --- a/static/rss-bridge.js +++ b/static/rss-bridge.js @@ -1,21 +1,14 @@ function rssbridge_list_search() { - function remove_www_from_url(url) { - if (url.hostname.indexOf('www.') === 0) { - url.hostname = url.hostname.substr(4); - } - } - var search = document.getElementById('searchfield').value; - var searchAsUrl = document.createElement('a'); - searchAsUrl.href = search; - remove_www_from_url(searchAsUrl); + var bridgeCards = document.querySelectorAll('section.bridge-card'); for (var i = 0; i < bridgeCards.length; i++) { var bridgeName = bridgeCards[i].getAttribute('data-ref'); var bridgeShortName = bridgeCards[i].getAttribute('data-short-name'); var bridgeDescription = bridgeCards[i].querySelector('.description'); - var bridgeUrl = bridgeCards[i].getElementsByTagName('a')[0]; - remove_www_from_url(bridgeUrl); + var bridgeUrlElement = bridgeCards[i].getElementsByTagName('a')[0]; + var bridgeUrl = bridgeUrlElement.toString(); + bridgeCards[i].style.display = 'none'; if (!bridgeName || !bridgeUrl) { continue; @@ -30,10 +23,7 @@ function rssbridge_list_search() { if (bridgeDescription.textContent.match(searchRegex)) { bridgeCards[i].style.display = 'block'; } - if (bridgeUrl.toString().match(searchRegex)) { - bridgeCards[i].style.display = 'block'; - } - if (bridgeUrl.hostname === searchAsUrl.hostname) { + if (bridgeUrl.match(searchRegex)) { bridgeCards[i].style.display = 'block'; } } @@ -47,3 +37,88 @@ function rssbridge_toggle_bridge(){ bridge.getElementsByClassName('showmore-box')[0].checked = true; } } + +function rssbridge_use_placeholder_value(sender) { + let inputId = sender.getAttribute('data-for'); + let inputElement = document.getElementById(inputId); + inputElement.value = inputElement.getAttribute("placeholder"); +} + +var rssbridge_feed_finder = (function() { + /* + * Code for "Find feed by URL" feature + */ + + // Start the Feed search + async function rssbridge_feed_search(event) { + const input = document.getElementById('searchfield'); + let content = encodeURIComponent(input.value); + if (content) { + const findfeedresults = document.getElementById('findfeedresults'); + findfeedresults.innerHTML = 'Searching for matching feeds ...'; + let baseurl = window.location.protocol + window.location.pathname; + let url = baseurl + '?action=findfeed&format=Html&url=' + content; + const response = await fetch(url); + if (response.ok) { + const data = await response.json(); + rss_bridge_feed_display_found_feed(data); + } else { + rss_bridge_feed_display_feed_search_fail(); + } + } else { + rss_bridge_feed_display_find_feed_empty(); + } + } + + // Display the found feeds + function rss_bridge_feed_display_found_feed(obj) { + const findfeedresults = document.getElementById('findfeedresults'); + + let content = 'Found Feed(s) :'; + + // Let's go throug every Feed found + for (const element of obj) { + content += `
    +
    + +
    +
    +

    ${element.bridgeMeta.name}

    +

    + ${element.bridgeMeta.description} +

    +
    +
      `; + + // Now display every Feed parameter + for (const param in element.bridgeData) { + content += `
    • ${element.bridgeData[param].name} : ${element.bridgeData[param].value}
    • `; + } + content += `
    +
    +
    `; + } + content += '

    '; + findfeedresults.innerHTML = content; + } + + // Display an error if no feed were found + function rss_bridge_feed_display_feed_search_fail() { + const findfeedresults = document.getElementById('findfeedresults'); + findfeedresults.innerHTML = 'No Feed found !'; + } + + // Empty the Found Feed section + function rss_bridge_feed_display_find_feed_empty() { + const findfeedresults = document.getElementById('findfeedresults'); + findfeedresults.innerHTML = ''; + } + + // Add Event to 'Detect Feed" button + var rssbridge_feed_finder = function() { + const button = document.getElementById('findfeed'); + button.addEventListener("click", rssbridge_feed_search); + button.addEventListener("keyup", rssbridge_feed_search); + }; + return rssbridge_feed_finder; +}()); diff --git a/static/style.css b/static/style.css index a83e25e4..4c831534 100644 --- a/static/style.css +++ b/static/style.css @@ -186,6 +186,7 @@ section li { margin-left: 1em; } .bridge-card { + position: relative; text-align: center; } @@ -281,23 +282,13 @@ p.maintainer { text-align: right; } -.secure-warning { - background-color: #ffc600; - color: #5f5f5f; - box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.3); - border-radius: 2px; - border: 1px solid transparent; - width: 80%; - margin: auto auto 6px; -} - .error strong { display: inline-block; width: 100px; } /* Hide all forms on the frontpage by default */ -form { +form.bridge-form { display: none; } @@ -395,11 +386,6 @@ button { } } /* @supports (display: grid) */ - - .secure-warning { - width: 100%; - } - } /* Dark theme */ @@ -453,3 +439,39 @@ button { color: #d8d3cb; } } + +/* find-feed */ +.search-result { + background-color: #f0f0f0; + border-radius: 5px; + padding: 15px; + display: flex; + position: relative; + text-align: left; + margin-bottom: 15px; +} +@media (prefers-color-scheme: dark) { + .search-result { + background-color: #202325; + } +} +.search-result h2 { + color: #288cfc; +} + +.search-result a { + text-decoration: none; + color: #248afa; +} +.search-result .icon { + margin: 0 15px 0 0; +} +.search-result span { + margin-right: 10px; +} +.search-result .description { + font-size: 110%; + margin-right: 0 !important; + margin-top: 5px !important; +} +/* end find-feed */ diff --git a/static/twitter-form.png b/static/twitter-form.png deleted file mode 100644 index 2f1a988c..00000000 Binary files a/static/twitter-form.png and /dev/null differ diff --git a/static/twitter-rasmus.png b/static/twitter-rasmus.png deleted file mode 100644 index 91692786..00000000 Binary files a/static/twitter-rasmus.png and /dev/null differ diff --git a/templates/access-denied.html.php b/templates/access-denied.html.php deleted file mode 100644 index 64968680..00000000 --- a/templates/access-denied.html.php +++ /dev/null @@ -1,4 +0,0 @@ - -

    - Please authenticate in order to access this instance -

    diff --git a/templates/base.html.php b/templates/base.html.php index ca31823d..a8ff7660 100644 --- a/templates/base.html.php +++ b/templates/base.html.php @@ -4,9 +4,11 @@ - <?= e($_title ?? 'RSS-Bridge') ?> + RSS-Bridge + + diff --git a/templates/error.html.php b/templates/error.html.php index 5220ba7c..16ab20b0 100644 --- a/templates/error.html.php +++ b/templates/error.html.php @@ -1,104 +1,15 @@ -
    + - - -

    The website is protected by CloudFlare

    -

    - RSS-Bridge tried to fetch a website. - The fetching was blocked by CloudFlare. - CloudFlare is anti-bot software. - Its purpose is to block non-humans. -

    - - - getCode() === 404): ?> -

    The website was not found

    -

    - RSS-Bridge tried to fetch a page on a website. - But it doesn't exists. -

    - - - getCode() === 429): ?> -

    Try again later

    -

    - RSS-Bridge tried to fetch a website. - They told us to try again later. -

    - - - - getCode() === 10): ?> -

    The rss feed is completely empty

    -

    - RSS-Bridge tried parse the empty string as xml. - The fetched url is not pointing to real xml. -

    - - - getCode() === 11): ?> -

    There is something wrong with the rss feed

    -

    - RSS-Bridge tried parse xml. It failed. The xml is probably broken. -

    - - - -

    Details

    - -
    -
    - Type: -
    - -
    - Code: getCode()) ?> -
    - -
    - Message: getMessage())) ?> -
    - -
    - File: getFile())) ?> -
    - -
    - Line: getLine()) ?> -
    -
    - -

    Trace

    - - $frame) : ?> - - # - -
    - - -
    - -

    Context

    - -
    - Query: -
    - -
    - Version: -
    - -
    - OS: -
    - -
    - PHP: -
    - -
    - - Go back -
    + +

    + +

    + +

    + +

    diff --git a/templates/exception.html.php b/templates/exception.html.php new file mode 100644 index 00000000..62ac90b4 --- /dev/null +++ b/templates/exception.html.php @@ -0,0 +1,148 @@ + +
    + + + +

    The website is protected by CloudFlare

    +

    + RSS-Bridge tried to fetch a website. + The fetching was blocked by CloudFlare. + CloudFlare is anti-bot software. + Its purpose is to block non-humans. +

    + + + getCode() === 400): ?> +

    400 Bad Request

    +

    + This is usually caused by an incorrectly constructed http request. +

    + + + getCode() === 403): ?> +

    403 Forbidden

    +

    + The HTTP 403 Forbidden response status code indicates that the + server understands the request but refuses to authorize it. +

    + + + getCode() === 404): ?> +

    404 Page Not Found

    +

    + RSS-Bridge tried to fetch a page on a website. + But it doesn't exists. +

    + + + getCode() === 429): ?> +

    429 Too Many Requests

    +

    + RSS-Bridge tried to fetch a website. + They told us to try again later. +

    + + + getCode() === 503): ?> +

    503 Service Unavailable

    +

    + Common causes are a server that is down for maintenance + or that is overloaded. +

    + + + getCode() === 0): ?> +

    + See + + https://curl.haxx.se/libcurl/c/libcurl-errors.html + + for description of the curl error code. +

    + +

    + + https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/getCode()) ?> + +

    + + + + getCode() === 10): ?> +

    The rss feed is completely empty

    +

    + RSS-Bridge tried parse the empty string as xml. + The fetched url is not pointing to real xml. +

    + + + getCode() === 11): ?> +

    There is something wrong with the rss feed

    +

    + RSS-Bridge tried parse xml. It failed. The xml is probably broken. +

    + + + +

    Details

    + +
    +
    + Type: +
    + +
    + Code: getCode()) ?> +
    + +
    + Message: getMessage())) ?> +
    + +
    + File: getFile())) ?> +
    + +
    + Line: getLine()) ?> +
    +
    + +

    Trace

    + + $frame) : ?> + + # + +
    + + +
    + +

    Context

    + +
    + Query: +
    + +
    + Version: +
    + +
    + OS: +
    + +
    + PHP: +
    + +
    + + Go back +
    + diff --git a/templates/frontpage.html.php b/templates/frontpage.html.php index 63f4a2ab..f285e8d6 100644 --- a/templates/frontpage.html.php +++ b/templates/frontpage.html.php @@ -1,7 +1,8 @@ - +