Use panther to retrieve more data from Aliexpress

This commit is contained in:
Jan Böhmer 2025-03-25 23:14:58 +01:00
parent 2fdd837354
commit 3626570a0b
7 changed files with 331 additions and 76 deletions

View file

@ -4,6 +4,8 @@ APP_SECRET='$ecretf0rt3st'
SYMFONY_DEPRECATIONS_HELPER=999999
PANTHER_APP_ENV=panther
PANTHER_ERROR_SCREENSHOT_DIR=./var/error-screenshots
PANTHER_APP_ENV=panther
PANTHER_ERROR_SCREENSHOT_DIR=./var/error-screenshots
DATABASE_URL="sqlite:///%kernel.project_dir%/var/app_test.db"
# Doctrine automatically adds an _test suffix to database name in test env

2
.gitignore vendored
View file

@ -8,6 +8,8 @@
/vendor/
###< symfony/framework-bundle ###
drivers/
###> symfony/phpunit-bridge ###
.phpunit
.phpunit.result.cache

View file

@ -17,6 +17,7 @@
"brick/math": "0.12.1 as 0.11.0",
"composer/ca-bundle": "^1.3",
"composer/package-versions-deprecated": "^1.11.99.5",
"dbrekelmans/bdi": "^1.4",
"doctrine/data-fixtures": "^2.0.0",
"doctrine/dbal": "^4.0.0",
"doctrine/doctrine-bundle": "^2.0",
@ -65,6 +66,7 @@
"symfony/http-kernel": "6.4.*",
"symfony/mailer": "6.4.*",
"symfony/monolog-bundle": "^3.1",
"symfony/panther": "^2.2",
"symfony/polyfill-php82": "^1.28",
"symfony/process": "6.4.*",
"symfony/property-access": "6.4.*",

342
composer.lock generated
View file

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "75643d42e05fce4684644d375bff2d0a",
"content-hash": "d894170eb8b24ff5376bf32a2fa71204",
"packages": [
{
"name": "amphp/amp",
@ -1569,6 +1569,55 @@
},
"time": "2024-04-12T12:12:48+00:00"
},
{
"name": "dbrekelmans/bdi",
"version": "1.4.0",
"source": {
"type": "git",
"url": "https://github.com/dbrekelmans/bdi.git",
"reference": "fa2ff9b5ed0508ddf5cd574f9dfa6fea954a9acd"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/dbrekelmans/bdi/zipball/fa2ff9b5ed0508ddf5cd574f9dfa6fea954a9acd",
"reference": "fa2ff9b5ed0508ddf5cd574f9dfa6fea954a9acd",
"shasum": ""
},
"require": {
"ext-json": "*",
"ext-zip": "*",
"ext-zlib": "*",
"php": "^8.1"
},
"bin": [
"bdi",
"bdi.phar"
],
"type": "library",
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Daniël Brekelmans",
"homepage": "https://github.com/dbrekelmans"
},
{
"name": "Contributors",
"homepage": "https://github.com/dbrekelmans/bdi/graphs/contributors"
}
],
"description": "PHAR distribution of dbrekelmans/browser-driver-installer.",
"homepage": "https://github.com/dbrekelmans/bdi",
"keywords": [
"browser-driver-installer"
],
"support": {
"source": "https://github.com/dbrekelmans/bdi/tree/1.4.0"
},
"time": "2024-12-12T18:36:47+00:00"
},
{
"name": "doctrine/collections",
"version": "2.3.0",
@ -6269,6 +6318,72 @@
},
"time": "2024-03-15T13:55:21+00:00"
},
{
"name": "php-webdriver/webdriver",
"version": "1.15.2",
"source": {
"type": "git",
"url": "https://github.com/php-webdriver/php-webdriver.git",
"reference": "998e499b786805568deaf8cbf06f4044f05d91bf"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-webdriver/php-webdriver/zipball/998e499b786805568deaf8cbf06f4044f05d91bf",
"reference": "998e499b786805568deaf8cbf06f4044f05d91bf",
"shasum": ""
},
"require": {
"ext-curl": "*",
"ext-json": "*",
"ext-zip": "*",
"php": "^7.3 || ^8.0",
"symfony/polyfill-mbstring": "^1.12",
"symfony/process": "^5.0 || ^6.0 || ^7.0"
},
"replace": {
"facebook/webdriver": "*"
},
"require-dev": {
"ergebnis/composer-normalize": "^2.20.0",
"ondram/ci-detector": "^4.0",
"php-coveralls/php-coveralls": "^2.4",
"php-mock/php-mock-phpunit": "^2.0",
"php-parallel-lint/php-parallel-lint": "^1.2",
"phpunit/phpunit": "^9.3",
"squizlabs/php_codesniffer": "^3.5",
"symfony/var-dumper": "^5.0 || ^6.0 || ^7.0"
},
"suggest": {
"ext-SimpleXML": "For Firefox profile creation"
},
"type": "library",
"autoload": {
"files": [
"lib/Exception/TimeoutException.php"
],
"psr-4": {
"Facebook\\WebDriver\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "A PHP client for Selenium WebDriver. Previously facebook/webdriver.",
"homepage": "https://github.com/php-webdriver/php-webdriver",
"keywords": [
"Chromedriver",
"geckodriver",
"php",
"selenium",
"webdriver"
],
"support": {
"issues": "https://github.com/php-webdriver/php-webdriver/issues",
"source": "https://github.com/php-webdriver/php-webdriver/tree/1.15.2"
},
"time": "2024-11-21T15:12:59+00:00"
},
{
"name": "phpdocumentor/reflection-common",
"version": "2.2.0",
@ -8157,6 +8272,74 @@
],
"time": "2024-10-25T15:07:50+00:00"
},
{
"name": "symfony/browser-kit",
"version": "v6.4.19",
"source": {
"type": "git",
"url": "https://github.com/symfony/browser-kit.git",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/browser-kit/zipball/ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"shasum": ""
},
"require": {
"php": ">=8.1",
"symfony/dom-crawler": "^5.4|^6.0|^7.0"
},
"require-dev": {
"symfony/css-selector": "^5.4|^6.0|^7.0",
"symfony/http-client": "^5.4|^6.0|^7.0",
"symfony/mime": "^5.4|^6.0|^7.0",
"symfony/process": "^5.4|^6.0|^7.0"
},
"type": "library",
"autoload": {
"psr-4": {
"Symfony\\Component\\BrowserKit\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Simulates the behavior of a web browser, allowing you to make requests, click on links and submit forms programmatically",
"homepage": "https://symfony.com",
"support": {
"source": "https://github.com/symfony/browser-kit/tree/v6.4.19"
},
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2025-02-14T11:23:16+00:00"
},
{
"name": "symfony/cache",
"version": "v6.4.19",
@ -10610,6 +10793,95 @@
],
"time": "2024-11-20T10:57:02+00:00"
},
{
"name": "symfony/panther",
"version": "v2.2.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/panther.git",
"reference": "b7e0f834c9046918972edb3dde2ecc4a20f6155e"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/panther/zipball/b7e0f834c9046918972edb3dde2ecc4a20f6155e",
"reference": "b7e0f834c9046918972edb3dde2ecc4a20f6155e",
"shasum": ""
},
"require": {
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=8.0",
"php-webdriver/webdriver": "^1.8.2",
"symfony/browser-kit": "^5.4 || ^6.4 || ^7.0",
"symfony/dependency-injection": "^5.4 || ^6.4 || ^7.0",
"symfony/deprecation-contracts": "^2.4 || ^3",
"symfony/dom-crawler": "^5.4 || ^6.4 || ^7.0",
"symfony/http-client": "^6.4 || ^7.0",
"symfony/http-kernel": "^5.4 || ^6.4 || ^7.0",
"symfony/process": "^5.4 || ^6.4 || ^7.0"
},
"require-dev": {
"symfony/css-selector": "^5.4 || ^6.4 || ^7.0",
"symfony/framework-bundle": "^5.4 || ^6.4 || ^7.0",
"symfony/mime": "^5.4 || ^6.4 || ^7.0",
"symfony/phpunit-bridge": "^7.2.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "2.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\Panther\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Kévin Dunglas",
"email": "dunglas@gmail.com",
"homepage": "https://dunglas.fr"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "A browser testing and web scraping library for PHP and Symfony.",
"homepage": "https://dunglas.fr",
"keywords": [
"e2e",
"scraping",
"selenium",
"symfony",
"testing",
"webdriver"
],
"support": {
"issues": "https://github.com/symfony/panther/issues",
"source": "https://github.com/symfony/panther/tree/v2.2.0"
},
"funding": [
{
"url": "https://www.panthera.org/donate",
"type": "custom"
},
{
"url": "https://github.com/dunglas",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/panther",
"type": "tidelift"
}
],
"time": "2025-01-30T13:11:55+00:00"
},
{
"name": "symfony/password-hasher",
"version": "v6.4.13",
@ -18332,74 +18604,6 @@
],
"time": "2020-09-28T06:39:44+00:00"
},
{
"name": "symfony/browser-kit",
"version": "v6.4.19",
"source": {
"type": "git",
"url": "https://github.com/symfony/browser-kit.git",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/browser-kit/zipball/ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"shasum": ""
},
"require": {
"php": ">=8.1",
"symfony/dom-crawler": "^5.4|^6.0|^7.0"
},
"require-dev": {
"symfony/css-selector": "^5.4|^6.0|^7.0",
"symfony/http-client": "^5.4|^6.0|^7.0",
"symfony/mime": "^5.4|^6.0|^7.0",
"symfony/process": "^5.4|^6.0|^7.0"
},
"type": "library",
"autoload": {
"psr-4": {
"Symfony\\Component\\BrowserKit\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Simulates the behavior of a web browser, allowing you to make requests, click on links and submit forms programmatically",
"homepage": "https://symfony.com",
"support": {
"source": "https://github.com/symfony/browser-kit/tree/v6.4.19"
},
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2025-02-14T11:23:16+00:00"
},
{
"name": "symfony/debug-bundle",
"version": "v6.4.13",

View file

@ -28,6 +28,7 @@
</testsuite>
</testsuites>
<extensions>
<extension class="Symfony\Component\Panther\ServerExtension" />
<extension class="DAMA\DoctrineTestBundle\PHPUnit\PHPUnitExtension"/>
</extensions>
<listeners>

View file

@ -25,15 +25,24 @@ namespace App\Services\InfoProviderSystem\Providers;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\WebDriverDimension;
use Symfony\Component\DependencyInjection\Attribute\Autowire;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Panther\Client;
use Symfony\Component\Panther\DomCrawler\Link;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class AliexpressProvider implements InfoProviderInterface
{
public function __construct(private readonly HttpClientInterface $client)
{
private readonly string $chromiumDriverPath;
public function __construct(private readonly HttpClientInterface $client,
#[Autowire('%kernel.project_dir%')]
private readonly string $projectDir)
{
$this->chromiumDriverPath = $this->projectDir . '/drivers/chromedriver.exe';
}
public function getProviderInfo(): array
@ -125,18 +134,44 @@ class AliexpressProvider implements InfoProviderInterface
}
$product_page = $this->getBaseURL() . "/item/{$id}.html";
$response = $this->client->request('GET', $product_page );
//Create panther client
$chromeOptions = new ChromeOptions();
//Disable W3C mode, to avoid issues with getting html() from elements. See https://github.com/symfony/panther/issues/478
$chromeOptions->setExperimentalOption('w3c', false);
$client = Client::createChromeClient( $this->chromiumDriverPath, options: ['capabilities' => [ChromeOptions::CAPABILITY => $chromeOptions]]);
$client->manage()->deleteAllCookies();
$client->manage()->window()->setSize(new WebDriverDimension(1920, 1080));
$client->request('GET', $product_page );
//Dismiss cookie consent
$dom = $client->waitFor('div.global-gdpr-wrap button.btn-accept');
$dom->filter('div.global-gdpr-wrap button.btn-accept')->first()->click();
$dom = $client->waitFor('h1[data-pl="product-title"]');
$name = $dom->filter('h1[data-pl="product-title"]')->text();
//Click on the description button
$dom->filter('a[href="#nav-description"]')->first()->click();
//$client->clickLink('Übersicht');
$dom = $client->waitFor('#product-description');
$description = $dom->filter('#product-description')->html();
//Remove any script tags. This is just to prevent any weird output in the notes field, this is not really a security measure
$description = preg_replace('/<script\b[^>]*>(.*?)<\/script>/is', "", $description);
$content = $response->getContent();
$dom = new Crawler($content);
return new PartDetailDTO(
provider_key: $this->getProviderKey(),
provider_id: $id,
name: $dom->filter('h1[data-pl="product-title"]')->text(),
name: $name,
description: "",
provider_url: $product_page,
notes: $dom->filter('div[data-pl="product-description"]')->html(),
notes: $description,
);
}

View file

@ -559,6 +559,15 @@
"symfony/options-resolver": {
"version": "v4.2.3"
},
"symfony/panther": {
"version": "2.2",
"recipe": {
"repo": "github.com/symfony/recipes",
"branch": "main",
"version": "1.0",
"ref": "bc2de681f79db177eac72d5b04c23bd59bea2b46"
}
},
"symfony/password-hasher": {
"version": "v5.3.8"
},