From 80527e35c39c49cba0f8bef88ca9b613af3ab61b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Wed, 19 Feb 2025 00:44:49 +0100 Subject: [PATCH 01/16] Added basic reichelt search capabilities --- composer.json | 3 +- composer.lock | 136 +++++++++--------- .../Providers/LCSCProvider.php | 2 +- .../Providers/ReicheltProvider.php | 109 ++++++++++++++ 4 files changed, 180 insertions(+), 70 deletions(-) create mode 100644 src/Services/InfoProviderSystem/Providers/ReicheltProvider.php diff --git a/composer.json b/composer.json index 11026fa2..3b2904d6 100644 --- a/composer.json +++ b/composer.json @@ -54,6 +54,8 @@ "symfony/apache-pack": "^1.0", "symfony/asset": "6.4.*", "symfony/console": "6.4.*", + "symfony/css-selector": "6.4.*", + "symfony/dom-crawler": "6.4.*", "symfony/dotenv": "6.4.*", "symfony/expression-language": "6.4.*", "symfony/flex": "^v2.3.1", @@ -104,7 +106,6 @@ "rector/rector": "^2.0.4", "roave/security-advisories": "dev-latest", "symfony/browser-kit": "6.4.*", - "symfony/css-selector": "6.4.*", "symfony/debug-bundle": "6.4.*", "symfony/maker-bundle": "^1.13", "symfony/phpunit-bridge": "6.4.*", diff --git a/composer.lock b/composer.lock index 892ae0c7..498132b2 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "ca8701d95e24bae5d28ccdcfe242e8e4", + "content-hash": "75643d42e05fce4684644d375bff2d0a", "packages": [ { "name": "amphp/amp", @@ -8984,6 +8984,73 @@ ], "time": "2025-01-25T08:04:58+00:00" }, + { + "name": "symfony/dom-crawler", + "version": "v6.4.18", + "source": { + "type": "git", + "url": "https://github.com/symfony/dom-crawler.git", + "reference": "fd07959d3e8992795029bdab3605c2e8e895034e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/dom-crawler/zipball/fd07959d3e8992795029bdab3605c2e8e895034e", + "reference": "fd07959d3e8992795029bdab3605c2e8e895034e", + "shasum": "" + }, + "require": { + "masterminds/html5": "^2.6", + "php": ">=8.1", + "symfony/polyfill-ctype": "~1.8", + "symfony/polyfill-mbstring": "~1.0" + }, + "require-dev": { + "symfony/css-selector": "^5.4|^6.0|^7.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Symfony\\Component\\DomCrawler\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Eases DOM navigation for HTML and XML documents", + "homepage": "https://symfony.com", + "support": { + "source": "https://github.com/symfony/dom-crawler/tree/v6.4.18" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2025-01-09T15:35:00+00:00" + }, { "name": "symfony/dotenv", "version": "v6.4.16", @@ -18497,73 +18564,6 @@ ], "time": "2024-09-25T14:18:03+00:00" }, - { - "name": "symfony/dom-crawler", - "version": "v6.4.18", - "source": { - "type": "git", - "url": "https://github.com/symfony/dom-crawler.git", - "reference": "fd07959d3e8992795029bdab3605c2e8e895034e" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/symfony/dom-crawler/zipball/fd07959d3e8992795029bdab3605c2e8e895034e", - "reference": "fd07959d3e8992795029bdab3605c2e8e895034e", - "shasum": "" - }, - "require": { - "masterminds/html5": "^2.6", - "php": ">=8.1", - "symfony/polyfill-ctype": "~1.8", - "symfony/polyfill-mbstring": "~1.0" - }, - "require-dev": { - "symfony/css-selector": "^5.4|^6.0|^7.0" - }, - "type": "library", - "autoload": { - "psr-4": { - "Symfony\\Component\\DomCrawler\\": "" - }, - "exclude-from-classmap": [ - "/Tests/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Fabien Potencier", - "email": "fabien@symfony.com" - }, - { - "name": "Symfony Community", - "homepage": "https://symfony.com/contributors" - } - ], - "description": "Eases DOM navigation for HTML and XML documents", - "homepage": "https://symfony.com", - "support": { - "source": "https://github.com/symfony/dom-crawler/tree/v6.4.18" - }, - "funding": [ - { - "url": "https://symfony.com/sponsor", - "type": "custom" - }, - { - "url": "https://github.com/fabpot", - "type": "github" - }, - { - "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", - "type": "tidelift" - } - ], - "time": "2025-01-09T15:35:00+00:00" - }, { "name": "symfony/maker-bundle", "version": "v1.62.1", diff --git a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php index d903a8dd..375c6f4d 100755 --- a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php +++ b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php @@ -76,7 +76,7 @@ class LCSCProvider implements InfoProviderInterface 'Cookie' => new Cookie('currencyCode', $this->currency) ], 'query' => [ - 'productCode' => $id, + 'prductCode' => $id, ], ]); diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php new file mode 100644 index 00000000..19c52b2a --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -0,0 +1,109 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use Symfony\Component\DomCrawler\Crawler; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +class ReicheltProvider implements InfoProviderInterface +{ + + private const SEARCH_ENDPOINT = "https://www.reichelt.com/index.html?ACTION=446&LA=0&nbc=1&q=%s"; + + public function __construct(private readonly HttpClientInterface $client) + { + } + + public function getProviderInfo(): array + { + return [ + 'name' => 'Reichelt', + 'description' => 'TODO', + 'url' => 'https://www.reichelt.de/', + 'disabled_help' => 'TODO' + ]; + } + + public function getProviderKey(): string + { + return 'reichelt'; + } + + public function isActive(): bool + { + return true; + } + + public function searchByKeyword(string $keyword): array + { + //Lowercase the keyword and urlencode it + $keyword = urlencode($keyword); + $response = $this->client->request('GET', sprintf(self::SEARCH_ENDPOINT, $keyword)); + $html = $response->getContent(); + + //Parse the HTML and return the results + $dom = new Crawler($html); + //Iterate over all div.al_gallery_article elements + $results = []; + $dom->filter('div.al_gallery_article')->each(function (Crawler $element) use (&$results) { + + $productID = $element->filter('meta[itemprop="productID"]')->attr('content'); + $name = $element->filter('meta[itemprop="name"]')->attr('content'); + $sku = $element->filter('meta[itemprop="sku"]')->attr('content'); + + //Try to extract a picture URL: + $pictureURL = $element->filter("div.al_artlogo img")->attr('src'); + + $results[] = new SearchResultDTO( + provider_key: $this->getProviderKey(), + provider_id: $productID, + name: $productID, + description: $name, + category: null, + manufacturer: $sku, + preview_image_url: $pictureURL, + provider_url: $element->filter('a.al_artinfo_link')->attr('href') + ); + }); + + return $results; + } + + public function getDetails(string $id): PartDetailDTO + { + // TODO: Implement getDetails() method. + } + + public function getCapabilities(): array + { + return [ + ProviderCapabilities::BASIC, + ProviderCapabilities::PICTURE, + ProviderCapabilities::DATASHEET, + ProviderCapabilities::PRICE, + ]; + } +} \ No newline at end of file From 5612a790fb9b17459636875f2891b0abdb29f6f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Wed, 19 Feb 2025 23:55:58 +0100 Subject: [PATCH 02/16] Added basic way to retrieve simple part infos and datasheet --- .../Providers/ReicheltProvider.php | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 19c52b2a..67faac3a 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -23,6 +23,7 @@ declare(strict_types=1); namespace App\Services\InfoProviderSystem\Providers; +use App\Services\InfoProviderSystem\DTOs\FileDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use Symfony\Component\DomCrawler\Crawler; @@ -70,6 +71,9 @@ class ReicheltProvider implements InfoProviderInterface $results = []; $dom->filter('div.al_gallery_article')->each(function (Crawler $element) use (&$results) { + //Extract product id from data-product attribute + $artId = json_decode($element->attr('data-product'), true, 2, JSON_THROW_ON_ERROR)['artid']; + $productID = $element->filter('meta[itemprop="productID"]')->attr('content'); $name = $element->filter('meta[itemprop="name"]')->attr('content'); $sku = $element->filter('meta[itemprop="sku"]')->attr('content'); @@ -79,7 +83,7 @@ class ReicheltProvider implements InfoProviderInterface $results[] = new SearchResultDTO( provider_key: $this->getProviderKey(), - provider_id: $productID, + provider_id: $artId, name: $productID, description: $name, category: null, @@ -94,7 +98,51 @@ class ReicheltProvider implements InfoProviderInterface public function getDetails(string $id): PartDetailDTO { - // TODO: Implement getDetails() method. + //Check that the ID is a number + if (!is_numeric($id)) { + throw new \InvalidArgumentException("Invalid ID"); + } + + //Use this endpoint to resolve the artID to a product page + $response = $this->client->request('GET', sprintf('https://www.reichelt.com/?ACTION=514&id=74&article=%s&LANGUAGE=EN&CCOUNTRY=DE', $id)); + $json = $response->toArray(); + + //Retrieve the product page from the response + $productPage = $this->getBaseURL() . '/shop/product' . $json[0]['article_path']; + + + $response = $this->client->request('GET', $productPage); + $html = $response->getContent(); + $dom = new Crawler($html); + + //Extract the product notes + $notes = $dom->filter('p[itemprop="description"]')->html(); + + //Extract datasheets + $datasheets = []; + $dom->filter('div.articleDatasheet a')->each(function (Crawler $element) use (&$datasheets) { + $datasheets[] = new FileDTO($element->attr('href'), $element->filter('span')->text()); + }); + + //Create part object + return new PartDetailDTO( + provider_key: $this->getProviderKey(), + provider_id: $id, + name: $json[0]['article_artnr'], + description: $json[0]['article_besch'], + manufacturer: $json[0]['manufacturer_name'], + preview_image_url: $json[0]['article_picture'], + provider_url: $productPage, + notes: $notes, + datasheets: $datasheets + ); + + } + + private function getBaseURL(): string + { + //Without the trailing slash + return 'https://www.reichelt.com/de/en'; } public function getCapabilities(): array From 376c7e7a6f5be168dc06fd75dfad97897b36f3c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 00:17:53 +0100 Subject: [PATCH 03/16] Allow to parse ranges for ParameterDTO which just contain two dots --- src/Services/InfoProviderSystem/DTOs/ParameterDTO.php | 6 +++--- .../InfoProviderSystem/DTOs/ParameterDTOTest.php | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Services/InfoProviderSystem/DTOs/ParameterDTO.php b/src/Services/InfoProviderSystem/DTOs/ParameterDTO.php index 3332700b..0b54d1a9 100644 --- a/src/Services/InfoProviderSystem/DTOs/ParameterDTO.php +++ b/src/Services/InfoProviderSystem/DTOs/ParameterDTO.php @@ -72,9 +72,9 @@ class ParameterDTO group: $group); } - //If the attribute contains "..." or a tilde we assume it is a range - if (preg_match('/(\.{3}|~)/', $value) === 1) { - $parts = preg_split('/\s*(\.{3}|~)\s*/', $value); + //If the attribute contains ".." or "..." or a tilde we assume it is a range + if (preg_match('/(\.{2,3}|~)/', $value) === 1) { + $parts = preg_split('/\s*(\.{2,3}|~)\s*/', $value); if (count($parts) === 2) { //Try to extract number and unit from value (allow leading +) if ($unit === null || trim($unit) === '') { diff --git a/tests/Services/InfoProviderSystem/DTOs/ParameterDTOTest.php b/tests/Services/InfoProviderSystem/DTOs/ParameterDTOTest.php index 7bbebf0b..ee4ca39a 100644 --- a/tests/Services/InfoProviderSystem/DTOs/ParameterDTOTest.php +++ b/tests/Services/InfoProviderSystem/DTOs/ParameterDTOTest.php @@ -70,6 +70,16 @@ class ParameterDTOTest extends TestCase 'test' ]; + //Test ranges + yield [ + new ParameterDTO('test', value_min: 1.0, value_max: 2.0, unit: 'kg', symbol: 'm', group: 'test'), + 'test', + '1.0..2.0', + 'kg', + 'm', + 'test' + ]; + //Test ranges with tilde yield [ new ParameterDTO('test', value_min: -1.0, value_max: 2.0, unit: 'kg', symbol: 'm', group: 'test'), From e0bf8e5fbc0e1836ef1c806a4d7e9c4a3c1d70f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 00:18:05 +0100 Subject: [PATCH 04/16] Allow reichelt provider to parse parameters --- .../Providers/ReicheltProvider.php | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 67faac3a..2afb978f 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -24,6 +24,7 @@ declare(strict_types=1); namespace App\Services\InfoProviderSystem\Providers; use App\Services\InfoProviderSystem\DTOs\FileDTO; +use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use Symfony\Component\DomCrawler\Crawler; @@ -134,11 +135,32 @@ class ReicheltProvider implements InfoProviderInterface preview_image_url: $json[0]['article_picture'], provider_url: $productPage, notes: $notes, - datasheets: $datasheets + datasheets: $datasheets, + parameters: $this->parseParameters($dom) ); } + private function parseParameters(Crawler $dom): array + { + $parameters = []; + //Iterate over each ul.articleTechnicalData which contains the specifications of each group + $dom->filter('ul.articleTechnicalData')->each(function (Crawler $groupElement) use (&$parameters) { + $groupName = $groupElement->filter('li.articleTechnicalHeadline')->text(); + + //Iterate over each second li in ul.articleAttribute, which contains the specifications + $groupElement->filter('ul.articleAttribute li:nth-child(2n)')->each(function (Crawler $specElement) use (&$parameters, $groupName) { + $parameters[] = ParameterDTO::parseValueField( + name: $specElement->previousAll()->text(), + value: $specElement->text(), + group: $groupName + ); + }); + }); + + return $parameters; + } + private function getBaseURL(): string { //Without the trailing slash From e287918121b83ed5487e979f6618e493345ff153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 00:24:46 +0100 Subject: [PATCH 05/16] Extract category from reichelt provider --- .../Providers/ReicheltProvider.php | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 2afb978f..6aa4c001 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -131,6 +131,7 @@ class ReicheltProvider implements InfoProviderInterface provider_id: $id, name: $json[0]['article_artnr'], description: $json[0]['article_besch'], + category: $this->parseCategory($dom), manufacturer: $json[0]['manufacturer_name'], preview_image_url: $json[0]['article_picture'], provider_url: $productPage, @@ -141,6 +142,29 @@ class ReicheltProvider implements InfoProviderInterface } + + private function parseCategory(Crawler $dom): string + { + // Look for ol.breadcrumb and iterate over the li elements + $category = ''; + $dom->filter('ol.breadcrumb li.triangle-left')->each(function (Crawler $element) use (&$category) { + //Do not include the .breadcrumb-showmore element + if ($element->attr('id') === 'breadcrumb-showmore') { + return; + } + + $category .= $element->text() . ' -> '; + }); + //Remove the trailing ' -> ' + $category = substr($category, 0, -4); + + return $category; + } + + /** + * @param Crawler $dom + * @return ParameterDTO[] + */ private function parseParameters(Crawler $dom): array { $parameters = []; From 39bc40037695931fff5ce765827d3fcad0cc17f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 00:32:03 +0100 Subject: [PATCH 06/16] Added basic price info retrieval from reichelt --- .../InfoProviderSystem/PartInfoRetriever.php | 4 ++-- .../Providers/ReicheltProvider.php | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Services/InfoProviderSystem/PartInfoRetriever.php b/src/Services/InfoProviderSystem/PartInfoRetriever.php index 1a31b197..1db36eee 100644 --- a/src/Services/InfoProviderSystem/PartInfoRetriever.php +++ b/src/Services/InfoProviderSystem/PartInfoRetriever.php @@ -33,8 +33,8 @@ use Symfony\Contracts\Cache\ItemInterface; final class PartInfoRetriever { - private const CACHE_DETAIL_EXPIRATION = 60 * 60 * 24 * 4; // 4 days - private const CACHE_RESULT_EXPIRATION = 60 * 60 * 24 * 7; // 7 days + private const CACHE_DETAIL_EXPIRATION = 5; // 4 days + private const CACHE_RESULT_EXPIRATION = 5; // 7 days public function __construct(private readonly ProviderRegistry $provider_registry, private readonly DTOtoEntityConverter $dto_to_entity_converter, private readonly CacheInterface $partInfoCache) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 6aa4c001..a91d256f 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -26,6 +26,8 @@ namespace App\Services\InfoProviderSystem\Providers; use App\Services\InfoProviderSystem\DTOs\FileDTO; use App\Services\InfoProviderSystem\DTOs\ParameterDTO; use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\PriceDTO; +use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; use Symfony\Component\DomCrawler\Crawler; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -33,6 +35,8 @@ use Symfony\Contracts\HttpClient\HttpClientInterface; class ReicheltProvider implements InfoProviderInterface { + public const DISTRIBUTOR_NAME = "Reichelt"; + private const SEARCH_ENDPOINT = "https://www.reichelt.com/index.html?ACTION=446&LA=0&nbc=1&q=%s"; public function __construct(private readonly HttpClientInterface $client) @@ -125,6 +129,16 @@ class ReicheltProvider implements InfoProviderInterface $datasheets[] = new FileDTO($element->attr('href'), $element->filter('span')->text()); }); + //Create purchase info + $purchaseInfo = new PurchaseInfoDTO( + distributor_name: self::DISTRIBUTOR_NAME, + order_number: $json[0]['article_artnr'], + prices: [ + new PriceDTO(1.0, (string) $json[0]['article_price'], 'EUR') + ], + product_url: $productPage + ); + //Create part object return new PartDetailDTO( provider_key: $this->getProviderKey(), @@ -137,7 +151,8 @@ class ReicheltProvider implements InfoProviderInterface provider_url: $productPage, notes: $notes, datasheets: $datasheets, - parameters: $this->parseParameters($dom) + parameters: $this->parseParameters($dom), + vendor_infos: [$purchaseInfo] ); } From f1c28b9f46b98342a9be5f6744bf2ce455a04f7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 15:14:59 +0100 Subject: [PATCH 07/16] Allow to parse batch prices --- .../Providers/ReicheltProvider.php | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index a91d256f..bffb9279 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -135,7 +135,7 @@ class ReicheltProvider implements InfoProviderInterface order_number: $json[0]['article_artnr'], prices: [ new PriceDTO(1.0, (string) $json[0]['article_price'], 'EUR') - ], + ] + $this->parseBatchPrices($dom), product_url: $productPage ); @@ -157,6 +157,35 @@ class ReicheltProvider implements InfoProviderInterface } + private function parseBatchPrices(Crawler $dom): array + { + //Iterate over each a.inline-block element in div.discountValue + $prices = []; + $dom->filter('div.discountValue a.inline-block')->each(function (Crawler $element) use (&$prices) { + //The minimum amount is the number in the span.block element + $minAmountText = $element->filter('span.block')->text(); + + //Extract a integer from the text + $matches = []; + if (!preg_match('/\d+/', $minAmountText, $matches)) { + return; + } + + $minAmount = (int) $matches[0]; + + //The price is the text of the p.productPrice element + $priceString = $element->filter('p.productPrice')->text(); + //Replace comma with dot + $priceString = str_replace(',', '.', $priceString); + //Strip any non-numeric characters + $priceString = preg_replace('/[^0-9.]/', '', $priceString); + + $prices[] = new PriceDTO($minAmount, $priceString, 'EUR'); + }); + + return $prices; + } + private function parseCategory(Crawler $dom): string { From a798aa9c249bfcf119fa48ae2279f8050921d837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 15:39:35 +0100 Subject: [PATCH 08/16] Allow to extract MPN from reichelt provider --- .../Providers/ReicheltProvider.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index bffb9279..8905ea76 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -147,6 +147,7 @@ class ReicheltProvider implements InfoProviderInterface description: $json[0]['article_besch'], category: $this->parseCategory($dom), manufacturer: $json[0]['manufacturer_name'], + mpn: $this->parseMPN($dom), preview_image_url: $json[0]['article_picture'], provider_url: $productPage, notes: $notes, @@ -157,6 +158,19 @@ class ReicheltProvider implements InfoProviderInterface } + private function parseMPN(Crawler $dom): ?string + { + //Find the small element directly after meta[itemprop="url"] element + $element = $dom->filter('meta[itemprop="url"] + small'); + //If the text contains GTIN text, take the small element afterwards + if (str_contains($element->text(), 'GTIN')) { + $element = $dom->filter('meta[itemprop="url"] + small + small'); + } + + //The MPN is contained in the span inside the element + return $element->filter('span')->text(); + } + private function parseBatchPrices(Crawler $dom): array { //Iterate over each a.inline-block element in div.discountValue From 32b4e6812d4c89d45a4102a06d5531f604b104ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 15:40:26 +0100 Subject: [PATCH 09/16] Undo change to cache expiration --- src/Services/InfoProviderSystem/PartInfoRetriever.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Services/InfoProviderSystem/PartInfoRetriever.php b/src/Services/InfoProviderSystem/PartInfoRetriever.php index 1db36eee..8a8eeae9 100644 --- a/src/Services/InfoProviderSystem/PartInfoRetriever.php +++ b/src/Services/InfoProviderSystem/PartInfoRetriever.php @@ -33,8 +33,8 @@ use Symfony\Contracts\Cache\ItemInterface; final class PartInfoRetriever { - private const CACHE_DETAIL_EXPIRATION = 5; // 4 days - private const CACHE_RESULT_EXPIRATION = 5; // 7 days + private const CACHE_DETAIL_EXPIRATION = 60 * 60 * 24 * 4; // 4 days + private const CACHE_RESULT_EXPIRATION = 60 * 60 * 24 * 4; // 7 days public function __construct(private readonly ProviderRegistry $provider_registry, private readonly DTOtoEntityConverter $dto_to_entity_converter, private readonly CacheInterface $partInfoCache) From 90e1b809fe39a95e762572fda2909660a4ca2e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 15:59:35 +0100 Subject: [PATCH 10/16] Allow to change language and country --- .../Providers/ReicheltProvider.php | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 8905ea76..a32962d8 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -39,7 +39,7 @@ class ReicheltProvider implements InfoProviderInterface private const SEARCH_ENDPOINT = "https://www.reichelt.com/index.html?ACTION=446&LA=0&nbc=1&q=%s"; - public function __construct(private readonly HttpClientInterface $client) + public function __construct(private readonly HttpClientInterface $client, private readonly bool $enabled = true, private readonly string $language = "en", private readonly string $country = "DE") { } @@ -47,8 +47,8 @@ class ReicheltProvider implements InfoProviderInterface { return [ 'name' => 'Reichelt', - 'description' => 'TODO', - 'url' => 'https://www.reichelt.de/', + 'description' => 'Webscrapping from reichelt.com to get part information', + 'url' => 'https://www.reichelt.com/', 'disabled_help' => 'TODO' ]; } @@ -60,14 +60,12 @@ class ReicheltProvider implements InfoProviderInterface public function isActive(): bool { - return true; + return $this->enabled; } public function searchByKeyword(string $keyword): array { - //Lowercase the keyword and urlencode it - $keyword = urlencode($keyword); - $response = $this->client->request('GET', sprintf(self::SEARCH_ENDPOINT, $keyword)); + $response = $this->client->request('GET', sprintf($this->getBaseURL() . '/shop/search/%s', $keyword)); $html = $response->getContent(); //Parse the HTML and return the results @@ -109,7 +107,14 @@ class ReicheltProvider implements InfoProviderInterface } //Use this endpoint to resolve the artID to a product page - $response = $this->client->request('GET', sprintf('https://www.reichelt.com/?ACTION=514&id=74&article=%s&LANGUAGE=EN&CCOUNTRY=DE', $id)); + $response = $this->client->request('GET', + sprintf( + 'https://www.reichelt.com/?ACTION=514&id=74&article=%s&LANGUAGE=%s&CCOUNTRY=%s', + $id, + strtoupper($this->language), + strtoupper($this->country) + ) + ); $json = $response->toArray(); //Retrieve the product page from the response @@ -246,7 +251,7 @@ class ReicheltProvider implements InfoProviderInterface private function getBaseURL(): string { //Without the trailing slash - return 'https://www.reichelt.com/de/en'; + return 'https://www.reichelt.com/' . strtolower($this->country) . '/' . strtolower($this->language); } public function getCapabilities(): array From d13752114cbfa455cb42bc6c0d2a5ce8444737b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 16:06:10 +0100 Subject: [PATCH 11/16] Allow to configure the reichelt info provider via env vars --- .env | 12 ++++++++++++ .../Providers/ReicheltProvider.php | 9 ++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.env b/.env index f0fbd306..a985eac5 100644 --- a/.env +++ b/.env @@ -216,6 +216,18 @@ PROVIDER_OEMSECRETS_SET_PARAM=1 #If unset or set to any other value, no sorting is performed. PROVIDER_OEMSECRETS_SORT_CRITERIA=C + +# Reichelt provider: +# Reichelt.com offers no official API, so this info provider webscrapes the website to extract info +# It could break at any time, use it at your own risk + +# We dont require an API key for Reichelt, just set this to 1 to enable Reichelt support +PROVIDER_REICHELT_ENABLED=0 +# The country to get prices for (only EUR supported for now) +PROVIDER_REICHELT_COUNTRY=DE +# The language to get results in (en, de, fr, nl, pl, it, es) +PROVIDER_REICHELT_LANGUAGE=en + ################################################################################## # EDA integration related settings ################################################################################## diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index a32962d8..1678a4c6 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -29,6 +29,7 @@ use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; use App\Services\InfoProviderSystem\DTOs\PriceDTO; use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use Symfony\Component\DependencyInjection\Attribute\Autowire; use Symfony\Component\DomCrawler\Crawler; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -39,7 +40,13 @@ class ReicheltProvider implements InfoProviderInterface private const SEARCH_ENDPOINT = "https://www.reichelt.com/index.html?ACTION=446&LA=0&nbc=1&q=%s"; - public function __construct(private readonly HttpClientInterface $client, private readonly bool $enabled = true, private readonly string $language = "en", private readonly string $country = "DE") + public function __construct(private readonly HttpClientInterface $client, + #[Autowire(env: "bool:PROVIDER_REICHELT_ENABLED")] + private readonly bool $enabled = true, + #[Autowire(env: "PROVIDER_REICHELT_LANGUAGE")] + private readonly string $language = "en", + #[Autowire(env: "PROVIDER_REICHELT_COUNTRY")] + private readonly string $country = "DE") { } From 5e40519bc5af0be5d571339a78b3af881dfba11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 16:29:37 +0100 Subject: [PATCH 12/16] Allow to select if VAT should be included or not --- .env | 2 ++ .../Providers/ReicheltProvider.php | 25 +++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/.env b/.env index a985eac5..678430d2 100644 --- a/.env +++ b/.env @@ -227,6 +227,8 @@ PROVIDER_REICHELT_ENABLED=0 PROVIDER_REICHELT_COUNTRY=DE # The language to get results in (en, de, fr, nl, pl, it, es) PROVIDER_REICHELT_LANGUAGE=en +# Include VAT in prices (set to 1 to include VAT, 0 to exclude VAT) +PROVIDER_REICHELT_INCLUDE_VAT=1 ################################################################################## # EDA integration related settings diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 1678a4c6..89ba48d8 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -46,7 +46,10 @@ class ReicheltProvider implements InfoProviderInterface #[Autowire(env: "PROVIDER_REICHELT_LANGUAGE")] private readonly string $language = "en", #[Autowire(env: "PROVIDER_REICHELT_COUNTRY")] - private readonly string $country = "DE") + private readonly string $country = "DE", + #[Autowire(env: "PROVIDER_REICHELT_INCLUDE_VAT")] + private bool $includeVAT = false + ) { } @@ -128,7 +131,11 @@ class ReicheltProvider implements InfoProviderInterface $productPage = $this->getBaseURL() . '/shop/product' . $json[0]['article_path']; - $response = $this->client->request('GET', $productPage); + $response = $this->client->request('GET', $productPage, [ + 'query' => [ + 'CCTYPE' => $this->includeVAT ? 'private' : 'business', + ], + ]); $html = $response->getContent(); $dom = new Crawler($html); @@ -141,13 +148,17 @@ class ReicheltProvider implements InfoProviderInterface $datasheets[] = new FileDTO($element->attr('href'), $element->filter('span')->text()); }); + //Determine price for one unit + $priceString = $dom->filter('meta[itemprop="price"]')->attr('content'); + $currency = $dom->filter('meta[itemprop="priceCurrency"]')->attr('content', 'EUR'); + //Create purchase info $purchaseInfo = new PurchaseInfoDTO( distributor_name: self::DISTRIBUTOR_NAME, order_number: $json[0]['article_artnr'], prices: [ - new PriceDTO(1.0, (string) $json[0]['article_price'], 'EUR') - ] + $this->parseBatchPrices($dom), + new PriceDTO(1.0, $priceString, $currency, $this->includeVAT) + ] + $this->parseBatchPrices($dom, $currency), product_url: $productPage ); @@ -183,11 +194,11 @@ class ReicheltProvider implements InfoProviderInterface return $element->filter('span')->text(); } - private function parseBatchPrices(Crawler $dom): array + private function parseBatchPrices(Crawler $dom, string $currency): array { //Iterate over each a.inline-block element in div.discountValue $prices = []; - $dom->filter('div.discountValue a.inline-block')->each(function (Crawler $element) use (&$prices) { + $dom->filter('div.discountValue a.inline-block')->each(function (Crawler $element) use (&$prices, $currency) { //The minimum amount is the number in the span.block element $minAmountText = $element->filter('span.block')->text(); @@ -206,7 +217,7 @@ class ReicheltProvider implements InfoProviderInterface //Strip any non-numeric characters $priceString = preg_replace('/[^0-9.]/', '', $priceString); - $prices[] = new PriceDTO($minAmount, $priceString, 'EUR'); + $prices[] = new PriceDTO($minAmount, $priceString, $currency, $this->includeVAT); }); return $prices; From d05c0579a274120c1233af6971646ea848560490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 16:33:37 +0100 Subject: [PATCH 13/16] Fixed problem that first batch price was not included --- .../InfoProviderSystem/Providers/ReicheltProvider.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 89ba48d8..62f075ea 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -156,9 +156,9 @@ class ReicheltProvider implements InfoProviderInterface $purchaseInfo = new PurchaseInfoDTO( distributor_name: self::DISTRIBUTOR_NAME, order_number: $json[0]['article_artnr'], - prices: [ - new PriceDTO(1.0, $priceString, $currency, $this->includeVAT) - ] + $this->parseBatchPrices($dom, $currency), + prices: array_merge( + [new PriceDTO(1.0, $priceString, $currency, $this->includeVAT)] + , $this->parseBatchPrices($dom, $currency)), product_url: $productPage ); From c51e0eb68f4c2daaccb0a11623263b713e196abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 16:37:13 +0100 Subject: [PATCH 14/16] Allow to get prices in non-EUR currencies from reichelt --- .env | 4 +++- .../InfoProviderSystem/Providers/ReicheltProvider.php | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 678430d2..c67d6fdc 100644 --- a/.env +++ b/.env @@ -223,12 +223,14 @@ PROVIDER_OEMSECRETS_SORT_CRITERIA=C # We dont require an API key for Reichelt, just set this to 1 to enable Reichelt support PROVIDER_REICHELT_ENABLED=0 -# The country to get prices for (only EUR supported for now) +# The country to get prices for PROVIDER_REICHELT_COUNTRY=DE # The language to get results in (en, de, fr, nl, pl, it, es) PROVIDER_REICHELT_LANGUAGE=en # Include VAT in prices (set to 1 to include VAT, 0 to exclude VAT) PROVIDER_REICHELT_INCLUDE_VAT=1 +# The currency to get prices in (only for countries with countries other than EUR) +PROVIDER_REICHELT_CURRENCY=EUR ################################################################################## # EDA integration related settings diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 62f075ea..3655cfcf 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -48,7 +48,9 @@ class ReicheltProvider implements InfoProviderInterface #[Autowire(env: "PROVIDER_REICHELT_COUNTRY")] private readonly string $country = "DE", #[Autowire(env: "PROVIDER_REICHELT_INCLUDE_VAT")] - private bool $includeVAT = false + private readonly bool $includeVAT = false, + #[Autowire(env: "PROVIDER_REICHELT_CURRENCY")] + private readonly string $currency = "EUR", ) { } @@ -134,6 +136,7 @@ class ReicheltProvider implements InfoProviderInterface $response = $this->client->request('GET', $productPage, [ 'query' => [ 'CCTYPE' => $this->includeVAT ? 'private' : 'business', + 'currency' => $this->currency, ], ]); $html = $response->getContent(); From 3585b08d4b3bd488eca6bbe4445634c1bf7f4ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 16:38:07 +0100 Subject: [PATCH 15/16] Pass reichelt env through docker container --- .docker/symfony.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/.docker/symfony.conf b/.docker/symfony.conf index 5788a630..e87e37b0 100644 --- a/.docker/symfony.conf +++ b/.docker/symfony.conf @@ -44,6 +44,7 @@ PassEnv PROVIDER_MOUSER_KEY PROVIDER_MOUSER_SEARCH_OPTION PROVIDER_MOUSER_SEARCH_LIMIT PROVIDER_MOUSER_SEARCH_WITH_SIGNUP_LANGUAGE PassEnv PROVIDER_LCSC_ENABLED PROVIDER_LCSC_CURRENCY PassEnv PROVIDER_OEMSECRETS_KEY PROVIDER_OEMSECRETS_COUNTRY_CODE PROVIDER_OEMSECRETS_CURRENCY PROVIDER_OEMSECRETS_ZERO_PRICE PROVIDER_OEMSECRETS_SET_PARAM PROVIDER_OEMSECRETS_SORT_CRITERIA + PassEnv PROVIDER_REICHELT_ENABLED PROVIDER_REICHELT_CURRENCY PROVIDER_REICHELT_COUNTRY PROVIDER_REICHELT_LANGUAGE PROVIDER_REICHELT_INCLUDE_VAT PassEnv EDA_KICAD_CATEGORY_DEPTH # For most configuration files from conf-available/, which are From 1e80be1376ee7962aa5f93ca2dbda0646b92fa8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 16:46:47 +0100 Subject: [PATCH 16/16] Added documentation for reichelt info provider --- src/Services/InfoProviderSystem/PartInfoRetriever.php | 4 ++-- .../InfoProviderSystem/Providers/ReicheltProvider.php | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Services/InfoProviderSystem/PartInfoRetriever.php b/src/Services/InfoProviderSystem/PartInfoRetriever.php index 8a8eeae9..1db36eee 100644 --- a/src/Services/InfoProviderSystem/PartInfoRetriever.php +++ b/src/Services/InfoProviderSystem/PartInfoRetriever.php @@ -33,8 +33,8 @@ use Symfony\Contracts\Cache\ItemInterface; final class PartInfoRetriever { - private const CACHE_DETAIL_EXPIRATION = 60 * 60 * 24 * 4; // 4 days - private const CACHE_RESULT_EXPIRATION = 60 * 60 * 24 * 4; // 7 days + private const CACHE_DETAIL_EXPIRATION = 5; // 4 days + private const CACHE_RESULT_EXPIRATION = 5; // 7 days public function __construct(private readonly ProviderRegistry $provider_registry, private readonly DTOtoEntityConverter $dto_to_entity_converter, private readonly CacheInterface $partInfoCache) diff --git a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php index 3655cfcf..b0d704d3 100644 --- a/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php +++ b/src/Services/InfoProviderSystem/Providers/ReicheltProvider.php @@ -38,8 +38,6 @@ class ReicheltProvider implements InfoProviderInterface public const DISTRIBUTOR_NAME = "Reichelt"; - private const SEARCH_ENDPOINT = "https://www.reichelt.com/index.html?ACTION=446&LA=0&nbc=1&q=%s"; - public function __construct(private readonly HttpClientInterface $client, #[Autowire(env: "bool:PROVIDER_REICHELT_ENABLED")] private readonly bool $enabled = true,