From 742e57cc5c6f7394d0b1628d576bf64324554c1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20B=C3=B6hmer?= Date: Thu, 20 Feb 2025 19:20:14 +0100 Subject: [PATCH] Added basic provider for pollin --- .env | 7 +- .../Providers/PollinProvider.php | 233 ++++++++++++++++++ 2 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 src/Services/InfoProviderSystem/Providers/PollinProvider.php diff --git a/.env b/.env index c67d6fdc..1806e9c6 100644 --- a/.env +++ b/.env @@ -220,7 +220,6 @@ PROVIDER_OEMSECRETS_SORT_CRITERIA=C # Reichelt provider: # Reichelt.com offers no official API, so this info provider webscrapes the website to extract info # It could break at any time, use it at your own risk - # We dont require an API key for Reichelt, just set this to 1 to enable Reichelt support PROVIDER_REICHELT_ENABLED=0 # The country to get prices for @@ -232,6 +231,12 @@ PROVIDER_REICHELT_INCLUDE_VAT=1 # The currency to get prices in (only for countries with countries other than EUR) PROVIDER_REICHELT_CURRENCY=EUR +# Pollin provider: +# Pollin.de offers no official API, so this info provider webscrapes the website to extract info +# It could break at any time, use it at your own risk +# We dont require an API key for Pollin, just set this to 1 to enable Pollin support +PROVIDER_POLLIN_ENABLED=0 + ################################################################################## # EDA integration related settings ################################################################################## diff --git a/src/Services/InfoProviderSystem/Providers/PollinProvider.php b/src/Services/InfoProviderSystem/Providers/PollinProvider.php new file mode 100644 index 00000000..0380a0fb --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/PollinProvider.php @@ -0,0 +1,233 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +use App\Entity\Parts\ManufacturingStatus; +use App\Entity\Parts\Part; +use App\Services\InfoProviderSystem\DTOs\FileDTO; +use App\Services\InfoProviderSystem\DTOs\ParameterDTO; +use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\PriceDTO; +use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use Symfony\Component\DependencyInjection\Attribute\Autowire; +use Symfony\Component\DomCrawler\Crawler; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +class PollinProvider implements InfoProviderInterface +{ + + public function __construct(private readonly HttpClientInterface $client, + #[Autowire(env: 'bool:PROVIDER_POLLIN_ENABLED')] + private readonly bool $enabled = true, + ) + { + } + + public function getProviderInfo(): array + { + return [ + 'name' => 'Pollin', + 'description' => 'Webscrapping from pollin.de to get part information', + 'url' => 'https://www.reichelt.de/', + 'disabled_help' => 'Set PROVIDER_POLLIN_ENABLED env to 1' + ]; + } + + public function getProviderKey(): string + { + return 'pollin'; + } + + public function isActive(): bool + { + return $this->enabled; + } + + public function searchByKeyword(string $keyword): array + { + $response = $this->client->request('GET', 'https://www.pollin.de/search', [ + 'query' => [ + 'search' => $keyword + ] + ]); + + $content = $response->getContent(); + $dom = new Crawler($content); + + $results = []; + + //Iterate over each div.product-box + $dom->filter('div.product-box')->each(function (Crawler $node) use (&$results) { + $results[] = new SearchResultDTO( + provider_key: $this->getProviderKey(), + provider_id: $node->filter('meta[itemprop="productID"]')->attr('content'), + name: $node->filter('a.product-name')->text(), + description: '', + preview_image_url: $node->filter('img.product-image')->attr('src'), + manufacturing_status: $this->mapAvailability($node->filter('link[itemprop="availability"]')->attr('href')), + provider_url: $node->filter('a.product-name')->attr('href') + ); + }); + + return $results; + } + + private function mapAvailability(string $availabilityURI): ManufacturingStatus + { + return match( $availabilityURI) { + 'http://schema.org/InStock' => ManufacturingStatus::ACTIVE, + 'http://schema.org/OutOfStock' => ManufacturingStatus::DISCONTINUED, + default => ManufacturingStatus::NOT_SET + }; + } + + public function getDetails(string $id): PartDetailDTO + { + //Ensure that $id is numeric + if (!is_numeric($id)) { + throw new \InvalidArgumentException("The id must be numeric!"); + } + + $response = $this->client->request('GET', 'https://www.pollin.de/search', [ + 'query' => [ + 'search' => $id + ] + ]); + + //The response must have us redirected to the product page + if ($response->getInfo('redirect_count') > 0) { + throw new \RuntimeException("Could not resolve the product page for the given id!"); + } + + $content = $response->getContent(); + + return $this->parseProductPage($content); + } + + private function parseProductPage(string $content): PartDetailDTO + { + $dom = new Crawler($content); + + $productPageUrl = $dom->filter('meta[property="product:product_link"]')->attr('content'); + $orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here + + //Calculate the mass + $massStr = $dom->filter('meta[itemprop="weight"]')->attr('content'); + //Remove the unit + $massStr = str_replace('kg', '', $massStr); + //Convert to float and convert to grams + $mass = (float) $massStr * 1000; + + //Parse purchase info + $purchaseInfo = new PurchaseInfoDTO( + 'Pollin', + $orderId, + [ + new PriceDTO(1, $dom->filter('meta[property="product:price:amount"]')->attr('content'), $dom->filter('meta[property="product:price:currency"]')->attr('content')) + ], + $productPageUrl + ); + + + + return new PartDetailDTO( + provider_key: $this->getProviderKey(), + provider_id: $orderId, + name: trim($dom->filter('meta[property="og:title"]')->attr('content')), + description: $dom->filter('meta[property="og:description"]')->attr('content'), + category: $this->parseCategory($dom), + manufacturer: $dom->filter('meta[property="product:brand"]')->count() > 0 ? $dom->filter('meta[property="product:brand"]')->attr('content') : null, + preview_image_url: $dom->filter('meta[property="og:image"]')->attr('content'), + manufacturing_status: $this->mapAvailability($dom->filter('link[itemprop="availability"]')->attr('href')), + provider_url: $productPageUrl, + notes: $this->parseNotes($dom), + datasheets: $this->parseDatasheets($dom), + parameters: $this->parseParameters($dom), + vendor_infos: [$purchaseInfo], + mass: $mass, + ); + } + + private function parseDatasheets(Crawler $dom): array + { + //Iterate over each a element withing div.pol-product-detail-download-files + $datasheets = []; + $dom->filter('div.pol-product-detail-download-files a')->each(function (Crawler $node) use (&$datasheets) { + $datasheets[] = new FileDTO($node->attr('href'), $node->text()); + }); + + return $datasheets; + } + + private function parseParameters(Crawler $dom): array + { + $parameters = []; + + //Iterate over each tr.properties-row inside table.product-detail-properties-table + $dom->filter('table.product-detail-properties-table tr.properties-row')->each(function (Crawler $node) use (&$parameters) { + $parameters[] = ParameterDTO::parseValueField( + name: rtrim($node->filter('th.properties-label')->text(), ':'), + value: $node->filter('td.properties-value')->text() + ); + }); + + return $parameters; + } + + private function parseCategory(Crawler $dom): string + { + $category = ''; + + //Iterate over each li.breadcrumb-item inside ol.breadcrumb + $dom->filter('ol.breadcrumb li.breadcrumb-item')->each(function (Crawler $node) use (&$category) { + //Skip if it has breadcrumb-item-home class + if (str_contains($node->attr('class'), 'breadcrumb-item-home')) { + return; + } + + + $category .= $node->text() . ' -> '; + }); + + //Remove the last ' -> ' + return substr($category, 0, -4); + } + + private function parseNotes(Crawler $dom): string + { + //Concat product highlights and product description + return $dom->filter('div.product-detail-top-features')->html() . '

' . $dom->filter('div.product-detail-description-text')->html(); + } + + public function getCapabilities(): array + { + return [ + ProviderCapabilities::BASIC, + ProviderCapabilities::PICTURE, + ProviderCapabilities::PRICE, + ProviderCapabilities::DATASHEET + ]; + } +} \ No newline at end of file