diff --git a/.docker/symfony.conf b/.docker/symfony.conf index e87e37b0..b5229bf6 100644 --- a/.docker/symfony.conf +++ b/.docker/symfony.conf @@ -45,6 +45,7 @@ PassEnv PROVIDER_LCSC_ENABLED PROVIDER_LCSC_CURRENCY PassEnv PROVIDER_OEMSECRETS_KEY PROVIDER_OEMSECRETS_COUNTRY_CODE PROVIDER_OEMSECRETS_CURRENCY PROVIDER_OEMSECRETS_ZERO_PRICE PROVIDER_OEMSECRETS_SET_PARAM PROVIDER_OEMSECRETS_SORT_CRITERIA PassEnv PROVIDER_REICHELT_ENABLED PROVIDER_REICHELT_CURRENCY PROVIDER_REICHELT_COUNTRY PROVIDER_REICHELT_LANGUAGE PROVIDER_REICHELT_INCLUDE_VAT + PassEnv PROVIDER_POLLIN_ENABLED PassEnv EDA_KICAD_CATEGORY_DEPTH # For most configuration files from conf-available/, which are diff --git a/.env b/.env index c67d6fdc..1806e9c6 100644 --- a/.env +++ b/.env @@ -220,7 +220,6 @@ PROVIDER_OEMSECRETS_SORT_CRITERIA=C # Reichelt provider: # Reichelt.com offers no official API, so this info provider webscrapes the website to extract info # It could break at any time, use it at your own risk - # We dont require an API key for Reichelt, just set this to 1 to enable Reichelt support PROVIDER_REICHELT_ENABLED=0 # The country to get prices for @@ -232,6 +231,12 @@ PROVIDER_REICHELT_INCLUDE_VAT=1 # The currency to get prices in (only for countries with countries other than EUR) PROVIDER_REICHELT_CURRENCY=EUR +# Pollin provider: +# Pollin.de offers no official API, so this info provider webscrapes the website to extract info +# It could break at any time, use it at your own risk +# We dont require an API key for Pollin, just set this to 1 to enable Pollin support +PROVIDER_POLLIN_ENABLED=0 + ################################################################################## # EDA integration related settings ################################################################################## diff --git a/docs/usage/information_provider_system.md b/docs/usage/information_provider_system.md index ab288626..015a9eb3 100644 --- a/docs/usage/information_provider_system.md +++ b/docs/usage/information_provider_system.md @@ -247,6 +247,14 @@ The following env configuration options are available: * `PROVIDER_REICHELT_LANGUAGE`: The language you want to get the descriptions in (optional, default: `en`) * `PROVIDER_REICHELT_INCLUDE_VAT`: If set to `1`, the prices will be gross prices (including tax), otherwise net prices (optional, default: `1`) +### Pollin + +The pollin provider uses webscraping from [pollin.de](https://www.pollin.de/) to get part information. +This is not an official API and could break at any time. So use it at your own risk. + +The following env configuration options are available: +* `PROVIDER_POLLIN_ENABLED`: Set this to `1` to enable the Pollin provider + ### Custom provider To create a custom provider, you have to create a new class implementing the `InfoProviderInterface` interface. As long diff --git a/src/Services/InfoProviderSystem/PartInfoRetriever.php b/src/Services/InfoProviderSystem/PartInfoRetriever.php index 8f18ba04..0eb74642 100644 --- a/src/Services/InfoProviderSystem/PartInfoRetriever.php +++ b/src/Services/InfoProviderSystem/PartInfoRetriever.php @@ -59,6 +59,11 @@ final class PartInfoRetriever $provider = $this->provider_registry->getProviderByKey($provider); } + //Ensure that the provider is active + if (!$provider->isActive()) { + throw new \RuntimeException("The provider with key {$provider->getProviderKey()} is not active!"); + } + if (!$provider instanceof InfoProviderInterface) { throw new \InvalidArgumentException("The provider must be either a provider key or a provider instance!"); } @@ -97,6 +102,11 @@ final class PartInfoRetriever { $provider = $this->provider_registry->getProviderByKey($provider_key); + //Ensure that the provider is active + if (!$provider->isActive()) { + throw new \RuntimeException("The provider with key $provider_key is not active!"); + } + //Generate key and escape reserved characters from the provider id $escaped_part_id = urlencode($part_id); return $this->partInfoCache->get("details_{$provider_key}_{$escaped_part_id}", function (ItemInterface $item) use ($provider, $part_id) { diff --git a/src/Services/InfoProviderSystem/Providers/PollinProvider.php b/src/Services/InfoProviderSystem/Providers/PollinProvider.php new file mode 100644 index 00000000..9745643d --- /dev/null +++ b/src/Services/InfoProviderSystem/Providers/PollinProvider.php @@ -0,0 +1,243 @@ +. + */ + +declare(strict_types=1); + + +namespace App\Services\InfoProviderSystem\Providers; + +use App\Entity\Parts\ManufacturingStatus; +use App\Entity\Parts\Part; +use App\Services\InfoProviderSystem\DTOs\FileDTO; +use App\Services\InfoProviderSystem\DTOs\ParameterDTO; +use App\Services\InfoProviderSystem\DTOs\PartDetailDTO; +use App\Services\InfoProviderSystem\DTOs\PriceDTO; +use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO; +use App\Services\InfoProviderSystem\DTOs\SearchResultDTO; +use Symfony\Component\DependencyInjection\Attribute\Autowire; +use Symfony\Component\DomCrawler\Crawler; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +class PollinProvider implements InfoProviderInterface +{ + + public function __construct(private readonly HttpClientInterface $client, + #[Autowire(env: 'bool:PROVIDER_POLLIN_ENABLED')] + private readonly bool $enabled = true, + ) + { + } + + public function getProviderInfo(): array + { + return [ + 'name' => 'Pollin', + 'description' => 'Webscrapping from pollin.de to get part information', + 'url' => 'https://www.reichelt.de/', + 'disabled_help' => 'Set PROVIDER_POLLIN_ENABLED env to 1' + ]; + } + + public function getProviderKey(): string + { + return 'pollin'; + } + + public function isActive(): bool + { + return $this->enabled; + } + + public function searchByKeyword(string $keyword): array + { + $response = $this->client->request('GET', 'https://www.pollin.de/search', [ + 'query' => [ + 'search' => $keyword + ] + ]); + + $content = $response->getContent(); + $dom = new Crawler($content); + + $results = []; + + //Iterate over each div.product-box + $dom->filter('div.product-box')->each(function (Crawler $node) use (&$results) { + $results[] = new SearchResultDTO( + provider_key: $this->getProviderKey(), + provider_id: $node->filter('meta[itemprop="productID"]')->attr('content'), + name: $node->filter('a.product-name')->text(), + description: '', + preview_image_url: $node->filter('img.product-image')->attr('src'), + manufacturing_status: $this->mapAvailability($node->filter('link[itemprop="availability"]')->attr('href')), + provider_url: $node->filter('a.product-name')->attr('href') + ); + }); + + return $results; + } + + private function mapAvailability(string $availabilityURI): ManufacturingStatus + { + return match( $availabilityURI) { + 'http://schema.org/InStock' => ManufacturingStatus::ACTIVE, + 'http://schema.org/OutOfStock' => ManufacturingStatus::DISCONTINUED, + default => ManufacturingStatus::NOT_SET + }; + } + + public function getDetails(string $id): PartDetailDTO + { + //Ensure that $id is numeric + if (!is_numeric($id)) { + throw new \InvalidArgumentException("The id must be numeric!"); + } + + $response = $this->client->request('GET', 'https://www.pollin.de/search', [ + 'query' => [ + 'search' => $id + ] + ]); + + //The response must have us redirected to the product page + if ($response->getInfo('redirect_count') > 0) { + throw new \RuntimeException("Could not resolve the product page for the given id!"); + } + + $content = $response->getContent(); + + return $this->parseProductPage($content); + } + + private function parseProductPage(string $content): PartDetailDTO + { + $dom = new Crawler($content); + + $productPageUrl = $dom->filter('meta[property="product:product_link"]')->attr('content'); + $orderId = trim($dom->filter('span[itemprop="sku"]')->text()); //Text is important here + + //Calculate the mass + $massStr = $dom->filter('meta[itemprop="weight"]')->attr('content'); + //Remove the unit + $massStr = str_replace('kg', '', $massStr); + //Convert to float and convert to grams + $mass = (float) $massStr * 1000; + + //Parse purchase info + $purchaseInfo = new PurchaseInfoDTO('Pollin', $orderId, $this->parsePrices($dom), $productPageUrl); + + return new PartDetailDTO( + provider_key: $this->getProviderKey(), + provider_id: $orderId, + name: trim($dom->filter('meta[property="og:title"]')->attr('content')), + description: $dom->filter('meta[property="og:description"]')->attr('content'), + category: $this->parseCategory($dom), + manufacturer: $dom->filter('meta[property="product:brand"]')->count() > 0 ? $dom->filter('meta[property="product:brand"]')->attr('content') : null, + preview_image_url: $dom->filter('meta[property="og:image"]')->attr('content'), + manufacturing_status: $this->mapAvailability($dom->filter('link[itemprop="availability"]')->attr('href')), + provider_url: $productPageUrl, + notes: $this->parseNotes($dom), + datasheets: $this->parseDatasheets($dom), + parameters: $this->parseParameters($dom), + vendor_infos: [$purchaseInfo], + mass: $mass, + ); + } + + private function parseDatasheets(Crawler $dom): array + { + //Iterate over each a element withing div.pol-product-detail-download-files + $datasheets = []; + $dom->filter('div.pol-product-detail-download-files a')->each(function (Crawler $node) use (&$datasheets) { + $datasheets[] = new FileDTO($node->attr('href'), $node->text()); + }); + + return $datasheets; + } + + private function parseParameters(Crawler $dom): array + { + $parameters = []; + + //Iterate over each tr.properties-row inside table.product-detail-properties-table + $dom->filter('table.product-detail-properties-table tr.properties-row')->each(function (Crawler $node) use (&$parameters) { + $parameters[] = ParameterDTO::parseValueField( + name: rtrim($node->filter('th.properties-label')->text(), ':'), + value: $node->filter('td.properties-value')->text() + ); + }); + + return $parameters; + } + + private function parseCategory(Crawler $dom): string + { + $category = ''; + + //Iterate over each li.breadcrumb-item inside ol.breadcrumb + $dom->filter('ol.breadcrumb li.breadcrumb-item')->each(function (Crawler $node) use (&$category) { + //Skip if it has breadcrumb-item-home class + if (str_contains($node->attr('class'), 'breadcrumb-item-home')) { + return; + } + + + $category .= $node->text() . ' -> '; + }); + + //Remove the last ' -> ' + return substr($category, 0, -4); + } + + private function parseNotes(Crawler $dom): string + { + //Concat product highlights and product description + return $dom->filter('div.product-detail-top-features')->html() . '

' . $dom->filter('div.product-detail-description-text')->html(); + } + + private function parsePrices(Crawler $dom): array + { + //TODO: Properly handle multiple prices, for now we just look at the price for one piece + + //We assume the currency is always the same + $currency = $dom->filter('meta[property="product:price:currency"]')->attr('content'); + + //If there is meta[property=highPrice] then use this as the price + if ($dom->filter('meta[itemprop="highPrice"]')->count() > 0) { + $price = $dom->filter('meta[itemprop="highPrice"]')->attr('content'); + } else { + $price = $dom->filter('meta[property="product:price:amount"]')->attr('content'); + } + + return [ + new PriceDTO(1.0, $price, $currency) + ]; + } + + public function getCapabilities(): array + { + return [ + ProviderCapabilities::BASIC, + ProviderCapabilities::PICTURE, + ProviderCapabilities::PRICE, + ProviderCapabilities::DATASHEET + ]; + } +} \ No newline at end of file