mirror of
https://github.com/Part-DB/Part-DB-server.git
synced 2025-07-14 20:35:08 +02:00
Started working on an aliexpress provider
This commit is contained in:
parent
e94d4a7752
commit
2fdd837354
1 changed files with 151 additions and 0 deletions
151
src/Services/InfoProviderSystem/Providers/AliexpressProvider.php
Normal file
151
src/Services/InfoProviderSystem/Providers/AliexpressProvider.php
Normal file
|
@ -0,0 +1,151 @@
|
|||
<?php
|
||||
/*
|
||||
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
|
||||
*
|
||||
* Copyright (C) 2019 - 2025 Jan Böhmer (https://github.com/jbtronics)
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
|
||||
namespace App\Services\InfoProviderSystem\Providers;
|
||||
|
||||
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
|
||||
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
|
||||
class AliexpressProvider implements InfoProviderInterface
|
||||
{
|
||||
|
||||
public function __construct(private readonly HttpClientInterface $client)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public function getProviderInfo(): array
|
||||
{
|
||||
return [
|
||||
'name' => 'Aliexpress',
|
||||
'description' => 'Webscrapping from reichelt.com to get part information',
|
||||
'url' => 'https://aliexpress.com/',
|
||||
'disabled_help' => 'Set PROVIDER_REICHELT_ENABLED env to 1'
|
||||
];
|
||||
}
|
||||
|
||||
public function getProviderKey(): string
|
||||
{
|
||||
return "aliexpress";
|
||||
}
|
||||
|
||||
public function isActive(): bool
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public function getBaseURL(): string
|
||||
{
|
||||
//Without the trailing slash
|
||||
return 'https://de.aliexpress.com';
|
||||
}
|
||||
|
||||
public function searchByKeyword(string $keyword): array
|
||||
{
|
||||
$response = $this->client->request('GET', $this->getBaseURL() . '/wholesale', [
|
||||
'query' => [
|
||||
'SearchText' => $keyword,
|
||||
'CatId' => 0,
|
||||
'd' => 'y',
|
||||
]
|
||||
]
|
||||
);
|
||||
|
||||
$content = $response->getContent();
|
||||
$dom = new Crawler($content);
|
||||
|
||||
$results = [];
|
||||
|
||||
//Iterate over each div.search-item-card-wrapper-gallery
|
||||
$dom->filter('div.search-item-card-wrapper-gallery')->each(function (Crawler $node) use (&$results) {
|
||||
|
||||
$productURL = $this->cleanProductURL($node->filter("a")->first()->attr('href'));
|
||||
$productID = $this->extractProductID($productURL);
|
||||
|
||||
//Skip results where we cannot extract a product ID
|
||||
if ($productID === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
$results[] = new SearchResultDTO(
|
||||
provider_key: $this->getProviderKey(),
|
||||
provider_id: $productID,
|
||||
name: $node->filter("div[title]")->attr('title'),
|
||||
description: "",
|
||||
preview_image_url: $node->filter("img")->first()->attr('src'),
|
||||
provider_url: $productURL
|
||||
);
|
||||
});
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
private function cleanProductURL(string $url): string
|
||||
{
|
||||
//Strip the query string
|
||||
return explode('?', $url)[0];
|
||||
}
|
||||
|
||||
private function extractProductID(string $url): ?string
|
||||
{
|
||||
//We want the numeric id from the url before the .html
|
||||
$matches = [];
|
||||
preg_match('/\/(\d+)\.html/', $url, $matches);
|
||||
|
||||
return $matches[1] ?? null;
|
||||
}
|
||||
|
||||
public function getDetails(string $id): PartDetailDTO
|
||||
{
|
||||
//Ensure that $id is numeric
|
||||
if (!is_numeric($id)) {
|
||||
throw new \InvalidArgumentException("The id must be numeric");
|
||||
}
|
||||
|
||||
$product_page = $this->getBaseURL() . "/item/{$id}.html";
|
||||
$response = $this->client->request('GET', $product_page );
|
||||
|
||||
$content = $response->getContent();
|
||||
$dom = new Crawler($content);
|
||||
|
||||
return new PartDetailDTO(
|
||||
provider_key: $this->getProviderKey(),
|
||||
provider_id: $id,
|
||||
name: $dom->filter('h1[data-pl="product-title"]')->text(),
|
||||
description: "",
|
||||
provider_url: $product_page,
|
||||
notes: $dom->filter('div[data-pl="product-description"]')->html(),
|
||||
);
|
||||
}
|
||||
|
||||
public function getCapabilities(): array
|
||||
{
|
||||
return [
|
||||
ProviderCapabilities::BASIC,
|
||||
ProviderCapabilities::PICTURE,
|
||||
ProviderCapabilities::PRICE,
|
||||
];
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue