mirror of
https://github.com/Part-DB/Part-DB-server.git
synced 2025-07-10 10:24:31 +02:00
[OEMSecrets provider] Extract real URLs and remove tracking parts
This commit is contained in:
parent
1cc1530b20
commit
6997861811
1 changed files with 43 additions and 20 deletions
|
@ -390,7 +390,7 @@ class OEMSecretsProvider implements InfoProviderInterface
|
||||||
|
|
||||||
//Force garbage collection to free up memory
|
//Force garbage collection to free up memory
|
||||||
gc_collect_cycles();
|
gc_collect_cycles();
|
||||||
|
|
||||||
return $results;
|
return $results;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -843,7 +843,7 @@ class OEMSecretsProvider implements InfoProviderInterface
|
||||||
distributor_name: $distributor_name,
|
distributor_name: $distributor_name,
|
||||||
order_number: $order_number,
|
order_number: $order_number,
|
||||||
prices: $priceDTOs,
|
prices: $priceDTOs,
|
||||||
product_url: $product['buy_now_url'] ?? ''
|
product_url: $this->unwrapURL($product['buy_now_url'] ?? null)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return null; // Return null if no valid distributor exists
|
return null; // Return null if no valid distributor exists
|
||||||
|
@ -974,37 +974,32 @@ class OEMSecretsProvider implements InfoProviderInterface
|
||||||
* @param string|null $sheetUrl The URL of the datasheet.
|
* @param string|null $sheetUrl The URL of the datasheet.
|
||||||
* @param string|null $sheetName The optional name of the datasheet. If null, the name is extracted from the URL.
|
* @param string|null $sheetName The optional name of the datasheet. If null, the name is extracted from the URL.
|
||||||
* @param array $existingDatasheets The array of existing datasheets to check for duplicates.
|
* @param array $existingDatasheets The array of existing datasheets to check for duplicates.
|
||||||
* @param string $eventLinkParam The query parameter used to extract the event link. Default is 'event_link'.
|
|
||||||
*
|
*
|
||||||
* @return FileDTO[]|null Returns an array containing the new datasheet if unique, or null if the datasheet is a duplicate or invalid.
|
* @return FileDTO[]|null Returns an array containing the new datasheet if unique, or null if the datasheet is a duplicate or invalid.
|
||||||
*
|
*
|
||||||
* @see FileDTO Used to create datasheet objects with a URL and name.
|
* @see FileDTO Used to create datasheet objects with a URL and name.
|
||||||
*/
|
*/
|
||||||
private function parseDataSheets(?string $sheetUrl, ?string $sheetName, array $existingDatasheets = [], string $eventLinkParam = 'event_link'): ?array
|
private function parseDataSheets(?string $sheetUrl, ?string $sheetName, array $existingDatasheets = []): ?array
|
||||||
{
|
{
|
||||||
if ($sheetUrl === null || $sheetUrl === '' || $sheetUrl === '0') {
|
if ($sheetUrl === null || $sheetUrl === '' || $sheetUrl === '0') {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Unwrap the URL (remove analytics part)
|
||||||
|
$sheetUrl = $this->unwrapURL($sheetUrl);
|
||||||
|
|
||||||
// If the datasheet name is not provided, extract it from the URL
|
// If the datasheet name is not provided, extract it from the URL
|
||||||
if ($sheetName === null) {
|
if ($sheetName === null) {
|
||||||
// Extract parameters from the query string of the URL
|
$urlPath = parse_url($sheetUrl, PHP_URL_PATH);
|
||||||
$queryParams = [];
|
if ($urlPath === false) {
|
||||||
$urlComponents = parse_url($sheetUrl);
|
throw new \RuntimeException("Invalid URL path: $sheetUrl");
|
||||||
if (isset($urlComponents['query'])) {
|
|
||||||
parse_str($urlComponents['query'], $queryParams);
|
|
||||||
}
|
}
|
||||||
// If the "event_link" parameter exists, use it to extract the PDF file name
|
|
||||||
if (isset($queryParams[$eventLinkParam])) {
|
// If "event_link" does not exist, try to extract the name from the main URL path
|
||||||
$eventLink = $queryParams[$eventLinkParam];
|
$sheetName = basename($urlPath);
|
||||||
$sheetName = basename(parse_url($eventLink, PHP_URL_PATH));
|
if (!str_contains($sheetName, '.') || !preg_match('/\.(pdf|doc|docx|xls|xlsx|ppt|pptx)$/i', $sheetName)) {
|
||||||
} else {
|
// If the name does not have a valid extension, assign a default name
|
||||||
// If "event_link" does not exist, try to extract the name from the main URL path
|
$sheetName = 'datasheet_' . uniqid('', true) . '.pdf';
|
||||||
$sheetName = basename($urlComponents['path']);
|
|
||||||
if (!str_contains($sheetName, '.') || !preg_match('/\.(pdf|doc|docx|xls|xlsx|ppt|pptx)$/i', $sheetName)) {
|
|
||||||
// If the name does not have a valid extension, assign a default name
|
|
||||||
$sheetName = 'datasheet_' . uniqid('', true) . '.pdf';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1447,4 +1442,32 @@ class OEMSecretsProvider implements InfoProviderInterface
|
||||||
return $this->countryNameToCodeMap[$countryName] ?? null;
|
return $this->countryNameToCodeMap[$countryName] ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes the analytics tracking parts from the URLs returned by the API.
|
||||||
|
*
|
||||||
|
* @param string|null $url
|
||||||
|
* @return string|null
|
||||||
|
*/
|
||||||
|
private function unwrapURL(?string $url): ?string
|
||||||
|
{
|
||||||
|
if ($url === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Check if the URL is a one redirected via analytics
|
||||||
|
if (str_contains($url, 'analytics.oemsecrets.com/main.php')) {
|
||||||
|
//Extract the URL from the analytics URL
|
||||||
|
$queryParams = [];
|
||||||
|
parse_str(parse_url($url, PHP_URL_QUERY), $queryParams);
|
||||||
|
|
||||||
|
//The real URL is stored in the 'event_link' query parameter
|
||||||
|
if (isset($queryParams['event_link']) && trim($queryParams['event_link']) !== '') {
|
||||||
|
return $queryParams['event_link'];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Otherwise return the URL as it is
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue