[OEMSecrets provider] Extract real URLs and remove tracking parts

This commit is contained in:
Jan Böhmer 2024-09-09 14:52:09 +02:00
parent 1cc1530b20
commit 6997861811

View file

@ -843,7 +843,7 @@ class OEMSecretsProvider implements InfoProviderInterface
distributor_name: $distributor_name, distributor_name: $distributor_name,
order_number: $order_number, order_number: $order_number,
prices: $priceDTOs, prices: $priceDTOs,
product_url: $product['buy_now_url'] ?? '' product_url: $this->unwrapURL($product['buy_now_url'] ?? null)
); );
} }
return null; // Return null if no valid distributor exists return null; // Return null if no valid distributor exists
@ -974,37 +974,32 @@ class OEMSecretsProvider implements InfoProviderInterface
* @param string|null $sheetUrl The URL of the datasheet. * @param string|null $sheetUrl The URL of the datasheet.
* @param string|null $sheetName The optional name of the datasheet. If null, the name is extracted from the URL. * @param string|null $sheetName The optional name of the datasheet. If null, the name is extracted from the URL.
* @param array $existingDatasheets The array of existing datasheets to check for duplicates. * @param array $existingDatasheets The array of existing datasheets to check for duplicates.
* @param string $eventLinkParam The query parameter used to extract the event link. Default is 'event_link'.
* *
* @return FileDTO[]|null Returns an array containing the new datasheet if unique, or null if the datasheet is a duplicate or invalid. * @return FileDTO[]|null Returns an array containing the new datasheet if unique, or null if the datasheet is a duplicate or invalid.
* *
* @see FileDTO Used to create datasheet objects with a URL and name. * @see FileDTO Used to create datasheet objects with a URL and name.
*/ */
private function parseDataSheets(?string $sheetUrl, ?string $sheetName, array $existingDatasheets = [], string $eventLinkParam = 'event_link'): ?array private function parseDataSheets(?string $sheetUrl, ?string $sheetName, array $existingDatasheets = []): ?array
{ {
if ($sheetUrl === null || $sheetUrl === '' || $sheetUrl === '0') { if ($sheetUrl === null || $sheetUrl === '' || $sheetUrl === '0') {
return null; return null;
} }
//Unwrap the URL (remove analytics part)
$sheetUrl = $this->unwrapURL($sheetUrl);
// If the datasheet name is not provided, extract it from the URL // If the datasheet name is not provided, extract it from the URL
if ($sheetName === null) { if ($sheetName === null) {
// Extract parameters from the query string of the URL $urlPath = parse_url($sheetUrl, PHP_URL_PATH);
$queryParams = []; if ($urlPath === false) {
$urlComponents = parse_url($sheetUrl); throw new \RuntimeException("Invalid URL path: $sheetUrl");
if (isset($urlComponents['query'])) {
parse_str($urlComponents['query'], $queryParams);
} }
// If the "event_link" parameter exists, use it to extract the PDF file name
if (isset($queryParams[$eventLinkParam])) { // If "event_link" does not exist, try to extract the name from the main URL path
$eventLink = $queryParams[$eventLinkParam]; $sheetName = basename($urlPath);
$sheetName = basename(parse_url($eventLink, PHP_URL_PATH)); if (!str_contains($sheetName, '.') || !preg_match('/\.(pdf|doc|docx|xls|xlsx|ppt|pptx)$/i', $sheetName)) {
} else { // If the name does not have a valid extension, assign a default name
// If "event_link" does not exist, try to extract the name from the main URL path $sheetName = 'datasheet_' . uniqid('', true) . '.pdf';
$sheetName = basename($urlComponents['path']);
if (!str_contains($sheetName, '.') || !preg_match('/\.(pdf|doc|docx|xls|xlsx|ppt|pptx)$/i', $sheetName)) {
// If the name does not have a valid extension, assign a default name
$sheetName = 'datasheet_' . uniqid('', true) . '.pdf';
}
} }
} }
@ -1447,4 +1442,32 @@ class OEMSecretsProvider implements InfoProviderInterface
return $this->countryNameToCodeMap[$countryName] ?? null; return $this->countryNameToCodeMap[$countryName] ?? null;
} }
/**
* Removes the analytics tracking parts from the URLs returned by the API.
*
* @param string|null $url
* @return string|null
*/
private function unwrapURL(?string $url): ?string
{
if ($url === null) {
return null;
}
//Check if the URL is a one redirected via analytics
if (str_contains($url, 'analytics.oemsecrets.com/main.php')) {
//Extract the URL from the analytics URL
$queryParams = [];
parse_str(parse_url($url, PHP_URL_QUERY), $queryParams);
//The real URL is stored in the 'event_link' query parameter
if (isset($queryParams['event_link']) && trim($queryParams['event_link']) !== '') {
return $queryParams['event_link'];
}
}
//Otherwise return the URL as it is
return $url;
}
} }