Fix:Add timeout to provider matching default to 30s #3000

This commit is contained in:
advplyr 2024-05-25 16:32:02 -05:00
parent 30d3e41542
commit 6fa49e0aab
9 changed files with 633 additions and 444 deletions

View file

@ -10,6 +10,8 @@ const Logger = require('../Logger')
const { levenshteinDistance, escapeRegExp } = require('../utils/index')
class BookFinder {
#providerResponseTimeout = 30000
constructor() {
this.openLibrary = new OpenLibrary()
this.googleBooks = new GoogleBooks()
@ -36,63 +38,75 @@ class BookFinder {
filterSearchResults(books, title, author, maxTitleDistance, maxAuthorDistance) {
var searchTitle = cleanTitleForCompares(title)
var searchAuthor = cleanAuthorForCompares(author)
return books.map(b => {
b.cleanedTitle = cleanTitleForCompares(b.title)
b.titleDistance = levenshteinDistance(b.cleanedTitle, title)
return books
.map((b) => {
b.cleanedTitle = cleanTitleForCompares(b.title)
b.titleDistance = levenshteinDistance(b.cleanedTitle, title)
// Total length of search (title or both title & author)
b.totalPossibleDistance = b.title.length
// Total length of search (title or both title & author)
b.totalPossibleDistance = b.title.length
if (author) {
if (!b.author) {
b.authorDistance = author.length
} else {
b.totalPossibleDistance += b.author.length
b.cleanedAuthor = cleanAuthorForCompares(b.author)
if (author) {
if (!b.author) {
b.authorDistance = author.length
} else {
b.totalPossibleDistance += b.author.length
b.cleanedAuthor = cleanAuthorForCompares(b.author)
var cleanedAuthorDistance = levenshteinDistance(b.cleanedAuthor, searchAuthor)
var authorDistance = levenshteinDistance(b.author || '', author)
var cleanedAuthorDistance = levenshteinDistance(b.cleanedAuthor, searchAuthor)
var authorDistance = levenshteinDistance(b.author || '', author)
// Use best distance
b.authorDistance = Math.min(cleanedAuthorDistance, authorDistance)
// Use best distance
b.authorDistance = Math.min(cleanedAuthorDistance, authorDistance)
// Check book author contains searchAuthor
if (searchAuthor.length > 4 && b.cleanedAuthor.includes(searchAuthor)) b.includesAuthor = searchAuthor
else if (author.length > 4 && b.author.includes(author)) b.includesAuthor = author
// Check book author contains searchAuthor
if (searchAuthor.length > 4 && b.cleanedAuthor.includes(searchAuthor)) b.includesAuthor = searchAuthor
else if (author.length > 4 && b.author.includes(author)) b.includesAuthor = author
}
}
}
b.totalDistance = b.titleDistance + (b.authorDistance || 0)
b.totalDistance = b.titleDistance + (b.authorDistance || 0)
// Check book title contains the searchTitle
if (searchTitle.length > 4 && b.cleanedTitle.includes(searchTitle)) b.includesTitle = searchTitle
else if (title.length > 4 && b.title.includes(title)) b.includesTitle = title
// Check book title contains the searchTitle
if (searchTitle.length > 4 && b.cleanedTitle.includes(searchTitle)) b.includesTitle = searchTitle
else if (title.length > 4 && b.title.includes(title)) b.includesTitle = title
return b
}).filter(b => {
if (b.includesTitle) { // If search title was found in result title then skip over leven distance check
if (this.verbose) Logger.debug(`Exact title was included in "${b.title}", Search: "${b.includesTitle}"`)
} else if (b.titleDistance > maxTitleDistance) {
if (this.verbose) Logger.debug(`Filtering out search result title distance = ${b.titleDistance}: "${b.cleanedTitle}"/"${searchTitle}"`)
return false
}
if (author) {
if (b.includesAuthor) { // If search author was found in result author then skip over leven distance check
if (this.verbose) Logger.debug(`Exact author was included in "${b.author}", Search: "${b.includesAuthor}"`)
} else if (b.authorDistance > maxAuthorDistance) {
if (this.verbose) Logger.debug(`Filtering out search result "${b.author}", author distance = ${b.authorDistance}: "${b.author}"/"${author}"`)
return b
})
.filter((b) => {
if (b.includesTitle) {
// If search title was found in result title then skip over leven distance check
if (this.verbose) Logger.debug(`Exact title was included in "${b.title}", Search: "${b.includesTitle}"`)
} else if (b.titleDistance > maxTitleDistance) {
if (this.verbose) Logger.debug(`Filtering out search result title distance = ${b.titleDistance}: "${b.cleanedTitle}"/"${searchTitle}"`)
return false
}
}
// If book total search length < 5 and was not exact match, then filter out
if (b.totalPossibleDistance < 5 && b.totalDistance > 0) return false
return true
})
if (author) {
if (b.includesAuthor) {
// If search author was found in result author then skip over leven distance check
if (this.verbose) Logger.debug(`Exact author was included in "${b.author}", Search: "${b.includesAuthor}"`)
} else if (b.authorDistance > maxAuthorDistance) {
if (this.verbose) Logger.debug(`Filtering out search result "${b.author}", author distance = ${b.authorDistance}: "${b.author}"/"${author}"`)
return false
}
}
// If book total search length < 5 and was not exact match, then filter out
if (b.totalPossibleDistance < 5 && b.totalDistance > 0) return false
return true
})
}
/**
*
* @param {string} title
* @param {string} author
* @param {number} maxTitleDistance
* @param {number} maxAuthorDistance
* @returns {Promise<Object[]>}
*/
async getOpenLibResults(title, author, maxTitleDistance, maxAuthorDistance) {
var books = await this.openLibrary.searchTitle(title)
var books = await this.openLibrary.searchTitle(title, this.#providerResponseTimeout)
if (this.verbose) Logger.debug(`OpenLib Book Search Results: ${books.length || 0}`)
if (books.errorCode) {
Logger.error(`OpenLib Search Error ${books.errorCode}`)
@ -109,8 +123,14 @@ class BookFinder {
return booksFiltered
}
/**
*
* @param {string} title
* @param {string} author
* @returns {Promise<Object[]>}
*/
async getGoogleBooksResults(title, author) {
var books = await this.googleBooks.search(title, author)
var books = await this.googleBooks.search(title, author, this.#providerResponseTimeout)
if (this.verbose) Logger.debug(`GoogleBooks Book Search Results: ${books.length || 0}`)
if (books.errorCode) {
Logger.error(`GoogleBooks Search Error ${books.errorCode}`)
@ -120,8 +140,14 @@ class BookFinder {
return books
}
/**
*
* @param {string} title
* @param {string} author
* @returns {Promise<Object[]>}
*/
async getFantLabResults(title, author) {
var books = await this.fantLab.search(title, author)
var books = await this.fantLab.search(title, author, this.#providerResponseTimeout)
if (this.verbose) Logger.debug(`FantLab Book Search Results: ${books.length || 0}`)
if (books.errorCode) {
Logger.error(`FantLab Search Error ${books.errorCode}`)
@ -131,41 +157,58 @@ class BookFinder {
return books
}
/**
*
* @param {string} search
* @returns {Promise<Object[]>}
*/
async getAudiobookCoversResults(search) {
const covers = await this.audiobookCovers.search(search)
const covers = await this.audiobookCovers.search(search, this.#providerResponseTimeout)
if (this.verbose) Logger.debug(`AudiobookCovers Search Results: ${covers.length || 0}`)
return covers || []
}
async getiTunesAudiobooksResults(title, author) {
return this.iTunesApi.searchAudiobooks(title)
/**
*
* @param {string} title
* @returns {Promise<Object[]>}
*/
async getiTunesAudiobooksResults(title) {
return this.iTunesApi.searchAudiobooks(title, this.#providerResponseTimeout)
}
/**
*
* @param {string} title
* @param {string} author
* @param {string} asin
* @param {string} provider
* @returns {Promise<Object[]>}
*/
async getAudibleResults(title, author, asin, provider) {
const region = provider.includes('.') ? provider.split('.').pop() : ''
const books = await this.audible.search(title, author, asin, region)
const books = await this.audible.search(title, author, asin, region, this.#providerResponseTimeout)
if (this.verbose) Logger.debug(`Audible Book Search Results: ${books.length || 0}`)
if (!books) return []
return books
}
/**
*
* @param {string} title
*
* @param {string} title
* @param {string} author
* @param {string} isbn
* @param {string} providerSlug
* @param {string} isbn
* @param {string} providerSlug
* @returns {Promise<Object[]>}
*/
async getCustomProviderResults(title, author, isbn, providerSlug) {
const books = await this.customProviderAdapter.search(title, author, isbn, providerSlug, 'book')
const books = await this.customProviderAdapter.search(title, author, isbn, providerSlug, 'book', this.#providerResponseTimeout)
if (this.verbose) Logger.debug(`Custom provider '${providerSlug}' Search Results: ${books.length || 0}`)
return books
}
static TitleCandidates = class {
constructor(cleanAuthor) {
this.candidates = new Set()
this.cleanAuthor = cleanAuthor
@ -179,13 +222,13 @@ class BookFinder {
title = this.#removeAuthorFromTitle(title)
const titleTransformers = [
[/([,:;_]| by ).*/g, ''], // Remove subtitle
[/(^| )\d+k(bps)?( |$)/, ' '], // Remove bitrate
[/([,:;_]| by ).*/g, ''], // Remove subtitle
[/(^| )\d+k(bps)?( |$)/, ' '], // Remove bitrate
[/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''], // Remove edition
[/(^| |\.)(m4b|m4a|mp3)( |$)/g, ''], // Remove file-type
[/ a novel.*$/g, ''], // Remove "a novel"
[/(^| )(un)?abridged( |$)/g, ' '], // Remove "unabridged/abridged"
[/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers
[/(^| |\.)(m4b|m4a|mp3)( |$)/g, ''], // Remove file-type
[/ a novel.*$/g, ''], // Remove "a novel"
[/(^| )(un)?abridged( |$)/g, ' '], // Remove "unabridged/abridged"
[/^\d+ | \d+$/g, ''] // Remove preceding/trailing numbers
]
// Main variant
@ -197,8 +240,7 @@ class BookFinder {
let candidate = cleanTitle
for (const transformer of titleTransformers)
candidate = candidate.replace(transformer[0], transformer[1]).trim()
for (const transformer of titleTransformers) candidate = candidate.replace(transformer[0], transformer[1]).trim()
if (candidate != cleanTitle) {
if (candidate) {
@ -240,7 +282,7 @@ class BookFinder {
#removeAuthorFromTitle(title) {
if (!this.cleanAuthor) return title
const authorRe = new RegExp(`(^| | by |)${escapeRegExp(this.cleanAuthor)}(?= |$)`, "g")
const authorRe = new RegExp(`(^| | by |)${escapeRegExp(this.cleanAuthor)}(?= |$)`, 'g')
const authorCleanedTitle = cleanAuthorForCompares(title)
const authorCleanedTitleWithoutAuthor = authorCleanedTitle.replace(authorRe, '')
if (authorCleanedTitleWithoutAuthor !== authorCleanedTitle) {
@ -297,7 +339,7 @@ class BookFinder {
promises.push(this.validateAuthor(candidate))
}
const results = [...new Set(await Promise.all(promises))]
filteredCandidates = results.filter(author => author)
filteredCandidates = results.filter((author) => author)
// If no valid candidates were found, add back an aggresively cleaned author version
if (!filteredCandidates.length && this.cleanAuthor) filteredCandidates.push(this.agressivelyCleanAuthor)
// Always add an empty author candidate
@ -312,17 +354,16 @@ class BookFinder {
}
}
/**
* Search for books including fuzzy searches
*
*
* @param {Object} libraryItem
* @param {string} provider
* @param {string} title
* @param {string} author
* @param {string} isbn
* @param {string} asin
* @param {{titleDistance:number, authorDistance:number, maxFuzzySearches:number}} options
* @param {string} provider
* @param {string} title
* @param {string} author
* @param {string} isbn
* @param {string} asin
* @param {{titleDistance:number, authorDistance:number, maxFuzzySearches:number}} options
* @returns {Promise<Object[]>}
*/
async search(libraryItem, provider, title, author, isbn, asin, options = {}) {
@ -337,8 +378,7 @@ class BookFinder {
return this.getCustomProviderResults(title, author, isbn, provider)
}
if (!title)
return books
if (!title) return books
books = await this.runSearch(title, author, provider, asin, maxTitleDistance, maxAuthorDistance)
@ -353,17 +393,14 @@ class BookFinder {
let authorCandidates = new BookFinder.AuthorCandidates(cleanAuthor, this.audnexus)
// Remove underscores and parentheses with their contents, and replace with a separator
const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}|_/g, " - ")
const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}|_/g, ' - ')
// Split title into hypen-separated parts
const titleParts = cleanTitle.split(/ - | -|- /)
for (const titlePart of titleParts)
authorCandidates.add(titlePart)
for (const titlePart of titleParts) authorCandidates.add(titlePart)
authorCandidates = await authorCandidates.getCandidates()
loop_author:
for (const authorCandidate of authorCandidates) {
loop_author: for (const authorCandidate of authorCandidates) {
let titleCandidates = new BookFinder.TitleCandidates(authorCandidate)
for (const titlePart of titleParts)
titleCandidates.add(titlePart)
for (const titlePart of titleParts) titleCandidates.add(titlePart)
titleCandidates = titleCandidates.getCandidates()
for (const titleCandidate of titleCandidates) {
if (titleCandidate == title && authorCandidate == author) continue // We already tried this
@ -393,10 +430,10 @@ class BookFinder {
/**
* Search for books
*
* @param {string} title
* @param {string} author
* @param {string} provider
*
* @param {string} title
* @param {string} author
* @param {string} provider
* @param {string} asin only used for audible providers
* @param {number} maxTitleDistance only used for openlibrary provider
* @param {number} maxAuthorDistance only used for openlibrary provider
@ -412,7 +449,7 @@ class BookFinder {
} else if (provider.startsWith('audible')) {
books = await this.getAudibleResults(title, author, asin, provider)
} else if (provider === 'itunes') {
books = await this.getiTunesAudiobooksResults(title, author)
books = await this.getiTunesAudiobooksResults(title)
} else if (provider === 'openlibrary') {
books = await this.getOpenLibResults(title, author, maxTitleDistance, maxAuthorDistance)
} else if (provider === 'fantlab') {
@ -448,7 +485,7 @@ class BookFinder {
covers.push(result.cover)
}
})
return [...(new Set(covers))]
return [...new Set(covers)]
}
findChapters(asin, region) {
@ -468,7 +505,7 @@ function stripSubtitle(title) {
function replaceAccentedChars(str) {
try {
return str.normalize('NFD').replace(/[\u0300-\u036f]/g, "")
return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '')
} catch (error) {
Logger.error('[BookFinder] str normalize error', error)
return str
@ -483,7 +520,7 @@ function cleanTitleForCompares(title) {
let stripped = stripSubtitle(title)
// Remove text in paranthesis (i.e. "Ender's Game (Ender's Saga)" becomes "Ender's Game")
let cleaned = stripped.replace(/ *\([^)]*\) */g, "")
let cleaned = stripped.replace(/ *\([^)]*\) */g, '')
// Remove single quotes (i.e. "Ender's Game" becomes "Enders Game")
cleaned = cleaned.replace(/'/g, '')