mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2025-08-03 01:35:08 +02:00
Add:Epub metadata parser and cover extractor #1479
This commit is contained in:
parent
48a08e9659
commit
69e23ef9f2
8 changed files with 284 additions and 35 deletions
42
server/utils/parsers/parseEbookMetadata.js
Normal file
42
server/utils/parsers/parseEbookMetadata.js
Normal file
|
@ -0,0 +1,42 @@
|
|||
const parseEpubMetadata = require('./parseEpubMetadata')
|
||||
|
||||
/**
|
||||
* @typedef EBookFileScanData
|
||||
* @property {string} path
|
||||
* @property {string} ebookFormat
|
||||
* @property {string} ebookCoverPath internal image path
|
||||
* @property {import('../../scanner/BookScanner').BookMetadataObject} metadata
|
||||
*/
|
||||
|
||||
/**
|
||||
* Parse metadata from ebook file
|
||||
*
|
||||
* @param {import('../../models/Book').EBookFileObject} ebookFile
|
||||
* @returns {Promise<EBookFileScanData>}
|
||||
*/
|
||||
async function parse(ebookFile) {
|
||||
if (!ebookFile) return null
|
||||
|
||||
if (ebookFile.ebookFormat === 'epub') {
|
||||
return parseEpubMetadata.parse(ebookFile.metadata.path)
|
||||
}
|
||||
return null
|
||||
}
|
||||
module.exports.parse = parse
|
||||
|
||||
/**
|
||||
* Extract cover from ebook file
|
||||
*
|
||||
* @param {EBookFileScanData} ebookFileScanData
|
||||
* @param {string} outputCoverPath
|
||||
* @returns {Promise<boolean>}
|
||||
*/
|
||||
async function extractCoverImage(ebookFileScanData, outputCoverPath) {
|
||||
if (!ebookFileScanData?.ebookCoverPath) return false
|
||||
|
||||
if (ebookFileScanData.ebookFormat === 'epub') {
|
||||
return parseEpubMetadata.extractCoverImage(ebookFileScanData.path, ebookFileScanData.ebookCoverPath, outputCoverPath)
|
||||
}
|
||||
return false
|
||||
}
|
||||
module.exports.extractCoverImage = extractCoverImage
|
109
server/utils/parsers/parseEpubMetadata.js
Normal file
109
server/utils/parsers/parseEpubMetadata.js
Normal file
|
@ -0,0 +1,109 @@
|
|||
const Path = require('path')
|
||||
const Logger = require('../../Logger')
|
||||
const StreamZip = require('../../libs/nodeStreamZip')
|
||||
const parseOpfMetadata = require('./parseOpfMetadata')
|
||||
const { xmlToJSON } = require('../index')
|
||||
|
||||
|
||||
/**
|
||||
* Extract file from epub and return string content
|
||||
*
|
||||
* @param {string} epubPath
|
||||
* @param {string} filepath
|
||||
* @returns {Promise<string>}
|
||||
*/
|
||||
async function extractFileFromEpub(epubPath, filepath) {
|
||||
const zip = new StreamZip.async({ file: epubPath })
|
||||
const data = await zip.entryData(filepath).catch((error) => {
|
||||
Logger.error(`[parseEpubMetadata] Failed to extract ${filepath} from epub at "${epubPath}"`, error)
|
||||
})
|
||||
const filedata = data?.toString('utf8')
|
||||
await zip.close()
|
||||
return filedata
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract an XML file from epub and return JSON
|
||||
*
|
||||
* @param {string} epubPath
|
||||
* @param {string} xmlFilepath
|
||||
* @returns {Promise<Object>}
|
||||
*/
|
||||
async function extractXmlToJson(epubPath, xmlFilepath) {
|
||||
const filedata = await extractFileFromEpub(epubPath, xmlFilepath)
|
||||
if (!filedata) return null
|
||||
return xmlToJSON(filedata)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract cover image from epub return true if success
|
||||
*
|
||||
* @param {string} epubPath
|
||||
* @param {string} epubImageFilepath
|
||||
* @param {string} outputCoverPath
|
||||
* @returns {Promise<boolean>}
|
||||
*/
|
||||
async function extractCoverImage(epubPath, epubImageFilepath, outputCoverPath) {
|
||||
const zip = new StreamZip.async({ file: epubPath })
|
||||
|
||||
const success = await zip.extract(epubImageFilepath, outputCoverPath).then(() => true).catch((error) => {
|
||||
Logger.error(`[parseEpubMetadata] Failed to extract image ${epubImageFilepath} from epub at "${epubPath}"`, error)
|
||||
return false
|
||||
})
|
||||
|
||||
await zip.close()
|
||||
|
||||
return success
|
||||
}
|
||||
module.exports.extractCoverImage = extractCoverImage
|
||||
|
||||
/**
|
||||
* Parse metadata from epub
|
||||
*
|
||||
* @param {string} epubPath
|
||||
* @returns {Promise<import('./parseEbookMetadata').EBookFileScanData>}
|
||||
*/
|
||||
async function parse(epubPath) {
|
||||
Logger.debug(`Parsing metadata from epub at "${epubPath}"`)
|
||||
// Entrypoint of the epub that contains the filepath to the package document (opf file)
|
||||
const containerJson = await extractXmlToJson(epubPath, 'META-INF/container.xml')
|
||||
|
||||
// Get package document opf filepath from container.xml
|
||||
const packageDocPath = containerJson.container?.rootfiles?.[0]?.rootfile?.[0]?.$?.['full-path']
|
||||
if (!packageDocPath) {
|
||||
Logger.error(`Failed to get package doc path in Container.xml`, JSON.stringify(containerJson, null, 2))
|
||||
return null
|
||||
}
|
||||
|
||||
// Extract package document to JSON
|
||||
const packageJson = await extractXmlToJson(epubPath, packageDocPath)
|
||||
if (!packageJson) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Parse metadata from package document opf file
|
||||
const opfMetadata = parseOpfMetadata.parseOpfMetadataJson(packageJson)
|
||||
if (!opfMetadata) {
|
||||
Logger.error(`Unable to parse metadata in package doc with json`, JSON.stringify(packageJson, null, 2))
|
||||
return null
|
||||
}
|
||||
|
||||
const payload = {
|
||||
path: epubPath,
|
||||
ebookFormat: 'epub',
|
||||
metadata: opfMetadata
|
||||
}
|
||||
|
||||
// Attempt to find filepath to cover image
|
||||
const manifestFirstImage = packageJson.package?.manifest?.[0]?.item?.find(item => item.$?.['media-type']?.startsWith('image/'))
|
||||
let coverImagePath = manifestFirstImage?.$?.href
|
||||
if (coverImagePath) {
|
||||
const packageDirname = Path.dirname(packageDocPath)
|
||||
payload.ebookCoverPath = Path.posix.join(packageDirname, coverImagePath)
|
||||
} else {
|
||||
Logger.warn(`Cover image not found in manifest for epub at "${epubPath}"`)
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
module.exports.parse = parse
|
|
@ -136,11 +136,7 @@ function stripPrefix(str) {
|
|||
return str.split(':').pop()
|
||||
}
|
||||
|
||||
module.exports.parseOpfMetadataXML = async (xml) => {
|
||||
const json = await xmlToJSON(xml)
|
||||
|
||||
if (!json) return null
|
||||
|
||||
module.exports.parseOpfMetadataJson = (json) => {
|
||||
// Handle <package ...> or with prefix <ns0:package ...>
|
||||
const packageKey = Object.keys(json).find(key => stripPrefix(key) === 'package')
|
||||
if (!packageKey) return null
|
||||
|
@ -167,7 +163,7 @@ module.exports.parseOpfMetadataXML = async (xml) => {
|
|||
const creators = parseCreators(metadata)
|
||||
const authors = (fetchCreators(creators, 'aut') || []).map(au => au?.trim()).filter(au => au)
|
||||
const narrators = (fetchNarrators(creators, metadata) || []).map(nrt => nrt?.trim()).filter(nrt => nrt)
|
||||
const data = {
|
||||
return {
|
||||
title: fetchTitle(metadata),
|
||||
subtitle: fetchSubtitle(metadata),
|
||||
authors,
|
||||
|
@ -182,5 +178,10 @@ module.exports.parseOpfMetadataXML = async (xml) => {
|
|||
series: fetchSeries(metadataMeta),
|
||||
tags: fetchTags(metadata)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
module.exports.parseOpfMetadataXML = async (xml) => {
|
||||
const json = await xmlToJSON(xml)
|
||||
if (!json) return null
|
||||
return this.parseOpfMetadataJson(json)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue