Add:Scanner extracts cover from comic files #1837 and ComicInfo.xml parser

This commit is contained in:
advplyr 2024-01-14 17:51:26 -06:00
parent e76af3bfc2
commit f5545cd3f4
10 changed files with 762 additions and 4 deletions

View file

@ -0,0 +1,35 @@
/**
* TODO: Add more fields
* @see https://anansi-project.github.io/docs/comicinfo/intro
*
* @param {Object} comicInfoJson
* @returns {import('../../scanner/BookScanner').BookMetadataObject}
*/
module.exports.parse = (comicInfoJson) => {
if (!comicInfoJson?.ComicInfo) return null
const ComicSeries = comicInfoJson.ComicInfo.Series?.[0]?.trim() || null
const ComicNumber = comicInfoJson.ComicInfo.Number?.[0]?.trim() || null
const ComicSummary = comicInfoJson.ComicInfo.Summary?.[0]?.trim() || null
let title = null
const series = []
if (ComicSeries) {
series.push({
name: ComicSeries,
sequence: ComicNumber
})
title = ComicSeries
if (ComicNumber) {
title += ` ${ComicNumber}`
}
}
return {
title,
series,
description: ComicSummary
}
}

View file

@ -0,0 +1,109 @@
const Path = require('path')
const globals = require('../globals')
const fs = require('../../libs/fsExtra')
const Logger = require('../../Logger')
const Archive = require('../../libs/libarchive/archive')
const { xmlToJSON } = require('../index')
const parseComicInfoMetadata = require('./parseComicInfoMetadata')
/**
*
* @param {string} filepath
* @returns {Promise<Buffer>}
*/
async function getComicFileBuffer(filepath) {
if (!await fs.pathExists(filepath)) {
Logger.error(`Comic path does not exist "${filepath}"`)
return null
}
try {
return fs.readFile(filepath)
} catch (error) {
Logger.error(`Failed to read comic at "${filepath}"`, error)
return null
}
}
/**
* Extract cover image from comic return true if success
*
* @param {string} comicPath
* @param {string} comicImageFilepath
* @param {string} outputCoverPath
* @returns {Promise<boolean>}
*/
async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) {
const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null
const archive = await Archive.open(comicFileBuffer)
const fileEntry = await archive.extractSingleFile(comicImageFilepath)
if (!fileEntry?.fileData) {
Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`)
return false
}
try {
await fs.writeFile(outputCoverPath, fileEntry.fileData)
return true
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to extract image from comicPath "${comicPath}"`, error)
return false
}
}
module.exports.extractCoverImage = extractCoverImage
/**
* Parse metadata from comic
*
* @param {import('../../models/Book').EBookFileObject} ebookFile
* @returns {Promise<import('./parseEbookMetadata').EBookFileScanData>}
*/
async function parse(ebookFile) {
const comicPath = ebookFile.metadata.path
Logger.debug(`Parsing metadata from comic at "${comicPath}"`)
const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null
const archive = await Archive.open(comicFileBuffer)
const fileObjects = await archive.getFilesArray()
fileObjects.sort((a, b) => {
return a.file.name.localeCompare(b.file.name, undefined, {
numeric: true,
sensitivity: 'base'
})
})
let metadata = null
const comicInfo = fileObjects.find(fo => fo.file.name === 'ComicInfo.xml')
if (comicInfo) {
const comicInfoEntry = await comicInfo.file.extract()
if (comicInfoEntry?.fileData) {
const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData)
const comicInfoJson = await xmlToJSON(comicInfoStr)
if (comicInfoJson) {
metadata = parseComicInfoMetadata.parse(comicInfoJson)
}
}
}
const payload = {
path: comicPath,
ebookFormat: ebookFile.ebookFormat,
metadata
}
const firstImage = fileObjects.find(fo => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1)))
if (firstImage?.file?._path) {
payload.ebookCoverPath = firstImage.file._path
} else {
Logger.warn(`Cover image not found in comic at "${comicPath}"`)
}
return payload
}
module.exports.parse = parse

View file

@ -1,4 +1,5 @@
const parseEpubMetadata = require('./parseEpubMetadata')
const parseComicMetadata = require('./parseComicMetadata')
/**
* @typedef EBookFileScanData
@ -18,7 +19,9 @@ async function parse(ebookFile) {
if (!ebookFile) return null
if (ebookFile.ebookFormat === 'epub') {
return parseEpubMetadata.parse(ebookFile.metadata.path)
return parseEpubMetadata.parse(ebookFile)
} else if (['cbz', 'cbr'].includes(ebookFile.ebookFormat)) {
return parseComicMetadata.parse(ebookFile)
}
return null
}
@ -36,6 +39,8 @@ async function extractCoverImage(ebookFileScanData, outputCoverPath) {
if (ebookFileScanData.ebookFormat === 'epub') {
return parseEpubMetadata.extractCoverImage(ebookFileScanData.path, ebookFileScanData.ebookCoverPath, outputCoverPath)
} else if (['cbz', 'cbr'].includes(ebookFileScanData.ebookFormat)) {
return parseComicMetadata.extractCoverImage(ebookFileScanData.path, ebookFileScanData.ebookCoverPath, outputCoverPath)
}
return false
}

View file

@ -60,10 +60,11 @@ module.exports.extractCoverImage = extractCoverImage
/**
* Parse metadata from epub
*
* @param {string} epubPath
* @param {import('../../models/Book').EBookFileObject} ebookFile
* @returns {Promise<import('./parseEbookMetadata').EBookFileScanData>}
*/
async function parse(epubPath) {
async function parse(ebookFile) {
const epubPath = ebookFile.metadata.path
Logger.debug(`Parsing metadata from epub at "${epubPath}"`)
// Entrypoint of the epub that contains the filepath to the package document (opf file)
const containerJson = await extractXmlToJson(epubPath, 'META-INF/container.xml')