Add:Scanner extracts cover from comic files #1837 and ComicInfo.xml parser

2025-08-03 09:44:41 +02:00 · 2024-01-14 17:51:26 -06:00 · 2024-01-14 17:51:26 -06:00 · f5545cd3f4
commit f5545cd3f4
parent e76af3bfc2
10 changed files with 762 additions and 4 deletions
--- a/server/utils/parsers/parseComicInfoMetadata.js
+++ b/server/utils/parsers/parseComicInfoMetadata.js
@ -0,0 +1,35 @@
+
+/**
+ * TODO: Add more fields
+ * @see https://anansi-project.github.io/docs/comicinfo/intro
+ * 
+ * @param {Object} comicInfoJson 
+ * @returns {import('../../scanner/BookScanner').BookMetadataObject}
+ */
+module.exports.parse = (comicInfoJson) => {
+  if (!comicInfoJson?.ComicInfo) return null
+
+  const ComicSeries = comicInfoJson.ComicInfo.Series?.[0]?.trim() || null
+  const ComicNumber = comicInfoJson.ComicInfo.Number?.[0]?.trim() || null
+  const ComicSummary = comicInfoJson.ComicInfo.Summary?.[0]?.trim() || null
+
+  let title = null
+  const series = []
+  if (ComicSeries) {
+    series.push({
+      name: ComicSeries,
+      sequence: ComicNumber
+    })
+
+    title = ComicSeries
+    if (ComicNumber) {
+      title += ` ${ComicNumber}`
+    }
+  }
+
+  return {
+    title,
+    series,
+    description: ComicSummary
+  }
+}
--- a/server/utils/parsers/parseComicMetadata.js
+++ b/server/utils/parsers/parseComicMetadata.js
@ -0,0 +1,109 @@
+const Path = require('path')
+const globals = require('../globals')
+const fs = require('../../libs/fsExtra')
+const Logger = require('../../Logger')
+const Archive = require('../../libs/libarchive/archive')
+const { xmlToJSON } = require('../index')
+const parseComicInfoMetadata = require('./parseComicInfoMetadata')
+
+/**
+ * 
+ * @param {string} filepath 
+ * @returns {Promise<Buffer>}
+ */
+async function getComicFileBuffer(filepath) {
+  if (!await fs.pathExists(filepath)) {
+    Logger.error(`Comic path does not exist "${filepath}"`)
+    return null
+  }
+  try {
+    return fs.readFile(filepath)
+  } catch (error) {
+    Logger.error(`Failed to read comic at "${filepath}"`, error)
+    return null
+  }
+}
+
+/**
+ * Extract cover image from comic return true if success
+ * 
+ * @param {string} comicPath 
+ * @param {string} comicImageFilepath 
+ * @param {string} outputCoverPath 
+ * @returns {Promise<boolean>}
+ */
+async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) {
+  const comicFileBuffer = await getComicFileBuffer(comicPath)
+  if (!comicFileBuffer) return null
+
+  const archive = await Archive.open(comicFileBuffer)
+  const fileEntry = await archive.extractSingleFile(comicImageFilepath)
+
+  if (!fileEntry?.fileData) {
+    Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`)
+    return false
+  }
+
+  try {
+    await fs.writeFile(outputCoverPath, fileEntry.fileData)
+    return true
+  } catch (error) {
+    Logger.error(`[parseComicMetadata] Failed to extract image from comicPath "${comicPath}"`, error)
+    return false
+  }
+}
+module.exports.extractCoverImage = extractCoverImage
+
+/**
+ * Parse metadata from comic
+ * 
+ * @param {import('../../models/Book').EBookFileObject} ebookFile 
+ * @returns {Promise<import('./parseEbookMetadata').EBookFileScanData>}
+ */
+async function parse(ebookFile) {
+  const comicPath = ebookFile.metadata.path
+  Logger.debug(`Parsing metadata from comic at "${comicPath}"`)
+
+  const comicFileBuffer = await getComicFileBuffer(comicPath)
+  if (!comicFileBuffer) return null
+
+  const archive = await Archive.open(comicFileBuffer)
+
+  const fileObjects = await archive.getFilesArray()
+
+  fileObjects.sort((a, b) => {
+    return a.file.name.localeCompare(b.file.name, undefined, {
+      numeric: true,
+      sensitivity: 'base'
+    })
+  })
+
+  let metadata = null
+  const comicInfo = fileObjects.find(fo => fo.file.name === 'ComicInfo.xml')
+  if (comicInfo) {
+    const comicInfoEntry = await comicInfo.file.extract()
+    if (comicInfoEntry?.fileData) {
+      const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData)
+      const comicInfoJson = await xmlToJSON(comicInfoStr)
+      if (comicInfoJson) {
+        metadata = parseComicInfoMetadata.parse(comicInfoJson)
+      }
+    }
+  }
+
+  const payload = {
+    path: comicPath,
+    ebookFormat: ebookFile.ebookFormat,
+    metadata
+  }
+
+  const firstImage = fileObjects.find(fo => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1)))
+  if (firstImage?.file?._path) {
+    payload.ebookCoverPath = firstImage.file._path
+  } else {
+    Logger.warn(`Cover image not found in comic at "${comicPath}"`)
+  }
+
+  return payload
+}
+module.exports.parse = parse
--- a/server/utils/parsers/parseEbookMetadata.js
+++ b/server/utils/parsers/parseEbookMetadata.js
@ -1,4 +1,5 @@
 const parseEpubMetadata = require('./parseEpubMetadata')
+const parseComicMetadata = require('./parseComicMetadata')

 /**
 * @typedef EBookFileScanData
@ -18,7 +19,9 @@ async function parse(ebookFile) {
  if (!ebookFile) return null

  if (ebookFile.ebookFormat === 'epub') {
-    return parseEpubMetadata.parse(ebookFile.metadata.path)
+    return parseEpubMetadata.parse(ebookFile)
+  } else if (['cbz', 'cbr'].includes(ebookFile.ebookFormat)) {
+    return parseComicMetadata.parse(ebookFile)
  }
  return null
 }
@ -36,6 +39,8 @@ async function extractCoverImage(ebookFileScanData, outputCoverPath) {

  if (ebookFileScanData.ebookFormat === 'epub') {
    return parseEpubMetadata.extractCoverImage(ebookFileScanData.path, ebookFileScanData.ebookCoverPath, outputCoverPath)
+  } else if (['cbz', 'cbr'].includes(ebookFileScanData.ebookFormat)) {
+    return parseComicMetadata.extractCoverImage(ebookFileScanData.path, ebookFileScanData.ebookCoverPath, outputCoverPath)
  }
  return false
 }
--- a/server/utils/parsers/parseEpubMetadata.js
+++ b/server/utils/parsers/parseEpubMetadata.js
@ -60,10 +60,11 @@ module.exports.extractCoverImage = extractCoverImage
 /**
 * Parse metadata from epub
 * 
- * @param {string} epubPath 
+ * @param {import('../../models/Book').EBookFileObject} ebookFile 
 * @returns {Promise<import('./parseEbookMetadata').EBookFileScanData>}
 */
-async function parse(epubPath) {
+async function parse(ebookFile) {
+  const epubPath = ebookFile.metadata.path
  Logger.debug(`Parsing metadata from epub at "${epubPath}"`)
  // Entrypoint of the epub that contains the filepath to the package document (opf file)
  const containerJson = await extractXmlToJson(epubPath, 'META-INF/container.xml')