Added Metadata editor class

2026-01-27 15:37:28 +00:00 · 2015-01-24 18:36:15 +01:00
parent ad3ff35aaa
commit 7d529a2acc
3 changed files with 191 additions and 31 deletions
--- a/kcc/comic2ebook.py
+++ b/kcc/comic2ebook.py
@@ -24,14 +24,13 @@ from copy import copy
 from glob import glob
 from json import loads
 from urllib.request import Request, urlopen
-from re import split, sub, compile
+from re import split, sub
 from stat import S_IWRITE, S_IREAD, S_IEXEC
 from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
 from tempfile import mkdtemp
 from shutil import move, copytree, rmtree
 from optparse import OptionParser, OptionGroup
 from multiprocessing import Pool
-from xml.dom.minidom import parse
 from uuid import uuid4
 from slugify import slugify as slugifyExt
 from PIL import Image
@@ -48,6 +47,7 @@ from . import image
 from . import cbxarchive
 from . import pdfjpgextract
 from . import dualmetafix
+from . import metadata
 from . import __version__


@@ -165,7 +165,7 @@ def buildHTML(path, imgfile, imgfilepath):
                          "'{\"targetId\":\"" + boxes[i] + "-Panel-Parent\", \"ordinal\":" + str(order[i]),
                          "}'></a></div>\n"])
        if options.quality == 2:
-            imgfilepv = str.split(imgfile, ".")
+            imgfilepv = imgfile.split(".")
            imgfilepv[0] += "-hq"
            imgfilepv = ".".join(imgfilepv)
        else:
@@ -641,45 +641,29 @@ def getComicInfo(path, originalPath):
        defaultTitle = False
    if os.path.exists(xmlPath):
        try:
-            xml = parse(xmlPath)
+            xml = metadata.MetadataParser(xmlPath)
        except Exception:
            os.remove(xmlPath)
            return
        options.authors = []
        if defaultTitle:
-            if len(xml.getElementsByTagName('Series')) != 0:
-                options.title = xml.getElementsByTagName('Series')[0].firstChild.nodeValue
-            if len(xml.getElementsByTagName('Volume')) != 0:
-                titleSuffix += ' V' + xml.getElementsByTagName('Volume')[0].firstChild.nodeValue
-            if len(xml.getElementsByTagName('Number')) != 0:
-                titleSuffix += ' #' + xml.getElementsByTagName('Number')[0].firstChild.nodeValue
+            if xml.data['Series']:
+                options.title = xml.data['Series']
+            if xml.data['Volume']:
+                titleSuffix += ' V' + xml.data['Volume']
+            if xml.data['Number']:
+                titleSuffix += ' #' + xml.data['Number']
            options.title += titleSuffix
-        if len(xml.getElementsByTagName('Writer')) != 0:
-            authorsTemp = str.split(xml.getElementsByTagName('Writer')[0].firstChild.nodeValue, ', ')
-            for author in authorsTemp:
-                options.authors.append(author)
-        if len(xml.getElementsByTagName('Penciller')) != 0:
-            authorsTemp = str.split(xml.getElementsByTagName('Penciller')[0].firstChild.nodeValue, ', ')
-            for author in authorsTemp:
-                options.authors.append(author)
-        if len(xml.getElementsByTagName('Inker')) != 0:
-            authorsTemp = str.split(xml.getElementsByTagName('Inker')[0].firstChild.nodeValue, ', ')
-            for author in authorsTemp:
-                options.authors.append(author)
-        if len(xml.getElementsByTagName('Colorist')) != 0:
-            authorsTemp = str.split(xml.getElementsByTagName('Colorist')[0].firstChild.nodeValue, ', ')
-            for author in authorsTemp:
-                options.authors.append(author)
+        for field in ['Writers', 'Pencillers', 'Inkers', 'Colorists']:
+            for person in xml.data[field]:
+                options.authors.append(person)
        if len(options.authors) > 0:
            options.authors = list(set(options.authors))
            options.authors.sort()
        else:
            options.authors = ['KCC']
-        if len(xml.getElementsByTagName('ScanInformation')) != 0:
-            coverId = xml.getElementsByTagName('ScanInformation')[0].firstChild.nodeValue
-            coverId = compile('(MCD\\()(\\d+)(\\))').search(coverId)
-            if coverId:
-                options.remoteCovers = getCoversFromMCB(coverId.group(2))
+        if xml.data['MUid']:
+            options.remoteCovers = getCoversFromMCB(xml.data['MUid'])
        os.remove(xmlPath)


--- a/kcc/metadata.py
+++ b/kcc/metadata.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013-2015 Pawel Jastrzebski <pawelj@iosphe.re>
+#
+# Permission to use, copy, modify, and/or distribute this software for
+# any purpose with or without fee is hereby granted, provided that the
+# above copyright notice and this permission notice appear in all
+# copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
+# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+import os
+from xml.dom.minidom import parse, Document
+from re import compile
+from zipfile import is_zipfile, ZipFile, ZIP_DEFLATED
+from subprocess import STDOUT, PIPE
+from psutil import Popen
+from tempfile import mkdtemp
+from shutil import rmtree
+from .shared import removeFromZIP, check7ZFile as is_7zfile
+from . import rarfile
+
+
+class MetadataParser:
+    def __init__(self, source):
+        self.source = source
+        self.data = {'Series': '',
+                     'Volume': '',
+                     'Number': '',
+                     'Writers': [],
+                     'Pencillers': [],
+                     'Inkers': [],
+                     'Colorists': [],
+                     'MUid': ''}
+        self.rawdata = None
+        if self.source.endswith('.xml'):
+            self.rawdata = parse(self.source)
+            self.parseXML()
+        else:
+            if is_zipfile(self.source):
+                with ZipFile(self.source) as zip_file:
+                    for member in zip_file.namelist():
+                        if member != 'ComicInfo.xml':
+                            continue
+                        with zip_file.open(member) as xml_file:
+                            self.rawdata = parse(xml_file)
+            elif rarfile.is_rarfile(self.source):
+                with rarfile.RarFile(self.source) as rar_file:
+                    for member in rar_file.namelist():
+                        if member != 'ComicInfo.xml':
+                            continue
+                        with rar_file.open(member) as xml_file:
+                            self.rawdata = parse(xml_file)
+            elif is_7zfile(self.source):
+                workdir = mkdtemp('', 'KCC-TMP-')
+                tmpXML = os.path.join(workdir, 'ComicInfo.xml')
+                output = Popen('7za e "' + self.source + '" ComicInfo.xml -o"' + workdir + '"',
+                               stdout=PIPE, stderr=STDOUT, shell=True)
+                extracted = False
+                for line in output.stdout:
+                    if b"Everything is Ok" in line:
+                        extracted = True
+                if not extracted:
+                    rmtree(workdir)
+                    raise OSError
+                if os.path.isfile(tmpXML):
+                    self.rawdata = parse(tmpXML)
+                rmtree(workdir)
+            else:
+                raise OSError
+            if self.rawdata:
+                self.parseXML()
+
+    def parseXML(self):
+        if len(self.rawdata.getElementsByTagName('Series')) != 0:
+            self.data['Series'] = self.rawdata.getElementsByTagName('Series')[0].firstChild.nodeValue
+        if len(self.rawdata.getElementsByTagName('Volume')) != 0:
+            self.data['Volume'] = self.rawdata.getElementsByTagName('Volume')[0].firstChild.nodeValue
+        if len(self.rawdata.getElementsByTagName('Number')) != 0:
+            self.data['Number'] = self.rawdata.getElementsByTagName('Number')[0].firstChild.nodeValue
+        for field in ['Writer', 'Penciller', 'Inker', 'Colorist']:
+            if len(self.rawdata.getElementsByTagName(field)) != 0:
+                for person in self.rawdata.getElementsByTagName(field)[0].firstChild.nodeValue.split(', '):
+                    self.data[field + 's'].append(person)
+            self.data[field + 's'] = list(set(self.data[field + 's']))
+            self.data[field + 's'].sort()
+        if len(self.rawdata.getElementsByTagName('ScanInformation')) != 0:
+            coverId = compile('(MCD\\()(\\d+)(\\))')\
+                .search(self.rawdata.getElementsByTagName('ScanInformation')[0].firstChild.nodeValue)
+            if coverId:
+                self.data['MUid'] = coverId.group(2)
+
+    def saveXML(self):
+        if self.rawdata:
+            root = self.rawdata.getElementsByTagName('ComicInfo')[0]
+            for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']],
+                        ['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])],
+                        ['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])],
+                        ['Colorist', ', '.join(self.data['Colorists'])],
+                        ['ScanInformation', 'MCD(' + self.data['MUid'] + ')' if self.data['MUid'] else '']):
+                if self.rawdata.getElementsByTagName(row[0]):
+                    node = self.rawdata.getElementsByTagName(row[0])[0]
+                    if row[1]:
+                        node.firstChild.replaceWholeText(row[1])
+                    else:
+                        root.removeChild(node)
+                elif row[1]:
+                    main = self.rawdata.createElement(row[0])
+                    root.appendChild(main)
+                    text = self.rawdata.createTextNode(row[1])
+                    main.appendChild(text)
+        else:
+            doc = Document()
+            root = doc.createElement('ComicInfo')
+            root.setAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
+            root.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
+            doc.appendChild(root)
+            for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']],
+                        ['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])],
+                        ['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])],
+                        ['Colorist', ', '.join(self.data['Colorists'])],
+                        ['ScanInformation', 'MCD(' + self.data['MUid'] + ')' if self.data['MUid'] else '']):
+                if row[1]:
+                    main = doc.createElement(row[0])
+                    root.appendChild(main)
+                    text = doc.createTextNode(row[1])
+                    main.appendChild(text)
+            self.rawdata = doc
+        if self.source.endswith('.xml'):
+            with open(self.source, 'w') as f:
+                self.rawdata.writexml(f)
+        else:
+            workdir = mkdtemp('', 'KCC-TMP-')
+            tmpXML = os.path.join(workdir, 'ComicInfo.xml')
+            with open(tmpXML, 'w') as f:
+                self.rawdata.writexml(f)
+            if is_zipfile(self.source):
+                removeFromZIP(self.source, 'ComicInfo.xml')
+                with ZipFile(self.source, mode='a', compression=ZIP_DEFLATED) as zip_file:
+                    zip_file.write(tmpXML, arcname=tmpXML.split(os.sep)[-1])
+            elif rarfile.is_rarfile(self.source):
+                raise NotImplementedError
+            elif is_7zfile(self.source):
+                output = Popen('7za a "' + self.source + '" "' + tmpXML + '"', stdout=PIPE, stderr=STDOUT, shell=True)
+                extracted = False
+                for line in output.stdout:
+                    if b"Everything is Ok" in line:
+                        extracted = True
+                if not extracted:
+                    rmtree(workdir)
+                    raise OSError
+            rmtree(workdir)
--- a/kcc/shared.py
+++ b/kcc/shared.py
@@ -22,6 +22,9 @@ from html.parser import HTMLParser
 from distutils.version import StrictVersion
 from scandir import walk
 from time import sleep
+from shutil import rmtree, move
+from tempfile import mkdtemp
+from zipfile import ZipFile, ZIP_DEFLATED


 class HTMLStripper(HTMLParser):
@@ -87,6 +90,20 @@ def saferReplace(old, new):
        raise PermissionError


+def removeFromZIP(zipfname, *filenames):
+    tempdir = mkdtemp('', 'KCC-TMP-')
+    try:
+        tempname = os.path.join(tempdir, 'KCC-TMP.zip')
+        with ZipFile(zipfname, 'r') as zipread:
+            with ZipFile(tempname, 'w', compression=ZIP_DEFLATED) as zipwrite:
+                for item in zipread.infolist():
+                    if item.filename not in filenames:
+                        zipwrite.writestr(item, zipread.read(item.filename))
+        move(tempname, zipfname)
+    finally:
+        rmtree(tempdir)
+
+
 # noinspection PyUnresolvedReferences
 def dependencyCheck(level):
    missing = []