1
0
mirror of https://github.com/ciromattia/kcc synced 2026-01-27 15:37:28 +00:00

Added Metadata editor class

This commit is contained in:
Paweł Jastrzębski
2015-01-24 18:36:15 +01:00
parent ad3ff35aaa
commit 7d529a2acc
3 changed files with 191 additions and 31 deletions

View File

@@ -24,14 +24,13 @@ from copy import copy
from glob import glob
from json import loads
from urllib.request import Request, urlopen
from re import split, sub, compile
from re import split, sub
from stat import S_IWRITE, S_IREAD, S_IEXEC
from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
from tempfile import mkdtemp
from shutil import move, copytree, rmtree
from optparse import OptionParser, OptionGroup
from multiprocessing import Pool
from xml.dom.minidom import parse
from uuid import uuid4
from slugify import slugify as slugifyExt
from PIL import Image
@@ -48,6 +47,7 @@ from . import image
from . import cbxarchive
from . import pdfjpgextract
from . import dualmetafix
from . import metadata
from . import __version__
@@ -165,7 +165,7 @@ def buildHTML(path, imgfile, imgfilepath):
"'{\"targetId\":\"" + boxes[i] + "-Panel-Parent\", \"ordinal\":" + str(order[i]),
"}'></a></div>\n"])
if options.quality == 2:
imgfilepv = str.split(imgfile, ".")
imgfilepv = imgfile.split(".")
imgfilepv[0] += "-hq"
imgfilepv = ".".join(imgfilepv)
else:
@@ -641,45 +641,29 @@ def getComicInfo(path, originalPath):
defaultTitle = False
if os.path.exists(xmlPath):
try:
xml = parse(xmlPath)
xml = metadata.MetadataParser(xmlPath)
except Exception:
os.remove(xmlPath)
return
options.authors = []
if defaultTitle:
if len(xml.getElementsByTagName('Series')) != 0:
options.title = xml.getElementsByTagName('Series')[0].firstChild.nodeValue
if len(xml.getElementsByTagName('Volume')) != 0:
titleSuffix += ' V' + xml.getElementsByTagName('Volume')[0].firstChild.nodeValue
if len(xml.getElementsByTagName('Number')) != 0:
titleSuffix += ' #' + xml.getElementsByTagName('Number')[0].firstChild.nodeValue
if xml.data['Series']:
options.title = xml.data['Series']
if xml.data['Volume']:
titleSuffix += ' V' + xml.data['Volume']
if xml.data['Number']:
titleSuffix += ' #' + xml.data['Number']
options.title += titleSuffix
if len(xml.getElementsByTagName('Writer')) != 0:
authorsTemp = str.split(xml.getElementsByTagName('Writer')[0].firstChild.nodeValue, ', ')
for author in authorsTemp:
options.authors.append(author)
if len(xml.getElementsByTagName('Penciller')) != 0:
authorsTemp = str.split(xml.getElementsByTagName('Penciller')[0].firstChild.nodeValue, ', ')
for author in authorsTemp:
options.authors.append(author)
if len(xml.getElementsByTagName('Inker')) != 0:
authorsTemp = str.split(xml.getElementsByTagName('Inker')[0].firstChild.nodeValue, ', ')
for author in authorsTemp:
options.authors.append(author)
if len(xml.getElementsByTagName('Colorist')) != 0:
authorsTemp = str.split(xml.getElementsByTagName('Colorist')[0].firstChild.nodeValue, ', ')
for author in authorsTemp:
options.authors.append(author)
for field in ['Writers', 'Pencillers', 'Inkers', 'Colorists']:
for person in xml.data[field]:
options.authors.append(person)
if len(options.authors) > 0:
options.authors = list(set(options.authors))
options.authors.sort()
else:
options.authors = ['KCC']
if len(xml.getElementsByTagName('ScanInformation')) != 0:
coverId = xml.getElementsByTagName('ScanInformation')[0].firstChild.nodeValue
coverId = compile('(MCD\\()(\\d+)(\\))').search(coverId)
if coverId:
options.remoteCovers = getCoversFromMCB(coverId.group(2))
if xml.data['MUid']:
options.remoteCovers = getCoversFromMCB(xml.data['MUid'])
os.remove(xmlPath)

159
kcc/metadata.py Normal file
View File

@@ -0,0 +1,159 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013-2015 Pawel Jastrzebski <pawelj@iosphe.re>
#
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all
# copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
import os
from xml.dom.minidom import parse, Document
from re import compile
from zipfile import is_zipfile, ZipFile, ZIP_DEFLATED
from subprocess import STDOUT, PIPE
from psutil import Popen
from tempfile import mkdtemp
from shutil import rmtree
from .shared import removeFromZIP, check7ZFile as is_7zfile
from . import rarfile
class MetadataParser:
def __init__(self, source):
self.source = source
self.data = {'Series': '',
'Volume': '',
'Number': '',
'Writers': [],
'Pencillers': [],
'Inkers': [],
'Colorists': [],
'MUid': ''}
self.rawdata = None
if self.source.endswith('.xml'):
self.rawdata = parse(self.source)
self.parseXML()
else:
if is_zipfile(self.source):
with ZipFile(self.source) as zip_file:
for member in zip_file.namelist():
if member != 'ComicInfo.xml':
continue
with zip_file.open(member) as xml_file:
self.rawdata = parse(xml_file)
elif rarfile.is_rarfile(self.source):
with rarfile.RarFile(self.source) as rar_file:
for member in rar_file.namelist():
if member != 'ComicInfo.xml':
continue
with rar_file.open(member) as xml_file:
self.rawdata = parse(xml_file)
elif is_7zfile(self.source):
workdir = mkdtemp('', 'KCC-TMP-')
tmpXML = os.path.join(workdir, 'ComicInfo.xml')
output = Popen('7za e "' + self.source + '" ComicInfo.xml -o"' + workdir + '"',
stdout=PIPE, stderr=STDOUT, shell=True)
extracted = False
for line in output.stdout:
if b"Everything is Ok" in line:
extracted = True
if not extracted:
rmtree(workdir)
raise OSError
if os.path.isfile(tmpXML):
self.rawdata = parse(tmpXML)
rmtree(workdir)
else:
raise OSError
if self.rawdata:
self.parseXML()
def parseXML(self):
if len(self.rawdata.getElementsByTagName('Series')) != 0:
self.data['Series'] = self.rawdata.getElementsByTagName('Series')[0].firstChild.nodeValue
if len(self.rawdata.getElementsByTagName('Volume')) != 0:
self.data['Volume'] = self.rawdata.getElementsByTagName('Volume')[0].firstChild.nodeValue
if len(self.rawdata.getElementsByTagName('Number')) != 0:
self.data['Number'] = self.rawdata.getElementsByTagName('Number')[0].firstChild.nodeValue
for field in ['Writer', 'Penciller', 'Inker', 'Colorist']:
if len(self.rawdata.getElementsByTagName(field)) != 0:
for person in self.rawdata.getElementsByTagName(field)[0].firstChild.nodeValue.split(', '):
self.data[field + 's'].append(person)
self.data[field + 's'] = list(set(self.data[field + 's']))
self.data[field + 's'].sort()
if len(self.rawdata.getElementsByTagName('ScanInformation')) != 0:
coverId = compile('(MCD\\()(\\d+)(\\))')\
.search(self.rawdata.getElementsByTagName('ScanInformation')[0].firstChild.nodeValue)
if coverId:
self.data['MUid'] = coverId.group(2)
def saveXML(self):
if self.rawdata:
root = self.rawdata.getElementsByTagName('ComicInfo')[0]
for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']],
['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])],
['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])],
['Colorist', ', '.join(self.data['Colorists'])],
['ScanInformation', 'MCD(' + self.data['MUid'] + ')' if self.data['MUid'] else '']):
if self.rawdata.getElementsByTagName(row[0]):
node = self.rawdata.getElementsByTagName(row[0])[0]
if row[1]:
node.firstChild.replaceWholeText(row[1])
else:
root.removeChild(node)
elif row[1]:
main = self.rawdata.createElement(row[0])
root.appendChild(main)
text = self.rawdata.createTextNode(row[1])
main.appendChild(text)
else:
doc = Document()
root = doc.createElement('ComicInfo')
root.setAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
root.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
doc.appendChild(root)
for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']],
['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])],
['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])],
['Colorist', ', '.join(self.data['Colorists'])],
['ScanInformation', 'MCD(' + self.data['MUid'] + ')' if self.data['MUid'] else '']):
if row[1]:
main = doc.createElement(row[0])
root.appendChild(main)
text = doc.createTextNode(row[1])
main.appendChild(text)
self.rawdata = doc
if self.source.endswith('.xml'):
with open(self.source, 'w') as f:
self.rawdata.writexml(f)
else:
workdir = mkdtemp('', 'KCC-TMP-')
tmpXML = os.path.join(workdir, 'ComicInfo.xml')
with open(tmpXML, 'w') as f:
self.rawdata.writexml(f)
if is_zipfile(self.source):
removeFromZIP(self.source, 'ComicInfo.xml')
with ZipFile(self.source, mode='a', compression=ZIP_DEFLATED) as zip_file:
zip_file.write(tmpXML, arcname=tmpXML.split(os.sep)[-1])
elif rarfile.is_rarfile(self.source):
raise NotImplementedError
elif is_7zfile(self.source):
output = Popen('7za a "' + self.source + '" "' + tmpXML + '"', stdout=PIPE, stderr=STDOUT, shell=True)
extracted = False
for line in output.stdout:
if b"Everything is Ok" in line:
extracted = True
if not extracted:
rmtree(workdir)
raise OSError
rmtree(workdir)

View File

@@ -22,6 +22,9 @@ from html.parser import HTMLParser
from distutils.version import StrictVersion
from scandir import walk
from time import sleep
from shutil import rmtree, move
from tempfile import mkdtemp
from zipfile import ZipFile, ZIP_DEFLATED
class HTMLStripper(HTMLParser):
@@ -87,6 +90,20 @@ def saferReplace(old, new):
raise PermissionError
def removeFromZIP(zipfname, *filenames):
tempdir = mkdtemp('', 'KCC-TMP-')
try:
tempname = os.path.join(tempdir, 'KCC-TMP.zip')
with ZipFile(zipfname, 'r') as zipread:
with ZipFile(tempname, 'w', compression=ZIP_DEFLATED) as zipwrite:
for item in zipread.infolist():
if item.filename not in filenames:
zipwrite.writestr(item, zipread.read(item.filename))
move(tempname, zipfname)
finally:
rmtree(tempdir)
# noinspection PyUnresolvedReferences
def dependencyCheck(level):
missing = []