mirror of
https://github.com/ciromattia/kcc
synced 2026-05-16 04:21:42 +00:00
experimental epub input (#1090)
* experimental epub input * fix missing spine items * only extract first image on page * re-organize * fallback if naive spine extraction fails * apply legacy extract option for epub too
This commit is contained in:
@@ -247,7 +247,7 @@ MAIN:
|
||||
|
||||
PROCESSING:
|
||||
-n, --noprocessing Do not modify image and ignore any profile or processing option
|
||||
--pdfextract Use legacy PDF image extraction method from KCC 8 and earlier.
|
||||
--legacyextract Use legacy PDF/EPUB image extraction method from earlier KCC versions.
|
||||
--pdfwidth Render vector PDFs based on device width instead of height.
|
||||
-u, --upscale Resize images smaller than device's resolution
|
||||
-s, --stretch Stretch images to device's resolution
|
||||
|
||||
@@ -750,14 +750,12 @@ Higher values are larger and higher quality, and may resolve blank page issues.<
|
||||
</widget>
|
||||
</item>
|
||||
<item row="9" column="0">
|
||||
<widget class="QCheckBox" name="pdfExtractBox">
|
||||
<widget class="QCheckBox" name="legacyExtractBox">
|
||||
<property name="toolTip">
|
||||
<string>Use the PDF image extraction method from KCC 8 and earlier.
|
||||
|
||||
Useful for really weird PDFs.</string>
|
||||
<string><html><head/><body><p>Use the PDF/EPUB image extraction method from older KCC versions.</p><p><br/></p><p>Use if standard extraction fails for whatever reason.</p></body></html></string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>PDF Legacy Extract</string>
|
||||
<string>Legacy Extract</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
||||
@@ -326,8 +326,8 @@ class WorkerThread(QThread):
|
||||
options.maximizestrips = True
|
||||
if GUI.disableProcessingBox.isChecked():
|
||||
options.noprocessing = True
|
||||
if GUI.pdfExtractBox.isChecked():
|
||||
options.pdfextract = True
|
||||
if GUI.legacyExtractBox.isChecked():
|
||||
options.legacyextract = True
|
||||
if GUI.pdfWidthBox.isChecked():
|
||||
options.pdfwidth = True
|
||||
if GUI.smartCoverCropBox.isChecked():
|
||||
@@ -625,7 +625,7 @@ class KCCGUI(KCC_ui.Ui_mainWindow):
|
||||
GUI.jobList.clear()
|
||||
if self.tar or self.sevenzip:
|
||||
fnames = QFileDialog.getOpenFileNames(MW, 'Select file', self.lastPath,
|
||||
'Comic (*.cbz *.cbr *.cb7 *.zip *.rar *.7z *.pdf);;All (*.*)')
|
||||
'Comic (*.cbz *.cbr *.cb7 *.zip *.rar *.7z *.epub *.pdf);;All (*.*)')
|
||||
else:
|
||||
fnames = QFileDialog.getOpenFileNames(MW, 'Select file', self.lastPath,
|
||||
'Comic (*.pdf);;All (*.*)')
|
||||
@@ -1080,7 +1080,7 @@ class KCCGUI(KCC_ui.Ui_mainWindow):
|
||||
'colorBox': GUI.colorBox.checkState(),
|
||||
'eraseRainbowBox': GUI.eraseRainbowBox.checkState(),
|
||||
'disableProcessingBox': GUI.disableProcessingBox.checkState(),
|
||||
'pdfExtractBox': GUI.pdfExtractBox.checkState(),
|
||||
'legacyExtractBox': GUI.legacyExtractBox.checkState(),
|
||||
'pdfWidthBox': GUI.pdfWidthBox.checkState(),
|
||||
'smartCoverCropBox': GUI.smartCoverCropBox.checkState(),
|
||||
'coverFillBox': GUI.coverFillBox.checkState(),
|
||||
@@ -1120,7 +1120,7 @@ class KCCGUI(KCC_ui.Ui_mainWindow):
|
||||
GUI.jobList.clear()
|
||||
formats = ['.pdf']
|
||||
if self.tar or self.sevenzip:
|
||||
formats.extend(['.cb7', '.7z', '.cbz', '.zip', '.cbr', '.rar'])
|
||||
formats.extend(['.cb7', '.7z', '.cbz', '.zip', '.cbr', '.rar', '.epub'])
|
||||
if os.path.isdir(message):
|
||||
GUI.jobList.addItem(message)
|
||||
GUI.jobList.scrollToBottom()
|
||||
|
||||
@@ -389,10 +389,10 @@ class Ui_mainWindow(object):
|
||||
|
||||
self.gridLayout_2.addWidget(self.qualityBox, 1, 2, 1, 1)
|
||||
|
||||
self.pdfExtractBox = QCheckBox(self.optionWidget)
|
||||
self.pdfExtractBox.setObjectName(u"pdfExtractBox")
|
||||
self.legacyExtractBox = QCheckBox(self.optionWidget)
|
||||
self.legacyExtractBox.setObjectName(u"legacyExtractBox")
|
||||
|
||||
self.gridLayout_2.addWidget(self.pdfExtractBox, 9, 0, 1, 1)
|
||||
self.gridLayout_2.addWidget(self.legacyExtractBox, 9, 0, 1, 1)
|
||||
|
||||
self.colorBox = QCheckBox(self.optionWidget)
|
||||
self.colorBox.setObjectName(u"colorBox")
|
||||
@@ -785,11 +785,9 @@ class Ui_mainWindow(object):
|
||||
#endif // QT_CONFIG(tooltip)
|
||||
self.qualityBox.setText(QCoreApplication.translate("mainWindow", u"Panel View 4/2/HQ", None))
|
||||
#if QT_CONFIG(tooltip)
|
||||
self.pdfExtractBox.setToolTip(QCoreApplication.translate("mainWindow", u"Use the PDF image extraction method from KCC 8 and earlier.\n"
|
||||
"\n"
|
||||
"Useful for really weird PDFs.", None))
|
||||
self.legacyExtractBox.setToolTip(QCoreApplication.translate("mainWindow", u"<html><head/><body><p>Use the PDF/EPUB image extraction method from older KCC versions.</p><p><br/></p><p>Use if standard extraction fails for whatever reason.</p></body></html>", None))
|
||||
#endif // QT_CONFIG(tooltip)
|
||||
self.pdfExtractBox.setText(QCoreApplication.translate("mainWindow", u"PDF Legacy Extract", None))
|
||||
self.legacyExtractBox.setText(QCoreApplication.translate("mainWindow", u"Legacy Extract", None))
|
||||
#if QT_CONFIG(tooltip)
|
||||
self.colorBox.setToolTip(QCoreApplication.translate("mainWindow", u"<html><head/><body><p style='white-space:pre'>Disable conversion to grayscale.</p></body></html>", None))
|
||||
#endif // QT_CONFIG(tooltip)
|
||||
|
||||
@@ -22,7 +22,9 @@ from collections import Counter
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from argparse import ArgumentParser
|
||||
from time import perf_counter, strftime, gmtime
|
||||
from copy import copy
|
||||
@@ -917,7 +919,7 @@ def getWorkFolder(afile, workdir=None):
|
||||
os.makedirs(fullPath)
|
||||
path = workdir
|
||||
sanitizePermissions(path)
|
||||
if options.pdfextract:
|
||||
if options.legacyextract:
|
||||
pdf = pdfjpgextract.PdfJpgExtract(afile, fullPath)
|
||||
njpg = pdf.extract()
|
||||
if njpg == 0:
|
||||
@@ -956,11 +958,61 @@ def getWorkFolder(afile, workdir=None):
|
||||
for file in os.listdir(os.path.join(fullPath, tdir[0])):
|
||||
move(os.path.join(fullPath, tdir[0], file), fullPath)
|
||||
os.rmdir(os.path.join(fullPath, tdir[0]))
|
||||
|
||||
if options.legacyextract:
|
||||
return workdir
|
||||
|
||||
if afile.lower().endswith('.epub'):
|
||||
container = ET.parse(os.path.join(path, 'META-INF', 'container.xml'))
|
||||
opf_path = container.find(r'.//{*}rootfile').attrib['full-path']
|
||||
opf_path = os.path.join(path, opf_path)
|
||||
opf = ET.parse(opf_path)
|
||||
spine = []
|
||||
for spine_item in opf.findall(r'.//{*}itemref'):
|
||||
spine.append(spine_item.attrib.get('idref'))
|
||||
manifest_dict = {}
|
||||
for manifest_item in opf.findall(".//*[@media-type='application/xhtml+xml']"):
|
||||
manifest_dict[manifest_item.attrib.get('id')] = manifest_item.attrib.get('href')
|
||||
ordered_image_paths = []
|
||||
for i, spine_item in enumerate(spine):
|
||||
if spine_item not in manifest_dict:
|
||||
continue
|
||||
page_path = os.path.join(os.path.dirname(opf_path), manifest_dict[spine_item])
|
||||
page = ET.parse(page_path)
|
||||
imgs = page.findall(r'.//{*}img') + page.findall(r'.//{*}image')
|
||||
img_path = None
|
||||
# TODO handle more than first image
|
||||
for img in imgs:
|
||||
for key in img.attrib:
|
||||
if 'src' in key or 'href' in key:
|
||||
img_path = img.attrib[key]
|
||||
if img_path.startswith('..'):
|
||||
img_path = os.path.join(os.path.dirname(opf_path), os.path.dirname(manifest_dict[spine_item]), img_path)
|
||||
else:
|
||||
img_path = os.path.join(os.path.dirname(opf_path), os.path.dirname(manifest_dict[spine_item]), img_path)
|
||||
break
|
||||
# TODO empty image
|
||||
if img_path:
|
||||
ordered_image_paths.append(img_path)
|
||||
# fallback if naive spine extraction fails
|
||||
if not ordered_image_paths:
|
||||
return workdir
|
||||
|
||||
if options.tempdir:
|
||||
workdir2 = mkdtemp('', 'KCC-', os.path.dirname(afile))
|
||||
else:
|
||||
workdir2 = mkdtemp('', 'KCC-')
|
||||
for i, img_path in enumerate(ordered_image_paths):
|
||||
_, ext = os.path.splitext(img_path)
|
||||
fullpath2 = os.path.join(workdir2, 'OEBPS', 'Images')
|
||||
os.makedirs(fullpath2, exist_ok=True)
|
||||
shutil.copyfile(img_path, os.path.join(fullpath2, f"{i}{ext}"))
|
||||
rmtree(workdir, True)
|
||||
return workdir2
|
||||
|
||||
return workdir
|
||||
|
||||
except OSError as e:
|
||||
rmtree(workdir, True)
|
||||
raise UserWarning(e)
|
||||
finally:
|
||||
pass
|
||||
else:
|
||||
raise UserWarning("Failed to open source file/directory.")
|
||||
|
||||
@@ -1406,8 +1458,8 @@ def makeParser():
|
||||
|
||||
processing_options.add_argument("-n", "--noprocessing", action="store_true", dest="noprocessing", default=False,
|
||||
help="Do not modify image and ignore any profile or processing option")
|
||||
processing_options.add_argument("--pdfextract", action="store_true", dest="pdfextract", default=False,
|
||||
help="Use the legacy PDF image extraction method from KCC 8 and earlier")
|
||||
processing_options.add_argument("--legacyextract", action="store_true", dest="legacyextract", default=False,
|
||||
help="Use the legacy PDF/EPUB image extraction method from older KCC versions")
|
||||
processing_options.add_argument("--pdfwidth", action="store_true", dest="pdfwidth", default=False,
|
||||
help="Render vector PDFs to device width instead of height.")
|
||||
processing_options.add_argument("--smartcovercrop", action="store_true", dest="smartcovercrop", default=False,
|
||||
|
||||
Reference in New Issue
Block a user