mirror of
https://github.com/ciromattia/kcc
synced 2026-01-25 22:47:28 +00:00
Add legacy pdf image extract option (#1225)
This commit is contained in:
@@ -238,6 +238,7 @@ MAIN:
|
||||
|
||||
PROCESSING:
|
||||
-n, --noprocessing Do not modify image and ignore any profile or processing option
|
||||
--pdfextract Use legacy PDF image extraction method from KCC 8 and earlier.
|
||||
-u, --upscale Resize images smaller than device's resolution
|
||||
-s, --stretch Stretch images to device's resolution
|
||||
-r SPLITTER, --splitter SPLITTER
|
||||
|
||||
12
gui/KCC.ui
12
gui/KCC.ui
@@ -896,6 +896,18 @@ Higher values are larger and higher quality, and may resolve blank page issues.<
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="9" column="0">
|
||||
<widget class="QCheckBox" name="pdfExtractBox">
|
||||
<property name="toolTip">
|
||||
<string>Use the PDF image extraction method from KCC 8 and earlier.
|
||||
|
||||
Useful for really weird PDFs.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>PDF Legacy Extract</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
||||
@@ -327,6 +327,8 @@ class WorkerThread(QThread):
|
||||
options.maximizestrips = True
|
||||
if GUI.disableProcessingBox.isChecked():
|
||||
options.noprocessing = True
|
||||
if GUI.pdfExtractBox.isChecked():
|
||||
options.pdfextract = True
|
||||
if GUI.metadataTitleBox.checkState() == Qt.CheckState.PartiallyChecked:
|
||||
options.metadatatitle = 1
|
||||
elif GUI.metadataTitleBox.checkState() == Qt.CheckState.Checked:
|
||||
@@ -1032,6 +1034,7 @@ class KCCGUI(KCC_ui.Ui_mainWindow):
|
||||
'colorBox': GUI.colorBox.checkState(),
|
||||
'eraseRainbowBox': GUI.eraseRainbowBox.checkState(),
|
||||
'disableProcessingBox': GUI.disableProcessingBox.checkState(),
|
||||
'pdfExtractBox': GUI.pdfExtractBox.checkState(),
|
||||
'metadataTitleBox': GUI.metadataTitleBox.checkState(),
|
||||
'mozJpegBox': GUI.mozJpegBox.checkState(),
|
||||
'jpegQualityBox': GUI.jpegQualityBox.checkState(),
|
||||
|
||||
@@ -462,6 +462,11 @@ class Ui_mainWindow(object):
|
||||
|
||||
self.gridLayout_2.addWidget(self.jpegQualityBox, 8, 0, 1, 1)
|
||||
|
||||
self.pdfExtractBox = QCheckBox(self.optionWidget)
|
||||
self.pdfExtractBox.setObjectName(u"pdfExtractBox")
|
||||
|
||||
self.gridLayout_2.addWidget(self.pdfExtractBox, 9, 0, 1, 1)
|
||||
|
||||
|
||||
self.gridLayout.addWidget(self.optionWidget, 5, 0, 1, 2)
|
||||
|
||||
@@ -733,6 +738,12 @@ class Ui_mainWindow(object):
|
||||
"Higher values are larger and higher quality, and may resolve blank page issues.", None))
|
||||
#endif // QT_CONFIG(tooltip)
|
||||
self.jpegQualityBox.setText(QCoreApplication.translate("mainWindow", u"Custom JPEG Quality", None))
|
||||
#if QT_CONFIG(tooltip)
|
||||
self.pdfExtractBox.setToolTip(QCoreApplication.translate("mainWindow", u"Use the PDF image extraction method from KCC 8 and earlier.\n"
|
||||
"\n"
|
||||
"Useful for really weird PDFs.", None))
|
||||
#endif // QT_CONFIG(tooltip)
|
||||
self.pdfExtractBox.setText(QCoreApplication.translate("mainWindow", u"PDF Legacy Extract", None))
|
||||
self.gammaLabel.setText(QCoreApplication.translate("mainWindow", u"Gamma: Auto", None))
|
||||
self.jpegQualityLabel.setText(QCoreApplication.translate("mainWindow", u"JPEG Quality:", None))
|
||||
# retranslateUi
|
||||
|
||||
@@ -48,6 +48,7 @@ from .comicarchive import SEVENZIP, available_archive_tools
|
||||
from . import comic2panel
|
||||
from . import image
|
||||
from . import comicarchive
|
||||
from . import pdfjpgextract
|
||||
from . import dualmetafix
|
||||
from . import metadata
|
||||
from . import kindle
|
||||
@@ -875,6 +876,12 @@ def getWorkFolder(afile, workdir=None):
|
||||
os.makedirs(fullPath)
|
||||
path = workdir
|
||||
sanitizePermissions(path)
|
||||
if options.pdfextract:
|
||||
pdf = pdfjpgextract.PdfJpgExtract(afile, fullPath)
|
||||
njpg = pdf.extract()
|
||||
if njpg == 0:
|
||||
raise UserWarning("Failed to extract images from PDF file.")
|
||||
return workdir
|
||||
target_height = options.profileData[1][1]
|
||||
if options.cropping == 1:
|
||||
target_height = target_height + target_height*0.20 #Account for possible margin at the top and bottom
|
||||
@@ -1342,6 +1349,8 @@ def makeParser():
|
||||
|
||||
processing_options.add_argument("-n", "--noprocessing", action="store_true", dest="noprocessing", default=False,
|
||||
help="Do not modify image and ignore any profile or processing option")
|
||||
processing_options.add_argument("--pdfextract", action="store_true", dest="pdfextract", default=False,
|
||||
help="Use the legacy PDF image extraction method from KCC 8 and earlier")
|
||||
processing_options.add_argument("-u", "--upscale", action="store_true", dest="upscale", default=False,
|
||||
help="Resize images smaller than device's resolution")
|
||||
processing_options.add_argument("-s", "--stretch", action="store_true", dest="stretch", default=False,
|
||||
|
||||
75
kindlecomicconverter/pdfjpgextract.py
Normal file
75
kindlecomicconverter/pdfjpgextract.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2012-2014 Ciro Mattia Gonano <ciromattia@gmail.com>
|
||||
# Copyright (c) 2013-2019 Pawel Jastrzebski <pawelj@iosphe.re>
|
||||
#
|
||||
# Based upon the code snippet by Ned Batchelder
|
||||
# (http://nedbatchelder.com/blog/200712/extracting_jpgs_from_pdfs.html)
|
||||
#
|
||||
# Permission to use, copy, modify, and/or distribute this software for
|
||||
# any purpose with or without fee is hereby granted, provided that the
|
||||
# above copyright notice and this permission notice appear in all
|
||||
# copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
|
||||
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
|
||||
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
# PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
# skip stray images a few pixels in size in some PDFs
|
||||
# typical images are many thousands in length
|
||||
# https://github.com/ciromattia/kcc/pull/546
|
||||
STRAY_IMAGE_LENGTH_THRESHOLD = 300
|
||||
|
||||
|
||||
class PdfJpgExtract:
|
||||
def __init__(self, fname, fullPath):
|
||||
self.fname = fname
|
||||
self.path = fullPath
|
||||
|
||||
def getPath(self):
|
||||
return self.path
|
||||
|
||||
def extract(self):
|
||||
pdf = open(self.fname, "rb").read()
|
||||
startmark = b"\xff\xd8"
|
||||
startfix = 0
|
||||
endmark = b"\xff\xd9"
|
||||
endfix = 2
|
||||
i = 0
|
||||
njpg = 0
|
||||
while True:
|
||||
istream = pdf.find(b"stream", i)
|
||||
if istream < 0:
|
||||
break
|
||||
istart = pdf.find(startmark, istream, istream + 20)
|
||||
if istart < 0:
|
||||
i = istream + 20
|
||||
continue
|
||||
iend = pdf.find(b"endstream", istart)
|
||||
if iend < 0:
|
||||
raise Exception("Didn't find end of stream!")
|
||||
iend = pdf.find(endmark, iend - 20)
|
||||
if iend < 0:
|
||||
raise Exception("Didn't find end of JPG!")
|
||||
istart += startfix
|
||||
iend += endfix
|
||||
i = iend
|
||||
|
||||
if iend - istart < STRAY_IMAGE_LENGTH_THRESHOLD:
|
||||
continue
|
||||
|
||||
jpg = pdf[istart:iend]
|
||||
jpgfile = open(os.path.join(self.path, "jpg%d.jpg" % njpg), "wb")
|
||||
jpgfile.write(jpg)
|
||||
jpgfile.close()
|
||||
njpg += 1
|
||||
|
||||
return njpg
|
||||
Reference in New Issue
Block a user