1
0
mirror of https://github.com/ciromattia/kcc synced 2026-04-15 05:28:49 +00:00

Compare commits

...

14 Commits

Author SHA1 Message Date
Alex Xu
e787dd2897 bump 7.3.0 2025-03-04 11:29:44 -08:00
Alex Xu
01625904d1 huge speed optimization on HDD by removing md5 (#845)
* Eliminate unnecessary use of MD5 checksum

md5checksum() computes the actual checksum of a specified file, which is appropriately expensive, but the code seemed to be using the checksum result as a key into the imgMetadata dictionary to avoid handling image files being renamed during processing steps. This seems like a very expensive way to handle the rename so instead, I now update the imgMetadata keys with the new filename in the one place that the rename happens, and avoid MD5 checksums entirely.

* merge conflicts

* Add missing handling for image path renames due to nested chapter folder name

* merge conflicts

* merge conflicts

* add perf_counters

* imgFileProcessing perf_counter

* use startswith and removeprefix

---------

Co-authored-by: utopiafallen <utopiafallen@gmail.com>
2025-03-04 11:28:23 -08:00
Alex Xu
5f8526da44 all image files have unique ordered names (#848) 2025-03-03 07:50:21 -08:00
Alex Xu
1159e737a0 small correction to 7z (#847) 2025-03-02 19:55:45 -08:00
Alex Xu
5bbdb715e9 7z is faster note 2025-03-02 12:39:51 -08:00
Alex Xu
1a3cd6c916 add perf_counter to makeBook (#846) 2025-03-02 12:24:49 -08:00
Alex Xu
e1e6d587f4 makeZIP now prefers 7z for SPEED (#844) 2025-03-02 11:08:27 -08:00
Alex Xu
ca5c0bdd61 fix error messages that only say a single number (#843) 2025-03-01 19:40:55 -08:00
Alex Xu
c6f491d27e bump 7.2.3 2025-02-28 20:44:45 -08:00
Alex Xu
c9ed3feef1 deprioritize tar (#842) 2025-02-28 20:17:41 -08:00
Alex Xu
be147fe7e5 saves several seconds per file (#841) 2025-02-28 20:11:11 -08:00
utopiafallen
62ffa2bc80 Improve processing performance by partially undoing "Refactored detection of corrupted files"
Commit f952634971 moved image corruption detection out from the ComicPage constructor and into a standalone detectCorruption() function. This led to a performance regression because now corruption detection happens in a single thread when it used to be distributed across worker threads, and because a source image is now loaded twice in memory: once during corruption detection and once when actually going to process the image.

Image file corruption detection is now back inside the ComicPage constructor and the extra load() has been removed because the convert() call will automatically invoke load() and most likely throw the same exceptions.
2025-02-28 19:49:13 -08:00
Alex Xu
11186d07c0 fix file splitting/chunking for real in certain situations (#839)
* fix file splitting without ComicInfo.xml

* remove dead var
2025-02-28 19:19:11 -08:00
Alex Xu
4b3cd6882a Revert "Build windows GUI version normally without docker" (#835)
* Revert "remove GUI windows docker"

This reverts commit 4fc5cc9dfb.

* build windows gui version with docker

* cffi

* bump 7.2.2
2025-02-25 08:57:40 -08:00
9 changed files with 138 additions and 74 deletions

View File

@@ -25,6 +25,11 @@ jobs:
# - name: build binary
# run: |
# pyi-makespec -F -i icons\\comic2ebook.ico -n KCC_test -w --noupx kcc.py
- name: Package Application
uses: JackMcKew/pyinstaller-action-windows@main
with:
path: .
spec: ./kcc.spec
- name: Package Application
uses: JackMcKew/pyinstaller-action-windows@main
with:
@@ -38,6 +43,7 @@ jobs:
- name: rename binaries
run: |
version_built=$(cat kindlecomicconverter/__init__.py | grep version | awk '{print $3}' | sed "s/[^.0-9b]//g")
mv dist/windows/kcc.exe dist/windows/KCC_${version_built}.exe
mv dist/windows/kcc-c2e.exe dist/windows/KCC_c2e_${version_built}.exe
mv dist/windows/kcc-c2p.exe dist/windows/KCC_c2p_${version_built}.exe
- name: upload build

View File

@@ -68,9 +68,11 @@ If you have issues detecting it, get stuck on the MOBI conversion step, or use L
### 7-Zip
This is only required for certain files and advanced features.
This is optional but will make conversions much faster.
KCC will ask you to install if needed.
This is required for certain files and advanced features.
KCC will ask you to install if needed.
Refer to the wiki to install: https://github.com/ciromattia/kcc/wiki/Installation#7-zip

39
kcc.spec Normal file
View File

@@ -0,0 +1,39 @@
# -*- mode: python ; coding: utf-8 -*-
block_cipher = None
a = Analysis(['kcc.py'],
pathex=['.'],
binaries=[],
datas=[],
hiddenimports=['_cffi_backend'],
hookspath=[],
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False)
pyz = PYZ(a.pure, a.zipped_data,
cipher=block_cipher)
exe = EXE(pyz,
a.scripts,
a.binaries,
a.zipfiles,
a.datas,
[],
name='kcc',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=False,
upx_exclude=[],
runtime_tmpdir=None,
console=False,
disable_windowed_traceback=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None , icon='icons\\comic2ebook.ico')

View File

@@ -37,7 +37,7 @@ from packaging.version import Version
from raven import Client
from tempfile import gettempdir
from .shared import HTMLStripper, sanitizeTrace, walkLevel, subprocess_run
from .shared import HTMLStripper, available_archive_tools, sanitizeTrace, walkLevel, subprocess_run
from . import __version__
from . import comic2ebook
from . import metadata
@@ -317,13 +317,8 @@ class WorkerThread(QThread):
GUI.progress.content = ''
self.errors = True
_, _, traceback = sys.exc_info()
if len(err.args) == 1:
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
% (jobargv[-1], str(err), sanitizeTrace(traceback)), 'error')
else:
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
% (jobargv[-1], str(err.args[0]), err.args[1]), 'error')
GUI.sentry.extra_context({'realTraceback': err.args[1]})
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
% (jobargv[-1], str(err), sanitizeTrace(traceback)), 'error')
if ' is corrupted.' not in str(err):
GUI.sentry.captureException()
MW.addMessage.emit('Error during conversion! Please consult '
@@ -1071,19 +1066,12 @@ class KCCGUI(KCC_ui.Ui_mainWindow):
self.addMessage('Since you are a new user of <b>KCC</b> please see few '
'<a href="https://github.com/ciromattia/kcc/wiki/Important-tips">important tips</a>.',
'info')
try:
subprocess_run(['tar'], stdout=PIPE, stderr=STDOUT)
self.tar = True
except FileNotFoundError:
self.tar = False
try:
subprocess_run(['7z'], stdout=PIPE, stderr=STDOUT)
self.sevenzip = True
except FileNotFoundError:
self.sevenzip = False
if not self.tar:
self.addMessage('<a href="https://github.com/ciromattia/kcc#7-zip">Install 7z (link)</a>'
' to enable CBZ/CBR/ZIP/etc processing.', 'warning')
self.tar = 'tar' in available_archive_tools()
self.sevenzip = '7z' in available_archive_tools()
if not any([self.tar, self.sevenzip]):
self.addMessage('<a href="https://github.com/ciromattia/kcc#7-zip">Install 7z (link)</a>'
' to enable CBZ/CBR/ZIP/etc processing.', 'warning')
self.detectKindleGen(True)
APP.messageFromOtherInstance.connect(self.handleMessage)

View File

@@ -1,4 +1,4 @@
__version__ = '7.2.1'
__version__ = '7.3.0'
__license__ = 'ISC'
__copyright__ = '2012-2022, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jastrzebski <pawelj@iosphe.re>, darodi'
__docformat__ = 'restructuredtext en'

View File

@@ -23,7 +23,7 @@ import pathlib
import re
import sys
from argparse import ArgumentParser
from time import strftime, gmtime
from time import perf_counter, strftime, gmtime
from copy import copy
from glob import glob, escape
from re import sub
@@ -33,14 +33,14 @@ from tempfile import mkdtemp, gettempdir, TemporaryFile
from shutil import move, copytree, rmtree, copyfile
from multiprocessing import Pool
from uuid import uuid4
from natsort import os_sorted
from natsort import os_sort_keygen
from slugify import slugify as slugify_ext
from PIL import Image, ImageFile
from subprocess import STDOUT, PIPE
from psutil import virtual_memory, disk_usage
from html import escape as hescape
from .shared import md5Checksum, getImageFileName, walkSort, walkLevel, sanitizeTrace, subprocess_run
from .shared import available_archive_tools, getImageFileName, walkSort, walkLevel, sanitizeTrace, subprocess_run
from . import comic2panel
from . import image
from . import comicarchive
@@ -51,7 +51,7 @@ from . import kindle
from . import __version__
ImageFile.LOAD_TRUNCATED_IMAGES = True
OS_SORT_KEY = os_sort_keygen()
def main(argv=None):
global options
@@ -78,7 +78,6 @@ def main(argv=None):
def buildHTML(path, imgfile, imgfilepath):
imgfilepath = md5Checksum(imgfilepath)
filename = getImageFileName(imgfile)
deviceres = options.profileData[1]
if not options.noprocessing and "Rotated" in options.imgMetadata[imgfilepath]:
@@ -425,7 +424,6 @@ def buildOPF(dstdir, title, filelist, cover=None):
"</container>"])
f.close()
def buildEPUB(path, chapternames, tomenumber, ischunked):
filelist = []
chapterlist = []
@@ -506,6 +504,7 @@ def buildEPUB(path, chapternames, tomenumber, ischunked):
"display: none;\n",
"}\n"])
f.close()
build_html_start = perf_counter()
for dirpath, dirnames, filenames in os.walk(os.path.join(path, 'OEBPS', 'Images')):
chapter = False
dirnames, filenames = walkSort(dirnames, filenames)
@@ -515,10 +514,12 @@ def buildEPUB(path, chapternames, tomenumber, ischunked):
'cover' + getImageFileName(afile)[1])
options.covers.append((image.Cover(os.path.join(dirpath, afile), cover, options,
tomenumber), options.uuid))
filelist.append(buildHTML(dirpath, afile, os.path.join(dirpath, afile)))
if not chapter:
chapterlist.append((dirpath.replace('Images', 'Text'), filelist[-1][1]))
chapterlist.append((dirpath.replace('Images', 'Text'), afile))
chapter = True
filelist.append(buildHTML(dirpath, afile, os.path.join(dirpath, afile)))
build_html_end = perf_counter()
print(f"buildHTML: {build_html_end - build_html_start} seconds")
# Overwrite chapternames if tree is flat and ComicInfo.xml has bookmarks
if not chapternames and options.chapters and not ischunked:
chapterlist = []
@@ -566,10 +567,13 @@ def imgDirectoryProcessing(path):
if GUI:
GUI.progressBarTick.emit(str(pagenumber))
if len(work) > 0:
img_processing_start = perf_counter()
for i in work:
workerPool.apply_async(func=imgFileProcessing, args=(i,), callback=imgFileProcessingTick)
workerPool.close()
workerPool.join()
img_processing_end = perf_counter()
print(f"imgFileProcessing: {img_processing_end - img_processing_start} seconds")
if GUI and not GUI.conversionAlive:
rmtree(os.path.join(path, '..', '..'), True)
raise UserWarning("Conversion interrupted.")
@@ -657,16 +661,14 @@ def getWorkFolder(afile):
path = cbx.extract(workdir)
sanitizePermissions(path)
tdir = os.listdir(workdir)
is_nested_single_dir = False
if len(tdir) == 2 and 'ComicInfo.xml' in tdir:
tdir.remove('ComicInfo.xml')
is_nested_single_dir = os.path.isdir(os.path.join(workdir, tdir[0]))
if is_nested_single_dir:
if os.path.isdir(os.path.join(workdir, tdir[0])):
os.replace(
os.path.join(workdir, 'ComicInfo.xml'),
os.path.join(workdir, tdir[0], 'ComicInfo.xml')
)
if len(tdir) == 1 and is_nested_single_dir:
if len(tdir) == 1 and os.path.isdir(os.path.join(workdir, tdir[0])):
path = os.path.join(workdir, tdir[0])
except OSError as e:
rmtree(workdir, True)
@@ -674,8 +676,7 @@ def getWorkFolder(afile):
else:
raise UserWarning("Failed to open source file/directory.")
newpath = mkdtemp('', 'KCC-', os.path.dirname(afile))
copytree(path, os.path.join(newpath, 'OEBPS', 'Images'))
rmtree(workdir, True)
os.renames(path, os.path.join(newpath, 'OEBPS', 'Images'))
return newpath
@@ -793,12 +794,16 @@ def getPanelViewSize(deviceres, size):
def sanitizeTree(filetree):
chapterNames = {}
for root, dirs, files in os.walk(filetree, False):
for i, name in enumerate(os_sorted(files)):
page = 1
for root, dirs, files in os.walk(filetree):
dirs.sort(key=OS_SORT_KEY)
files.sort(key=OS_SORT_KEY)
for name in files:
splitname = os.path.splitext(name)
# file needs kcc at front AND back to avoid renaming issues
slugified = f'kcc-{i:04}'
slugified = f'kcc-{page:04}'
page += 1
for suffix in '-KCC', '-KCC-A', '-KCC-B', '-KCC-C':
if splitname[0].endswith(suffix):
slugified += suffix.lower()
@@ -808,7 +813,8 @@ def sanitizeTree(filetree):
key = os.path.join(root, name)
if key != newKey:
os.replace(key, newKey)
for name in dirs:
options.imgMetadata[newKey] = options.imgMetadata.pop(key)
for i, name in enumerate(dirs):
tmpName = name
slugified = slugify(name)
while os.path.exists(os.path.join(root, slugified)) and name.upper() != slugified.upper():
@@ -818,6 +824,11 @@ def sanitizeTree(filetree):
key = os.path.join(root, name)
if key != newKey:
os.replace(key, newKey)
dirs[i] = newKey
existingImgPathKeys = list(options.imgMetadata.keys())
for imgPath in existingImgPathKeys:
if imgPath.startswith(key):
options.imgMetadata[newKey + imgPath.removeprefix(key)] = options.imgMetadata.pop(imgPath)
return chapterNames
@@ -890,8 +901,7 @@ def chunk_process(path, mode, parent):
firstTome = False
return output
def detectCorruption(tmppath, orgpath):
def detectSuboptimalProcessing(tmppath, orgpath):
imageNumber = 0
imageSmaller = 0
alreadyProcessed = False
@@ -907,9 +917,6 @@ def detectCorruption(tmppath, orgpath):
raise RuntimeError('Image file %s is corrupted.' % pathOrg)
try:
img = Image.open(path)
img.verify()
img = Image.open(path)
img.load()
imageNumber += 1
if options.profileData[1][0] > img.size[0] and options.profileData[1][1] > img.size[1]:
imageSmaller += 1
@@ -953,17 +960,27 @@ def slugify(value):
def makeZIP(zipfilename, basedir, isepub=False):
start = perf_counter()
zipfilename = os.path.abspath(zipfilename) + '.zip'
zipOutput = ZipFile(zipfilename, 'w', ZIP_DEFLATED)
if isepub:
zipOutput.writestr('mimetype', 'application/epub+zip', ZIP_STORED)
for dirpath, _, filenames in os.walk(basedir):
for name in filenames:
path = os.path.normpath(os.path.join(dirpath, name))
aPath = os.path.normpath(os.path.join(dirpath.replace(basedir, ''), name))
if os.path.isfile(path):
zipOutput.write(path, aPath)
zipOutput.close()
if '7z' in available_archive_tools():
if isepub:
mimetypeFile = open(os.path.join(basedir, 'mimetype'), 'w')
mimetypeFile.write('application/epub+zip')
mimetypeFile.close()
subprocess_run(['7z', 'a', '-tzip', zipfilename, os.path.join(basedir, "*")], capture_output=True, check=True)
else:
zipOutput = ZipFile(zipfilename, 'w', ZIP_DEFLATED)
if isepub:
zipOutput.writestr('mimetype', 'application/epub+zip', ZIP_STORED)
for dirpath, _, filenames in os.walk(basedir):
for name in filenames:
path = os.path.normpath(os.path.join(dirpath, name))
aPath = os.path.normpath(os.path.join(dirpath.replace(basedir, ''), name))
if os.path.isfile(path):
zipOutput.write(path, aPath)
zipOutput.close()
end = perf_counter()
print(f"makeZIP time: {end - start} seconds")
return zipfilename
@@ -1138,9 +1155,7 @@ def checkTools(source):
source = source.upper()
if source.endswith('.CB7') or source.endswith('.7Z') or source.endswith('.RAR') or source.endswith('.CBR') or \
source.endswith('.ZIP') or source.endswith('.CBZ'):
try:
subprocess_run(['7z'], stdout=PIPE, stderr=STDOUT)
except FileNotFoundError:
if '7z' not in available_archive_tools():
print('ERROR: 7z is missing!')
sys.exit(1)
if options.format == 'MOBI':
@@ -1170,6 +1185,7 @@ def checkPre(source):
def makeBook(source, qtgui=None):
start = perf_counter()
global GUI
GUI = qtgui
if GUI:
@@ -1181,7 +1197,7 @@ def makeBook(source, qtgui=None):
path = getWorkFolder(source)
print("Checking images...")
getComicInfo(os.path.join(path, "OEBPS", "Images"), source)
detectCorruption(os.path.join(path, "OEBPS", "Images"), source)
detectSuboptimalProcessing(os.path.join(path, "OEBPS", "Images"), source)
if options.webtoon:
y = image.ProfileData.Profiles[options.profile][1][1]
comic2panel.main(['-y ' + str(y), '-i', '-m', path], qtgui)
@@ -1272,6 +1288,8 @@ def makeBook(source, qtgui=None):
elif os.path.isdir(source):
rmtree(source)
end = perf_counter()
print(f"makeBook: {end - start} seconds")
return filepath

View File

@@ -76,6 +76,8 @@ class ComicArchive:
['unar', self.filepath, '-f', '-o', targetdir]
)
extraction_commands.reverse()
if distro.id() == 'fedora' or distro.like() == 'fedora':
extraction_commands.append(
['unrar', 'x', '-y', '-x__MACOSX', '-x.DS_Store', '-xthumbs.db', '-xThumbs.db', self.filepath, targetdir]

View File

@@ -22,7 +22,6 @@ import io
import os
import mozjpeg_lossless_optimization
from PIL import Image, ImageOps, ImageStat, ImageChops, ImageFilter
from .shared import md5Checksum
from .page_number_crop_alg import get_bbox_crop_margin_page_number, get_bbox_crop_margin
from .inter_panel_crop_alg import crop_empty_inter_panel
@@ -141,7 +140,13 @@ class ComicPageParser:
self.source = source
self.size = self.opt.profileData[1]
self.payload = []
self.image = Image.open(os.path.join(source[0], source[1])).convert('RGB')
# Detect corruption in source image, let caller catch any exceptions triggered.
srcImgPath = os.path.join(source[0], source[1])
self.image = Image.open(srcImgPath)
self.image.verify()
self.image = Image.open(srcImgPath).convert('RGB')
self.color = self.colorCheck()
self.fill = self.fillCheck()
# backwards compatibility for Pillow >9.1.0
@@ -315,7 +320,7 @@ class ComicPage:
output_jpeg_file.write(output_jpeg_bytes)
else:
self.image.save(self.targetPath, 'JPEG', optimize=1, quality=85)
return [md5Checksum(self.targetPath), flags, self.orgPath]
return [self.targetPath, flags, self.orgPath]
except IOError as err:
raise RuntimeError('Cannot save image. ' + str(err))

View File

@@ -18,6 +18,7 @@
# PERFORMANCE OF THIS SOFTWARE.
#
from functools import lru_cache
import os
from hashlib import md5
from html.parser import HTMLParser
@@ -74,16 +75,6 @@ def walkLevel(some_dir, level=1):
del dirs[:]
def md5Checksum(fpath):
with open(fpath, 'rb') as fh:
m = md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
def sanitizeTrace(traceback):
return ''.join(format_tb(traceback))\
@@ -137,6 +128,19 @@ def dependencyCheck(level):
print('ERROR: ' + ', '.join(missing) + ' is not installed!')
sys.exit(1)
@lru_cache
def available_archive_tools():
available = []
for tool in ['tar', '7z', 'unar', 'unrar']:
try:
subprocess_run([tool], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
available.append(tool)
except FileNotFoundError:
pass
return available
def subprocess_run(command, **kwargs):
if (os.name == 'nt'):
kwargs.setdefault('creationflags', subprocess.CREATE_NO_WINDOW)