1
0
mirror of https://github.com/ciromattia/kcc synced 2026-04-18 06:58:58 +00:00

Compare commits

...

8 Commits

Author SHA1 Message Date
Alex Xu
e787dd2897 bump 7.3.0 2025-03-04 11:29:44 -08:00
Alex Xu
01625904d1 huge speed optimization on HDD by removing md5 (#845)
* Eliminate unnecessary use of MD5 checksum

md5checksum() computes the actual checksum of a specified file, which is appropriately expensive, but the code seemed to be using the checksum result as a key into the imgMetadata dictionary to avoid handling image files being renamed during processing steps. This seems like a very expensive way to handle the rename so instead, I now update the imgMetadata keys with the new filename in the one place that the rename happens, and avoid MD5 checksums entirely.

* merge conflicts

* Add missing handling for image path renames due to nested chapter folder name

* merge conflicts

* merge conflicts

* add perf_counters

* imgFileProcessing perf_counter

* use startswith and removeprefix

---------

Co-authored-by: utopiafallen <utopiafallen@gmail.com>
2025-03-04 11:28:23 -08:00
Alex Xu
5f8526da44 all image files have unique ordered names (#848) 2025-03-03 07:50:21 -08:00
Alex Xu
1159e737a0 small correction to 7z (#847) 2025-03-02 19:55:45 -08:00
Alex Xu
5bbdb715e9 7z is faster note 2025-03-02 12:39:51 -08:00
Alex Xu
1a3cd6c916 add perf_counter to makeBook (#846) 2025-03-02 12:24:49 -08:00
Alex Xu
e1e6d587f4 makeZIP now prefers 7z for SPEED (#844) 2025-03-02 11:08:27 -08:00
Alex Xu
ca5c0bdd61 fix error messages that only say a single number (#843) 2025-03-01 19:40:55 -08:00
6 changed files with 79 additions and 61 deletions

View File

@@ -68,7 +68,9 @@ If you have issues detecting it, get stuck on the MOBI conversion step, or use L
### 7-Zip ### 7-Zip
This is only required for certain files and advanced features. This is optional but will make conversions much faster.
This is required for certain files and advanced features.
KCC will ask you to install if needed. KCC will ask you to install if needed.

View File

@@ -37,7 +37,7 @@ from packaging.version import Version
from raven import Client from raven import Client
from tempfile import gettempdir from tempfile import gettempdir
from .shared import HTMLStripper, sanitizeTrace, walkLevel, subprocess_run from .shared import HTMLStripper, available_archive_tools, sanitizeTrace, walkLevel, subprocess_run
from . import __version__ from . import __version__
from . import comic2ebook from . import comic2ebook
from . import metadata from . import metadata
@@ -317,13 +317,8 @@ class WorkerThread(QThread):
GUI.progress.content = '' GUI.progress.content = ''
self.errors = True self.errors = True
_, _, traceback = sys.exc_info() _, _, traceback = sys.exc_info()
if len(err.args) == 1: MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s" % (jobargv[-1], str(err), sanitizeTrace(traceback)), 'error')
% (jobargv[-1], str(err), sanitizeTrace(traceback)), 'error')
else:
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
% (jobargv[-1], str(err.args[0]), err.args[1]), 'error')
GUI.sentry.extra_context({'realTraceback': err.args[1]})
if ' is corrupted.' not in str(err): if ' is corrupted.' not in str(err):
GUI.sentry.captureException() GUI.sentry.captureException()
MW.addMessage.emit('Error during conversion! Please consult ' MW.addMessage.emit('Error during conversion! Please consult '
@@ -1071,19 +1066,12 @@ class KCCGUI(KCC_ui.Ui_mainWindow):
self.addMessage('Since you are a new user of <b>KCC</b> please see few ' self.addMessage('Since you are a new user of <b>KCC</b> please see few '
'<a href="https://github.com/ciromattia/kcc/wiki/Important-tips">important tips</a>.', '<a href="https://github.com/ciromattia/kcc/wiki/Important-tips">important tips</a>.',
'info') 'info')
try:
subprocess_run(['tar'], stdout=PIPE, stderr=STDOUT) self.tar = 'tar' in available_archive_tools()
self.tar = True self.sevenzip = '7z' in available_archive_tools()
except FileNotFoundError: if not any([self.tar, self.sevenzip]):
self.tar = False self.addMessage('<a href="https://github.com/ciromattia/kcc#7-zip">Install 7z (link)</a>'
try: ' to enable CBZ/CBR/ZIP/etc processing.', 'warning')
subprocess_run(['7z'], stdout=PIPE, stderr=STDOUT)
self.sevenzip = True
except FileNotFoundError:
self.sevenzip = False
if not self.tar:
self.addMessage('<a href="https://github.com/ciromattia/kcc#7-zip">Install 7z (link)</a>'
' to enable CBZ/CBR/ZIP/etc processing.', 'warning')
self.detectKindleGen(True) self.detectKindleGen(True)
APP.messageFromOtherInstance.connect(self.handleMessage) APP.messageFromOtherInstance.connect(self.handleMessage)

View File

@@ -1,4 +1,4 @@
__version__ = '7.2.3' __version__ = '7.3.0'
__license__ = 'ISC' __license__ = 'ISC'
__copyright__ = '2012-2022, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jastrzebski <pawelj@iosphe.re>, darodi' __copyright__ = '2012-2022, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jastrzebski <pawelj@iosphe.re>, darodi'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'

View File

@@ -23,7 +23,7 @@ import pathlib
import re import re
import sys import sys
from argparse import ArgumentParser from argparse import ArgumentParser
from time import strftime, gmtime from time import perf_counter, strftime, gmtime
from copy import copy from copy import copy
from glob import glob, escape from glob import glob, escape
from re import sub from re import sub
@@ -33,14 +33,14 @@ from tempfile import mkdtemp, gettempdir, TemporaryFile
from shutil import move, copytree, rmtree, copyfile from shutil import move, copytree, rmtree, copyfile
from multiprocessing import Pool from multiprocessing import Pool
from uuid import uuid4 from uuid import uuid4
from natsort import os_sorted from natsort import os_sort_keygen
from slugify import slugify as slugify_ext from slugify import slugify as slugify_ext
from PIL import Image, ImageFile from PIL import Image, ImageFile
from subprocess import STDOUT, PIPE from subprocess import STDOUT, PIPE
from psutil import virtual_memory, disk_usage from psutil import virtual_memory, disk_usage
from html import escape as hescape from html import escape as hescape
from .shared import md5Checksum, getImageFileName, walkSort, walkLevel, sanitizeTrace, subprocess_run from .shared import available_archive_tools, getImageFileName, walkSort, walkLevel, sanitizeTrace, subprocess_run
from . import comic2panel from . import comic2panel
from . import image from . import image
from . import comicarchive from . import comicarchive
@@ -51,7 +51,7 @@ from . import kindle
from . import __version__ from . import __version__
ImageFile.LOAD_TRUNCATED_IMAGES = True ImageFile.LOAD_TRUNCATED_IMAGES = True
OS_SORT_KEY = os_sort_keygen()
def main(argv=None): def main(argv=None):
global options global options
@@ -78,7 +78,6 @@ def main(argv=None):
def buildHTML(path, imgfile, imgfilepath): def buildHTML(path, imgfile, imgfilepath):
imgfilepath = md5Checksum(imgfilepath)
filename = getImageFileName(imgfile) filename = getImageFileName(imgfile)
deviceres = options.profileData[1] deviceres = options.profileData[1]
if not options.noprocessing and "Rotated" in options.imgMetadata[imgfilepath]: if not options.noprocessing and "Rotated" in options.imgMetadata[imgfilepath]:
@@ -425,7 +424,6 @@ def buildOPF(dstdir, title, filelist, cover=None):
"</container>"]) "</container>"])
f.close() f.close()
def buildEPUB(path, chapternames, tomenumber, ischunked): def buildEPUB(path, chapternames, tomenumber, ischunked):
filelist = [] filelist = []
chapterlist = [] chapterlist = []
@@ -506,6 +504,7 @@ def buildEPUB(path, chapternames, tomenumber, ischunked):
"display: none;\n", "display: none;\n",
"}\n"]) "}\n"])
f.close() f.close()
build_html_start = perf_counter()
for dirpath, dirnames, filenames in os.walk(os.path.join(path, 'OEBPS', 'Images')): for dirpath, dirnames, filenames in os.walk(os.path.join(path, 'OEBPS', 'Images')):
chapter = False chapter = False
dirnames, filenames = walkSort(dirnames, filenames) dirnames, filenames = walkSort(dirnames, filenames)
@@ -515,10 +514,12 @@ def buildEPUB(path, chapternames, tomenumber, ischunked):
'cover' + getImageFileName(afile)[1]) 'cover' + getImageFileName(afile)[1])
options.covers.append((image.Cover(os.path.join(dirpath, afile), cover, options, options.covers.append((image.Cover(os.path.join(dirpath, afile), cover, options,
tomenumber), options.uuid)) tomenumber), options.uuid))
filelist.append(buildHTML(dirpath, afile, os.path.join(dirpath, afile)))
if not chapter: if not chapter:
chapterlist.append((dirpath.replace('Images', 'Text'), filelist[-1][1])) chapterlist.append((dirpath.replace('Images', 'Text'), afile))
chapter = True chapter = True
filelist.append(buildHTML(dirpath, afile, os.path.join(dirpath, afile)))
build_html_end = perf_counter()
print(f"buildHTML: {build_html_end - build_html_start} seconds")
# Overwrite chapternames if tree is flat and ComicInfo.xml has bookmarks # Overwrite chapternames if tree is flat and ComicInfo.xml has bookmarks
if not chapternames and options.chapters and not ischunked: if not chapternames and options.chapters and not ischunked:
chapterlist = [] chapterlist = []
@@ -566,10 +567,13 @@ def imgDirectoryProcessing(path):
if GUI: if GUI:
GUI.progressBarTick.emit(str(pagenumber)) GUI.progressBarTick.emit(str(pagenumber))
if len(work) > 0: if len(work) > 0:
img_processing_start = perf_counter()
for i in work: for i in work:
workerPool.apply_async(func=imgFileProcessing, args=(i,), callback=imgFileProcessingTick) workerPool.apply_async(func=imgFileProcessing, args=(i,), callback=imgFileProcessingTick)
workerPool.close() workerPool.close()
workerPool.join() workerPool.join()
img_processing_end = perf_counter()
print(f"imgFileProcessing: {img_processing_end - img_processing_start} seconds")
if GUI and not GUI.conversionAlive: if GUI and not GUI.conversionAlive:
rmtree(os.path.join(path, '..', '..'), True) rmtree(os.path.join(path, '..', '..'), True)
raise UserWarning("Conversion interrupted.") raise UserWarning("Conversion interrupted.")
@@ -790,12 +794,16 @@ def getPanelViewSize(deviceres, size):
def sanitizeTree(filetree): def sanitizeTree(filetree):
chapterNames = {} chapterNames = {}
for root, dirs, files in os.walk(filetree, False): page = 1
for i, name in enumerate(os_sorted(files)): for root, dirs, files in os.walk(filetree):
dirs.sort(key=OS_SORT_KEY)
files.sort(key=OS_SORT_KEY)
for name in files:
splitname = os.path.splitext(name) splitname = os.path.splitext(name)
# file needs kcc at front AND back to avoid renaming issues # file needs kcc at front AND back to avoid renaming issues
slugified = f'kcc-{i:04}' slugified = f'kcc-{page:04}'
page += 1
for suffix in '-KCC', '-KCC-A', '-KCC-B', '-KCC-C': for suffix in '-KCC', '-KCC-A', '-KCC-B', '-KCC-C':
if splitname[0].endswith(suffix): if splitname[0].endswith(suffix):
slugified += suffix.lower() slugified += suffix.lower()
@@ -805,7 +813,8 @@ def sanitizeTree(filetree):
key = os.path.join(root, name) key = os.path.join(root, name)
if key != newKey: if key != newKey:
os.replace(key, newKey) os.replace(key, newKey)
for name in dirs: options.imgMetadata[newKey] = options.imgMetadata.pop(key)
for i, name in enumerate(dirs):
tmpName = name tmpName = name
slugified = slugify(name) slugified = slugify(name)
while os.path.exists(os.path.join(root, slugified)) and name.upper() != slugified.upper(): while os.path.exists(os.path.join(root, slugified)) and name.upper() != slugified.upper():
@@ -815,6 +824,11 @@ def sanitizeTree(filetree):
key = os.path.join(root, name) key = os.path.join(root, name)
if key != newKey: if key != newKey:
os.replace(key, newKey) os.replace(key, newKey)
dirs[i] = newKey
existingImgPathKeys = list(options.imgMetadata.keys())
for imgPath in existingImgPathKeys:
if imgPath.startswith(key):
options.imgMetadata[newKey + imgPath.removeprefix(key)] = options.imgMetadata.pop(imgPath)
return chapterNames return chapterNames
@@ -946,17 +960,27 @@ def slugify(value):
def makeZIP(zipfilename, basedir, isepub=False): def makeZIP(zipfilename, basedir, isepub=False):
start = perf_counter()
zipfilename = os.path.abspath(zipfilename) + '.zip' zipfilename = os.path.abspath(zipfilename) + '.zip'
zipOutput = ZipFile(zipfilename, 'w', ZIP_DEFLATED) if '7z' in available_archive_tools():
if isepub: if isepub:
zipOutput.writestr('mimetype', 'application/epub+zip', ZIP_STORED) mimetypeFile = open(os.path.join(basedir, 'mimetype'), 'w')
for dirpath, _, filenames in os.walk(basedir): mimetypeFile.write('application/epub+zip')
for name in filenames: mimetypeFile.close()
path = os.path.normpath(os.path.join(dirpath, name)) subprocess_run(['7z', 'a', '-tzip', zipfilename, os.path.join(basedir, "*")], capture_output=True, check=True)
aPath = os.path.normpath(os.path.join(dirpath.replace(basedir, ''), name)) else:
if os.path.isfile(path): zipOutput = ZipFile(zipfilename, 'w', ZIP_DEFLATED)
zipOutput.write(path, aPath) if isepub:
zipOutput.close() zipOutput.writestr('mimetype', 'application/epub+zip', ZIP_STORED)
for dirpath, _, filenames in os.walk(basedir):
for name in filenames:
path = os.path.normpath(os.path.join(dirpath, name))
aPath = os.path.normpath(os.path.join(dirpath.replace(basedir, ''), name))
if os.path.isfile(path):
zipOutput.write(path, aPath)
zipOutput.close()
end = perf_counter()
print(f"makeZIP time: {end - start} seconds")
return zipfilename return zipfilename
@@ -1131,9 +1155,7 @@ def checkTools(source):
source = source.upper() source = source.upper()
if source.endswith('.CB7') or source.endswith('.7Z') or source.endswith('.RAR') or source.endswith('.CBR') or \ if source.endswith('.CB7') or source.endswith('.7Z') or source.endswith('.RAR') or source.endswith('.CBR') or \
source.endswith('.ZIP') or source.endswith('.CBZ'): source.endswith('.ZIP') or source.endswith('.CBZ'):
try: if '7z' not in available_archive_tools():
subprocess_run(['7z'], stdout=PIPE, stderr=STDOUT)
except FileNotFoundError:
print('ERROR: 7z is missing!') print('ERROR: 7z is missing!')
sys.exit(1) sys.exit(1)
if options.format == 'MOBI': if options.format == 'MOBI':
@@ -1163,6 +1185,7 @@ def checkPre(source):
def makeBook(source, qtgui=None): def makeBook(source, qtgui=None):
start = perf_counter()
global GUI global GUI
GUI = qtgui GUI = qtgui
if GUI: if GUI:
@@ -1265,6 +1288,8 @@ def makeBook(source, qtgui=None):
elif os.path.isdir(source): elif os.path.isdir(source):
rmtree(source) rmtree(source)
end = perf_counter()
print(f"makeBook: {end - start} seconds")
return filepath return filepath

View File

@@ -22,7 +22,6 @@ import io
import os import os
import mozjpeg_lossless_optimization import mozjpeg_lossless_optimization
from PIL import Image, ImageOps, ImageStat, ImageChops, ImageFilter from PIL import Image, ImageOps, ImageStat, ImageChops, ImageFilter
from .shared import md5Checksum
from .page_number_crop_alg import get_bbox_crop_margin_page_number, get_bbox_crop_margin from .page_number_crop_alg import get_bbox_crop_margin_page_number, get_bbox_crop_margin
from .inter_panel_crop_alg import crop_empty_inter_panel from .inter_panel_crop_alg import crop_empty_inter_panel
@@ -321,7 +320,7 @@ class ComicPage:
output_jpeg_file.write(output_jpeg_bytes) output_jpeg_file.write(output_jpeg_bytes)
else: else:
self.image.save(self.targetPath, 'JPEG', optimize=1, quality=85) self.image.save(self.targetPath, 'JPEG', optimize=1, quality=85)
return [md5Checksum(self.targetPath), flags, self.orgPath] return [self.targetPath, flags, self.orgPath]
except IOError as err: except IOError as err:
raise RuntimeError('Cannot save image. ' + str(err)) raise RuntimeError('Cannot save image. ' + str(err))

View File

@@ -18,6 +18,7 @@
# PERFORMANCE OF THIS SOFTWARE. # PERFORMANCE OF THIS SOFTWARE.
# #
from functools import lru_cache
import os import os
from hashlib import md5 from hashlib import md5
from html.parser import HTMLParser from html.parser import HTMLParser
@@ -74,16 +75,6 @@ def walkLevel(some_dir, level=1):
del dirs[:] del dirs[:]
def md5Checksum(fpath):
with open(fpath, 'rb') as fh:
m = md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
def sanitizeTrace(traceback): def sanitizeTrace(traceback):
return ''.join(format_tb(traceback))\ return ''.join(format_tb(traceback))\
@@ -137,6 +128,19 @@ def dependencyCheck(level):
print('ERROR: ' + ', '.join(missing) + ' is not installed!') print('ERROR: ' + ', '.join(missing) + ' is not installed!')
sys.exit(1) sys.exit(1)
@lru_cache
def available_archive_tools():
available = []
for tool in ['tar', '7z', 'unar', 'unrar']:
try:
subprocess_run([tool], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
available.append(tool)
except FileNotFoundError:
pass
return available
def subprocess_run(command, **kwargs): def subprocess_run(command, **kwargs):
if (os.name == 'nt'): if (os.name == 'nt'):
kwargs.setdefault('creationflags', subprocess.CREATE_NO_WINDOW) kwargs.setdefault('creationflags', subprocess.CREATE_NO_WINDOW)