1
0
mirror of https://github.com/ciromattia/kcc synced 2026-04-15 05:28:49 +00:00

Compare commits

...

8 Commits

Author SHA1 Message Date
Alex Xu
e787dd2897 bump 7.3.0 2025-03-04 11:29:44 -08:00
Alex Xu
01625904d1 huge speed optimization on HDD by removing md5 (#845)
* Eliminate unnecessary use of MD5 checksum

md5checksum() computes the actual checksum of a specified file, which is appropriately expensive, but the code seemed to be using the checksum result as a key into the imgMetadata dictionary to avoid handling image files being renamed during processing steps. This seems like a very expensive way to handle the rename so instead, I now update the imgMetadata keys with the new filename in the one place that the rename happens, and avoid MD5 checksums entirely.

* merge conflicts

* Add missing handling for image path renames due to nested chapter folder name

* merge conflicts

* merge conflicts

* add perf_counters

* imgFileProcessing perf_counter

* use startswith and removeprefix

---------

Co-authored-by: utopiafallen <utopiafallen@gmail.com>
2025-03-04 11:28:23 -08:00
Alex Xu
5f8526da44 all image files have unique ordered names (#848) 2025-03-03 07:50:21 -08:00
Alex Xu
1159e737a0 small correction to 7z (#847) 2025-03-02 19:55:45 -08:00
Alex Xu
5bbdb715e9 7z is faster note 2025-03-02 12:39:51 -08:00
Alex Xu
1a3cd6c916 add perf_counter to makeBook (#846) 2025-03-02 12:24:49 -08:00
Alex Xu
e1e6d587f4 makeZIP now prefers 7z for SPEED (#844) 2025-03-02 11:08:27 -08:00
Alex Xu
ca5c0bdd61 fix error messages that only say a single number (#843) 2025-03-01 19:40:55 -08:00
6 changed files with 79 additions and 61 deletions

View File

@@ -68,9 +68,11 @@ If you have issues detecting it, get stuck on the MOBI conversion step, or use L
### 7-Zip
This is only required for certain files and advanced features.
This is optional but will make conversions much faster.
KCC will ask you to install if needed.
This is required for certain files and advanced features.
KCC will ask you to install if needed.
Refer to the wiki to install: https://github.com/ciromattia/kcc/wiki/Installation#7-zip

View File

@@ -37,7 +37,7 @@ from packaging.version import Version
from raven import Client
from tempfile import gettempdir
from .shared import HTMLStripper, sanitizeTrace, walkLevel, subprocess_run
from .shared import HTMLStripper, available_archive_tools, sanitizeTrace, walkLevel, subprocess_run
from . import __version__
from . import comic2ebook
from . import metadata
@@ -317,13 +317,8 @@ class WorkerThread(QThread):
GUI.progress.content = ''
self.errors = True
_, _, traceback = sys.exc_info()
if len(err.args) == 1:
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
% (jobargv[-1], str(err), sanitizeTrace(traceback)), 'error')
else:
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
% (jobargv[-1], str(err.args[0]), err.args[1]), 'error')
GUI.sentry.extra_context({'realTraceback': err.args[1]})
MW.showDialog.emit("Error during conversion %s:\n\n%s\n\nTraceback:\n%s"
% (jobargv[-1], str(err), sanitizeTrace(traceback)), 'error')
if ' is corrupted.' not in str(err):
GUI.sentry.captureException()
MW.addMessage.emit('Error during conversion! Please consult '
@@ -1071,19 +1066,12 @@ class KCCGUI(KCC_ui.Ui_mainWindow):
self.addMessage('Since you are a new user of <b>KCC</b> please see few '
'<a href="https://github.com/ciromattia/kcc/wiki/Important-tips">important tips</a>.',
'info')
try:
subprocess_run(['tar'], stdout=PIPE, stderr=STDOUT)
self.tar = True
except FileNotFoundError:
self.tar = False
try:
subprocess_run(['7z'], stdout=PIPE, stderr=STDOUT)
self.sevenzip = True
except FileNotFoundError:
self.sevenzip = False
if not self.tar:
self.addMessage('<a href="https://github.com/ciromattia/kcc#7-zip">Install 7z (link)</a>'
' to enable CBZ/CBR/ZIP/etc processing.', 'warning')
self.tar = 'tar' in available_archive_tools()
self.sevenzip = '7z' in available_archive_tools()
if not any([self.tar, self.sevenzip]):
self.addMessage('<a href="https://github.com/ciromattia/kcc#7-zip">Install 7z (link)</a>'
' to enable CBZ/CBR/ZIP/etc processing.', 'warning')
self.detectKindleGen(True)
APP.messageFromOtherInstance.connect(self.handleMessage)

View File

@@ -1,4 +1,4 @@
__version__ = '7.2.3'
__version__ = '7.3.0'
__license__ = 'ISC'
__copyright__ = '2012-2022, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jastrzebski <pawelj@iosphe.re>, darodi'
__docformat__ = 'restructuredtext en'

View File

@@ -23,7 +23,7 @@ import pathlib
import re
import sys
from argparse import ArgumentParser
from time import strftime, gmtime
from time import perf_counter, strftime, gmtime
from copy import copy
from glob import glob, escape
from re import sub
@@ -33,14 +33,14 @@ from tempfile import mkdtemp, gettempdir, TemporaryFile
from shutil import move, copytree, rmtree, copyfile
from multiprocessing import Pool
from uuid import uuid4
from natsort import os_sorted
from natsort import os_sort_keygen
from slugify import slugify as slugify_ext
from PIL import Image, ImageFile
from subprocess import STDOUT, PIPE
from psutil import virtual_memory, disk_usage
from html import escape as hescape
from .shared import md5Checksum, getImageFileName, walkSort, walkLevel, sanitizeTrace, subprocess_run
from .shared import available_archive_tools, getImageFileName, walkSort, walkLevel, sanitizeTrace, subprocess_run
from . import comic2panel
from . import image
from . import comicarchive
@@ -51,7 +51,7 @@ from . import kindle
from . import __version__
ImageFile.LOAD_TRUNCATED_IMAGES = True
OS_SORT_KEY = os_sort_keygen()
def main(argv=None):
global options
@@ -78,7 +78,6 @@ def main(argv=None):
def buildHTML(path, imgfile, imgfilepath):
imgfilepath = md5Checksum(imgfilepath)
filename = getImageFileName(imgfile)
deviceres = options.profileData[1]
if not options.noprocessing and "Rotated" in options.imgMetadata[imgfilepath]:
@@ -425,7 +424,6 @@ def buildOPF(dstdir, title, filelist, cover=None):
"</container>"])
f.close()
def buildEPUB(path, chapternames, tomenumber, ischunked):
filelist = []
chapterlist = []
@@ -506,6 +504,7 @@ def buildEPUB(path, chapternames, tomenumber, ischunked):
"display: none;\n",
"}\n"])
f.close()
build_html_start = perf_counter()
for dirpath, dirnames, filenames in os.walk(os.path.join(path, 'OEBPS', 'Images')):
chapter = False
dirnames, filenames = walkSort(dirnames, filenames)
@@ -515,10 +514,12 @@ def buildEPUB(path, chapternames, tomenumber, ischunked):
'cover' + getImageFileName(afile)[1])
options.covers.append((image.Cover(os.path.join(dirpath, afile), cover, options,
tomenumber), options.uuid))
filelist.append(buildHTML(dirpath, afile, os.path.join(dirpath, afile)))
if not chapter:
chapterlist.append((dirpath.replace('Images', 'Text'), filelist[-1][1]))
chapterlist.append((dirpath.replace('Images', 'Text'), afile))
chapter = True
filelist.append(buildHTML(dirpath, afile, os.path.join(dirpath, afile)))
build_html_end = perf_counter()
print(f"buildHTML: {build_html_end - build_html_start} seconds")
# Overwrite chapternames if tree is flat and ComicInfo.xml has bookmarks
if not chapternames and options.chapters and not ischunked:
chapterlist = []
@@ -566,10 +567,13 @@ def imgDirectoryProcessing(path):
if GUI:
GUI.progressBarTick.emit(str(pagenumber))
if len(work) > 0:
img_processing_start = perf_counter()
for i in work:
workerPool.apply_async(func=imgFileProcessing, args=(i,), callback=imgFileProcessingTick)
workerPool.close()
workerPool.join()
img_processing_end = perf_counter()
print(f"imgFileProcessing: {img_processing_end - img_processing_start} seconds")
if GUI and not GUI.conversionAlive:
rmtree(os.path.join(path, '..', '..'), True)
raise UserWarning("Conversion interrupted.")
@@ -790,12 +794,16 @@ def getPanelViewSize(deviceres, size):
def sanitizeTree(filetree):
chapterNames = {}
for root, dirs, files in os.walk(filetree, False):
for i, name in enumerate(os_sorted(files)):
page = 1
for root, dirs, files in os.walk(filetree):
dirs.sort(key=OS_SORT_KEY)
files.sort(key=OS_SORT_KEY)
for name in files:
splitname = os.path.splitext(name)
# file needs kcc at front AND back to avoid renaming issues
slugified = f'kcc-{i:04}'
slugified = f'kcc-{page:04}'
page += 1
for suffix in '-KCC', '-KCC-A', '-KCC-B', '-KCC-C':
if splitname[0].endswith(suffix):
slugified += suffix.lower()
@@ -805,7 +813,8 @@ def sanitizeTree(filetree):
key = os.path.join(root, name)
if key != newKey:
os.replace(key, newKey)
for name in dirs:
options.imgMetadata[newKey] = options.imgMetadata.pop(key)
for i, name in enumerate(dirs):
tmpName = name
slugified = slugify(name)
while os.path.exists(os.path.join(root, slugified)) and name.upper() != slugified.upper():
@@ -815,6 +824,11 @@ def sanitizeTree(filetree):
key = os.path.join(root, name)
if key != newKey:
os.replace(key, newKey)
dirs[i] = newKey
existingImgPathKeys = list(options.imgMetadata.keys())
for imgPath in existingImgPathKeys:
if imgPath.startswith(key):
options.imgMetadata[newKey + imgPath.removeprefix(key)] = options.imgMetadata.pop(imgPath)
return chapterNames
@@ -946,17 +960,27 @@ def slugify(value):
def makeZIP(zipfilename, basedir, isepub=False):
start = perf_counter()
zipfilename = os.path.abspath(zipfilename) + '.zip'
zipOutput = ZipFile(zipfilename, 'w', ZIP_DEFLATED)
if isepub:
zipOutput.writestr('mimetype', 'application/epub+zip', ZIP_STORED)
for dirpath, _, filenames in os.walk(basedir):
for name in filenames:
path = os.path.normpath(os.path.join(dirpath, name))
aPath = os.path.normpath(os.path.join(dirpath.replace(basedir, ''), name))
if os.path.isfile(path):
zipOutput.write(path, aPath)
zipOutput.close()
if '7z' in available_archive_tools():
if isepub:
mimetypeFile = open(os.path.join(basedir, 'mimetype'), 'w')
mimetypeFile.write('application/epub+zip')
mimetypeFile.close()
subprocess_run(['7z', 'a', '-tzip', zipfilename, os.path.join(basedir, "*")], capture_output=True, check=True)
else:
zipOutput = ZipFile(zipfilename, 'w', ZIP_DEFLATED)
if isepub:
zipOutput.writestr('mimetype', 'application/epub+zip', ZIP_STORED)
for dirpath, _, filenames in os.walk(basedir):
for name in filenames:
path = os.path.normpath(os.path.join(dirpath, name))
aPath = os.path.normpath(os.path.join(dirpath.replace(basedir, ''), name))
if os.path.isfile(path):
zipOutput.write(path, aPath)
zipOutput.close()
end = perf_counter()
print(f"makeZIP time: {end - start} seconds")
return zipfilename
@@ -1131,9 +1155,7 @@ def checkTools(source):
source = source.upper()
if source.endswith('.CB7') or source.endswith('.7Z') or source.endswith('.RAR') or source.endswith('.CBR') or \
source.endswith('.ZIP') or source.endswith('.CBZ'):
try:
subprocess_run(['7z'], stdout=PIPE, stderr=STDOUT)
except FileNotFoundError:
if '7z' not in available_archive_tools():
print('ERROR: 7z is missing!')
sys.exit(1)
if options.format == 'MOBI':
@@ -1163,6 +1185,7 @@ def checkPre(source):
def makeBook(source, qtgui=None):
start = perf_counter()
global GUI
GUI = qtgui
if GUI:
@@ -1265,6 +1288,8 @@ def makeBook(source, qtgui=None):
elif os.path.isdir(source):
rmtree(source)
end = perf_counter()
print(f"makeBook: {end - start} seconds")
return filepath

View File

@@ -22,7 +22,6 @@ import io
import os
import mozjpeg_lossless_optimization
from PIL import Image, ImageOps, ImageStat, ImageChops, ImageFilter
from .shared import md5Checksum
from .page_number_crop_alg import get_bbox_crop_margin_page_number, get_bbox_crop_margin
from .inter_panel_crop_alg import crop_empty_inter_panel
@@ -321,7 +320,7 @@ class ComicPage:
output_jpeg_file.write(output_jpeg_bytes)
else:
self.image.save(self.targetPath, 'JPEG', optimize=1, quality=85)
return [md5Checksum(self.targetPath), flags, self.orgPath]
return [self.targetPath, flags, self.orgPath]
except IOError as err:
raise RuntimeError('Cannot save image. ' + str(err))

View File

@@ -18,6 +18,7 @@
# PERFORMANCE OF THIS SOFTWARE.
#
from functools import lru_cache
import os
from hashlib import md5
from html.parser import HTMLParser
@@ -74,16 +75,6 @@ def walkLevel(some_dir, level=1):
del dirs[:]
def md5Checksum(fpath):
with open(fpath, 'rb') as fh:
m = md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
def sanitizeTrace(traceback):
return ''.join(format_tb(traceback))\
@@ -137,6 +128,19 @@ def dependencyCheck(level):
print('ERROR: ' + ', '.join(missing) + ' is not installed!')
sys.exit(1)
@lru_cache
def available_archive_tools():
available = []
for tool in ['tar', '7z', 'unar', 'unrar']:
try:
subprocess_run([tool], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
available.append(tool)
except FileNotFoundError:
pass
return available
def subprocess_run(command, **kwargs):
if (os.name == 'nt'):
kwargs.setdefault('creationflags', subprocess.CREATE_NO_WINDOW)