From f33d35502411169f002d2bcdf47c23a8e2d7df9f Mon Sep 17 00:00:00 2001 From: Ciro Mattia Gonano Date: Thu, 11 Apr 2013 10:34:33 +0200 Subject: [PATCH 1/5] Filenames slugifications (#28, #31, #9, #8) --- README.md | 4 +++- kcc/comic2ebook.py | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b54a166..b223b74 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,9 @@ The app relies and includes the following scripts/binaries: Added generic CSS file Optimized archive extraction for zip/rar files (#40) - 2.9: Added support for generating a plain CBZ (skipping all the EPUB/Mobi generation) (#45) - Prevent output file overwriting the source one: if a duplicate name is detected, append _kcc to the name + Prevent output file overwriting the source one: if a duplicate name is detected, append _kcc to the name + Rarfile library updated to 2.6 + Filenames slugifications (#28, #31, #9, #8) ## COPYRIGHT diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index 51c1b4b..58d614d 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -128,7 +128,7 @@ def buildNCX(dstdir, title, chapters): f = open(ncxfile, "w") f.writelines(["\n", "\n", + "\"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n", "\n", "\n", "\n", @@ -356,6 +356,7 @@ def genEpubStruct(path): chapterlist = [] cover = None _, deviceres, _, _, panelviewsize = image.ProfileData.Profiles[options.profile] + slugifyFileTree(path) os.mkdir(os.path.join(path, 'OEBPS', 'Text')) f = open(os.path.join(path, 'OEBPS', 'Text', 'style.css'), 'w') #DON'T COMPRESS CSS. KINDLE WILL FAIL TO PARSE IT. @@ -535,6 +536,29 @@ def getWorkFolder(afile): return path +def slugify(value): + """ + Normalizes string, converts to lowercase, removes non-alpha characters, + and converts spaces to hyphens. + """ + import unicodedata + value = unicodedata.normalize('NFKD', unicode(value)).encode('ascii', 'ignore') + value = re.sub('[^\w\s-]', '', value).strip() + value = re.sub('[-\s]+', '-', value) + return value + + +def slugifyFileTree(filetree): + for root, dirs, files in os.walk(filetree): + for name in files: + splitname = os.path.splitext(name) + os.rename(os.path.join(root, name), + os.path.join(root, slugify(splitname[0]) + splitname[1])) + for name in dirs: + slugifyFileTree(os.path.join(root, name)) + os.rename(os.path.join(root, name), os.path.join(root, slugify(name))) + + def Copyright(): print ('comic2ebook v%(__version__)s. ' 'Written 2012 by Ciro Mattia Gonano.' % globals()) From be270aa7971baa775970380a2656aa44924dee47 Mon Sep 17 00:00:00 2001 From: Ciro Mattia Gonano Date: Thu, 11 Apr 2013 11:49:29 +0200 Subject: [PATCH 2/5] Add number padding and lowering for file names (not directory) --- kcc/comic2ebook.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index 58d614d..bebbcb3 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -536,15 +536,20 @@ def getWorkFolder(afile): return path -def slugify(value): +def slugify(value, lower=True, digitpadding=True): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. """ import unicodedata - value = unicodedata.normalize('NFKD', unicode(value)).encode('ascii', 'ignore') + value = unicodedata.normalize('NFKD', unicode(value, 'UTF-8')).encode('ascii', 'ignore') value = re.sub('[^\w\s-]', '', value).strip() value = re.sub('[-\s]+', '-', value) + if lower: + value = value.lower() + if digitpadding: + value = re.sub(r'([0-9]+)', r'00000\1', value) + value = re.sub(r'0*([0-9]{6,})', r'\1', value) return value @@ -556,7 +561,7 @@ def slugifyFileTree(filetree): os.path.join(root, slugify(splitname[0]) + splitname[1])) for name in dirs: slugifyFileTree(os.path.join(root, name)) - os.rename(os.path.join(root, name), os.path.join(root, slugify(name))) + os.rename(os.path.join(root, name), os.path.join(root, slugify(name, False))) def Copyright(): From f0afa1fff2f780d987441c96bdaad38dc3e63920 Mon Sep 17 00:00:00 2001 From: Ciro Mattia Gonano Date: Thu, 11 Apr 2013 12:18:02 +0200 Subject: [PATCH 3/5] Convert dot char to hyphen. Removes UNIX-hidden files and dirs from the final archive (prevents .DS_Store and stuff) --- kcc/comic2ebook.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index b3d7f4a..0e92f5d 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -360,7 +360,7 @@ def genEpubStruct(path): chapterlist = [] cover = None _, deviceres, _, _, panelviewsize = image.ProfileData.Profiles[options.profile] - slugifyFileTree(path) + sanitizeTree(os.path.join(path, 'OEBPS', 'Images')) os.mkdir(os.path.join(path, 'OEBPS', 'Text')) f = open(os.path.join(path, 'OEBPS', 'Text', 'style.css'), 'w') #DON'T COMPRESS CSS. KINDLE WILL FAIL TO PARSE IT. @@ -540,32 +540,35 @@ def getWorkFolder(afile): return path -def slugify(value, lower=True, digitpadding=True): +def slugify(value): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. """ import unicodedata value = unicodedata.normalize('NFKD', unicode(value, 'UTF-8')).encode('ascii', 'ignore') - value = re.sub('[^\w\s-]', '', value).strip() - value = re.sub('[-\s]+', '-', value) - if lower: - value = value.lower() - if digitpadding: - value = re.sub(r'([0-9]+)', r'00000\1', value) - value = re.sub(r'0*([0-9]{6,})', r'\1', value) + value = re.sub('[^\w\s\.-]', '', value).strip().lower() + value = re.sub('[-\.\s]+', '-', value) + value = re.sub(r'([0-9]+)', r'00000\1', value) + value = re.sub(r'0*([0-9]{6,})', r'\1', value) return value -def slugifyFileTree(filetree): +def sanitizeTree(filetree): for root, dirs, files in os.walk(filetree): for name in files: - splitname = os.path.splitext(name) - os.rename(os.path.join(root, name), - os.path.join(root, slugify(splitname[0]) + splitname[1])) + if name.startswith('.'): + os.remove(os.path.join(root, name)) + else: + splitname = os.path.splitext(name) + os.rename(os.path.join(root, name), + os.path.join(root, slugify(splitname[0]) + splitname[1])) for name in dirs: - slugifyFileTree(os.path.join(root, name)) - os.rename(os.path.join(root, name), os.path.join(root, slugify(name, False))) + if name.startswith('.'): + os.remove(os.path.join(root, name)) + else: + sanitizeTree(os.path.join(root, name)) + os.rename(os.path.join(root, name), os.path.join(root, slugify(name))) def Copyright(): From b972e4c74600276852324d95a67b333f72c4641b Mon Sep 17 00:00:00 2001 From: Ciro Mattia Gonano Date: Thu, 11 Apr 2013 12:33:14 +0200 Subject: [PATCH 4/5] Remove Windows silly 'thumbs.db' too --- kcc/comic2ebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index 0e92f5d..2c47d81 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -557,7 +557,7 @@ def slugify(value): def sanitizeTree(filetree): for root, dirs, files in os.walk(filetree): for name in files: - if name.startswith('.'): + if name.startswith('.') or name.lower() == 'thumbs.db': os.remove(os.path.join(root, name)) else: splitname = os.path.splitext(name) From 724156c554af01d96a2dde0589cae86d67ddafb6 Mon Sep 17 00:00:00 2001 From: Ciro Mattia Gonano Date: Fri, 12 Apr 2013 01:36:51 +0200 Subject: [PATCH 5/5] Small fixes --- kcc/comic2ebook.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index 2c47d81..2338196 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # # Copyright (c) 2012 Ciro Mattia Gonano # @@ -546,7 +547,7 @@ def slugify(value): and converts spaces to hyphens. """ import unicodedata - value = unicodedata.normalize('NFKD', unicode(value, 'UTF-8')).encode('ascii', 'ignore') + value = unicodedata.normalize('NFKD', unicode(value, 'latin1')).encode('ascii', 'ignore') value = re.sub('[^\w\s\.-]', '', value).strip().lower() value = re.sub('[-\.\s]+', '-', value) value = re.sub(r'([0-9]+)', r'00000\1', value) @@ -567,7 +568,6 @@ def sanitizeTree(filetree): if name.startswith('.'): os.remove(os.path.join(root, name)) else: - sanitizeTree(os.path.join(root, name)) os.rename(os.path.join(root, name), os.path.join(root, slugify(name))) @@ -578,7 +578,6 @@ def Copyright(): def Usage(): print "Generates HTML, NCX and OPF for a Comic ebook from a bunch of images." - print "Optimized for creating MOBI files to be read on Kindle Paperwhite." parser.print_help()