diff --git a/README.md b/README.md index 196f1b8..f8cec3a 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,8 @@ The app relies and includes the following scripts/binaries: Prevent output file overwriting the source one: if a duplicate name is detected, append _kcc to the name Rarfile library updated to 2.6 Added GIF, TIFF and BMP to supported formats (#42) - + Filenames slugifications (#28, #31, #9, #8) + ## COPYRIGHT diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index b5e74b2..2338196 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # # Copyright (c) 2012 Ciro Mattia Gonano # @@ -128,7 +129,7 @@ def buildNCX(dstdir, title, chapters): f = open(ncxfile, "w") f.writelines(["\n", "\n", + "\"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n", "\n", "\n", "\n", @@ -360,6 +361,7 @@ def genEpubStruct(path): chapterlist = [] cover = None _, deviceres, _, _, panelviewsize = image.ProfileData.Profiles[options.profile] + sanitizeTree(os.path.join(path, 'OEBPS', 'Images')) os.mkdir(os.path.join(path, 'OEBPS', 'Text')) f = open(os.path.join(path, 'OEBPS', 'Text', 'style.css'), 'w') #DON'T COMPRESS CSS. KINDLE WILL FAIL TO PARSE IT. @@ -539,6 +541,36 @@ def getWorkFolder(afile): return path +def slugify(value): + """ + Normalizes string, converts to lowercase, removes non-alpha characters, + and converts spaces to hyphens. + """ + import unicodedata + value = unicodedata.normalize('NFKD', unicode(value, 'latin1')).encode('ascii', 'ignore') + value = re.sub('[^\w\s\.-]', '', value).strip().lower() + value = re.sub('[-\.\s]+', '-', value) + value = re.sub(r'([0-9]+)', r'00000\1', value) + value = re.sub(r'0*([0-9]{6,})', r'\1', value) + return value + + +def sanitizeTree(filetree): + for root, dirs, files in os.walk(filetree): + for name in files: + if name.startswith('.') or name.lower() == 'thumbs.db': + os.remove(os.path.join(root, name)) + else: + splitname = os.path.splitext(name) + os.rename(os.path.join(root, name), + os.path.join(root, slugify(splitname[0]) + splitname[1])) + for name in dirs: + if name.startswith('.'): + os.remove(os.path.join(root, name)) + else: + os.rename(os.path.join(root, name), os.path.join(root, slugify(name))) + + def Copyright(): print ('comic2ebook v%(__version__)s. ' 'Written 2012 by Ciro Mattia Gonano.' % globals()) @@ -546,7 +578,6 @@ def Copyright(): def Usage(): print "Generates HTML, NCX and OPF for a Comic ebook from a bunch of images." - print "Optimized for creating MOBI files to be read on Kindle Paperwhite." parser.print_help()