From f33d35502411169f002d2bcdf47c23a8e2d7df9f Mon Sep 17 00:00:00 2001
From: Ciro Mattia Gonano <ciromattia@gmail.com>
Date: Thu, 11 Apr 2013 10:34:33 +0200
Subject: [PATCH 1/5] Filenames slugifications (#28, #31, #9, #8)

---
 README.md          |  4 +++-
 kcc/comic2ebook.py | 26 +++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index b54a166..b223b74 100644
--- a/README.md
+++ b/README.md
@@ -123,7 +123,9 @@ The app relies and includes the following scripts/binaries:
         Added generic CSS file  
         Optimized archive extraction for zip/rar files (#40)  
   - 2.9: Added support for generating a plain CBZ (skipping all the EPUB/Mobi generation) (#45)  
-        Prevent output file overwriting the source one: if a duplicate name is detected, append _kcc to the name
+        Prevent output file overwriting the source one: if a duplicate name is detected, append _kcc to the name  
+        Rarfile library updated to 2.6  
+        Filenames slugifications (#28, #31, #9, #8)
         
 
 ## COPYRIGHT
diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py
index 51c1b4b..58d614d 100755
--- a/kcc/comic2ebook.py
+++ b/kcc/comic2ebook.py
@@ -128,7 +128,7 @@ def buildNCX(dstdir, title, chapters):
     f = open(ncxfile, "w")
     f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
                   "<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\" ",
-                  "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n",
+                  "\"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n",
                   "<ncx version=\"2005-1\" xml:lang=\"en-US\" xmlns=\"http://www.daisy.org/z3986/2005/ncx/\">\n",
                   "<head>\n",
                   "<meta name=\"dtb:uid\" content=\"015ffaec-9340-42f8-b163-a0c5ab7d0611\"/>\n",
@@ -356,6 +356,7 @@ def genEpubStruct(path):
     chapterlist = []
     cover = None
     _, deviceres, _, _, panelviewsize = image.ProfileData.Profiles[options.profile]
+    slugifyFileTree(path)
     os.mkdir(os.path.join(path, 'OEBPS', 'Text'))
     f = open(os.path.join(path, 'OEBPS', 'Text', 'style.css'), 'w')
     #DON'T COMPRESS CSS. KINDLE WILL FAIL TO PARSE IT.
@@ -535,6 +536,29 @@ def getWorkFolder(afile):
     return path
 
 
+def slugify(value):
+    """
+    Normalizes string, converts to lowercase, removes non-alpha characters,
+    and converts spaces to hyphens.
+    """
+    import unicodedata
+    value = unicodedata.normalize('NFKD', unicode(value)).encode('ascii', 'ignore')
+    value = re.sub('[^\w\s-]', '', value).strip()
+    value = re.sub('[-\s]+', '-', value)
+    return value
+
+
+def slugifyFileTree(filetree):
+    for root, dirs, files in os.walk(filetree):
+        for name in files:
+            splitname = os.path.splitext(name)
+            os.rename(os.path.join(root, name),
+                      os.path.join(root, slugify(splitname[0]) + splitname[1]))
+        for name in dirs:
+            slugifyFileTree(os.path.join(root, name))
+            os.rename(os.path.join(root, name), os.path.join(root, slugify(name)))
+
+
 def Copyright():
     print ('comic2ebook v%(__version__)s. '
            'Written 2012 by Ciro Mattia Gonano.' % globals())

From be270aa7971baa775970380a2656aa44924dee47 Mon Sep 17 00:00:00 2001
From: Ciro Mattia Gonano <ciromattia@gmail.com>
Date: Thu, 11 Apr 2013 11:49:29 +0200
Subject: [PATCH 2/5] Add number padding and lowering for file names (not
 directory)

---
 kcc/comic2ebook.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py
index 58d614d..bebbcb3 100755
--- a/kcc/comic2ebook.py
+++ b/kcc/comic2ebook.py
@@ -536,15 +536,20 @@ def getWorkFolder(afile):
     return path
 
 
-def slugify(value):
+def slugify(value, lower=True, digitpadding=True):
     """
     Normalizes string, converts to lowercase, removes non-alpha characters,
     and converts spaces to hyphens.
     """
     import unicodedata
-    value = unicodedata.normalize('NFKD', unicode(value)).encode('ascii', 'ignore')
+    value = unicodedata.normalize('NFKD', unicode(value, 'UTF-8')).encode('ascii', 'ignore')
     value = re.sub('[^\w\s-]', '', value).strip()
     value = re.sub('[-\s]+', '-', value)
+    if lower:
+        value = value.lower()
+    if digitpadding:
+        value = re.sub(r'([0-9]+)', r'00000\1', value)
+        value = re.sub(r'0*([0-9]{6,})', r'\1', value)
     return value
 
 
@@ -556,7 +561,7 @@ def slugifyFileTree(filetree):
                       os.path.join(root, slugify(splitname[0]) + splitname[1]))
         for name in dirs:
             slugifyFileTree(os.path.join(root, name))
-            os.rename(os.path.join(root, name), os.path.join(root, slugify(name)))
+            os.rename(os.path.join(root, name), os.path.join(root, slugify(name, False)))
 
 
 def Copyright():

From f0afa1fff2f780d987441c96bdaad38dc3e63920 Mon Sep 17 00:00:00 2001
From: Ciro Mattia Gonano <ciromattia@gmail.com>
Date: Thu, 11 Apr 2013 12:18:02 +0200
Subject: [PATCH 3/5] Convert dot char to hyphen. Removes UNIX-hidden files and
 dirs from the final archive (prevents .DS_Store and stuff)

---
 kcc/comic2ebook.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py
index b3d7f4a..0e92f5d 100755
--- a/kcc/comic2ebook.py
+++ b/kcc/comic2ebook.py
@@ -360,7 +360,7 @@ def genEpubStruct(path):
     chapterlist = []
     cover = None
     _, deviceres, _, _, panelviewsize = image.ProfileData.Profiles[options.profile]
-    slugifyFileTree(path)
+    sanitizeTree(os.path.join(path, 'OEBPS', 'Images'))
     os.mkdir(os.path.join(path, 'OEBPS', 'Text'))
     f = open(os.path.join(path, 'OEBPS', 'Text', 'style.css'), 'w')
     #DON'T COMPRESS CSS. KINDLE WILL FAIL TO PARSE IT.
@@ -540,32 +540,35 @@ def getWorkFolder(afile):
     return path
 
 
-def slugify(value, lower=True, digitpadding=True):
+def slugify(value):
     """
     Normalizes string, converts to lowercase, removes non-alpha characters,
     and converts spaces to hyphens.
     """
     import unicodedata
     value = unicodedata.normalize('NFKD', unicode(value, 'UTF-8')).encode('ascii', 'ignore')
-    value = re.sub('[^\w\s-]', '', value).strip()
-    value = re.sub('[-\s]+', '-', value)
-    if lower:
-        value = value.lower()
-    if digitpadding:
-        value = re.sub(r'([0-9]+)', r'00000\1', value)
-        value = re.sub(r'0*([0-9]{6,})', r'\1', value)
+    value = re.sub('[^\w\s\.-]', '', value).strip().lower()
+    value = re.sub('[-\.\s]+', '-', value)
+    value = re.sub(r'([0-9]+)', r'00000\1', value)
+    value = re.sub(r'0*([0-9]{6,})', r'\1', value)
     return value
 
 
-def slugifyFileTree(filetree):
+def sanitizeTree(filetree):
     for root, dirs, files in os.walk(filetree):
         for name in files:
-            splitname = os.path.splitext(name)
-            os.rename(os.path.join(root, name),
-                      os.path.join(root, slugify(splitname[0]) + splitname[1]))
+            if name.startswith('.'):
+                os.remove(os.path.join(root, name))
+            else:
+                splitname = os.path.splitext(name)
+                os.rename(os.path.join(root, name),
+                          os.path.join(root, slugify(splitname[0]) + splitname[1]))
         for name in dirs:
-            slugifyFileTree(os.path.join(root, name))
-            os.rename(os.path.join(root, name), os.path.join(root, slugify(name, False)))
+            if name.startswith('.'):
+                os.remove(os.path.join(root, name))
+            else:
+                sanitizeTree(os.path.join(root, name))
+                os.rename(os.path.join(root, name), os.path.join(root, slugify(name)))
 
 
 def Copyright():

From b972e4c74600276852324d95a67b333f72c4641b Mon Sep 17 00:00:00 2001
From: Ciro Mattia Gonano <ciromattia@gmail.com>
Date: Thu, 11 Apr 2013 12:33:14 +0200
Subject: [PATCH 4/5] Remove Windows silly 'thumbs.db' too

---
 kcc/comic2ebook.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py
index 0e92f5d..2c47d81 100755
--- a/kcc/comic2ebook.py
+++ b/kcc/comic2ebook.py
@@ -557,7 +557,7 @@ def slugify(value):
 def sanitizeTree(filetree):
     for root, dirs, files in os.walk(filetree):
         for name in files:
-            if name.startswith('.'):
+            if name.startswith('.') or name.lower() == 'thumbs.db':
                 os.remove(os.path.join(root, name))
             else:
                 splitname = os.path.splitext(name)

From 724156c554af01d96a2dde0589cae86d67ddafb6 Mon Sep 17 00:00:00 2001
From: Ciro Mattia Gonano <ciro@winged.it>
Date: Fri, 12 Apr 2013 01:36:51 +0200
Subject: [PATCH 5/5] Small fixes

---
 kcc/comic2ebook.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py
index 2c47d81..2338196 100755
--- a/kcc/comic2ebook.py
+++ b/kcc/comic2ebook.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 #
 # Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com>
 #
@@ -546,7 +547,7 @@ def slugify(value):
     and converts spaces to hyphens.
     """
     import unicodedata
-    value = unicodedata.normalize('NFKD', unicode(value, 'UTF-8')).encode('ascii', 'ignore')
+    value = unicodedata.normalize('NFKD', unicode(value, 'latin1')).encode('ascii', 'ignore')
     value = re.sub('[^\w\s\.-]', '', value).strip().lower()
     value = re.sub('[-\.\s]+', '-', value)
     value = re.sub(r'([0-9]+)', r'00000\1', value)
@@ -567,7 +568,6 @@ def sanitizeTree(filetree):
             if name.startswith('.'):
                 os.remove(os.path.join(root, name))
             else:
-                sanitizeTree(os.path.join(root, name))
                 os.rename(os.path.join(root, name), os.path.join(root, slugify(name)))
 
 
@@ -578,7 +578,6 @@ def Copyright():
 
 def Usage():
     print "Generates HTML, NCX and OPF for a Comic ebook from a bunch of images."
-    print "Optimized for creating MOBI files to be read on Kindle Paperwhite."
     parser.print_help()