1
0
mirror of https://github.com/ciromattia/kcc synced 2025-12-13 17:56:30 +00:00

Valid ePub 2.0 output

This commit is contained in:
Ciro Mattia Gonano
2013-01-28 19:28:25 +01:00
parent 74a93f1e50
commit 6029aa2606
3 changed files with 84 additions and 61 deletions

17
kcc.py
View File

@@ -16,22 +16,7 @@
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE. # PERFORMANCE OF THIS SOFTWARE.
# #
# Changelog __version__ = '2.2'
# 1.00 - Initial version
# 1.10 - Added support for CBZ/CBR files
# 1.11 - Added support for ZIP/RAR extensions
# 1.20 - Comic optimizations! Split pages not target-oriented (landscape
# with portrait target or portrait with landscape target), add palette
# and other image optimizations from Mangle.
# WARNING: PIL is required for all image mangling!
#
# Todo:
# - Add gracefully exit for CBR if no rarfile.py and no unrar
# executable are found
# - Improve error reporting
# - recurse into dirtree for multiple comics
__version__ = '2.0'
__license__ = 'ISC' __license__ = 'ISC'
__copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>' __copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'

View File

@@ -36,29 +36,29 @@
# executable are found # executable are found
# - Improve error reporting # - Improve error reporting
__version__ = '1.50' __version__ = '2.10'
__license__ = 'ISC' __license__ = 'ISC'
__copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>' __copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, sys
import sys from shutil import move,copyfile,make_archive,rmtree
from optparse import OptionParser from optparse import OptionParser
import image, cbxarchive, pdfjpgextract import image, cbxarchive, pdfjpgextract
def buildHTML(path,file): def buildHTML(path,file):
filename = getImageFileName(file) filename = getImageFileName(file)
if filename is not None: if filename is not None:
htmlfile = os.path.join(path,filename[0] + '.html') htmlfile = os.path.join(path.replace('/Images','/Text'),filename[0] + '.html')
f = open(htmlfile, "w") f = open(htmlfile, "w")
f.writelines(["<!DOCTYPE html SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", f.writelines(["<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n",
"<html xmlns=\"http://www.w3.org/1999/xhtml\">\n", "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n",
"<head>\n", "<head>\n",
"<title>",filename[0],"</title>\n", "<title>",filename[0],"</title>\n",
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n", "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n",
"</head>\n", "</head>\n",
"<body>\n", "<body>\n",
"<div><img src=\"",file,"\" /></div>\n", "<div><img src=\"../Images/",file,"\" alt=\"",file,"\" /></div>\n",
"</body>\n", "</body>\n",
"</html>" "</html>"
]) ])
@@ -66,36 +66,42 @@ def buildHTML(path,file):
return path,file return path,file
def buildNCX(dstdir, title, chapters): def buildNCX(dstdir, title, chapters):
ncxfile = dstdir + '/toc.ncx' ncxfile = dstdir + '/OEBPS/toc.ncx'
f = open(ncxfile, "w") f = open(ncxfile, "w")
f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
"<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\" \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n", "<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\" \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n",
"<ncx version=\"2005-1\" xml:lang=\"en-US\" xmlns=\"http://www.daisy.org/z3986/2005/ncx/\">\n", "<ncx version=\"2005-1\" xml:lang=\"en-US\" xmlns=\"http://www.daisy.org/z3986/2005/ncx/\">\n",
"<head>\n</head>\n", "<head>\n",
"<meta name=\"dtb:uid\" content=\"015ffaec-9340-42f8-b163-a0c5ab7d0611\"/>\n",
"<meta name=\"dtb:depth\" content=\"2\"/>\n",
"<meta name=\"dtb:totalPageCount\" content=\"0\"/>\n",
"<meta name=\"dtb:maxPageNumber\" content=\"0\"/>\n",
"</head>\n",
"<docTitle><text>",title,"</text></docTitle>\n", "<docTitle><text>",title,"</text></docTitle>\n",
"<navMap>" "<navMap>"
]) ])
for chapter in chapters: for chapter in chapters:
folder = chapter[0].replace(dstdir,'').lstrip('/') folder = chapter[0].replace(dstdir + '/OEBPS','').lstrip('/')
title = os.path.basename(folder) title = os.path.basename(folder)
filename = getImageFileName(os.path.join(folder,chapter[1])) filename = getImageFileName(os.path.join(folder,chapter[1]))
f.write("<navPoint id=\"" + folder + "\"><navLabel><text>" + title f.write("<navPoint id=\"" + folder.replace('/','_') + "\"><navLabel><text>" + title
+ "</text></navLabel><content src=\"" + filename[0] + ".html\"/></navPoint>\n") + "</text></navLabel><content src=\"" + filename[0] + ".html\"/></navPoint>\n")
f.write("</navMap>\n</ncx>") f.write("</navMap>\n</ncx>")
f.close() f.close()
return return
def buildOPF(profile, dstdir, title, filelist, cover=None): def buildOPF(profile, dstdir, title, filelist, cover=None):
opffile = dstdir + '/content.opf' opffile = dstdir + '/OEBPS/content.opf'
# read the first file resolution # read the first file resolution
profilelabel, deviceres, palette = image.ProfileData.Profiles[profile] profilelabel, deviceres, palette = image.ProfileData.Profiles[profile]
imgres = str(deviceres[0]) + "x" + str(deviceres[1]) imgres = str(deviceres[0]) + "x" + str(deviceres[1])
f = open(opffile, "w") f = open(opffile, "w")
f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
"<package version=\"2.0\" unique-identifier=\"PrimaryID\" xmlns=\"http://www.idpf.org/2007/opf\">\n", "<package version=\"2.0\" unique-identifier=\"BookID\" xmlns=\"http://www.idpf.org/2007/opf\">\n",
"<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n", "<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n",
"<dc:title>",title,"</dc:title>\n", "<dc:title>",title,"</dc:title>\n",
"<dc:language>en-US</dc:language>\n", "<dc:language>en-US</dc:language>\n",
"<dc:identifier id=\"BookID\" opf:scheme=\"UUID\">015ffaec-9340-42f8-b163-a0c5ab7d0611</dc:identifier>\n",
"<meta name=\"cover\" content=\"cover\"/>\n", "<meta name=\"cover\" content=\"cover\"/>\n",
"<meta name=\"book-type\" content=\"comic\"/>\n", "<meta name=\"book-type\" content=\"comic\"/>\n",
"<meta name=\"zero-gutter\" content=\"true\"/>\n", "<meta name=\"zero-gutter\" content=\"true\"/>\n",
@@ -107,18 +113,19 @@ def buildOPF(profile, dstdir, title, filelist, cover=None):
]) ])
# set cover # set cover
if cover is not None: if cover is not None:
folder = cover[0].replace(dstdir,'').lstrip('/') filename = getImageFileName(cover.replace(dstdir + '/OEBPS','').lstrip('/'))
filename = getImageFileName(cover[1])
if '.png' == filename[1]: if '.png' == filename[1]:
mt = 'image/png' mt = 'image/png'
else: else:
mt = 'image/jpeg' mt = 'image/jpeg'
f.write("<item id=\"cover\" href=\"" + os.path.join(folder,cover[1]) + "\" media-type=\"" + mt + "\"/>\n") f.write("<item id=\"cover\" href=\"" + filename[0] + filename[1] + "\" media-type=\"" + mt + "\"/>\n")
reflist = []
for path in filelist: for path in filelist:
folder = path[0].replace(dstdir,'').lstrip('/') folder = path[0].replace(dstdir + '/OEBPS','').lstrip('/')
filename = getImageFileName(path[1]) filename = getImageFileName(path[1])
uniqueid = os.path.join(folder,filename[0]).replace('/','_') uniqueid = os.path.join(folder,filename[0]).replace('/','_')
f.write("<item id=\"page_" + uniqueid + "\" href=\"" + os.path.join(folder,filename[0]) reflist.append(uniqueid)
f.write("<item id=\"page_" + uniqueid + "\" href=\"" + os.path.join(folder.replace('Images/','Text/'),filename[0])
+ ".html\" media-type=\"application/xhtml+xml\"/>\n") + ".html\" media-type=\"application/xhtml+xml\"/>\n")
if '.png' == filename[1]: if '.png' == filename[1]:
mt = 'image/png' mt = 'image/png'
@@ -126,13 +133,23 @@ def buildOPF(profile, dstdir, title, filelist, cover=None):
mt = 'image/jpeg' mt = 'image/jpeg'
f.write("<item id=\"img_" + uniqueid + "\" href=\"" + os.path.join(folder,path[1]) + "\" media-type=\"" + mt + "\"/>\n") f.write("<item id=\"img_" + uniqueid + "\" href=\"" + os.path.join(folder,path[1]) + "\" media-type=\"" + mt + "\"/>\n")
f.write("</manifest>\n<spine toc=\"ncx\">\n") f.write("</manifest>\n<spine toc=\"ncx\">\n")
for path in filelist: for entry in reflist:
folder = path[0].replace(dstdir,'').lstrip('/') f.write("<itemref idref=\"page_" + entry + "\" />\n")
filename = getImageFileName(path[1])
uniqueid = os.path.join(folder,filename[0]).replace('/','_')
f.write("<itemref idref=\"page_" + uniqueid + "\" />\n")
f.write("</spine>\n<guide>\n</guide>\n</package>\n") f.write("</spine>\n<guide>\n</guide>\n</package>\n")
f.close() f.close()
# finish with standard ePub folders
os.mkdir(dstdir + '/META-INF')
f = open(dstdir + '/mimetype', 'w')
f.write('application/epub+zip')
f.close()
f = open(dstdir + '/META-INF/container.xml', 'w')
f.writelines(["<?xml version=\"1.0\"?>\n",
"<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n",
"<rootfiles>\n",
"<rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n",
"</rootfiles>\n",
"</container>"])
f.close()
return return
def getImageFileName(file): def getImageFileName(file):
@@ -188,10 +205,12 @@ def genEpubStruct(path):
filelist = [] filelist = []
chapterlist = [] chapterlist = []
cover = None cover = None
for (dirpath, dirnames, filenames) in os.walk(path): os.mkdir(path + "/OEBPS/Text")
for (dirpath, dirnames, filenames) in os.walk(path + "/OEBPS/Images/"):
chapter = False chapter = False
for file in filenames: for file in filenames:
if getImageFileName(file) is not None: filename = getImageFileName(file)
if filename is not None:
# put credits at the end # put credits at the end
if "credit" in file.lower(): if "credit" in file.lower():
os.rename(os.path.join(dirpath,file), os.path.join(dirpath,'ZZZ999_'+file)) os.rename(os.path.join(dirpath,file), os.path.join(dirpath,'ZZZ999_'+file))
@@ -202,10 +221,11 @@ def genEpubStruct(path):
file = newfilename file = newfilename
filelist.append(buildHTML(dirpath,file)) filelist.append(buildHTML(dirpath,file))
if not chapter: if not chapter:
chapterlist.append((dirpath,filelist[-1][1])) chapterlist.append((dirpath.replace('/Images','/Text'),filelist[-1][1]))
chapter = True chapter = True
if cover is None: if cover is None:
cover = filelist[-1] cover = os.path.join(filelist[-1][0],'cover' + getImageFileName(filelist[-1][1])[1])
copyfile(os.path.join(filelist[-1][0],filelist[-1][1]), cover)
if options.title == 'defaulttitle': if options.title == 'defaulttitle':
options.title = os.path.basename(path) options.title = os.path.basename(path)
buildNCX(path,options.title,chapterlist) buildNCX(path,options.title,chapterlist)
@@ -218,20 +238,26 @@ def getWorkFolder(file):
if fname[1].lower() == '.pdf': if fname[1].lower() == '.pdf':
pdf = pdfjpgextract.PdfJpgExtract(file) pdf = pdfjpgextract.PdfJpgExtract(file)
pdf.extract() pdf.extract()
return pdf.getPath() path = pdf.getPath()
else: else:
if fname[1].lower() == '.zip':
move(file,fname[0] + '.cbz')
file = fname[0] + '.cbz'
cbx = cbxarchive.CBxArchive(file) cbx = cbxarchive.CBxArchive(file)
if cbx.isCbxFile(): if cbx.isCbxFile():
cbx.extract() cbx.extract()
return cbx.getPath() path = cbx.getPath()
else: else:
try: try:
import shutil import shutil
if not os.path.isdir(file + "_orig"): if not os.path.isdir(file + "_orig"):
shutil.copytree(file, file + "_orig") shutil.copytree(file, file + "_orig")
return file path = file
except OSError: except OSError:
raise raise
move(path,path + "_temp")
move(path + "_temp",path + "/OEBPS/Images/")
return path
def Copyright(): def Copyright():
print ('comic2ebook v%(__version__)s. ' print ('comic2ebook v%(__version__)s. '
@@ -270,10 +296,14 @@ def main(argv=None):
path = getWorkFolder(path) path = getWorkFolder(path)
if options.imgproc: if options.imgproc:
print "Processing images..." print "Processing images..."
dirImgProcess(path) dirImgProcess(path + "/OEBPS/Images/")
print "Creating ePub structure..." print "Creating ePub structure..."
genEpubStruct(path) genEpubStruct(path)
epub_path = path # actually zip the ePub
make_archive(path,'zip',path)
move(path + '.zip', path + '.epub')
rmtree(path)
epub_path = path + '.epub'
def getEpubPath(): def getEpubPath():
global epub_path global epub_path

View File

@@ -79,6 +79,8 @@ class MainWindow:
w = apply(OptionMenu, (self.master, self.profile) + tuple(options)) w = apply(OptionMenu, (self.master, self.profile) + tuple(options))
w.grid(row=1,column=3) w.grid(row=1,column=3)
self.epub_only = IntVar()
self.epub_only = 0
self.image_preprocess = IntVar() self.image_preprocess = IntVar()
self.image_preprocess = 1 self.image_preprocess = 1
self.cut_page_numbers = IntVar() self.cut_page_numbers = IntVar()
@@ -89,28 +91,31 @@ class MainWindow:
self.image_upscale = 0 self.image_upscale = 0
self.image_stretch = IntVar() self.image_stretch = IntVar()
self.image_stretch = 0 self.image_stretch = 0
self.c = Checkbutton(self.master, text="Generate ePub only (does not call 'kindlegen')",
variable=self.epub_only)
self.c.grid(row=3,column=3,sticky=W)
self.c = Checkbutton(self.master, text="Apply image optimizations", self.c = Checkbutton(self.master, text="Apply image optimizations",
variable=self.image_preprocess) variable=self.image_preprocess)
self.c.select() self.c.select()
self.c.grid(row=2,column=3,sticky=W) self.c.grid(row=4,column=3,sticky=W)
self.c = Checkbutton(self.master, text="Cut page numbers", self.c = Checkbutton(self.master, text="Cut page numbers",
variable=self.cut_page_numbers) variable=self.cut_page_numbers)
self.c.grid(row=3,column=3,sticky=W) self.c.grid(row=5,column=3,sticky=W)
self.c = Checkbutton(self.master, text="Split manga-style (right-to-left reading)", self.c = Checkbutton(self.master, text="Split manga-style (right-to-left reading)",
variable=self.mangastyle) variable=self.mangastyle)
self.c.grid(row=4,column=3,sticky=W) self.c.grid(row=6,column=3,sticky=W)
self.c = Checkbutton(self.master, text="Allow image upscaling", self.c = Checkbutton(self.master, text="Allow image upscaling",
variable=self.image_upscale) variable=self.image_upscale)
self.c.grid(row=5,column=3,sticky=W) self.c.grid(row=7,column=3,sticky=W)
self.c = Checkbutton(self.master, text="Stretch images", self.c = Checkbutton(self.master, text="Stretch images",
variable=self.image_stretch) variable=self.image_stretch)
self.c.grid(row=6,column=3,sticky=W) self.c.grid(row=8,column=3,sticky=W)
self.progressbar = ttk.Progressbar(orient=HORIZONTAL, length=200, mode='determinate') self.progressbar = ttk.Progressbar(orient=HORIZONTAL, length=200, mode='determinate')
self.submit = Button(self.master, text="Execute!", command=self.start_conversion, fg="red") self.submit = Button(self.master, text="Execute!", command=self.start_conversion, fg="red")
self.submit.grid(row=7,column=3) self.submit.grid(row=9,column=3)
self.progressbar.grid(row=8,column=0,columnspan=4,sticky=W+E+N+S) self.progressbar.grid(row=10,column=0,columnspan=4,sticky=W+E+N+S)
# self.debug = Listbox(self.master) # self.debug = Listbox(self.master)
# self.debug.grid(row=9,columnspan=4,sticky=W+E+N+S) # self.debug.grid(row=9,columnspan=4,sticky=W+E+N+S)
@@ -141,27 +146,30 @@ class MainWindow:
subargv = list(argv) subargv = list(argv)
subargv.append(entry) subargv.append(entry)
comic2ebook.main(subargv) comic2ebook.main(subargv)
path = comic2ebook.getEpubPath() epub_path = comic2ebook.getEpubPath()
except Exception, err: except Exception, err:
tkMessageBox.showerror('Error comic2ebook', "Error on file %s:\n%s" % (subargv[-1], str(err))) tkMessageBox.showerror('Error comic2ebook', "Error on file %s:\n%s" % (subargv[-1], str(err)))
errors = True errors = True
continue continue
if self.epub_only == 1:
continue;
try: try:
retcode = call("kindlegen " + path + "/content.opf", shell=True) retcode = call("kindlegen " + epub_path, shell=True)
if retcode < 0: if retcode < 0:
print >>sys.stderr, "Child was terminated by signal", -retcode print >>sys.stderr, "Child was terminated by signal", -retcode
else: else:
print >>sys.stderr, "Child returned", retcode print >>sys.stderr, "Child returned", retcode
except OSError as e: except OSError as e:
tkMessageBox.showerror('Error kindlegen', "Error on file %s:\n%s" % (path + "/content.opf", e)) tkMessageBox.showerror('Error kindlegen', "Error on file %s:\n%s" % (epub_path, e))
errors = True errors = True
continue continue
try: try:
kindlestrip.main((path + "/content.mobi", path + '.mobi')) mobifile = epub_path.replace('.epub','.mobi')
# try to clean up temp files... may be destructive!!! shutil.move(mobifile,mobifile + '_tostrip')
shutil.rmtree(path, onerror=self.remove_readonly) kindlestrip.main((mobifile + '_tostrip', mobifile))
os.remove(mobifile + '_tostrip')
except Exception, err: except Exception, err:
tkMessageBox.showerror('Error', "Error on file %s:\n%s" % (path + "/content.mobi", str(err))) tkMessageBox.showerror('Error', "Error on file %s:\n%s" % (mobifile, str(err)))
errors = True errors = True
continue continue
if errors: if errors: