mirror of
https://github.com/ciromattia/kcc
synced 2026-02-22 03:59:11 +00:00
Preparing the field for switching towards a full-fledged app (get rid of AppleScript) via py2app/py2exe
This commit is contained in:
0
kcc/__init__.py
Normal file
0
kcc/__init__.py
Normal file
82
kcc/cbxarchive.py
Normal file
82
kcc/cbxarchive.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com>
|
||||
#
|
||||
# Permission to use, copy, modify, and/or distribute this software for
|
||||
# any purpose with or without fee is hereby granted, provided that the
|
||||
# above copyright notice and this permission notice appear in all
|
||||
# copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
|
||||
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
|
||||
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
# PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
__version__ = '1.0'
|
||||
|
||||
import os
|
||||
|
||||
class CBxArchive:
|
||||
def __init__(self, origFileName):
|
||||
self.cbxexts = ['.zip','.cbz','.rar','.cbr']
|
||||
self.origFileName = origFileName
|
||||
self.filename = os.path.splitext(origFileName)
|
||||
self.path = self.filename[0]
|
||||
|
||||
def isCbxFile(self):
|
||||
result = (self.filename[1].lower() in self.cbxexts)
|
||||
if result == True:
|
||||
return result
|
||||
return False
|
||||
|
||||
def getPath(self):
|
||||
return self.path
|
||||
|
||||
def extractCBZ(self):
|
||||
try:
|
||||
from zipfile import ZipFile
|
||||
except ImportError:
|
||||
self.cbzFile = None
|
||||
cbzFile = ZipFile(self.origFileName)
|
||||
for f in cbzFile.namelist():
|
||||
if (f.startswith('__MACOSX') or f.endswith('.DS_Store')):
|
||||
pass # skip MacOS special files
|
||||
elif f.endswith('/'):
|
||||
try:
|
||||
os.makedirs(self.path+'/'+f)
|
||||
except:
|
||||
pass #the dir exists so we are going to extract the images only.
|
||||
else:
|
||||
cbzFile.extract(f, self.path)
|
||||
|
||||
def extractCBR(self):
|
||||
try:
|
||||
import rarfile
|
||||
except ImportError:
|
||||
self.cbrFile = None
|
||||
return
|
||||
cbrFile = rarfile.RarFile(self.origFileName)
|
||||
for f in cbrFile.namelist():
|
||||
if (f.startswith('__MACOSX') or f.endswith('.DS_Store')):
|
||||
pass # skip MacOS special files
|
||||
elif f.endswith('/'):
|
||||
try:
|
||||
os.makedirs(self.path+'/'+f)
|
||||
except:
|
||||
pass #the dir exists so we are going to extract the images only.
|
||||
else:
|
||||
cbrFile.extract(f, self.path)
|
||||
|
||||
def extract(self):
|
||||
if ('.cbr' == self.filename[1].lower() or '.rar' == self.filename[1].lower()):
|
||||
self.extractCBR()
|
||||
elif ('.cbz' == self.filename[1].lower() or '.zip' == self.filename[1].lower()):
|
||||
self.extractCBZ()
|
||||
dir = os.listdir(self.path)
|
||||
if (len(dir) == 1):
|
||||
import shutil
|
||||
for f in os.listdir(self.path + "/" + dir[0]):
|
||||
shutil.move(self.path + "/" + dir[0] + "/" + f,self.path)
|
||||
os.rmdir(self.path + "/" + dir[0])
|
||||
185
kcc/comic2ebook.py
Executable file
185
kcc/comic2ebook.py
Executable file
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com>
|
||||
#
|
||||
# Permission to use, copy, modify, and/or distribute this software for
|
||||
# any purpose with or without fee is hereby granted, provided that the
|
||||
# above copyright notice and this permission notice appear in all
|
||||
# copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
|
||||
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
|
||||
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
# PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
# Changelog
|
||||
# 1.00 - Initial version
|
||||
# 1.10 - Added support for CBZ/CBR files
|
||||
# 1.11 - Added support for ZIP/RAR extensions
|
||||
# 1.20 - Comic optimizations! Split pages not target-oriented (landscape
|
||||
# with portrait target or portrait with landscape target), add palette
|
||||
# and other image optimizations from Mangle.
|
||||
# WARNING: PIL is required for all image mangling!
|
||||
#
|
||||
# Todo:
|
||||
# - Add gracefully exit for CBR if no rarfile.py and no unrar
|
||||
# executable are found
|
||||
# - Improve error reporting
|
||||
# - recurse into dirtree for multiple comics
|
||||
|
||||
__version__ = '1.20'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import image, cbxarchive
|
||||
|
||||
class HTMLbuilder:
|
||||
|
||||
def getResult(self):
|
||||
return getImageFileName(self.file)
|
||||
|
||||
def __init__(self, dstdir, file):
|
||||
self.file = file
|
||||
filename = getImageFileName(file)
|
||||
if (filename != None):
|
||||
htmlfile = dstdir + '/' + filename[0] + '.html'
|
||||
f = open(htmlfile, "w");
|
||||
f.writelines(["<!DOCTYPE html SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n",
|
||||
"<html xmlns=\"http://www.w3.org/1999/xhtml\">\n",
|
||||
"<head>\n",
|
||||
"<title>",filename[0],"</title>\n",
|
||||
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n",
|
||||
"</head>\n",
|
||||
"<body>\n",
|
||||
"<div><img src=\"",file,"\" /></div>\n",
|
||||
"</body>\n",
|
||||
"</html>"
|
||||
])
|
||||
f.close()
|
||||
return None
|
||||
|
||||
class NCXbuilder:
|
||||
def __init__(self, dstdir, title):
|
||||
ncxfile = dstdir + '/content.ncx'
|
||||
f = open(ncxfile, "w");
|
||||
f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
|
||||
"<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\" \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n",
|
||||
"<ncx version=\"2005-1\" xml:lang=\"en-US\" xmlns=\"http://www.daisy.org/z3986/2005/ncx/\">\n",
|
||||
"<head>\n</head>\n",
|
||||
"<docTitle><text>",title,"</text></docTitle>\n",
|
||||
"<navMap></navMap>\n</ncx>"
|
||||
])
|
||||
f.close()
|
||||
return
|
||||
|
||||
class OPFBuilder:
|
||||
def __init__(self, dstdir, title, filelist):
|
||||
opffile = dstdir + '/content.opf'
|
||||
# read the first file resolution
|
||||
try:
|
||||
from PIL import Image
|
||||
im = Image.open(dstdir + "/" + filelist[0][0] + filelist[0][1])
|
||||
width, height = im.size
|
||||
imgres = str(width) + "x" + str(height)
|
||||
except ImportError:
|
||||
print "Could not load PIL, falling back on default HD"
|
||||
imgres = "758x1024"
|
||||
f = open(opffile, "w");
|
||||
f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
|
||||
"<package version=\"2.0\" unique-identifier=\"PrimaryID\" xmlns=\"http://www.idpf.org/2007/opf\">\n",
|
||||
"<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n",
|
||||
"<dc:title>",title,"</dc:title>\n",
|
||||
"<dc:language>en-US</dc:language>\n",
|
||||
"<meta name=\"book-type\" content=\"comic\"/>\n",
|
||||
"<meta name=\"zero-gutter\" content=\"true\"/>\n",
|
||||
"<meta name=\"zero-margin\" content=\"true\"/>\n",
|
||||
"<meta name=\"fixed-layout\" content=\"true\"/>\n",
|
||||
"<meta name=\"orientation-lock\" content=\"portrait\"/>\n",
|
||||
"<meta name=\"original-resolution\" content=\"" + imgres + "\"/>\n",
|
||||
"</metadata><manifest><item id=\"ncx\" href=\"content.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n"])
|
||||
for filename in filelist:
|
||||
f.write("<item id=\"page_" + filename[0] + "\" href=\"" + filename[0] + ".html\" media-type=\"application/xhtml+xml\"/>\n")
|
||||
for filename in filelist:
|
||||
if ('.png' == filename[1]):
|
||||
mt = 'image/png';
|
||||
else:
|
||||
mt = 'image/jpeg';
|
||||
f.write("<item id=\"img_" + filename[0] + "\" href=\"" + filename[0] + filename[1] + "\" media-type=\"" + mt + "\"/>\n")
|
||||
f.write("</manifest>\n<spine toc=\"ncx\">\n")
|
||||
for filename in filelist:
|
||||
f.write("<itemref idref=\"page_" + filename[0] + "\" />\n")
|
||||
f.write("</spine>\n<guide>\n</guide>\n</package>\n")
|
||||
f.close()
|
||||
return
|
||||
|
||||
def getImageFileName(file):
|
||||
filename = os.path.splitext(file)
|
||||
if (filename[0].startswith('.') or (filename[1].lower() != '.png' and filename[1].lower() != '.jpg' and filename[1].lower() != '.jpeg')):
|
||||
return None
|
||||
return filename
|
||||
|
||||
def isInFilelist(file,list):
|
||||
filename = os.path.splitext(file)
|
||||
seen = False
|
||||
for item in list:
|
||||
if filename[0] == item[0]:
|
||||
seen = True
|
||||
return seen
|
||||
|
||||
if __name__ == "__main__":
|
||||
print ('comic2ebook v%(__version__)s. '
|
||||
'Written 2012 by Ciro Mattia Gonano.' % globals())
|
||||
if len(sys.argv)<3 or len(sys.argv)>4:
|
||||
print "Generates HTML, NCX and OPF for a Comic ebook from a bunch of images"
|
||||
print "Optimized for creating Mobipockets to be read into Kindle Paperwhite"
|
||||
print "Usage:"
|
||||
print " %s <profile> <dir> <title>" % sys.argv[0]
|
||||
print " <title> is optional"
|
||||
sys.exit(1)
|
||||
else:
|
||||
profile = sys.argv[1]
|
||||
dir = sys.argv[2]
|
||||
cbx = cbxarchive.CBxArchive(dir)
|
||||
if cbx.isCbxFile():
|
||||
cbx.extract()
|
||||
dir = cbx.getPath()
|
||||
if len(sys.argv)==4:
|
||||
title = sys.argv[3]
|
||||
else:
|
||||
title = "comic"
|
||||
filelist = []
|
||||
try:
|
||||
print "Splitting double pages..."
|
||||
for file in os.listdir(dir):
|
||||
if (getImageFileName(file) != None):
|
||||
img = image.ComicPage(dir+'/'+file, profile)
|
||||
img.splitPage(dir)
|
||||
for file in os.listdir(dir):
|
||||
if (getImageFileName(file) != None):
|
||||
print "Optimizing " + file + " for " + profile
|
||||
img = image.ComicPage(dir+'/'+file, profile)
|
||||
img.resizeImage()
|
||||
#img.frameImage()
|
||||
img.quantizeImage()
|
||||
img.saveToDir(dir)
|
||||
except ImportError:
|
||||
print "Could not load PIL, not optimizing image"
|
||||
|
||||
for file in os.listdir(dir):
|
||||
if (getImageFileName(file) != None and isInFilelist(file,filelist) == False):
|
||||
# put credits at the end
|
||||
if "credits" in file.lower():
|
||||
os.rename(dir+'/'+file, dir+'/ZZZ999_'+file)
|
||||
file = 'ZZZ999_'+file
|
||||
filename = HTMLbuilder(dir,file).getResult()
|
||||
if (filename != None):
|
||||
filelist.append(filename)
|
||||
NCXbuilder(dir,title)
|
||||
# ensure we're sorting files alphabetically
|
||||
filelist = sorted(filelist, key=lambda name: name[0])
|
||||
OPFBuilder(dir,title,filelist)
|
||||
sys.exit(0)
|
||||
207
kcc/image.py
Executable file
207
kcc/image.py
Executable file
@@ -0,0 +1,207 @@
|
||||
# Copyright (C) 2010 Alex Yatskov
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
class ImageFlags:
|
||||
Orient = 1 << 0
|
||||
Resize = 1 << 1
|
||||
Frame = 1 << 2
|
||||
Quantize = 1 << 3
|
||||
Stretch = 1 << 4
|
||||
|
||||
|
||||
class KindleData:
|
||||
Palette4 = [
|
||||
0x00, 0x00, 0x00,
|
||||
0x55, 0x55, 0x55,
|
||||
0xaa, 0xaa, 0xaa,
|
||||
0xff, 0xff, 0xff
|
||||
]
|
||||
|
||||
Palette15a = [
|
||||
0x00, 0x00, 0x00,
|
||||
0x11, 0x11, 0x11,
|
||||
0x22, 0x22, 0x22,
|
||||
0x33, 0x33, 0x33,
|
||||
0x44, 0x44, 0x44,
|
||||
0x55, 0x55, 0x55,
|
||||
0x66, 0x66, 0x66,
|
||||
0x77, 0x77, 0x77,
|
||||
0x88, 0x88, 0x88,
|
||||
0x99, 0x99, 0x99,
|
||||
0xaa, 0xaa, 0xaa,
|
||||
0xbb, 0xbb, 0xbb,
|
||||
0xcc, 0xcc, 0xcc,
|
||||
0xdd, 0xdd, 0xdd,
|
||||
0xff, 0xff, 0xff,
|
||||
]
|
||||
|
||||
Palette15b = [
|
||||
0x00, 0x00, 0x00,
|
||||
0x11, 0x11, 0x11,
|
||||
0x22, 0x22, 0x22,
|
||||
0x33, 0x33, 0x33,
|
||||
0x44, 0x44, 0x44,
|
||||
0x55, 0x55, 0x55,
|
||||
0x77, 0x77, 0x77,
|
||||
0x88, 0x88, 0x88,
|
||||
0x99, 0x99, 0x99,
|
||||
0xaa, 0xaa, 0xaa,
|
||||
0xbb, 0xbb, 0xbb,
|
||||
0xcc, 0xcc, 0xcc,
|
||||
0xdd, 0xdd, 0xdd,
|
||||
0xee, 0xee, 0xee,
|
||||
0xff, 0xff, 0xff,
|
||||
]
|
||||
|
||||
Profiles = {
|
||||
'K1': ((600, 800), Palette4),
|
||||
'K2': ((600, 800), Palette15a),
|
||||
'K3': ((600, 800), Palette15a),
|
||||
'K4': ((600, 800), Palette15b),
|
||||
'KHD': ((758, 1024), Palette15b),
|
||||
'KDX': ((824, 1200), Palette15a)
|
||||
}
|
||||
|
||||
class ComicPage:
|
||||
def __init__(self,source,device):
|
||||
try:
|
||||
self.size, self.palette = KindleData.Profiles[device]
|
||||
except KeyError:
|
||||
raise RuntimeError('Unexpected output device %s' % device)
|
||||
try:
|
||||
self.origFileName = source
|
||||
self.image = Image.open(source)
|
||||
except IOError:
|
||||
raise RuntimeError('Cannot read image file %s' % source)
|
||||
self.image = self.image.convert('RGB')
|
||||
|
||||
def saveToDir(self,targetdir):
|
||||
filename = os.path.basename(self.origFileName)
|
||||
print "Saving to " + targetdir + '/' + filename
|
||||
try:
|
||||
self.image = self.image.convert('L') # convert to grayscale
|
||||
self.image.save(targetdir + '/' + filename,"JPEG")
|
||||
except IOError as e:
|
||||
raise RuntimeError('Cannot write image in directory %s: %s' %(targetdir,e))
|
||||
|
||||
def quantizeImage(self):
|
||||
colors = len(self.palette) / 3
|
||||
if colors < 256:
|
||||
palette = self.palette + self.palette[:3] * (256 - colors)
|
||||
palImg = Image.new('P', (1, 1))
|
||||
palImg.putpalette(palette)
|
||||
self.image = self.image.quantize(palette=palImg)
|
||||
|
||||
def stretchImage(self):
|
||||
widthDev, heightDev = self.size
|
||||
self.image = self.image.resize((widthDev, heightDev), Image.ANTIALIAS)
|
||||
|
||||
def resizeImage(self):
|
||||
widthDev, heightDev = self.size
|
||||
widthImg, heightImg = self.image.size
|
||||
if widthImg <= widthDev and heightImg <= heightDev:
|
||||
return self.image
|
||||
ratioImg = float(widthImg) / float(heightImg)
|
||||
ratioWidth = float(widthImg) / float(widthDev)
|
||||
ratioHeight = float(heightImg) / float(heightDev)
|
||||
if ratioWidth > ratioHeight:
|
||||
widthImg = widthDev
|
||||
heightImg = int(widthDev / ratioImg)
|
||||
elif ratioWidth < ratioHeight:
|
||||
heightImg = heightDev
|
||||
widthImg = int(heightDev * ratioImg)
|
||||
else:
|
||||
widthImg, heightImg = self.size
|
||||
self.image = self.image.resize((widthImg, heightImg), Image.ANTIALIAS)
|
||||
|
||||
def orientImage(self):
|
||||
widthDev, heightDev = self.size
|
||||
widthImg, heightImg = self.image.size
|
||||
if (widthImg > heightImg) != (widthDev > heightDev):
|
||||
self.image = self.image.rotate(90, Image.BICUBIC, True)
|
||||
|
||||
def splitPage(self, targetdir, righttoleft=False):
|
||||
width, height = self.image.size
|
||||
dstwidth, dstheight = self.size
|
||||
print "Image is %d x %d" % (width,height)
|
||||
# only split if origin is not oriented the same as target
|
||||
if (width > height) != (dstwidth > dstheight):
|
||||
if (width > height):
|
||||
# source is landscape, so split by the width
|
||||
leftbox = (0, 0, width/2, height)
|
||||
rightbox = (width/2, 0, width, height)
|
||||
else:
|
||||
# source is portrait and target is landscape, so split by the height
|
||||
leftbox = (0, 0, width, height/2)
|
||||
rightbox = (0, height/2, width, height)
|
||||
filename = os.path.splitext(os.path.basename(self.origFileName))
|
||||
fileone = targetdir + '/' + filename[0] + '-1' + filename[1]
|
||||
filetwo = targetdir + '/' + filename[0] + '-2' + filename[1]
|
||||
try:
|
||||
if (righttoleft == True):
|
||||
pageone = self.image.crop(rightbox)
|
||||
pagetwo = self.image.crop(leftbox)
|
||||
else:
|
||||
pageone = self.image.crop(leftbox)
|
||||
pagetwo = self.image.crop(rightbox)
|
||||
pageone.save(fileone)
|
||||
pagetwo.save(filetwo)
|
||||
os.remove(self.origFileName)
|
||||
except IOError as e:
|
||||
raise RuntimeError('Cannot write image in directory %s: %s' %(targetdir,e))
|
||||
return (fileone,filetwo)
|
||||
return None
|
||||
|
||||
def frameImage(self):
|
||||
foreground = tuple(self.palette[:3])
|
||||
background = tuple(self.palette[-3:])
|
||||
widthDev, heightDev = self.size
|
||||
widthImg, heightImg = self.image.size
|
||||
pastePt = (
|
||||
max(0, (widthDev - widthImg) / 2),
|
||||
max(0, (heightDev - heightImg) / 2)
|
||||
)
|
||||
corner1 = (
|
||||
pastePt[0] - 1,
|
||||
pastePt[1] - 1
|
||||
)
|
||||
corner2 = (
|
||||
pastePt[0] + widthImg + 1,
|
||||
pastePt[1] + heightImg + 1
|
||||
)
|
||||
imageBg = Image.new(self.image.mode, self.size, background)
|
||||
imageBg.paste(self.image, pastePt)
|
||||
draw = ImageDraw.Draw(imageBg)
|
||||
draw.rectangle([corner1, corner2], outline=foreground)
|
||||
self.image = imageBg
|
||||
|
||||
# for debug purposes (this file is not meant to be called directly
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
imgfile = sys.argv[1]
|
||||
img = ComicPage(imgfile, "KHD")
|
||||
pages = img.splitPage('temp/',False)
|
||||
if (pages != None):
|
||||
print "%s, %s" % pages
|
||||
sys.exit(0)
|
||||
img.orientImage()
|
||||
img.resizeImage()
|
||||
img.frameImage()
|
||||
img.quantizeImage()
|
||||
img.saveToDir("temp/")
|
||||
sys.exit(0)
|
||||
233
kcc/kindlestrip.py
Executable file
233
kcc/kindlestrip.py
Executable file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
#
|
||||
# This is a python script. You need a Python interpreter to run it.
|
||||
# For example, ActiveState Python, which exists for windows.
|
||||
#
|
||||
# This script strips the penultimate record from a Mobipocket file.
|
||||
# This is useful because the current KindleGen add a compressed copy
|
||||
# of the source files used in this record, making the ebook produced
|
||||
# about twice as big as it needs to be.
|
||||
#
|
||||
#
|
||||
# This is free and unencumbered software released into the public domain.
|
||||
#
|
||||
# Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
# distribute this software, either in source code form or as a compiled
|
||||
# binary, for any purpose, commercial or non-commercial, and by any
|
||||
# means.
|
||||
#
|
||||
# In jurisdictions that recognize copyright laws, the author or authors
|
||||
# of this software dedicate any and all copyright interest in the
|
||||
# software to the public domain. We make this dedication for the benefit
|
||||
# of the public at large and to the detriment of our heirs and
|
||||
# successors. We intend this dedication to be an overt act of
|
||||
# relinquishment in perpetuity of all present and future rights to this
|
||||
# software under copyright law.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# For more information, please refer to <http://unlicense.org/>
|
||||
#
|
||||
# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
|
||||
# With enhancements by Kevin Hendricks, KevinH on mobileread.com
|
||||
#
|
||||
# Changelog
|
||||
# 1.00 - Initial version
|
||||
# 1.10 - Added an option to output the stripped data
|
||||
# 1.20 - Added check for source files section (thanks Piquan)
|
||||
# 1.30 - Added prelim Support for K8 style mobis
|
||||
# 1.31 - removed the SRCS section but kept a 0 size entry for it
|
||||
# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
|
||||
# 1.33 - now uses and modifies mobiheader SRCS and CNT
|
||||
# 1.34 - added credit for Kevin Hendricks
|
||||
# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records
|
||||
|
||||
__version__ = '1.35'
|
||||
|
||||
import sys
|
||||
import struct
|
||||
import binascii
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
|
||||
class StripException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class SectionStripper:
|
||||
def loadSection(self, section):
|
||||
if (section + 1 == self.num_sections):
|
||||
endoff = len(self.data_file)
|
||||
else:
|
||||
endoff = self.sections[section + 1][0]
|
||||
off = self.sections[section][0]
|
||||
return self.data_file[off:endoff]
|
||||
|
||||
def patch(self, off, new):
|
||||
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
|
||||
|
||||
def strip(self, off, len):
|
||||
self.data_file = self.data_file[:off] + self.data_file[off+len:]
|
||||
|
||||
def patchSection(self, section, new, in_off = 0):
|
||||
if (section + 1 == self.num_sections):
|
||||
endoff = len(self.data_file)
|
||||
else:
|
||||
endoff = self.sections[section + 1][0]
|
||||
off = self.sections[section][0]
|
||||
assert off + in_off + len(new) <= endoff
|
||||
self.patch(off + in_off, new)
|
||||
|
||||
def updateEXTH121(self, srcs_secnum, srcs_cnt, mobiheader):
|
||||
mobi_length, = struct.unpack('>L',mobiheader[0x14:0x18])
|
||||
exth_flag, = struct.unpack('>L', mobiheader[0x80:0x84])
|
||||
exth = 'NONE'
|
||||
try:
|
||||
if exth_flag & 0x40:
|
||||
exth = mobiheader[16 + mobi_length:]
|
||||
if (len(exth) >= 4) and (exth[:4] == 'EXTH'):
|
||||
nitems, = struct.unpack('>I', exth[8:12])
|
||||
pos = 12
|
||||
for i in xrange(nitems):
|
||||
type, size = struct.unpack('>II', exth[pos: pos + 8])
|
||||
# print type, size
|
||||
if type == 121:
|
||||
boundaryptr, =struct.unpack('>L',exth[pos+8: pos + size])
|
||||
if srcs_secnum <= boundaryptr:
|
||||
boundaryptr -= srcs_cnt
|
||||
prefix = mobiheader[0:16 + mobi_length + pos + 8]
|
||||
suffix = mobiheader[16 + mobi_length + pos + 8 + 4:]
|
||||
nval = struct.pack('>L',boundaryptr)
|
||||
mobiheader = prefix + nval + suffix
|
||||
pos += size
|
||||
except:
|
||||
pass
|
||||
return mobiheader
|
||||
|
||||
def __init__(self, datain):
|
||||
if datain[0x3C:0x3C+8] != 'BOOKMOBI':
|
||||
raise StripException("invalid file format")
|
||||
self.num_sections, = struct.unpack('>H', datain[76:78])
|
||||
|
||||
# get mobiheader and check SRCS section number and count
|
||||
offset0, = struct.unpack_from('>L', datain, 78)
|
||||
offset1, = struct.unpack_from('>L', datain, 86)
|
||||
mobiheader = datain[offset0:offset1]
|
||||
srcs_secnum, srcs_cnt = struct.unpack_from('>2L', mobiheader, 0xe0)
|
||||
if srcs_secnum == 0xffffffff or srcs_cnt == 0:
|
||||
raise StripException("File doesn't contain the sources section.")
|
||||
|
||||
print "Found SRCS section number %d, and count %d" % (srcs_secnum, srcs_cnt)
|
||||
# find its offset and length
|
||||
next = srcs_secnum + srcs_cnt
|
||||
srcs_offset, flgval = struct.unpack_from('>2L', datain, 78+(srcs_secnum*8))
|
||||
next_offset, flgval = struct.unpack_from('>2L', datain, 78+(next*8))
|
||||
srcs_length = next_offset - srcs_offset
|
||||
if datain[srcs_offset:srcs_offset+4] != 'SRCS':
|
||||
raise StripException("SRCS section num does not point to SRCS.")
|
||||
print " beginning at offset %0x and ending at offset %0x" % (srcs_offset, srcs_length)
|
||||
|
||||
# it appears bytes 68-71 always contain (2*num_sections) + 1
|
||||
# this is not documented anyplace at all but it appears to be some sort of next
|
||||
# available unique_id used to identify specific sections in the palm db
|
||||
self.data_file = datain[:68] + struct.pack('>L',((self.num_sections-srcs_cnt)*2+1))
|
||||
self.data_file += datain[72:76]
|
||||
|
||||
# write out the number of sections reduced by srtcs_cnt
|
||||
self.data_file = self.data_file + struct.pack('>H',self.num_sections-srcs_cnt)
|
||||
|
||||
# we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
|
||||
# up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
|
||||
delta = -8 * srcs_cnt
|
||||
for i in xrange(srcs_secnum):
|
||||
offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
|
||||
offset += delta
|
||||
self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
|
||||
|
||||
# for every record after the srcs_cnt SRCS records we must start it
|
||||
# earlier by 8*srcs_cnt + the length of the srcs sections themselves)
|
||||
delta = delta - srcs_length
|
||||
for i in xrange(srcs_secnum+srcs_cnt,self.num_sections):
|
||||
offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
|
||||
offset += delta
|
||||
flgval = 2 * (i - srcs_cnt)
|
||||
self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
|
||||
|
||||
# now pad it out to begin right at the first offset
|
||||
# typically this is 2 bytes of nulls
|
||||
first_offset, flgval = struct.unpack_from('>2L', self.data_file, 78)
|
||||
self.data_file += '\0' * (first_offset - len(self.data_file))
|
||||
|
||||
# now finally add on every thing up to the original src_offset
|
||||
self.data_file += datain[offset0: srcs_offset]
|
||||
|
||||
# and everything afterwards
|
||||
self.data_file += datain[srcs_offset+srcs_length:]
|
||||
|
||||
#store away the SRCS section in case the user wants it output
|
||||
self.stripped_data_header = datain[srcs_offset:srcs_offset+16]
|
||||
self.stripped_data = datain[srcs_offset+16:srcs_offset+srcs_length]
|
||||
|
||||
# update the number of sections count
|
||||
self.num_section = self.num_sections - srcs_cnt
|
||||
|
||||
# update the srcs_secnum and srcs_cnt in the mobiheader
|
||||
offset0, flgval0 = struct.unpack_from('>2L', self.data_file, 78)
|
||||
offset1, flgval1 = struct.unpack_from('>2L', self.data_file, 86)
|
||||
mobiheader = self.data_file[offset0:offset1]
|
||||
mobiheader = mobiheader[:0xe0]+ struct.pack('>L', 0xffffffff) + struct.pack('>L', 0) + mobiheader[0xe8:]
|
||||
|
||||
# if K8 mobi, handle metadata 121 in old mobiheader
|
||||
mobiheader = self.updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
|
||||
self.data_file = self.data_file[0:offset0] + mobiheader + self.data_file[offset1:]
|
||||
print "done"
|
||||
|
||||
def getResult(self):
|
||||
return self.data_file
|
||||
|
||||
def getStrippedData(self):
|
||||
return self.stripped_data
|
||||
|
||||
def getHeader(self):
|
||||
return self.stripped_data_header
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
print ('KindleStrip v%(__version__)s. '
|
||||
'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals())
|
||||
if len(sys.argv)<3 or len(sys.argv)>4:
|
||||
print "Strips the Sources record from Mobipocket ebooks"
|
||||
print "For ebooks generated using KindleGen 1.1 and later that add the source"
|
||||
print "Usage:"
|
||||
print " %s <infile> <outfile> <strippeddatafile>" % sys.argv[0]
|
||||
print "<strippeddatafile> is optional."
|
||||
sys.exit(1)
|
||||
else:
|
||||
infile = sys.argv[1]
|
||||
outfile = sys.argv[2]
|
||||
data_file = file(infile, 'rb').read()
|
||||
try:
|
||||
strippedFile = SectionStripper(data_file)
|
||||
file(outfile, 'wb').write(strippedFile.getResult())
|
||||
print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
|
||||
if len(sys.argv)==4:
|
||||
file(sys.argv[3], 'wb').write(strippedFile.getStrippedData())
|
||||
except StripException, e:
|
||||
print "Error: %s" % e
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
1706
kcc/rarfile.py
Normal file
1706
kcc/rarfile.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user