1
0
mirror of https://github.com/ciromattia/kcc synced 2025-12-21 05:31:49 +00:00

Replaced KindleStrip with KindleUnpack (close #6)

This commit is contained in:
Paweł Jastrzębski
2013-09-23 10:21:45 +02:00
parent 7907a45ca2
commit 4890727692
4 changed files with 403 additions and 254 deletions

View File

@@ -130,11 +130,10 @@ This script born as a cross-platform alternative to `KindleComicParser` by **Dc5
The app relies and includes the following scripts/binaries: The app relies and includes the following scripts/binaries:
- `KindleStrip` script © 2010-2012 by **Paul Durrant** and released in public domain - `KindleUnpack` script by Charles **M. Hannum, P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding**. Released with GPLv3 License.
([forum thread](http://www.mobileread.com/forums/showthread.php?t=96903)) - `rarfile.py` script &copy; 2005-2011 **Marko Kreen** <markokr@gmail.com>. Released with ISC License.
- `rarfile.py` script &copy; 2005-2011 **Marko Kreen** <markokr@gmail.com>, released with ISC License - `image.py` class from **Alex Yatskov**'s [Mangle](http://foosoft.net/mangle/) with subsequent [proDOOMman](https://github.com/proDOOMman/Mangle)'s and [Birua](https://github.com/Birua/Mangle)'s patches.
- `image.py` class from **Alex Yatskov**'s [Mangle](http://foosoft.net/mangle/) with subsequent [proDOOMman](https://github.com/proDOOMman/Mangle)'s and [Birua](https://github.com/Birua/Mangle)'s patches - Icon is by **Nikolay Verin** ([http://ncrow.deviantart.com/](http://ncrow.deviantart.com/)) and released under [CC BY-NC-SA 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/) License.
- Icon is by **Nikolay Verin** ([http://ncrow.deviantart.com/](http://ncrow.deviantart.com/)) and released under [CC BY-NC-SA 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/) License
## SAMPLE FILES CREATED BY KCC ## SAMPLE FILES CREATED BY KCC
* [Kindle Paperwhite](http://kcc.vulturis.eu/Samples/Ubunchu!-KPW.mobi) * [Kindle Paperwhite](http://kcc.vulturis.eu/Samples/Ubunchu!-KPW.mobi)
@@ -255,6 +254,7 @@ The app relies and includes the following scripts/binaries:
* Hotfixed crash occurring on OS with Russian locale * Hotfixed crash occurring on OS with Russian locale
####3.3: ####3.3:
* Created MOBI files are not longer marked as _Personal_ on newer Kindle models
* Margins are now automatically omitted in Panel View mode * Margins are now automatically omitted in Panel View mode
* Layout of panels in Panel View mode is now automatically adjusted to content * Layout of panels in Panel View mode is now automatically adjusted to content
* Support for Virtual Panel View was removed * Support for Virtual Panel View was removed
@@ -264,9 +264,6 @@ The app relies and includes the following scripts/binaries:
* Windows release is now bundled with UnRAR and 7za * Windows release is now bundled with UnRAR and 7za
* Small GUI tweaks * Small GUI tweaks
## KNOWN ISSUES
* Removing SRCS headers sometimes fail in 32bit enviroments. Due to memory limitations.
## COPYRIGHT ## COPYRIGHT
Copyright (c) 2012-2013 Ciro Mattia Gonano and Paweł Jastrzębski. Copyright (c) 2012-2013 Ciro Mattia Gonano and Paweł Jastrzębski.

View File

@@ -30,7 +30,7 @@ import traceback
import urllib2 import urllib2
import time import time
import comic2ebook import comic2ebook
import kindlestrip import kindlesplit
from image import ProfileData from image import ProfileData
from subprocess import call, Popen, STDOUT, PIPE from subprocess import call, Popen, STDOUT, PIPE
from PyQt4 import QtGui, QtCore from PyQt4 import QtGui, QtCore
@@ -245,23 +245,27 @@ class WorkerThread(QtCore.QThread):
True) True)
else: else:
self.emit(QtCore.SIGNAL("addMessage"), 'Creating MOBI file... Done!', 'info', True) self.emit(QtCore.SIGNAL("addMessage"), 'Creating MOBI file... Done!', 'info', True)
self.emit(QtCore.SIGNAL("addMessage"), 'Removing SRCS header...', 'info') self.emit(QtCore.SIGNAL("addMessage"), 'Cleaning MOBI file...', 'info')
os.remove(item) os.remove(item)
mobiPath = item.replace('.epub', '.mobi') mobiPath = item.replace('.epub', '.mobi')
shutil.move(mobiPath, mobiPath + '_tostrip') shutil.move(mobiPath, mobiPath + '_toclean')
try: try:
kindlestrip.main((mobiPath + '_tostrip', mobiPath)) if profile in ['K345', 'KHD', 'KF', 'KFHD', 'KFHD8', 'KFA']:
newKindle = True
else:
newKindle = False
mobisplit = kindlesplit.mobi_split(mobiPath + '_toclean', newKindle)
open(mobiPath, 'wb').write(mobisplit.getResult())
except Exception: except Exception:
self.errors = True self.errors = True
if not self.errors: if not self.errors:
os.remove(mobiPath + '_tostrip') os.remove(mobiPath + '_toclean')
self.emit(QtCore.SIGNAL("addMessage"), 'Removing SRCS header... Done!', 'info', True) self.emit(QtCore.SIGNAL("addMessage"), 'Cleaning MOBI file... Done!', 'info', True)
else: else:
shutil.move(mobiPath + '_tostrip', mobiPath) os.remove(mobiPath + '_toclean')
os.remove(mobiPath)
self.emit(QtCore.SIGNAL("addMessage"), self.emit(QtCore.SIGNAL("addMessage"),
'KindleStrip failed to remove SRCS header!', 'warning') 'KindleUnpack failed to clean MOBI file!', 'error')
self.emit(QtCore.SIGNAL("addMessage"),
'MOBI file will work correctly but it will be highly oversized.', 'warning')
else: else:
epubSize = (os.path.getsize(item))/1024/1024 epubSize = (os.path.getsize(item))/1024/1024
os.remove(item) os.remove(item)

384
kcc/kindlesplit.py Normal file
View File

@@ -0,0 +1,384 @@
# Based on initial version of KindleUnpack. Copyright (C) 2009 Charles M. Hannum <root@ihack.net>
# Improvements Copyright (C) 2009-2012 P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding
# Copyright (C) 2013 Pawel Jastrzebski <pawelj@vulturis.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
__license__ = 'ISC'
__copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jastrzebski <pawelj@vulturis.eu>'
__docformat__ = 'restructuredtext en'
import struct
from uuid import uuid4
# important pdb header offsets
unique_id_seed = 68
number_of_pdb_records = 76
# important palmdoc header offsets
book_length = 4
book_record_count = 8
first_pdb_record = 78
# important rec0 offsets
length_of_book = 4
mobi_header_base = 16
mobi_header_length = 20
mobi_type = 24
mobi_version = 36
first_non_text = 80
title_offset = 84
first_image_record = 108
first_content_index = 192
last_content_index = 194
kf8_last_content_index = 192 # for KF8 mobi headers
fcis_index = 200
flis_index = 208
srcs_index = 224
srcs_count = 228
primary_index = 244
datp_index = 256
huffoff = 112
hufftbloff = 120
def getint(datain, ofs, sz='L'):
i, = struct.unpack_from('>'+sz, datain, ofs)
return i
def writeint(datain, ofs, n, length='L'):
if length == 'L':
return datain[:ofs]+struct.pack('>L', n)+datain[ofs+4:]
else:
return datain[:ofs]+struct.pack('>H', n)+datain[ofs+2:]
def getsecaddr(datain, secno):
nsec = getint(datain, number_of_pdb_records, 'H')
assert secno >= 0 & secno < nsec, 'secno %d out of range (nsec=%d)' % (secno, nsec)
secstart = getint(datain, first_pdb_record+secno*8)
if secno == nsec-1:
secend = len(datain)
else:
secend = getint(datain, first_pdb_record+(secno+1)*8)
return secstart, secend
def readsection(datain, secno):
secstart, secend = getsecaddr(datain, secno)
return datain[secstart:secend]
def writesection(datain, secno, secdata): # overwrite, accounting for different length
dataout = deletesectionrange(datain, secno, secno)
return insertsection(dataout, secno, secdata)
def nullsection(datain, secno): # make it zero-length without deleting it
datalst = []
nsec = getint(datain, number_of_pdb_records, 'H')
secstart, secend = getsecaddr(datain, secno)
zerosecstart, zerosecend = getsecaddr(datain, 0)
dif = secend-secstart
datalst.append(datain[:first_pdb_record])
for i in range(0, secno+1):
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
for i in range(secno+1, nsec):
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
ofs -= dif
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
lpad = zerosecstart - (first_pdb_record + 8*nsec)
if lpad > 0:
datalst.append('\0' * lpad)
datalst.append(datain[zerosecstart: secstart])
datalst.append(datain[secend:])
dataout = "".join(datalst)
return dataout
def deletesectionrange(datain, firstsec, lastsec): # delete a range of sections
datalst = []
firstsecstart, firstsecend = getsecaddr(datain, firstsec)
lastsecstart, lastsecend = getsecaddr(datain, lastsec)
zerosecstart, zerosecend = getsecaddr(datain, 0)
dif = lastsecend - firstsecstart + 8*(lastsec-firstsec+1)
nsec = getint(datain, number_of_pdb_records, 'H')
datalst.append(datain[:unique_id_seed])
datalst.append(struct.pack('>L', 2*(nsec-(lastsec-firstsec+1))+1))
datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
datalst.append(struct.pack('>H', nsec-(lastsec-firstsec+1)))
newstart = zerosecstart - 8*(lastsec-firstsec+1)
for i in range(0, firstsec):
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
ofs -= 8 * (lastsec - firstsec + 1)
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
for i in range(lastsec+1, nsec):
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
ofs -= dif
flgval = 2*(i-(lastsec-firstsec+1))
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
lpad = newstart - (first_pdb_record + 8*(nsec - (lastsec - firstsec + 1)))
if lpad > 0:
datalst.append('\0' * lpad)
datalst.append(datain[zerosecstart:firstsecstart])
datalst.append(datain[lastsecend:])
dataout = "".join(datalst)
return dataout
def insertsection(datain, secno, secdata): # insert a new section
datalst = []
nsec = getint(datain, number_of_pdb_records, 'H')
secstart, secend = getsecaddr(datain, secno)
zerosecstart, zerosecend = getsecaddr(datain, 0)
dif = len(secdata)
datalst.append(datain[:unique_id_seed])
datalst.append(struct.pack('>L', 2*(nsec+1)+1))
datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
datalst.append(struct.pack('>H', nsec+1))
newstart = zerosecstart + 8
for i in range(0, secno):
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
ofs += 8
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
datalst.append(struct.pack('>L', secstart + 8) + struct.pack('>L', (2*secno)))
for i in range(secno, nsec):
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
ofs = ofs + dif + 8
flgval = 2*(i+1)
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
lpad = newstart - (first_pdb_record + 8*(nsec + 1))
if lpad > 0:
datalst.append('\0' * lpad)
datalst.append(datain[zerosecstart:secstart])
datalst.append(secdata)
datalst.append(datain[secstart:])
dataout = "".join(datalst)
return dataout
def insertsectionrange(sectionsource, firstsec, lastsec, sectiontarget, targetsec): # insert a range of sections
dataout = sectiontarget
for idx in range(lastsec, firstsec-1, -1):
dataout = insertsection(dataout, targetsec, readsection(sectionsource, idx))
return dataout
def get_exth_params(rec0):
ebase = mobi_header_base + getint(rec0, mobi_header_length)
elen = getint(rec0, ebase+4)
enum = getint(rec0, ebase+8)
return ebase, elen, enum
def add_exth(rec0, exth_num, exth_bytes):
ebase, elen, enum = get_exth_params(rec0)
newrecsize = 8+len(exth_bytes)
newrec0 = rec0[0:ebase+4]+struct.pack('>L', elen+newrecsize)+struct.pack('>L', enum+1) +\
struct.pack('>L', exth_num) + struct.pack('>L', newrecsize)+exth_bytes+rec0[ebase+12:]
newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+newrecsize)
return newrec0
def read_exth(rec0, exth_num):
exth_values = []
ebase, elen, enum = get_exth_params(rec0)
ebase += 12
while enum > 0:
exth_id = getint(rec0, ebase)
if exth_id == exth_num:
# We might have multiple exths, so build a list.
exth_values.append(rec0[ebase+8:ebase+getint(rec0, ebase+4)])
enum -= 1
ebase = ebase+getint(rec0, ebase+4)
return exth_values
def write_exth(rec0, exth_num, exth_bytes):
ebase, elen, enum = get_exth_params(rec0)
ebase_idx = ebase+12
enum_idx = enum
while enum_idx > 0:
exth_id = getint(rec0, ebase_idx)
if exth_id == exth_num:
dif = len(exth_bytes)+8-getint(rec0, ebase_idx+4)
newrec0 = rec0
if dif != 0:
newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+dif)
return newrec0[:ebase+4]+struct.pack('>L', elen+len(exth_bytes)+8-getint(rec0, ebase_idx+4)) +\
struct.pack('>L', enum)+rec0[ebase+12:ebase_idx+4] +\
struct.pack('>L', len(exth_bytes)+8)+exth_bytes +\
rec0[ebase_idx+getint(rec0, ebase_idx+4):]
enum_idx -= 1
ebase_idx = ebase_idx+getint(rec0, ebase_idx+4)
return rec0
def del_exth(rec0, exth_num):
ebase, elen, enum = get_exth_params(rec0)
ebase_idx = ebase+12
enum_idx = 0
while enum_idx < enum:
exth_id = getint(rec0, ebase_idx)
exth_size = getint(rec0, ebase_idx+4)
if exth_id == exth_num:
newrec0 = rec0
newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)-exth_size)
newrec0 = newrec0[:ebase_idx]+newrec0[ebase_idx+exth_size:]
newrec0 = newrec0[0:ebase+4]+struct.pack('>L', elen-exth_size)+struct.pack('>L', enum-1)+newrec0[ebase+12:]
return newrec0
enum_idx += 1
ebase_idx = ebase_idx+exth_size
return rec0
class mobi_split:
def __init__(self, infile, newKindle):
try:
datain = open(infile, 'rb').read()
datain_rec0 = readsection(datain, 0)
ver = getint(datain_rec0, mobi_version)
fake_asin = str(uuid4())
self.combo = (ver != 8)
if not self.combo:
return
exth121 = read_exth(datain_rec0, 121)
if len(exth121) == 0:
self.combo = False
return
else:
# only pay attention to first exth121
# (there should only be one)
datain_kf8, = struct.unpack_from('>L', exth121[0], 0)
if datain_kf8 == 0xffffffff:
self.combo = False
return
datain_kfrec0 = readsection(datain, datain_kf8)
firstimage = getint(datain_rec0, first_image_record)
lastimage = getint(datain_rec0, last_content_index, 'H')
if not newKindle:
# create the standalone mobi7
num_sec = getint(datain, number_of_pdb_records, 'H')
# remove BOUNDARY up to but not including ELF record
self.result_file = deletesectionrange(datain, datain_kf8-1, num_sec-2)
# check if there are SRCS records and delete them
srcs = getint(datain_rec0, srcs_index)
num_srcs = getint(datain_rec0, srcs_count)
if srcs != 0xffffffff and num_srcs > 0:
self.result_file = deletesectionrange(self.result_file, srcs, srcs+num_srcs-1)
datain_rec0 = writeint(datain_rec0, srcs_index, 0xffffffff)
datain_rec0 = writeint(datain_rec0, srcs_count, 0)
# reset the EXTH 121 KF8 Boundary meta data to 0xffffffff
datain_rec0 = write_exth(datain_rec0, 121, struct.pack('>L', 0xffffffff))
# datain_rec0 = del_exth(datain_rec0,121)
# datain_rec0 = del_exth(datain_rec0,534)
# don't remove the EXTH 125 KF8 Count of Resources, seems to be present in mobi6 files as well
# set the EXTH 129 KF8 Masthead / Cover Image string to the null string
datain_rec0 = write_exth(datain_rec0, 129, '')
# don't remove the EXTH 131 KF8 Unidentified Count, seems to be present in mobi6 files as well
# Make sure we have an ASIN & cdeType set...
if len(read_exth(datain_rec0, 113)) == 0:
datain_rec0 = add_exth(datain_rec0, 113, fake_asin)
if len(read_exth(datain_rec0, 504)) == 0:
datain_rec0 = add_exth(datain_rec0, 504, fake_asin)
if len(read_exth(datain_rec0, 501)) == 0:
datain_rec0 = add_exth(datain_rec0, 501, b'EBOK')
# need to reset flags stored in 0x80-0x83
# old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
# Bit Flags
# 0x1000 = Bit 12 indicates if embedded fonts are used or not
# 0x0800 = means this Header points to *shared* images/resource/fonts ??
# 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
# 0x0040 = exth exists
# 0x0010 = Not sure but this is always set so far
fval, = struct.unpack_from('>L', datain_rec0, 0x80)
# need to remove flag 0x0800 for KindlePreviewer 2.8 and unset Bit 12 for embedded fonts
fval &= 0x07FF
datain_rec0 = datain_rec0[:0x80] + struct.pack('>L', fval) + datain_rec0[0x84:]
self.result_file = writesection(self.result_file, 0, datain_rec0)
if lastimage == 0xffff:
# find the lowest of the next sections and copy up to that.
ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'),
(hufftbloff, 'L')]
for ofs, sz in ofs_list:
n = getint(datain_kfrec0, ofs, sz)
if 0 < n < lastimage:
lastimage = n-1
# Try to null out FONT and RES, but leave the (empty) PDB record so image refs remain valid
for i in range(firstimage, lastimage):
imgsec = readsection(self.result_file, i)
if imgsec[0:4] in ['RESC', 'FONT']:
self.result_file = nullsection(self.result_file, i)
# mobi7 finished
else:
# create standalone mobi8
self.result_file = deletesectionrange(datain, 0, datain_kf8-1)
target = getint(datain_kfrec0, first_image_record)
self.result_file = insertsectionrange(datain, firstimage, lastimage, self.result_file, target)
datain_kfrec0 = readsection(self.result_file, 0)
# Only keep the correct EXTH 116 StartOffset, KG 2.5 carries over the one from the mobi7 part,
# which then points at garbage in the mobi8 part, and confuses FW 3.4
kf8starts = read_exth(datain_kfrec0, 116)
# If we have multiple StartOffset, keep only the last one
kf8start_count = len(kf8starts)
while kf8start_count > 1:
kf8start_count -= 1
datain_kfrec0 = del_exth(datain_kfrec0, 116)
# update the EXTH 125 KF8 Count of Images/Fonts/Resources
datain_kfrec0 = write_exth(datain_kfrec0, 125, struct.pack('>L', lastimage-firstimage+1))
# Same dance for the KF8, we want an ASIN & cdeType :)
if len(read_exth(datain_kfrec0, 113)) == 0:
datain_kfrec0 = add_exth(datain_kfrec0, 113, fake_asin)
if len(read_exth(datain_kfrec0, 504)) == 0:
datain_kfrec0 = add_exth(datain_kfrec0, 504, fake_asin)
if len(read_exth(datain_kfrec0, 501)) == 0:
datain_kfrec0 = add_exth(datain_kfrec0, 501, b'EBOK')
# need to reset flags stored in 0x80-0x83
# old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
# standalone mobi8 with exth: 0x0050
# Bit Flags
# 0x1000 = Bit 12 indicates if embedded fonts are used or not
# 0x0800 = means this Header points to *shared* images/resource/fonts ??
# 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
# 0x0040 = exth exists
# 0x0010 = Not sure but this is always set so far
fval, = struct.unpack_from('>L', datain_kfrec0, 0x80)
fval &= 0x1FFF
fval |= 0x0800
datain_kfrec0 = datain_kfrec0[:0x80] + struct.pack('>L', fval) + datain_kfrec0[0x84:]
# properly update other index pointers that have been shifted by the insertion of images
ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'),
(hufftbloff, 'L')]
for ofs, sz in ofs_list:
n = getint(datain_kfrec0, ofs, sz)
if n != 0xffffffff:
datain_kfrec0 = writeint(datain_kfrec0, ofs, n+lastimage-firstimage+1, sz)
self.result_file = writesection(self.result_file, 0, datain_kfrec0)
# mobi8 finished
except Exception:
raise
def getResult(self):
return self.result_file

View File

@@ -1,236 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
#
# This is a python script. You need a Python interpreter to run it.
# For example, ActiveState Python, which exists for windows.
#
# This script strips the penultimate record from a Mobipocket file.
# This is useful because the current KindleGen add a compressed copy
# of the source files used in this record, making the ebook produced
# about twice as big as it needs to be.
#
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org/>
#
# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
# With enhancements by Kevin Hendricks, KevinH on mobileread.com
#
# Changelog
# 1.00 - Initial version
# 1.10 - Added an option to output the stripped data
# 1.20 - Added check for source files section (thanks Piquan)
# 1.30 - Added prelim Support for K8 style mobis
# 1.31 - removed the SRCS section but kept a 0 size entry for it
# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
# 1.33 - now uses and modifies mobiheader SRCS and CNT
# 1.34 - added credit for Kevin Hendricks
# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records
__version__ = '1.35'
import sys
import struct
import binascii
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
class StripException(Exception):
pass
class SectionStripper:
def loadSection(self, section):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
else:
endoff = self.sections[section + 1][0]
off = self.sections[section][0]
return self.data_file[off:endoff]
def patch(self, off, new):
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
def strip(self, off, len):
self.data_file = self.data_file[:off] + self.data_file[off+len:]
def patchSection(self, section, new, in_off = 0):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
else:
endoff = self.sections[section + 1][0]
off = self.sections[section][0]
assert off + in_off + len(new) <= endoff
self.patch(off + in_off, new)
def updateEXTH121(self, srcs_secnum, srcs_cnt, mobiheader):
mobi_length, = struct.unpack('>L',mobiheader[0x14:0x18])
exth_flag, = struct.unpack('>L', mobiheader[0x80:0x84])
exth = 'NONE'
try:
if exth_flag & 0x40:
exth = mobiheader[16 + mobi_length:]
if (len(exth) >= 4) and (exth[:4] == 'EXTH'):
nitems, = struct.unpack('>I', exth[8:12])
pos = 12
for i in xrange(nitems):
type, size = struct.unpack('>II', exth[pos: pos + 8])
# print type, size
if type == 121:
boundaryptr, =struct.unpack('>L',exth[pos+8: pos + size])
if srcs_secnum <= boundaryptr:
boundaryptr -= srcs_cnt
prefix = mobiheader[0:16 + mobi_length + pos + 8]
suffix = mobiheader[16 + mobi_length + pos + 8 + 4:]
nval = struct.pack('>L',boundaryptr)
mobiheader = prefix + nval + suffix
pos += size
except:
pass
return mobiheader
def __init__(self, datain):
if datain[0x3C:0x3C+8] != 'BOOKMOBI':
raise StripException("invalid file format")
self.num_sections, = struct.unpack('>H', datain[76:78])
# get mobiheader and check SRCS section number and count
offset0, = struct.unpack_from('>L', datain, 78)
offset1, = struct.unpack_from('>L', datain, 86)
mobiheader = datain[offset0:offset1]
srcs_secnum, srcs_cnt = struct.unpack_from('>2L', mobiheader, 0xe0)
if srcs_secnum == 0xffffffff or srcs_cnt == 0:
raise StripException("File doesn't contain the sources section.")
print "Found SRCS section number %d, and count %d" % (srcs_secnum, srcs_cnt)
# find its offset and length
next = srcs_secnum + srcs_cnt
srcs_offset, flgval = struct.unpack_from('>2L', datain, 78+(srcs_secnum*8))
next_offset, flgval = struct.unpack_from('>2L', datain, 78+(next*8))
srcs_length = next_offset - srcs_offset
if datain[srcs_offset:srcs_offset+4] != 'SRCS':
raise StripException("SRCS section num does not point to SRCS.")
print " beginning at offset %0x and ending at offset %0x" % (srcs_offset, srcs_length)
# it appears bytes 68-71 always contain (2*num_sections) + 1
# this is not documented anyplace at all but it appears to be some sort of next
# available unique_id used to identify specific sections in the palm db
self.data_file = datain[:68] + struct.pack('>L',((self.num_sections-srcs_cnt)*2+1))
self.data_file += datain[72:76]
# write out the number of sections reduced by srtcs_cnt
self.data_file = self.data_file + struct.pack('>H',self.num_sections-srcs_cnt)
# we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
# up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
delta = -8 * srcs_cnt
for i in xrange(srcs_secnum):
offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
offset += delta
self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
# for every record after the srcs_cnt SRCS records we must start it
# earlier by 8*srcs_cnt + the length of the srcs sections themselves)
delta = delta - srcs_length
for i in xrange(srcs_secnum+srcs_cnt,self.num_sections):
offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
offset += delta
flgval = 2 * (i - srcs_cnt)
self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
# now pad it out to begin right at the first offset
# typically this is 2 bytes of nulls
first_offset, flgval = struct.unpack_from('>2L', self.data_file, 78)
self.data_file += '\0' * (first_offset - len(self.data_file))
# now finally add on every thing up to the original src_offset
self.data_file += datain[offset0: srcs_offset]
# and everything afterwards
self.data_file += datain[srcs_offset+srcs_length:]
#store away the SRCS section in case the user wants it output
self.stripped_data_header = datain[srcs_offset:srcs_offset+16]
self.stripped_data = datain[srcs_offset+16:srcs_offset+srcs_length]
# update the number of sections count
self.num_section = self.num_sections - srcs_cnt
# update the srcs_secnum and srcs_cnt in the mobiheader
offset0, flgval0 = struct.unpack_from('>2L', self.data_file, 78)
offset1, flgval1 = struct.unpack_from('>2L', self.data_file, 86)
mobiheader = self.data_file[offset0:offset1]
mobiheader = mobiheader[:0xe0]+ struct.pack('>L', 0xffffffff) + struct.pack('>L', 0) + mobiheader[0xe8:]
# if K8 mobi, handle metadata 121 in old mobiheader
mobiheader = self.updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
self.data_file = self.data_file[0:offset0] + mobiheader + self.data_file[offset1:]
print "done"
def getResult(self):
return self.data_file
def getStrippedData(self):
return self.stripped_data
def getHeader(self):
return self.stripped_data_header
def main(argv=None):
infile = argv[0]
outfile = argv[1]
data_file = file(infile, 'rb').read()
try:
strippedFile = SectionStripper(data_file)
file(outfile, 'wb').write(strippedFile.getResult())
print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
if len(argv)==3:
file(argv[2], 'wb').write(strippedFile.getStrippedData())
except StripException, e:
print "Error: %s" % e
sys.exit(1)
if __name__ == "__main__":
sys.stdout=Unbuffered(sys.stdout)
print ('KindleStrip v%(__version__)s. '
'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals())
if len(sys.argv)<3 or len(sys.argv)>4:
print "Strips the Sources record from Mobipocket ebooks"
print "For ebooks generated using KindleGen 1.1 and later that add the source"
print "Usage:"
print " %s <infile> <outfile> <strippeddatafile>" % sys.argv[0]
print "<strippeddatafile> is optional."
sys.exit(1)
else:
main(sys.argv[1:])
sys.exit(0)