mirror of
https://github.com/ciromattia/kcc
synced 2025-12-13 09:46:25 +00:00
Replaced KindleStrip with KindleUnpack (close #6)
This commit is contained in:
13
README.md
13
README.md
@@ -130,11 +130,10 @@ This script born as a cross-platform alternative to `KindleComicParser` by **Dc5
|
||||
|
||||
The app relies and includes the following scripts/binaries:
|
||||
|
||||
- `KindleStrip` script © 2010-2012 by **Paul Durrant** and released in public domain
|
||||
([forum thread](http://www.mobileread.com/forums/showthread.php?t=96903))
|
||||
- `rarfile.py` script © 2005-2011 **Marko Kreen** <markokr@gmail.com>, released with ISC License
|
||||
- `image.py` class from **Alex Yatskov**'s [Mangle](http://foosoft.net/mangle/) with subsequent [proDOOMman](https://github.com/proDOOMman/Mangle)'s and [Birua](https://github.com/Birua/Mangle)'s patches
|
||||
- Icon is by **Nikolay Verin** ([http://ncrow.deviantart.com/](http://ncrow.deviantart.com/)) and released under [CC BY-NC-SA 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/) License
|
||||
- `KindleUnpack` script by Charles **M. Hannum, P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding**. Released with GPLv3 License.
|
||||
- `rarfile.py` script © 2005-2011 **Marko Kreen** <markokr@gmail.com>. Released with ISC License.
|
||||
- `image.py` class from **Alex Yatskov**'s [Mangle](http://foosoft.net/mangle/) with subsequent [proDOOMman](https://github.com/proDOOMman/Mangle)'s and [Birua](https://github.com/Birua/Mangle)'s patches.
|
||||
- Icon is by **Nikolay Verin** ([http://ncrow.deviantart.com/](http://ncrow.deviantart.com/)) and released under [CC BY-NC-SA 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/) License.
|
||||
|
||||
## SAMPLE FILES CREATED BY KCC
|
||||
* [Kindle Paperwhite](http://kcc.vulturis.eu/Samples/Ubunchu!-KPW.mobi)
|
||||
@@ -255,6 +254,7 @@ The app relies and includes the following scripts/binaries:
|
||||
* Hotfixed crash occurring on OS with Russian locale
|
||||
|
||||
####3.3:
|
||||
* Created MOBI files are not longer marked as _Personal_ on newer Kindle models
|
||||
* Margins are now automatically omitted in Panel View mode
|
||||
* Layout of panels in Panel View mode is now automatically adjusted to content
|
||||
* Support for Virtual Panel View was removed
|
||||
@@ -264,9 +264,6 @@ The app relies and includes the following scripts/binaries:
|
||||
* Windows release is now bundled with UnRAR and 7za
|
||||
* Small GUI tweaks
|
||||
|
||||
## KNOWN ISSUES
|
||||
* Removing SRCS headers sometimes fail in 32bit enviroments. Due to memory limitations.
|
||||
|
||||
## COPYRIGHT
|
||||
|
||||
Copyright (c) 2012-2013 Ciro Mattia Gonano and Paweł Jastrzębski.
|
||||
|
||||
@@ -30,7 +30,7 @@ import traceback
|
||||
import urllib2
|
||||
import time
|
||||
import comic2ebook
|
||||
import kindlestrip
|
||||
import kindlesplit
|
||||
from image import ProfileData
|
||||
from subprocess import call, Popen, STDOUT, PIPE
|
||||
from PyQt4 import QtGui, QtCore
|
||||
@@ -245,23 +245,27 @@ class WorkerThread(QtCore.QThread):
|
||||
True)
|
||||
else:
|
||||
self.emit(QtCore.SIGNAL("addMessage"), 'Creating MOBI file... Done!', 'info', True)
|
||||
self.emit(QtCore.SIGNAL("addMessage"), 'Removing SRCS header...', 'info')
|
||||
self.emit(QtCore.SIGNAL("addMessage"), 'Cleaning MOBI file...', 'info')
|
||||
os.remove(item)
|
||||
mobiPath = item.replace('.epub', '.mobi')
|
||||
shutil.move(mobiPath, mobiPath + '_tostrip')
|
||||
shutil.move(mobiPath, mobiPath + '_toclean')
|
||||
try:
|
||||
kindlestrip.main((mobiPath + '_tostrip', mobiPath))
|
||||
if profile in ['K345', 'KHD', 'KF', 'KFHD', 'KFHD8', 'KFA']:
|
||||
newKindle = True
|
||||
else:
|
||||
newKindle = False
|
||||
mobisplit = kindlesplit.mobi_split(mobiPath + '_toclean', newKindle)
|
||||
open(mobiPath, 'wb').write(mobisplit.getResult())
|
||||
except Exception:
|
||||
self.errors = True
|
||||
if not self.errors:
|
||||
os.remove(mobiPath + '_tostrip')
|
||||
self.emit(QtCore.SIGNAL("addMessage"), 'Removing SRCS header... Done!', 'info', True)
|
||||
os.remove(mobiPath + '_toclean')
|
||||
self.emit(QtCore.SIGNAL("addMessage"), 'Cleaning MOBI file... Done!', 'info', True)
|
||||
else:
|
||||
shutil.move(mobiPath + '_tostrip', mobiPath)
|
||||
os.remove(mobiPath + '_toclean')
|
||||
os.remove(mobiPath)
|
||||
self.emit(QtCore.SIGNAL("addMessage"),
|
||||
'KindleStrip failed to remove SRCS header!', 'warning')
|
||||
self.emit(QtCore.SIGNAL("addMessage"),
|
||||
'MOBI file will work correctly but it will be highly oversized.', 'warning')
|
||||
'KindleUnpack failed to clean MOBI file!', 'error')
|
||||
else:
|
||||
epubSize = (os.path.getsize(item))/1024/1024
|
||||
os.remove(item)
|
||||
|
||||
384
kcc/kindlesplit.py
Normal file
384
kcc/kindlesplit.py
Normal file
@@ -0,0 +1,384 @@
|
||||
# Based on initial version of KindleUnpack. Copyright (C) 2009 Charles M. Hannum <root@ihack.net>
|
||||
# Improvements Copyright (C) 2009-2012 P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding
|
||||
# Copyright (C) 2013 Pawel Jastrzebski <pawelj@vulturis.eu>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
__license__ = 'ISC'
|
||||
__copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jastrzebski <pawelj@vulturis.eu>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct
|
||||
from uuid import uuid4
|
||||
|
||||
# important pdb header offsets
|
||||
unique_id_seed = 68
|
||||
number_of_pdb_records = 76
|
||||
|
||||
# important palmdoc header offsets
|
||||
book_length = 4
|
||||
book_record_count = 8
|
||||
first_pdb_record = 78
|
||||
|
||||
# important rec0 offsets
|
||||
length_of_book = 4
|
||||
mobi_header_base = 16
|
||||
mobi_header_length = 20
|
||||
mobi_type = 24
|
||||
mobi_version = 36
|
||||
first_non_text = 80
|
||||
title_offset = 84
|
||||
first_image_record = 108
|
||||
first_content_index = 192
|
||||
last_content_index = 194
|
||||
kf8_last_content_index = 192 # for KF8 mobi headers
|
||||
fcis_index = 200
|
||||
flis_index = 208
|
||||
srcs_index = 224
|
||||
srcs_count = 228
|
||||
primary_index = 244
|
||||
datp_index = 256
|
||||
huffoff = 112
|
||||
hufftbloff = 120
|
||||
|
||||
|
||||
def getint(datain, ofs, sz='L'):
|
||||
i, = struct.unpack_from('>'+sz, datain, ofs)
|
||||
return i
|
||||
|
||||
|
||||
def writeint(datain, ofs, n, length='L'):
|
||||
if length == 'L':
|
||||
return datain[:ofs]+struct.pack('>L', n)+datain[ofs+4:]
|
||||
else:
|
||||
return datain[:ofs]+struct.pack('>H', n)+datain[ofs+2:]
|
||||
|
||||
|
||||
def getsecaddr(datain, secno):
|
||||
nsec = getint(datain, number_of_pdb_records, 'H')
|
||||
assert secno >= 0 & secno < nsec, 'secno %d out of range (nsec=%d)' % (secno, nsec)
|
||||
secstart = getint(datain, first_pdb_record+secno*8)
|
||||
if secno == nsec-1:
|
||||
secend = len(datain)
|
||||
else:
|
||||
secend = getint(datain, first_pdb_record+(secno+1)*8)
|
||||
return secstart, secend
|
||||
|
||||
|
||||
def readsection(datain, secno):
|
||||
secstart, secend = getsecaddr(datain, secno)
|
||||
return datain[secstart:secend]
|
||||
|
||||
|
||||
def writesection(datain, secno, secdata): # overwrite, accounting for different length
|
||||
dataout = deletesectionrange(datain, secno, secno)
|
||||
return insertsection(dataout, secno, secdata)
|
||||
|
||||
|
||||
def nullsection(datain, secno): # make it zero-length without deleting it
|
||||
datalst = []
|
||||
nsec = getint(datain, number_of_pdb_records, 'H')
|
||||
secstart, secend = getsecaddr(datain, secno)
|
||||
zerosecstart, zerosecend = getsecaddr(datain, 0)
|
||||
dif = secend-secstart
|
||||
datalst.append(datain[:first_pdb_record])
|
||||
for i in range(0, secno+1):
|
||||
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
|
||||
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
|
||||
for i in range(secno+1, nsec):
|
||||
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
|
||||
ofs -= dif
|
||||
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
|
||||
lpad = zerosecstart - (first_pdb_record + 8*nsec)
|
||||
if lpad > 0:
|
||||
datalst.append('\0' * lpad)
|
||||
datalst.append(datain[zerosecstart: secstart])
|
||||
datalst.append(datain[secend:])
|
||||
dataout = "".join(datalst)
|
||||
return dataout
|
||||
|
||||
|
||||
def deletesectionrange(datain, firstsec, lastsec): # delete a range of sections
|
||||
datalst = []
|
||||
firstsecstart, firstsecend = getsecaddr(datain, firstsec)
|
||||
lastsecstart, lastsecend = getsecaddr(datain, lastsec)
|
||||
zerosecstart, zerosecend = getsecaddr(datain, 0)
|
||||
dif = lastsecend - firstsecstart + 8*(lastsec-firstsec+1)
|
||||
nsec = getint(datain, number_of_pdb_records, 'H')
|
||||
datalst.append(datain[:unique_id_seed])
|
||||
datalst.append(struct.pack('>L', 2*(nsec-(lastsec-firstsec+1))+1))
|
||||
datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
|
||||
datalst.append(struct.pack('>H', nsec-(lastsec-firstsec+1)))
|
||||
newstart = zerosecstart - 8*(lastsec-firstsec+1)
|
||||
for i in range(0, firstsec):
|
||||
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
|
||||
ofs -= 8 * (lastsec - firstsec + 1)
|
||||
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
|
||||
for i in range(lastsec+1, nsec):
|
||||
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
|
||||
ofs -= dif
|
||||
flgval = 2*(i-(lastsec-firstsec+1))
|
||||
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
|
||||
lpad = newstart - (first_pdb_record + 8*(nsec - (lastsec - firstsec + 1)))
|
||||
if lpad > 0:
|
||||
datalst.append('\0' * lpad)
|
||||
datalst.append(datain[zerosecstart:firstsecstart])
|
||||
datalst.append(datain[lastsecend:])
|
||||
dataout = "".join(datalst)
|
||||
return dataout
|
||||
|
||||
|
||||
def insertsection(datain, secno, secdata): # insert a new section
|
||||
datalst = []
|
||||
nsec = getint(datain, number_of_pdb_records, 'H')
|
||||
secstart, secend = getsecaddr(datain, secno)
|
||||
zerosecstart, zerosecend = getsecaddr(datain, 0)
|
||||
dif = len(secdata)
|
||||
datalst.append(datain[:unique_id_seed])
|
||||
datalst.append(struct.pack('>L', 2*(nsec+1)+1))
|
||||
datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
|
||||
datalst.append(struct.pack('>H', nsec+1))
|
||||
newstart = zerosecstart + 8
|
||||
for i in range(0, secno):
|
||||
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
|
||||
ofs += 8
|
||||
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
|
||||
datalst.append(struct.pack('>L', secstart + 8) + struct.pack('>L', (2*secno)))
|
||||
for i in range(secno, nsec):
|
||||
ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8)
|
||||
ofs = ofs + dif + 8
|
||||
flgval = 2*(i+1)
|
||||
datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval))
|
||||
lpad = newstart - (first_pdb_record + 8*(nsec + 1))
|
||||
if lpad > 0:
|
||||
datalst.append('\0' * lpad)
|
||||
datalst.append(datain[zerosecstart:secstart])
|
||||
datalst.append(secdata)
|
||||
datalst.append(datain[secstart:])
|
||||
dataout = "".join(datalst)
|
||||
return dataout
|
||||
|
||||
|
||||
def insertsectionrange(sectionsource, firstsec, lastsec, sectiontarget, targetsec): # insert a range of sections
|
||||
dataout = sectiontarget
|
||||
for idx in range(lastsec, firstsec-1, -1):
|
||||
dataout = insertsection(dataout, targetsec, readsection(sectionsource, idx))
|
||||
return dataout
|
||||
|
||||
|
||||
def get_exth_params(rec0):
|
||||
ebase = mobi_header_base + getint(rec0, mobi_header_length)
|
||||
elen = getint(rec0, ebase+4)
|
||||
enum = getint(rec0, ebase+8)
|
||||
return ebase, elen, enum
|
||||
|
||||
|
||||
def add_exth(rec0, exth_num, exth_bytes):
|
||||
ebase, elen, enum = get_exth_params(rec0)
|
||||
newrecsize = 8+len(exth_bytes)
|
||||
newrec0 = rec0[0:ebase+4]+struct.pack('>L', elen+newrecsize)+struct.pack('>L', enum+1) +\
|
||||
struct.pack('>L', exth_num) + struct.pack('>L', newrecsize)+exth_bytes+rec0[ebase+12:]
|
||||
newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+newrecsize)
|
||||
return newrec0
|
||||
|
||||
|
||||
def read_exth(rec0, exth_num):
|
||||
exth_values = []
|
||||
ebase, elen, enum = get_exth_params(rec0)
|
||||
ebase += 12
|
||||
while enum > 0:
|
||||
exth_id = getint(rec0, ebase)
|
||||
if exth_id == exth_num:
|
||||
# We might have multiple exths, so build a list.
|
||||
exth_values.append(rec0[ebase+8:ebase+getint(rec0, ebase+4)])
|
||||
enum -= 1
|
||||
ebase = ebase+getint(rec0, ebase+4)
|
||||
return exth_values
|
||||
|
||||
|
||||
def write_exth(rec0, exth_num, exth_bytes):
|
||||
ebase, elen, enum = get_exth_params(rec0)
|
||||
ebase_idx = ebase+12
|
||||
enum_idx = enum
|
||||
while enum_idx > 0:
|
||||
exth_id = getint(rec0, ebase_idx)
|
||||
if exth_id == exth_num:
|
||||
dif = len(exth_bytes)+8-getint(rec0, ebase_idx+4)
|
||||
newrec0 = rec0
|
||||
if dif != 0:
|
||||
newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+dif)
|
||||
return newrec0[:ebase+4]+struct.pack('>L', elen+len(exth_bytes)+8-getint(rec0, ebase_idx+4)) +\
|
||||
struct.pack('>L', enum)+rec0[ebase+12:ebase_idx+4] +\
|
||||
struct.pack('>L', len(exth_bytes)+8)+exth_bytes +\
|
||||
rec0[ebase_idx+getint(rec0, ebase_idx+4):]
|
||||
enum_idx -= 1
|
||||
ebase_idx = ebase_idx+getint(rec0, ebase_idx+4)
|
||||
return rec0
|
||||
|
||||
|
||||
def del_exth(rec0, exth_num):
|
||||
ebase, elen, enum = get_exth_params(rec0)
|
||||
ebase_idx = ebase+12
|
||||
enum_idx = 0
|
||||
while enum_idx < enum:
|
||||
exth_id = getint(rec0, ebase_idx)
|
||||
exth_size = getint(rec0, ebase_idx+4)
|
||||
if exth_id == exth_num:
|
||||
newrec0 = rec0
|
||||
newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)-exth_size)
|
||||
newrec0 = newrec0[:ebase_idx]+newrec0[ebase_idx+exth_size:]
|
||||
newrec0 = newrec0[0:ebase+4]+struct.pack('>L', elen-exth_size)+struct.pack('>L', enum-1)+newrec0[ebase+12:]
|
||||
return newrec0
|
||||
enum_idx += 1
|
||||
ebase_idx = ebase_idx+exth_size
|
||||
return rec0
|
||||
|
||||
|
||||
class mobi_split:
|
||||
def __init__(self, infile, newKindle):
|
||||
try:
|
||||
datain = open(infile, 'rb').read()
|
||||
datain_rec0 = readsection(datain, 0)
|
||||
ver = getint(datain_rec0, mobi_version)
|
||||
fake_asin = str(uuid4())
|
||||
self.combo = (ver != 8)
|
||||
if not self.combo:
|
||||
return
|
||||
exth121 = read_exth(datain_rec0, 121)
|
||||
if len(exth121) == 0:
|
||||
self.combo = False
|
||||
return
|
||||
else:
|
||||
# only pay attention to first exth121
|
||||
# (there should only be one)
|
||||
datain_kf8, = struct.unpack_from('>L', exth121[0], 0)
|
||||
if datain_kf8 == 0xffffffff:
|
||||
self.combo = False
|
||||
return
|
||||
datain_kfrec0 = readsection(datain, datain_kf8)
|
||||
firstimage = getint(datain_rec0, first_image_record)
|
||||
lastimage = getint(datain_rec0, last_content_index, 'H')
|
||||
|
||||
if not newKindle:
|
||||
# create the standalone mobi7
|
||||
num_sec = getint(datain, number_of_pdb_records, 'H')
|
||||
# remove BOUNDARY up to but not including ELF record
|
||||
self.result_file = deletesectionrange(datain, datain_kf8-1, num_sec-2)
|
||||
# check if there are SRCS records and delete them
|
||||
srcs = getint(datain_rec0, srcs_index)
|
||||
num_srcs = getint(datain_rec0, srcs_count)
|
||||
if srcs != 0xffffffff and num_srcs > 0:
|
||||
self.result_file = deletesectionrange(self.result_file, srcs, srcs+num_srcs-1)
|
||||
datain_rec0 = writeint(datain_rec0, srcs_index, 0xffffffff)
|
||||
datain_rec0 = writeint(datain_rec0, srcs_count, 0)
|
||||
# reset the EXTH 121 KF8 Boundary meta data to 0xffffffff
|
||||
datain_rec0 = write_exth(datain_rec0, 121, struct.pack('>L', 0xffffffff))
|
||||
# datain_rec0 = del_exth(datain_rec0,121)
|
||||
# datain_rec0 = del_exth(datain_rec0,534)
|
||||
# don't remove the EXTH 125 KF8 Count of Resources, seems to be present in mobi6 files as well
|
||||
# set the EXTH 129 KF8 Masthead / Cover Image string to the null string
|
||||
datain_rec0 = write_exth(datain_rec0, 129, '')
|
||||
# don't remove the EXTH 131 KF8 Unidentified Count, seems to be present in mobi6 files as well
|
||||
|
||||
# Make sure we have an ASIN & cdeType set...
|
||||
if len(read_exth(datain_rec0, 113)) == 0:
|
||||
datain_rec0 = add_exth(datain_rec0, 113, fake_asin)
|
||||
if len(read_exth(datain_rec0, 504)) == 0:
|
||||
datain_rec0 = add_exth(datain_rec0, 504, fake_asin)
|
||||
if len(read_exth(datain_rec0, 501)) == 0:
|
||||
datain_rec0 = add_exth(datain_rec0, 501, b'EBOK')
|
||||
|
||||
# need to reset flags stored in 0x80-0x83
|
||||
# old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
|
||||
# Bit Flags
|
||||
# 0x1000 = Bit 12 indicates if embedded fonts are used or not
|
||||
# 0x0800 = means this Header points to *shared* images/resource/fonts ??
|
||||
# 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
|
||||
# 0x0040 = exth exists
|
||||
# 0x0010 = Not sure but this is always set so far
|
||||
fval, = struct.unpack_from('>L', datain_rec0, 0x80)
|
||||
# need to remove flag 0x0800 for KindlePreviewer 2.8 and unset Bit 12 for embedded fonts
|
||||
fval &= 0x07FF
|
||||
datain_rec0 = datain_rec0[:0x80] + struct.pack('>L', fval) + datain_rec0[0x84:]
|
||||
self.result_file = writesection(self.result_file, 0, datain_rec0)
|
||||
if lastimage == 0xffff:
|
||||
# find the lowest of the next sections and copy up to that.
|
||||
ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'),
|
||||
(hufftbloff, 'L')]
|
||||
for ofs, sz in ofs_list:
|
||||
n = getint(datain_kfrec0, ofs, sz)
|
||||
if 0 < n < lastimage:
|
||||
lastimage = n-1
|
||||
|
||||
# Try to null out FONT and RES, but leave the (empty) PDB record so image refs remain valid
|
||||
for i in range(firstimage, lastimage):
|
||||
imgsec = readsection(self.result_file, i)
|
||||
if imgsec[0:4] in ['RESC', 'FONT']:
|
||||
self.result_file = nullsection(self.result_file, i)
|
||||
# mobi7 finished
|
||||
else:
|
||||
# create standalone mobi8
|
||||
self.result_file = deletesectionrange(datain, 0, datain_kf8-1)
|
||||
target = getint(datain_kfrec0, first_image_record)
|
||||
self.result_file = insertsectionrange(datain, firstimage, lastimage, self.result_file, target)
|
||||
datain_kfrec0 = readsection(self.result_file, 0)
|
||||
|
||||
# Only keep the correct EXTH 116 StartOffset, KG 2.5 carries over the one from the mobi7 part,
|
||||
# which then points at garbage in the mobi8 part, and confuses FW 3.4
|
||||
kf8starts = read_exth(datain_kfrec0, 116)
|
||||
# If we have multiple StartOffset, keep only the last one
|
||||
kf8start_count = len(kf8starts)
|
||||
while kf8start_count > 1:
|
||||
kf8start_count -= 1
|
||||
datain_kfrec0 = del_exth(datain_kfrec0, 116)
|
||||
|
||||
# update the EXTH 125 KF8 Count of Images/Fonts/Resources
|
||||
datain_kfrec0 = write_exth(datain_kfrec0, 125, struct.pack('>L', lastimage-firstimage+1))
|
||||
|
||||
# Same dance for the KF8, we want an ASIN & cdeType :)
|
||||
if len(read_exth(datain_kfrec0, 113)) == 0:
|
||||
datain_kfrec0 = add_exth(datain_kfrec0, 113, fake_asin)
|
||||
if len(read_exth(datain_kfrec0, 504)) == 0:
|
||||
datain_kfrec0 = add_exth(datain_kfrec0, 504, fake_asin)
|
||||
if len(read_exth(datain_kfrec0, 501)) == 0:
|
||||
datain_kfrec0 = add_exth(datain_kfrec0, 501, b'EBOK')
|
||||
|
||||
# need to reset flags stored in 0x80-0x83
|
||||
# old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
|
||||
# standalone mobi8 with exth: 0x0050
|
||||
# Bit Flags
|
||||
# 0x1000 = Bit 12 indicates if embedded fonts are used or not
|
||||
# 0x0800 = means this Header points to *shared* images/resource/fonts ??
|
||||
# 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
|
||||
# 0x0040 = exth exists
|
||||
# 0x0010 = Not sure but this is always set so far
|
||||
fval, = struct.unpack_from('>L', datain_kfrec0, 0x80)
|
||||
fval &= 0x1FFF
|
||||
fval |= 0x0800
|
||||
datain_kfrec0 = datain_kfrec0[:0x80] + struct.pack('>L', fval) + datain_kfrec0[0x84:]
|
||||
|
||||
# properly update other index pointers that have been shifted by the insertion of images
|
||||
ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'),
|
||||
(hufftbloff, 'L')]
|
||||
for ofs, sz in ofs_list:
|
||||
n = getint(datain_kfrec0, ofs, sz)
|
||||
if n != 0xffffffff:
|
||||
datain_kfrec0 = writeint(datain_kfrec0, ofs, n+lastimage-firstimage+1, sz)
|
||||
self.result_file = writesection(self.result_file, 0, datain_kfrec0)
|
||||
# mobi8 finished
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
def getResult(self):
|
||||
return self.result_file
|
||||
@@ -1,236 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
#
|
||||
# This is a python script. You need a Python interpreter to run it.
|
||||
# For example, ActiveState Python, which exists for windows.
|
||||
#
|
||||
# This script strips the penultimate record from a Mobipocket file.
|
||||
# This is useful because the current KindleGen add a compressed copy
|
||||
# of the source files used in this record, making the ebook produced
|
||||
# about twice as big as it needs to be.
|
||||
#
|
||||
#
|
||||
# This is free and unencumbered software released into the public domain.
|
||||
#
|
||||
# Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
# distribute this software, either in source code form or as a compiled
|
||||
# binary, for any purpose, commercial or non-commercial, and by any
|
||||
# means.
|
||||
#
|
||||
# In jurisdictions that recognize copyright laws, the author or authors
|
||||
# of this software dedicate any and all copyright interest in the
|
||||
# software to the public domain. We make this dedication for the benefit
|
||||
# of the public at large and to the detriment of our heirs and
|
||||
# successors. We intend this dedication to be an overt act of
|
||||
# relinquishment in perpetuity of all present and future rights to this
|
||||
# software under copyright law.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# For more information, please refer to <http://unlicense.org/>
|
||||
#
|
||||
# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
|
||||
# With enhancements by Kevin Hendricks, KevinH on mobileread.com
|
||||
#
|
||||
# Changelog
|
||||
# 1.00 - Initial version
|
||||
# 1.10 - Added an option to output the stripped data
|
||||
# 1.20 - Added check for source files section (thanks Piquan)
|
||||
# 1.30 - Added prelim Support for K8 style mobis
|
||||
# 1.31 - removed the SRCS section but kept a 0 size entry for it
|
||||
# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
|
||||
# 1.33 - now uses and modifies mobiheader SRCS and CNT
|
||||
# 1.34 - added credit for Kevin Hendricks
|
||||
# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records
|
||||
|
||||
__version__ = '1.35'
|
||||
|
||||
import sys
|
||||
import struct
|
||||
import binascii
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
|
||||
class StripException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class SectionStripper:
|
||||
def loadSection(self, section):
|
||||
if (section + 1 == self.num_sections):
|
||||
endoff = len(self.data_file)
|
||||
else:
|
||||
endoff = self.sections[section + 1][0]
|
||||
off = self.sections[section][0]
|
||||
return self.data_file[off:endoff]
|
||||
|
||||
def patch(self, off, new):
|
||||
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
|
||||
|
||||
def strip(self, off, len):
|
||||
self.data_file = self.data_file[:off] + self.data_file[off+len:]
|
||||
|
||||
def patchSection(self, section, new, in_off = 0):
|
||||
if (section + 1 == self.num_sections):
|
||||
endoff = len(self.data_file)
|
||||
else:
|
||||
endoff = self.sections[section + 1][0]
|
||||
off = self.sections[section][0]
|
||||
assert off + in_off + len(new) <= endoff
|
||||
self.patch(off + in_off, new)
|
||||
|
||||
def updateEXTH121(self, srcs_secnum, srcs_cnt, mobiheader):
|
||||
mobi_length, = struct.unpack('>L',mobiheader[0x14:0x18])
|
||||
exth_flag, = struct.unpack('>L', mobiheader[0x80:0x84])
|
||||
exth = 'NONE'
|
||||
try:
|
||||
if exth_flag & 0x40:
|
||||
exth = mobiheader[16 + mobi_length:]
|
||||
if (len(exth) >= 4) and (exth[:4] == 'EXTH'):
|
||||
nitems, = struct.unpack('>I', exth[8:12])
|
||||
pos = 12
|
||||
for i in xrange(nitems):
|
||||
type, size = struct.unpack('>II', exth[pos: pos + 8])
|
||||
# print type, size
|
||||
if type == 121:
|
||||
boundaryptr, =struct.unpack('>L',exth[pos+8: pos + size])
|
||||
if srcs_secnum <= boundaryptr:
|
||||
boundaryptr -= srcs_cnt
|
||||
prefix = mobiheader[0:16 + mobi_length + pos + 8]
|
||||
suffix = mobiheader[16 + mobi_length + pos + 8 + 4:]
|
||||
nval = struct.pack('>L',boundaryptr)
|
||||
mobiheader = prefix + nval + suffix
|
||||
pos += size
|
||||
except:
|
||||
pass
|
||||
return mobiheader
|
||||
|
||||
def __init__(self, datain):
|
||||
if datain[0x3C:0x3C+8] != 'BOOKMOBI':
|
||||
raise StripException("invalid file format")
|
||||
self.num_sections, = struct.unpack('>H', datain[76:78])
|
||||
|
||||
# get mobiheader and check SRCS section number and count
|
||||
offset0, = struct.unpack_from('>L', datain, 78)
|
||||
offset1, = struct.unpack_from('>L', datain, 86)
|
||||
mobiheader = datain[offset0:offset1]
|
||||
srcs_secnum, srcs_cnt = struct.unpack_from('>2L', mobiheader, 0xe0)
|
||||
if srcs_secnum == 0xffffffff or srcs_cnt == 0:
|
||||
raise StripException("File doesn't contain the sources section.")
|
||||
|
||||
print "Found SRCS section number %d, and count %d" % (srcs_secnum, srcs_cnt)
|
||||
# find its offset and length
|
||||
next = srcs_secnum + srcs_cnt
|
||||
srcs_offset, flgval = struct.unpack_from('>2L', datain, 78+(srcs_secnum*8))
|
||||
next_offset, flgval = struct.unpack_from('>2L', datain, 78+(next*8))
|
||||
srcs_length = next_offset - srcs_offset
|
||||
if datain[srcs_offset:srcs_offset+4] != 'SRCS':
|
||||
raise StripException("SRCS section num does not point to SRCS.")
|
||||
print " beginning at offset %0x and ending at offset %0x" % (srcs_offset, srcs_length)
|
||||
|
||||
# it appears bytes 68-71 always contain (2*num_sections) + 1
|
||||
# this is not documented anyplace at all but it appears to be some sort of next
|
||||
# available unique_id used to identify specific sections in the palm db
|
||||
self.data_file = datain[:68] + struct.pack('>L',((self.num_sections-srcs_cnt)*2+1))
|
||||
self.data_file += datain[72:76]
|
||||
|
||||
# write out the number of sections reduced by srtcs_cnt
|
||||
self.data_file = self.data_file + struct.pack('>H',self.num_sections-srcs_cnt)
|
||||
|
||||
# we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
|
||||
# up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
|
||||
delta = -8 * srcs_cnt
|
||||
for i in xrange(srcs_secnum):
|
||||
offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
|
||||
offset += delta
|
||||
self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
|
||||
|
||||
# for every record after the srcs_cnt SRCS records we must start it
|
||||
# earlier by 8*srcs_cnt + the length of the srcs sections themselves)
|
||||
delta = delta - srcs_length
|
||||
for i in xrange(srcs_secnum+srcs_cnt,self.num_sections):
|
||||
offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
|
||||
offset += delta
|
||||
flgval = 2 * (i - srcs_cnt)
|
||||
self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
|
||||
|
||||
# now pad it out to begin right at the first offset
|
||||
# typically this is 2 bytes of nulls
|
||||
first_offset, flgval = struct.unpack_from('>2L', self.data_file, 78)
|
||||
self.data_file += '\0' * (first_offset - len(self.data_file))
|
||||
|
||||
# now finally add on every thing up to the original src_offset
|
||||
self.data_file += datain[offset0: srcs_offset]
|
||||
|
||||
# and everything afterwards
|
||||
self.data_file += datain[srcs_offset+srcs_length:]
|
||||
|
||||
#store away the SRCS section in case the user wants it output
|
||||
self.stripped_data_header = datain[srcs_offset:srcs_offset+16]
|
||||
self.stripped_data = datain[srcs_offset+16:srcs_offset+srcs_length]
|
||||
|
||||
# update the number of sections count
|
||||
self.num_section = self.num_sections - srcs_cnt
|
||||
|
||||
# update the srcs_secnum and srcs_cnt in the mobiheader
|
||||
offset0, flgval0 = struct.unpack_from('>2L', self.data_file, 78)
|
||||
offset1, flgval1 = struct.unpack_from('>2L', self.data_file, 86)
|
||||
mobiheader = self.data_file[offset0:offset1]
|
||||
mobiheader = mobiheader[:0xe0]+ struct.pack('>L', 0xffffffff) + struct.pack('>L', 0) + mobiheader[0xe8:]
|
||||
|
||||
# if K8 mobi, handle metadata 121 in old mobiheader
|
||||
mobiheader = self.updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
|
||||
self.data_file = self.data_file[0:offset0] + mobiheader + self.data_file[offset1:]
|
||||
print "done"
|
||||
|
||||
def getResult(self):
|
||||
return self.data_file
|
||||
|
||||
def getStrippedData(self):
|
||||
return self.stripped_data
|
||||
|
||||
def getHeader(self):
|
||||
return self.stripped_data_header
|
||||
|
||||
def main(argv=None):
|
||||
infile = argv[0]
|
||||
outfile = argv[1]
|
||||
data_file = file(infile, 'rb').read()
|
||||
try:
|
||||
strippedFile = SectionStripper(data_file)
|
||||
file(outfile, 'wb').write(strippedFile.getResult())
|
||||
print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
|
||||
if len(argv)==3:
|
||||
file(argv[2], 'wb').write(strippedFile.getStrippedData())
|
||||
except StripException, e:
|
||||
print "Error: %s" % e
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
print ('KindleStrip v%(__version__)s. '
|
||||
'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals())
|
||||
if len(sys.argv)<3 or len(sys.argv)>4:
|
||||
print "Strips the Sources record from Mobipocket ebooks"
|
||||
print "For ebooks generated using KindleGen 1.1 and later that add the source"
|
||||
print "Usage:"
|
||||
print " %s <infile> <outfile> <strippeddatafile>" % sys.argv[0]
|
||||
print "<strippeddatafile> is optional."
|
||||
sys.exit(1)
|
||||
else:
|
||||
main(sys.argv[1:])
|
||||
sys.exit(0)
|
||||
Reference in New Issue
Block a user