1
0
mirror of https://github.com/ciromattia/kcc synced 2025-12-13 01:36:27 +00:00

Add PDF jpg image extraction (fixes #2)

More work on GUI with Tkinter.
This commit is contained in:
Ciro Mattia Gonano
2013-01-14 12:53:13 +01:00
parent 988a357555
commit 2a7b2c9e3d
5 changed files with 184 additions and 38 deletions

View File

@@ -38,7 +38,7 @@ __version__ = '1.30'
import os
import sys
from optparse import OptionParser
import image, cbxarchive
import image, cbxarchive, pdfjpgextract
class HTMLbuilder:
@@ -149,15 +149,21 @@ def main(argv=None):
help="Comic title")
parser.add_option("-m", "--manga-style", action="store_true", dest="righttoleft", default=False,
help="Split pages 'manga style' (right-to-left reading)")
options, args = parser.parse_args()
options, args = parser.parse_args(argv)
if len(args) != 1:
parser.print_help()
sys.exit(1)
return
dir = args[0]
cbx = cbxarchive.CBxArchive(dir)
if cbx.isCbxFile():
cbx.extract()
dir = cbx.getPath()
fname = os.path.splitext(dir)
if (fname[1].lower() == '.pdf'):
pdf = pdfjpgextract.PdfJpgExtract(dir)
pdf.extract()
dir = pdf.getPath()
else:
cbx = cbxarchive.CBxArchive(dir)
if cbx.isCbxFile():
cbx.extract()
dir = cbx.getPath()
filelist = []
try:
print "Splitting double pages..."
@@ -192,5 +198,5 @@ def main(argv=None):
if __name__ == "__main__":
Copyright()
main()
main(sys.argv[1:])
sys.exit(0)

9
kcc.py
View File

@@ -31,14 +31,11 @@
# - Improve error reporting
# - recurse into dirtree for multiple comics
__version__ = '1.30'
__version__ = '2.0'
from Tkinter import *
from kcc import gui
root = Tk()
app = gui.MainWindow(master=root)
app.master.title("Kindle Comic Converter v" + __version__)
app.master.maxsize(1000, 400)
app.mainloop()
root.destroy()
app = gui.MainWindow(master=root,title="Kindle Comic Converter v" + __version__)
root.mainloop()

View File

@@ -38,7 +38,7 @@ __version__ = '1.30'
import os
import sys
from optparse import OptionParser
import image, cbxarchive
import image, cbxarchive, pdfjpgextract
class HTMLbuilder:
@@ -149,15 +149,21 @@ def main(argv=None):
help="Comic title")
parser.add_option("-m", "--manga-style", action="store_true", dest="righttoleft", default=False,
help="Split pages 'manga style' (right-to-left reading)")
options, args = parser.parse_args()
options, args = parser.parse_args(argv)
if len(args) != 1:
parser.print_help()
sys.exit(1)
return
dir = args[0]
cbx = cbxarchive.CBxArchive(dir)
if cbx.isCbxFile():
cbx.extract()
dir = cbx.getPath()
fname = os.path.splitext(dir)
if (fname[1].lower() == '.pdf'):
pdf = pdfjpgextract.PdfJpgExtract(dir)
pdf.extract()
dir = pdf.getPath()
else:
cbx = cbxarchive.CBxArchive(dir)
if cbx.isCbxFile():
cbx.extract()
dir = cbx.getPath()
filelist = []
try:
print "Splitting double pages..."
@@ -192,5 +198,5 @@ def main(argv=None):
if __name__ == "__main__":
Copyright()
main()
main(sys.argv[1:])
sys.exit(0)

View File

@@ -18,26 +18,86 @@
from Tkinter import *
import tkFileDialog
import comic2ebook
from image import ProfileData
class MainWindow:
def clear_files(self):
self.files = []
self.refresh_list()
class MainWindow(Frame):
def open_files(self):
self.files = tkFileDialog.askopenfilename()
filetypes = [('all files', '.*'), ('Comic files', ('*.cbr','*.cbz','*.zip','*.rar'))]
f = tkFileDialog.askopenfilenames(title="Choose a file...",filetypes=filetypes)
if (isinstance(f,tuple) == False):
try:
import re
f = re.findall('\{(.*?)\}', f)
except:
import tkMessageBox
tkMessageBox.showerror(
"Open file",
"askopenfilename() returned other than a tuple and no regex module could be found"
)
sys.exit(1)
self.files.extend(f)
self.refresh_list()
def createWidgets(self):
self.QUIT = Button(self)
self.QUIT["text"] = "Quit"
self.QUIT["fg"] = "red"
self.QUIT["command"] = self.quit
self.QUIT.pack({"side": "right"})
def open_folder(self):
self.files = tkFileDialog.askdirectory(title="Choose a folder...")
self.refresh_list()
self.OPENFILES = Button(self)
self.OPENFILES["text"] = "Open files",
self.OPENFILES["command"] = self.open_files
self.OPENFILES.pack({"side": "left"})
def refresh_list(self):
self.filelocation.config(state=NORMAL)
self.filelocation.delete(0, END)
for file in self.files:
self.filelocation.insert(END, file)
self.filelocation.config(state=DISABLED)
def __init__(self, master=None):
Frame.__init__(self, master)
self.pack()
self.createWidgets()
def initialize(self):
self.filelocation = Listbox(self.master)
self.filelocation.pack(fill=BOTH, expand=1)
self.refresh_list()
self.clear_file = Button(self.master, text="Clear files", command=self.clear_files)
self.clear_file.pack(side=LEFT)
self.open_file = Button(self.master, text="Add files...", command=self.open_files)
self.open_file.pack(side=LEFT)
self.open_folder = Button(self.master, text="Add folder...", command=self.open_folder)
self.open_folder.pack(side=LEFT)
self.profile = StringVar()
self.profile.set("KHD")
for text in ProfileData.Profiles:
b = Radiobutton(self.master, text=text,
variable=self.profile, value=text)
b.pack(anchor=W,fill=BOTH)
self.mangastyle = BooleanVar()
self.mangastyle = False
self.c = Checkbutton(self.master, text="Split manga-style (right-to-left reading)",
variable=self.mangastyle)
self.c.pack()
#now for a button
self.submit = Button(self.master, text="Execute!", command=self.convert, fg="red")
self.submit.pack()
def convert(self):
argv = ["-p",self.profile.get()]
if (self.mangastyle == True):
argv.append("-m")
for entry in self.files:
subargv = list(argv)
subargv.append(entry)
comic2ebook.main(subargv)
print "Done!"
def __init__(self, master, title):
self.files = []
self.master = master
self.master.title(title)
self.initialize()

77
kcc/pdfjpgextract.py Normal file
View File

@@ -0,0 +1,77 @@
# Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com>
#
# Based upon the code snippet by Ned Batchelder
# (http://nedbatchelder.com/blog/200712/extracting_jpgs_from_pdfs.html)
#
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all
# copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
__version__ = '1.0'
import os
class PdfJpgExtract:
def __init__(self, origFileName):
self.cbxexts = ['.zip','.cbz','.rar','.cbr']
self.origFileName = origFileName
self.filename = os.path.splitext(origFileName)
self.path = self.filename[0]
def getPath(self):
return self.path
def extract(self):
pdf = file(self.origFileName, "rb").read()
startmark = "\xff\xd8"
startfix = 0
endmark = "\xff\xd9"
endfix = 2
i = 0
njpg = 0
os.makedirs(self.path)
while True:
istream = pdf.find("stream", i)
if istream < 0:
break
istart = pdf.find(startmark, istream, istream+20)
if istart < 0:
i = istream+20
continue
iend = pdf.find("endstream", istart)
if iend < 0:
raise Exception("Didn't find end of stream!")
iend = pdf.find(endmark, iend-20)
if iend < 0:
raise Exception("Didn't find end of JPG!")
istart += startfix
iend += endfix
print "JPG %d from %d to %d" % (njpg, istart, iend)
jpg = pdf[istart:iend]
jpgfile = file(self.path+"/jpg%d.jpg" % njpg, "wb")
jpgfile.write(jpg)
jpgfile.close()
njpg += 1
i = iend
dir = os.listdir(self.path)
if len(dir) == 1:
import shutil
for f in os.listdir(self.path + "/" + dir[0]):
shutil.move(self.path + "/" + dir[0] + "/" + f, self.path)
os.rmdir(self.path + "/" + dir[0])