From 15e86b28abac122e91aeee75e4850904dbbd2137 Mon Sep 17 00:00:00 2001 From: Ciro Mattia Gonano Date: Sat, 9 Mar 2013 17:17:40 +0100 Subject: [PATCH] Do not use python-magic, instead use is_zipfile and is_rarfile, falling back on extension for PDFs. --- README.md | 2 +- kcc/cbxarchive.py | 21 ++-- kcc/comic2ebook.py | 27 ++--- kcc/magic.py | 244 --------------------------------------------- 4 files changed, 18 insertions(+), 276 deletions(-) delete mode 100644 kcc/magic.py diff --git a/README.md b/README.md index 57dec6c..dfbf777 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ The app relies and includes the following scripts/binaries: Epub margins support (#30) Fixed no file added if file has no spaces on Windows (#25) Gracefully exit if unrar missing (#15) - Do not call kindlegen if source epub is bigger than 300MB (#17) + Do not call kindlegen if source epub is bigger than 320MB (#17) Get filetype from magic number (#14) PDF conversion works again diff --git a/kcc/cbxarchive.py b/kcc/cbxarchive.py index b0716af..39176bb 100644 --- a/kcc/cbxarchive.py +++ b/kcc/cbxarchive.py @@ -19,17 +19,17 @@ __copyright__ = '2012-2013, Ciro Mattia Gonano ' __docformat__ = 'restructuredtext en' import os -import magic +import zipfile +import rarfile class CBxArchive: def __init__(self, origFileName): self.origFileName = origFileName - mime = magic.from_buffer(open(origFileName).read(1024), mime=True) - if mime == 'application/x-rar': - self.compressor = 'rar' - elif mime == 'application/zip': + if zipfile.is_zipfile(origFileName): self.compressor = 'zip' + elif rarfile.is_rarfile(origFileName): + self.compressor = 'rar' else: self.compressor = None @@ -37,11 +37,7 @@ class CBxArchive: return self.compressor is not None def extractCBZ(self, targetdir): - try: - from zipfile import ZipFile - except ImportError: - self.cbzFile = None - cbzFile = ZipFile(self.origFileName) + cbzFile = zipfile.ZipFile(self.origFileName) for f in cbzFile.namelist(): if f.startswith('__MACOSX') or f.endswith('.DS_Store'): pass # skip MacOS special files @@ -54,11 +50,6 @@ class CBxArchive: cbzFile.extract(f, targetdir) def extractCBR(self, targetdir): - try: - import rarfile - except ImportError: - self.cbrFile = None - return cbrFile = rarfile.RarFile(self.origFileName) for f in cbrFile.namelist(): if f.startswith('__MACOSX') or f.endswith('.DS_Store'): diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index 53cb599..03bd72c 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -392,23 +392,18 @@ def getWorkFolder(afile): path = workdir except OSError: raise + elif afile.lower().endswith('.pdf'): + pdf = pdfjpgextract.PdfJpgExtract(afile) + path = pdf.extract() else: - import magic - mime = magic.from_buffer(open(afile).read(1024), mime=True) - if mime == 'application/pdf': - pdf = pdfjpgextract.PdfJpgExtract(afile) - path = pdf.extract() - elif mime == 'application/x-rar' or mime == 'application/zip': - cbx = cbxarchive.CBxArchive(afile) - if cbx.isCbxFile(): - try: - path = cbx.extract(workdir) - except OSError: - print 'Unrar not found, please download from ' + \ - 'http://www.rarlab.com/download.htm and put into your PATH.' - sys.exit(21) - else: - raise TypeError + cbx = cbxarchive.CBxArchive(afile) + if cbx.isCbxFile(): + try: + path = cbx.extract(workdir) + except OSError: + print 'Unrar not found, please download from ' + \ + 'http://www.rarlab.com/download.htm and put into your PATH.' + sys.exit(21) else: raise TypeError move(path, path + "_temp") diff --git a/kcc/magic.py b/kcc/magic.py deleted file mode 100644 index 9df32a1..0000000 --- a/kcc/magic.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -magic is a wrapper around the libmagic file identification library. - -See README for more information. - -Usage: - ->>> import magic ->>> magic.from_file("testdata/test.pdf") -'PDF document, version 1.2' ->>> magic.from_file("testdata/test.pdf", mime=True) -'application/pdf' ->>> magic.from_buffer(open("testdata/test.pdf").read(1024)) -'PDF document, version 1.2' ->>> - - -""" - -import sys -import os.path -import ctypes -import ctypes.util - -from ctypes import c_char_p, c_int, c_size_t, c_void_p - -class MagicException(Exception): pass - -class Magic: - """ - Magic is a wrapper around the libmagic C library. - - """ - - def __init__(self, mime=False, magic_file=None, mime_encoding=False): - """ - Create a new libmagic wrapper. - - mime - if True, mimetypes are returned instead of textual descriptions - mime_encoding - if True, codec is returned - magic_file - use a mime database other than the system default - - """ - flags = MAGIC_NONE - if mime: - flags |= MAGIC_MIME - elif mime_encoding: - flags |= MAGIC_MIME_ENCODING - - self.cookie = magic_open(flags) - - magic_load(self.cookie, magic_file) - - - def from_buffer(self, buf): - """ - Identify the contents of `buf` - """ - return magic_buffer(self.cookie, buf) - - def from_file(self, filename): - """ - Identify the contents of file `filename` - raises IOError if the file does not exist - """ - - if not os.path.exists(filename): - raise IOError("File does not exist: " + filename) - - return magic_file(self.cookie, filename) - - def __del__(self): - # during shutdown magic_close may have been cleared already - if self.cookie and magic_close: - magic_close(self.cookie) - self.cookie = None - -_magic_mime = None -_magic = None - -def _get_magic_mime(): - global _magic_mime - if not _magic_mime: - _magic_mime = Magic(mime=True) - return _magic_mime - -def _get_magic(): - global _magic - if not _magic: - _magic = Magic() - return _magic - -def _get_magic_type(mime): - if mime: - return _get_magic_mime() - else: - return _get_magic() - -def from_file(filename, mime=False): - m = _get_magic_type(mime) - return m.from_file(filename) - -def from_buffer(buffer, mime=False): - m = _get_magic_type(mime) - return m.from_buffer(buffer) - - - - -libmagic = None -# Let's try to find magic or magic1 -dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') - -# This is necessary because find_library returns None if it doesn't find the library -if dll: - libmagic = ctypes.CDLL(dll) - -if not libmagic or not libmagic._name: - import sys - platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib', - '/usr/local/lib/libmagic.dylib', - '/usr/local/Cellar/libmagic/5.10/lib/libmagic.dylib'], - 'win32': ['magic1.dll']} - for dll in platform_to_lib.get(sys.platform, []): - try: - libmagic = ctypes.CDLL(dll) - except OSError: - pass - -if not libmagic or not libmagic._name: - # It is better to raise an ImportError since we are importing magic module - raise ImportError('failed to find libmagic. Check your installation') - -magic_t = ctypes.c_void_p - -def errorcheck(result, func, args): - err = magic_error(args[0]) - if err is not None: - raise MagicException(err) - else: - return result - -def coerce_filename(filename): - if filename is None: - return None - return filename.encode(sys.getfilesystemencoding()) - -magic_open = libmagic.magic_open -magic_open.restype = magic_t -magic_open.argtypes = [c_int] - -magic_close = libmagic.magic_close -magic_close.restype = None -magic_close.argtypes = [magic_t] - -magic_error = libmagic.magic_error -magic_error.restype = c_char_p -magic_error.argtypes = [magic_t] - -magic_errno = libmagic.magic_errno -magic_errno.restype = c_int -magic_errno.argtypes = [magic_t] - -_magic_file = libmagic.magic_file -_magic_file.restype = c_char_p -_magic_file.argtypes = [magic_t, c_char_p] -_magic_file.errcheck = errorcheck - -def magic_file(cookie, filename): - return _magic_file(cookie, coerce_filename(filename)) - -_magic_buffer = libmagic.magic_buffer -_magic_buffer.restype = c_char_p -_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] -_magic_buffer.errcheck = errorcheck - - -def magic_buffer(cookie, buf): - return _magic_buffer(cookie, buf, len(buf)) - - -_magic_load = libmagic.magic_load -_magic_load.restype = c_int -_magic_load.argtypes = [magic_t, c_char_p] -_magic_load.errcheck = errorcheck - -def magic_load(cookie, filename): - return _magic_load(cookie, coerce_filename(filename)) - -magic_setflags = libmagic.magic_setflags -magic_setflags.restype = c_int -magic_setflags.argtypes = [magic_t, c_int] - -magic_check = libmagic.magic_check -magic_check.restype = c_int -magic_check.argtypes = [magic_t, c_char_p] - -magic_compile = libmagic.magic_compile -magic_compile.restype = c_int -magic_compile.argtypes = [magic_t, c_char_p] - - - -MAGIC_NONE = 0x000000 # No flags - -MAGIC_DEBUG = 0x000001 # Turn on debugging - -MAGIC_SYMLINK = 0x000002 # Follow symlinks - -MAGIC_COMPRESS = 0x000004 # Check inside compressed files - -MAGIC_DEVICES = 0x000008 # Look at the contents of devices - -MAGIC_MIME = 0x000010 # Return a mime string - -MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding - -MAGIC_CONTINUE = 0x000020 # Return all matches - -MAGIC_CHECK = 0x000040 # Print warnings to stderr - -MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit - -MAGIC_RAW = 0x000100 # Don't translate unprintable chars - -MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors - -MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files - -MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files - -MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries - -MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type - -MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details - -MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files - -MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff - -MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran - -MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens