mirror of
https://github.com/ciromattia/kcc
synced 2025-12-13 09:46:25 +00:00
Do not use python-magic, instead use is_zipfile and is_rarfile, falling back on extension for PDFs.
This commit is contained in:
@@ -116,7 +116,7 @@ The app relies and includes the following scripts/binaries:
|
|||||||
Epub margins support (#30)
|
Epub margins support (#30)
|
||||||
Fixed no file added if file has no spaces on Windows (#25)
|
Fixed no file added if file has no spaces on Windows (#25)
|
||||||
Gracefully exit if unrar missing (#15)
|
Gracefully exit if unrar missing (#15)
|
||||||
Do not call kindlegen if source epub is bigger than 300MB (#17)
|
Do not call kindlegen if source epub is bigger than 320MB (#17)
|
||||||
Get filetype from magic number (#14)
|
Get filetype from magic number (#14)
|
||||||
PDF conversion works again
|
PDF conversion works again
|
||||||
|
|
||||||
|
|||||||
@@ -19,17 +19,17 @@ __copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import magic
|
import zipfile
|
||||||
|
import rarfile
|
||||||
|
|
||||||
|
|
||||||
class CBxArchive:
|
class CBxArchive:
|
||||||
def __init__(self, origFileName):
|
def __init__(self, origFileName):
|
||||||
self.origFileName = origFileName
|
self.origFileName = origFileName
|
||||||
mime = magic.from_buffer(open(origFileName).read(1024), mime=True)
|
if zipfile.is_zipfile(origFileName):
|
||||||
if mime == 'application/x-rar':
|
|
||||||
self.compressor = 'rar'
|
|
||||||
elif mime == 'application/zip':
|
|
||||||
self.compressor = 'zip'
|
self.compressor = 'zip'
|
||||||
|
elif rarfile.is_rarfile(origFileName):
|
||||||
|
self.compressor = 'rar'
|
||||||
else:
|
else:
|
||||||
self.compressor = None
|
self.compressor = None
|
||||||
|
|
||||||
@@ -37,11 +37,7 @@ class CBxArchive:
|
|||||||
return self.compressor is not None
|
return self.compressor is not None
|
||||||
|
|
||||||
def extractCBZ(self, targetdir):
|
def extractCBZ(self, targetdir):
|
||||||
try:
|
cbzFile = zipfile.ZipFile(self.origFileName)
|
||||||
from zipfile import ZipFile
|
|
||||||
except ImportError:
|
|
||||||
self.cbzFile = None
|
|
||||||
cbzFile = ZipFile(self.origFileName)
|
|
||||||
for f in cbzFile.namelist():
|
for f in cbzFile.namelist():
|
||||||
if f.startswith('__MACOSX') or f.endswith('.DS_Store'):
|
if f.startswith('__MACOSX') or f.endswith('.DS_Store'):
|
||||||
pass # skip MacOS special files
|
pass # skip MacOS special files
|
||||||
@@ -54,11 +50,6 @@ class CBxArchive:
|
|||||||
cbzFile.extract(f, targetdir)
|
cbzFile.extract(f, targetdir)
|
||||||
|
|
||||||
def extractCBR(self, targetdir):
|
def extractCBR(self, targetdir):
|
||||||
try:
|
|
||||||
import rarfile
|
|
||||||
except ImportError:
|
|
||||||
self.cbrFile = None
|
|
||||||
return
|
|
||||||
cbrFile = rarfile.RarFile(self.origFileName)
|
cbrFile = rarfile.RarFile(self.origFileName)
|
||||||
for f in cbrFile.namelist():
|
for f in cbrFile.namelist():
|
||||||
if f.startswith('__MACOSX') or f.endswith('.DS_Store'):
|
if f.startswith('__MACOSX') or f.endswith('.DS_Store'):
|
||||||
|
|||||||
@@ -392,23 +392,18 @@ def getWorkFolder(afile):
|
|||||||
path = workdir
|
path = workdir
|
||||||
except OSError:
|
except OSError:
|
||||||
raise
|
raise
|
||||||
|
elif afile.lower().endswith('.pdf'):
|
||||||
|
pdf = pdfjpgextract.PdfJpgExtract(afile)
|
||||||
|
path = pdf.extract()
|
||||||
else:
|
else:
|
||||||
import magic
|
cbx = cbxarchive.CBxArchive(afile)
|
||||||
mime = magic.from_buffer(open(afile).read(1024), mime=True)
|
if cbx.isCbxFile():
|
||||||
if mime == 'application/pdf':
|
try:
|
||||||
pdf = pdfjpgextract.PdfJpgExtract(afile)
|
path = cbx.extract(workdir)
|
||||||
path = pdf.extract()
|
except OSError:
|
||||||
elif mime == 'application/x-rar' or mime == 'application/zip':
|
print 'Unrar not found, please download from ' + \
|
||||||
cbx = cbxarchive.CBxArchive(afile)
|
'http://www.rarlab.com/download.htm and put into your PATH.'
|
||||||
if cbx.isCbxFile():
|
sys.exit(21)
|
||||||
try:
|
|
||||||
path = cbx.extract(workdir)
|
|
||||||
except OSError:
|
|
||||||
print 'Unrar not found, please download from ' + \
|
|
||||||
'http://www.rarlab.com/download.htm and put into your PATH.'
|
|
||||||
sys.exit(21)
|
|
||||||
else:
|
|
||||||
raise TypeError
|
|
||||||
else:
|
else:
|
||||||
raise TypeError
|
raise TypeError
|
||||||
move(path, path + "_temp")
|
move(path, path + "_temp")
|
||||||
|
|||||||
244
kcc/magic.py
244
kcc/magic.py
@@ -1,244 +0,0 @@
|
|||||||
"""
|
|
||||||
magic is a wrapper around the libmagic file identification library.
|
|
||||||
|
|
||||||
See README for more information.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
|
|
||||||
>>> import magic
|
|
||||||
>>> magic.from_file("testdata/test.pdf")
|
|
||||||
'PDF document, version 1.2'
|
|
||||||
>>> magic.from_file("testdata/test.pdf", mime=True)
|
|
||||||
'application/pdf'
|
|
||||||
>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
|
|
||||||
'PDF document, version 1.2'
|
|
||||||
>>>
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os.path
|
|
||||||
import ctypes
|
|
||||||
import ctypes.util
|
|
||||||
|
|
||||||
from ctypes import c_char_p, c_int, c_size_t, c_void_p
|
|
||||||
|
|
||||||
class MagicException(Exception): pass
|
|
||||||
|
|
||||||
class Magic:
|
|
||||||
"""
|
|
||||||
Magic is a wrapper around the libmagic C library.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, mime=False, magic_file=None, mime_encoding=False):
|
|
||||||
"""
|
|
||||||
Create a new libmagic wrapper.
|
|
||||||
|
|
||||||
mime - if True, mimetypes are returned instead of textual descriptions
|
|
||||||
mime_encoding - if True, codec is returned
|
|
||||||
magic_file - use a mime database other than the system default
|
|
||||||
|
|
||||||
"""
|
|
||||||
flags = MAGIC_NONE
|
|
||||||
if mime:
|
|
||||||
flags |= MAGIC_MIME
|
|
||||||
elif mime_encoding:
|
|
||||||
flags |= MAGIC_MIME_ENCODING
|
|
||||||
|
|
||||||
self.cookie = magic_open(flags)
|
|
||||||
|
|
||||||
magic_load(self.cookie, magic_file)
|
|
||||||
|
|
||||||
|
|
||||||
def from_buffer(self, buf):
|
|
||||||
"""
|
|
||||||
Identify the contents of `buf`
|
|
||||||
"""
|
|
||||||
return magic_buffer(self.cookie, buf)
|
|
||||||
|
|
||||||
def from_file(self, filename):
|
|
||||||
"""
|
|
||||||
Identify the contents of file `filename`
|
|
||||||
raises IOError if the file does not exist
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not os.path.exists(filename):
|
|
||||||
raise IOError("File does not exist: " + filename)
|
|
||||||
|
|
||||||
return magic_file(self.cookie, filename)
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
# during shutdown magic_close may have been cleared already
|
|
||||||
if self.cookie and magic_close:
|
|
||||||
magic_close(self.cookie)
|
|
||||||
self.cookie = None
|
|
||||||
|
|
||||||
_magic_mime = None
|
|
||||||
_magic = None
|
|
||||||
|
|
||||||
def _get_magic_mime():
|
|
||||||
global _magic_mime
|
|
||||||
if not _magic_mime:
|
|
||||||
_magic_mime = Magic(mime=True)
|
|
||||||
return _magic_mime
|
|
||||||
|
|
||||||
def _get_magic():
|
|
||||||
global _magic
|
|
||||||
if not _magic:
|
|
||||||
_magic = Magic()
|
|
||||||
return _magic
|
|
||||||
|
|
||||||
def _get_magic_type(mime):
|
|
||||||
if mime:
|
|
||||||
return _get_magic_mime()
|
|
||||||
else:
|
|
||||||
return _get_magic()
|
|
||||||
|
|
||||||
def from_file(filename, mime=False):
|
|
||||||
m = _get_magic_type(mime)
|
|
||||||
return m.from_file(filename)
|
|
||||||
|
|
||||||
def from_buffer(buffer, mime=False):
|
|
||||||
m = _get_magic_type(mime)
|
|
||||||
return m.from_buffer(buffer)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
libmagic = None
|
|
||||||
# Let's try to find magic or magic1
|
|
||||||
dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1')
|
|
||||||
|
|
||||||
# This is necessary because find_library returns None if it doesn't find the library
|
|
||||||
if dll:
|
|
||||||
libmagic = ctypes.CDLL(dll)
|
|
||||||
|
|
||||||
if not libmagic or not libmagic._name:
|
|
||||||
import sys
|
|
||||||
platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
|
|
||||||
'/usr/local/lib/libmagic.dylib',
|
|
||||||
'/usr/local/Cellar/libmagic/5.10/lib/libmagic.dylib'],
|
|
||||||
'win32': ['magic1.dll']}
|
|
||||||
for dll in platform_to_lib.get(sys.platform, []):
|
|
||||||
try:
|
|
||||||
libmagic = ctypes.CDLL(dll)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not libmagic or not libmagic._name:
|
|
||||||
# It is better to raise an ImportError since we are importing magic module
|
|
||||||
raise ImportError('failed to find libmagic. Check your installation')
|
|
||||||
|
|
||||||
magic_t = ctypes.c_void_p
|
|
||||||
|
|
||||||
def errorcheck(result, func, args):
|
|
||||||
err = magic_error(args[0])
|
|
||||||
if err is not None:
|
|
||||||
raise MagicException(err)
|
|
||||||
else:
|
|
||||||
return result
|
|
||||||
|
|
||||||
def coerce_filename(filename):
|
|
||||||
if filename is None:
|
|
||||||
return None
|
|
||||||
return filename.encode(sys.getfilesystemencoding())
|
|
||||||
|
|
||||||
magic_open = libmagic.magic_open
|
|
||||||
magic_open.restype = magic_t
|
|
||||||
magic_open.argtypes = [c_int]
|
|
||||||
|
|
||||||
magic_close = libmagic.magic_close
|
|
||||||
magic_close.restype = None
|
|
||||||
magic_close.argtypes = [magic_t]
|
|
||||||
|
|
||||||
magic_error = libmagic.magic_error
|
|
||||||
magic_error.restype = c_char_p
|
|
||||||
magic_error.argtypes = [magic_t]
|
|
||||||
|
|
||||||
magic_errno = libmagic.magic_errno
|
|
||||||
magic_errno.restype = c_int
|
|
||||||
magic_errno.argtypes = [magic_t]
|
|
||||||
|
|
||||||
_magic_file = libmagic.magic_file
|
|
||||||
_magic_file.restype = c_char_p
|
|
||||||
_magic_file.argtypes = [magic_t, c_char_p]
|
|
||||||
_magic_file.errcheck = errorcheck
|
|
||||||
|
|
||||||
def magic_file(cookie, filename):
|
|
||||||
return _magic_file(cookie, coerce_filename(filename))
|
|
||||||
|
|
||||||
_magic_buffer = libmagic.magic_buffer
|
|
||||||
_magic_buffer.restype = c_char_p
|
|
||||||
_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
|
|
||||||
_magic_buffer.errcheck = errorcheck
|
|
||||||
|
|
||||||
|
|
||||||
def magic_buffer(cookie, buf):
|
|
||||||
return _magic_buffer(cookie, buf, len(buf))
|
|
||||||
|
|
||||||
|
|
||||||
_magic_load = libmagic.magic_load
|
|
||||||
_magic_load.restype = c_int
|
|
||||||
_magic_load.argtypes = [magic_t, c_char_p]
|
|
||||||
_magic_load.errcheck = errorcheck
|
|
||||||
|
|
||||||
def magic_load(cookie, filename):
|
|
||||||
return _magic_load(cookie, coerce_filename(filename))
|
|
||||||
|
|
||||||
magic_setflags = libmagic.magic_setflags
|
|
||||||
magic_setflags.restype = c_int
|
|
||||||
magic_setflags.argtypes = [magic_t, c_int]
|
|
||||||
|
|
||||||
magic_check = libmagic.magic_check
|
|
||||||
magic_check.restype = c_int
|
|
||||||
magic_check.argtypes = [magic_t, c_char_p]
|
|
||||||
|
|
||||||
magic_compile = libmagic.magic_compile
|
|
||||||
magic_compile.restype = c_int
|
|
||||||
magic_compile.argtypes = [magic_t, c_char_p]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MAGIC_NONE = 0x000000 # No flags
|
|
||||||
|
|
||||||
MAGIC_DEBUG = 0x000001 # Turn on debugging
|
|
||||||
|
|
||||||
MAGIC_SYMLINK = 0x000002 # Follow symlinks
|
|
||||||
|
|
||||||
MAGIC_COMPRESS = 0x000004 # Check inside compressed files
|
|
||||||
|
|
||||||
MAGIC_DEVICES = 0x000008 # Look at the contents of devices
|
|
||||||
|
|
||||||
MAGIC_MIME = 0x000010 # Return a mime string
|
|
||||||
|
|
||||||
MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
|
|
||||||
|
|
||||||
MAGIC_CONTINUE = 0x000020 # Return all matches
|
|
||||||
|
|
||||||
MAGIC_CHECK = 0x000040 # Print warnings to stderr
|
|
||||||
|
|
||||||
MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
|
|
||||||
|
|
||||||
MAGIC_RAW = 0x000100 # Don't translate unprintable chars
|
|
||||||
|
|
||||||
MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
|
|
||||||
|
|
||||||
MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
|
|
||||||
Reference in New Issue
Block a user