# rarfile.py # # Copyright (c) 2005-2011 Marko Kreen # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. r"""RAR archive reader. This is Python module for Rar archive reading. The interface is made as zipfile like as possible. Basic logic: - Parse archive structure with Python. - Extract non-compressed files with Python - Extract compressed files with unrar. - Optionally write compressed data to temp file to speed up unrar, otherwise it needs to scan whole archive on each execution. Example:: import rarfile rf = rarfile.RarFile('myarchive.rar') for f in rf.infolist(): print f.filename, f.file_size if f.filename == 'README': print rf.read(f) There are few module-level parameters to tune behaviour, here they are with defaults, and reason to change it:: import rarfile # Set to full path of unrar.exe if it is not in PATH rarfile.UNRAR_TOOL = "unrar" # Set to 0 if you don't look at comments and want to # avoid wasting time for parsing them rarfile.NEED_COMMENTS = 1 # Set up to 1 if you don't want to deal with decoding comments # from unknown encoding. rarfile will try couple of common # encodings in sequence. rarfile.UNICODE_COMMENTS = 0 # Set to 1 if you prefer timestamps to be datetime objects # instead tuples rarfile.USE_DATETIME = 0 # Set to '/' to be more compatible with zipfile rarfile.PATH_SEP = '\\' For more details, refer to source. """ __version__ = '2.4' # export only interesting items __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] ## ## Imports and compat - support both Python 2.x and 3.x ## import sys, os, struct from struct import pack, unpack from binascii import crc32 from tempfile import mkstemp from subprocess import Popen, PIPE, STDOUT from datetime import datetime # only needed for encryped headers try: from Crypto.Cipher import AES try: from hashlib import sha1 except ImportError: from sha import new as sha1 _have_crypto = 1 except ImportError: _have_crypto = 0 # compat with 2.x if sys.hexversion < 0x3000000: # prefer 3.x behaviour range = xrange # py2.6 has broken bytes() def bytes(s, enc): return str(s) # see if compat bytearray() is needed try: bytearray except NameError: import array class bytearray: def __init__(self, val = ''): self.arr = array.array('B', val) self.append = self.arr.append self.__getitem__ = self.arr.__getitem__ self.__len__ = self.arr.__len__ def decode(self, *args): return self.arr.tostring().decode(*args) # Optimized .readinto() requires memoryview try: memoryview have_memoryview = 1 except NameError: have_memoryview = 0 # Struct() for older python try: from struct import Struct except ImportError: class Struct: def __init__(self, fmt): self.format = fmt self.size = struct.calcsize(fmt) def unpack(self, buf): return unpack(self.format, buf) def unpack_from(self, buf, ofs = 0): return unpack(self.format, buf[ofs : ofs + self.size]) def pack(self, *args): return pack(self.format, *args) # file object superclass try: from io import RawIOBase except ImportError: class RawIOBase(object): def close(self): pass ## ## Module configuration. Can be tuned after importing. ## # default fallback charset DEFAULT_CHARSET = "windows-1252" # list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed TRY_ENCODINGS = ('utf8', 'utf-16le') # 'unrar', 'rar' or full path to either one UNRAR_TOOL = "unrar" # Command line args to use for opening file for reading. OPEN_ARGS = ('p', '-inul') # Command line args to use for extracting file to disk. EXTRACT_ARGS = ('x', '-y', '-idq') # args for testrar() TEST_ARGS = ('t', '-idq') # whether to speed up decompression by using tmp archive USE_EXTRACT_HACK = 1 # limit the filesize for tmp archive usage HACK_SIZE_LIMIT = 20*1024*1024 # whether to parse file/archive comments. NEED_COMMENTS = 1 # whether to convert comments to unicode strings UNICODE_COMMENTS = 0 # When RAR is corrupt, stopping on bad header is better # On unknown/misparsed RAR headers reporting is better REPORT_BAD_HEADER = 0 # Convert RAR time tuple into datetime() object USE_DATETIME = 0 # Separator for path name components. RAR internally uses '\\'. # Use '/' to be similar with zipfile. PATH_SEP = '\\' ## ## rar constants ## # block types RAR_BLOCK_MARK = 0x72 # r RAR_BLOCK_MAIN = 0x73 # s RAR_BLOCK_FILE = 0x74 # t RAR_BLOCK_OLD_COMMENT = 0x75 # u RAR_BLOCK_OLD_EXTRA = 0x76 # v RAR_BLOCK_OLD_SUB = 0x77 # w RAR_BLOCK_OLD_RECOVERY = 0x78 # x RAR_BLOCK_OLD_AUTH = 0x79 # y RAR_BLOCK_SUB = 0x7a # z RAR_BLOCK_ENDARC = 0x7b # { # flags for RAR_BLOCK_MAIN RAR_MAIN_VOLUME = 0x0001 RAR_MAIN_COMMENT = 0x0002 RAR_MAIN_LOCK = 0x0004 RAR_MAIN_SOLID = 0x0008 RAR_MAIN_NEWNUMBERING = 0x0010 RAR_MAIN_AUTH = 0x0020 RAR_MAIN_RECOVERY = 0x0040 RAR_MAIN_PASSWORD = 0x0080 RAR_MAIN_FIRSTVOLUME = 0x0100 RAR_MAIN_ENCRYPTVER = 0x0200 # flags for RAR_BLOCK_FILE RAR_FILE_SPLIT_BEFORE = 0x0001 RAR_FILE_SPLIT_AFTER = 0x0002 RAR_FILE_PASSWORD = 0x0004 RAR_FILE_COMMENT = 0x0008 RAR_FILE_SOLID = 0x0010 RAR_FILE_DICTMASK = 0x00e0 RAR_FILE_DICT64 = 0x0000 RAR_FILE_DICT128 = 0x0020 RAR_FILE_DICT256 = 0x0040 RAR_FILE_DICT512 = 0x0060 RAR_FILE_DICT1024 = 0x0080 RAR_FILE_DICT2048 = 0x00a0 RAR_FILE_DICT4096 = 0x00c0 RAR_FILE_DIRECTORY = 0x00e0 RAR_FILE_LARGE = 0x0100 RAR_FILE_UNICODE = 0x0200 RAR_FILE_SALT = 0x0400 RAR_FILE_VERSION = 0x0800 RAR_FILE_EXTTIME = 0x1000 RAR_FILE_EXTFLAGS = 0x2000 # flags for RAR_BLOCK_ENDARC RAR_ENDARC_NEXT_VOLUME = 0x0001 RAR_ENDARC_DATACRC = 0x0002 RAR_ENDARC_REVSPACE = 0x0004 RAR_ENDARC_VOLNR = 0x0008 # flags common to all blocks RAR_SKIP_IF_UNKNOWN = 0x4000 RAR_LONG_BLOCK = 0x8000 # Host OS types RAR_OS_MSDOS = 0 RAR_OS_OS2 = 1 RAR_OS_WIN32 = 2 RAR_OS_UNIX = 3 RAR_OS_MACOS = 4 RAR_OS_BEOS = 5 # Compression methods - '0'..'5' RAR_M0 = 0x30 RAR_M1 = 0x31 RAR_M2 = 0x32 RAR_M3 = 0x33 RAR_M4 = 0x34 RAR_M5 = 0x35 ## ## internal constants ## RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii') ZERO = bytes("\0", 'ascii') EMPTY = bytes("", 'ascii') S_BLK_HDR = Struct(' HACK_SIZE_LIMIT: use_hack = 0 else: use_hack = 1 # now extract if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0: return self._open_clear(inf) elif use_hack: return self._open_hack(inf, psw) else: return self._open_unrar(self.rarfile, inf, psw) def read(self, fname, psw = None): """Return uncompressed data for archive entry. For longer files using .open() may be better idea. @param fname: filename or RarInfo instance @param psw: password to use for extracting. """ f = self.open(fname, 'r', psw) try: return f.read() finally: f.close() def close(self): """Release open resources.""" pass def printdir(self): """Print archive file list to stdout.""" for f in self._info_list: print(f.filename) def extract(self, member, path=None, pwd=None): """Extract single file into current directory. @param member: filename or RarInfo instance @param path: optional destination path @param pwd: optional password to use """ if isinstance(member, RarInfo): fname = member.filename else: fname = member self._extract([fname], path, pwd) def extractall(self, path=None, members=None, pwd=None): """Extract all files into current directory. @param path: optional destination path @param members: optional filename or RarInfo instance list to extract @param pwd: optional password to use """ fnlist = [] if members is not None: for m in members: if isinstance(m, RarInfo): fnlist.append(m.filename) else: fnlist.append(m) self._extract(fnlist, path, pwd) def testrar(self): """Let 'unrar' test the archive. """ cmd = [UNRAR_TOOL] + list(TEST_ARGS) if self._password is not None: cmd.append('-p' + self._password) else: cmd.append('-p-') cmd.append(self.rarfile) p = custom_popen(cmd) p.communicate() if p.returncode != 0: raise BadRarFile("Testing failed") ## ## private methods ## # store entry def _process_entry(self, item): if item.type == RAR_BLOCK_FILE: # use only first part if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: self._info_map[item.filename] = item self._info_list.append(item) # remember if any items require password if item.needs_password(): self._needs_password = True elif len(self._info_list) > 0: # final crc is in last block old = self._info_list[-1] old.CRC = item.CRC old.compress_size += item.compress_size # parse new-style comment if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': if not NEED_COMMENTS: pass elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): pass elif item.flags & RAR_FILE_SOLID: # file comment cmt = self._read_comment_v3(item, self._password) if len(self._info_list) > 0: old = self._info_list[-1] old.comment = cmt else: # archive comment cmt = self._read_comment_v3(item, self._password) self.comment = cmt if self._info_callback: self._info_callback(item) # read rar def _parse(self): self._fd = None try: self._parse_real() finally: if self._fd: self._fd.close() self._fd = None def _parse_real(self): fd = open(self.rarfile, "rb") self._fd = fd id = fd.read(len(RAR_ID)) if id != RAR_ID: raise NotRarFile("Not a Rar archive: "+self.rarfile) volume = 0 # first vol (.rar) is 0 more_vols = 0 endarc = 0 volfile = self.rarfile while 1: if endarc: h = None # don't read past ENDARC else: h = self._parse_header(fd) if not h: if more_vols: volume += 1 volfile = self._next_volname(volfile) fd.close() fd = open(volfile, "rb") self._fd = fd more_vols = 0 endarc = 0 continue break h.volume = volume h.volume_file = volfile if h.type == RAR_BLOCK_MAIN and not self._main: self._main = h if h.flags & RAR_MAIN_NEWNUMBERING: # RAR 2.x does not set FIRSTVOLUME, # so check it only if NEWNUMBERING is used if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: raise NeedFirstVolume("Need to start from first volume") if h.flags & RAR_MAIN_PASSWORD: self._needs_password = True if not self._password: self._main = None break elif h.type == RAR_BLOCK_ENDARC: more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME endarc = 1 elif h.type == RAR_BLOCK_FILE: # RAR 2.x does not write RAR_BLOCK_ENDARC if h.flags & RAR_FILE_SPLIT_AFTER: more_vols = 1 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: raise NeedFirstVolume("Need to start from first volume") # store it self._process_entry(h) # go to next header if h.add_size > 0: fd.seek(h.file_offset + h.add_size, 0) # AES encrypted headers _last_aes_key = (None, None, None) # (salt, key, iv) def _decrypt_header(self, fd): if not _have_crypto: raise NoCrypto('Cannot parse encrypted headers - no crypto') salt = fd.read(8) if self._last_aes_key[0] == salt: key, iv = self._last_aes_key[1:] else: key, iv = rar3_s2k(self._password, salt) self._last_aes_key = (salt, key, iv) return HeaderDecrypt(fd, key, iv) # read single header def _parse_header(self, fd): try: # handle encrypted headers if self._main and self._main.flags & RAR_MAIN_PASSWORD: if not self._password: return fd = self._decrypt_header(fd) # now read actual header return self._parse_block_header(fd) except struct.error: if REPORT_BAD_HEADER: raise BadRarFile('Broken header in RAR file') return None # common header def _parse_block_header(self, fd): h = RarInfo() h.header_offset = fd.tell() h.comment = None # read and parse base header buf = fd.read(S_BLK_HDR.size) if not buf: return None t = S_BLK_HDR.unpack_from(buf) h.header_crc, h.type, h.flags, h.header_size = t h.header_base = S_BLK_HDR.size pos = S_BLK_HDR.size # read full header if h.header_size > S_BLK_HDR.size: h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size) else: h.header_data = buf h.file_offset = fd.tell() # unexpected EOF? if len(h.header_data) != h.header_size: if REPORT_BAD_HEADER: raise BadRarFile('Unexpected EOF when reading header') return None # block has data assiciated with it? if h.flags & RAR_LONG_BLOCK: h.add_size = S_LONG.unpack_from(h.header_data, pos)[0] else: h.add_size = 0 # parse interesting ones, decide header boundaries for crc if h.type == RAR_BLOCK_MARK: return h elif h.type == RAR_BLOCK_MAIN: h.header_base += 6 if h.flags & RAR_MAIN_ENCRYPTVER: h.header_base += 1 if h.flags & RAR_MAIN_COMMENT: self._parse_subblocks(h, h.header_base) self.comment = h.comment elif h.type == RAR_BLOCK_FILE: self._parse_file_header(h, pos) elif h.type == RAR_BLOCK_SUB: self._parse_file_header(h, pos) h.header_base = h.header_size elif h.type == RAR_BLOCK_OLD_AUTH: h.header_base += 8 elif h.type == RAR_BLOCK_OLD_EXTRA: h.header_base += 7 else: h.header_base = h.header_size # check crc if h.type == RAR_BLOCK_OLD_SUB: crcdat = h.header_data[2:] + fd.read(h.add_size) else: crcdat = h.header_data[2:h.header_base] calc_crc = crc32(crcdat) & 0xFFFF # return good header if h.header_crc == calc_crc: return h # need to panic? if REPORT_BAD_HEADER: xlen = len(crcdat) crcdat = h.header_data[2:] msg = 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h.type, h.header_crc, calc_crc, xlen ) xlen = len(crcdat) while xlen >= S_BLK_HDR.size - 2: crc = crc32(crcdat[:xlen]) & 0xFFFF if crc == h.header_crc: msg += ' / crc match, xlen = %d' % xlen xlen -= 1 raise BadRarFile(msg) # instead panicing, send eof return None # read file-specific header def _parse_file_header(self, h, pos): fld = S_FILE_HDR.unpack_from(h.header_data, pos) h.compress_size = fld[0] h.file_size = fld[1] h.host_os = fld[2] h.CRC = fld[3] h.date_time = parse_dos_time(fld[4]) h.extract_version = fld[5] h.compress_type = fld[6] h.name_size = fld[7] h.mode = fld[8] pos += S_FILE_HDR.size if h.flags & RAR_FILE_LARGE: h1 = S_LONG.unpack_from(h.header_data, pos)[0] h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0] h.compress_size |= h1 << 32 h.file_size |= h2 << 32 pos += 8 h.add_size = h.compress_size name = h.header_data[pos : pos + h.name_size ] pos += h.name_size if h.flags & RAR_FILE_UNICODE: nul = name.find(ZERO) h.orig_filename = name[:nul] u = UnicodeFilename(h.orig_filename, name[nul + 1 : ]) h.filename = u.decode() # if parsing failed fall back to simple name if u.failed: h.filename = self._decode(h.orig_filename) else: h.orig_filename = name h.filename = self._decode(name) # change separator, if requested if PATH_SEP != '\\': h.filename = h.filename.replace('\\', PATH_SEP) if h.flags & RAR_FILE_SALT: h.salt = h.header_data[pos : pos + 8] pos += 8 else: h.salt = None # optional extended time stamps if h.flags & RAR_FILE_EXTTIME: pos = self._parse_ext_time(h, pos) else: h.mtime = h.atime = h.ctime = h.arctime = None # base header end h.header_base = pos if h.flags & RAR_FILE_COMMENT: self._parse_subblocks(h, pos) # convert timestamps if USE_DATETIME: h.date_time = to_datetime(h.date_time) h.mtime = to_datetime(h.mtime) h.atime = to_datetime(h.atime) h.ctime = to_datetime(h.ctime) h.arctime = to_datetime(h.arctime) # .mtime is .date_time with more precision if h.mtime: if USE_DATETIME: h.date_time = h.mtime else: # keep seconds int h.date_time = h.mtime[:5] + (int(h.mtime[5]),) return pos # find old-style comment subblock def _parse_subblocks(self, h, pos): hdata = h.header_data while pos < len(hdata): # ordinary block header t = S_BLK_HDR.unpack_from(hdata, pos) scrc, stype, sflags, slen = t pos_next = pos + slen pos += S_BLK_HDR.size # corrupt header if pos_next < pos: break # followed by block-specific header if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) pos += S_COMMENT_HDR.size data = hdata[pos : pos_next] cmt = rar_decompress(ver, meth, data, declen, sflags, crc, self._password) if not self._crc_check: h.comment = self._decode_comment(cmt) elif crc32(cmt) & 0xFFFF == crc: h.comment = self._decode_comment(cmt) pos = pos_next def _parse_ext_time(self, h, pos): data = h.header_data # flags and rest of data can be missing flags = 0 if pos + 2 <= len(data): flags = S_SHORT.unpack_from(data, pos)[0] pos += 2 h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time) h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos) h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos) h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos) return pos def _parse_xtime(self, flag, data, pos, dostime = None): unit = 10000000.0 # 100 ns units if flag & 8: if not dostime: t = S_LONG.unpack_from(data, pos)[0] dostime = parse_dos_time(t) pos += 4 rem = 0 cnt = flag & 3 for i in range(cnt): b = S_BYTE.unpack_from(data, pos)[0] rem = (b << 16) | (rem >> 8) pos += 1 sec = dostime[5] + rem / unit if flag & 4: sec += 1 dostime = dostime[:5] + (sec,) return dostime, pos # given current vol name, construct next one def _next_volname(self, volfile): if self._main.flags & RAR_MAIN_NEWNUMBERING: return self._next_newvol(volfile) return self._next_oldvol(volfile) # new-style next volume def _next_newvol(self, volfile): i = len(volfile) - 1 while i >= 0: if volfile[i] >= '0' and volfile[i] <= '9': return self._inc_volname(volfile, i) i -= 1 raise BadRarName("Cannot construct volume name: "+volfile) # old-style next volume def _next_oldvol(self, volfile): # rar -> r00 if volfile[-4:].lower() == '.rar': return volfile[:-2] + '00' return self._inc_volname(volfile, len(volfile) - 1) # increase digits with carry, otherwise just increment char def _inc_volname(self, volfile, i): fn = list(volfile) while i >= 0: if fn[i] != '9': fn[i] = chr(ord(fn[i]) + 1) break fn[i] = '0' i -= 1 return ''.join(fn) def _open_clear(self, inf): return DirectReader(self, inf) # put file compressed data into temporary .rar archive, and run # unrar on that, thus avoiding unrar going over whole archive def _open_hack(self, inf, psw = None): BSIZE = 32*1024 size = inf.compress_size + inf.header_size rf = open(inf.volume_file, "rb", 0) rf.seek(inf.header_offset) tmpfd, tmpname = mkstemp(suffix='.rar') tmpf = os.fdopen(tmpfd, "wb") try: # create main header: crc, type, flags, size, res1, res2 mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4) tmpf.write(RAR_ID + mh) while size > 0: if size > BSIZE: buf = rf.read(BSIZE) else: buf = rf.read(size) if not buf: raise BadRarFile('read failed: ' + inf.filename) tmpf.write(buf) size -= len(buf) tmpf.close() rf.close() except: rf.close() tmpf.close() os.unlink(tmpname) raise return self._open_unrar(tmpname, inf, psw, tmpname) def _read_comment_v3(self, inf, psw=None): # read data rf = open(inf.volume_file, "rb") rf.seek(inf.file_offset) data = rf.read(inf.compress_size) rf.close() # decompress cmt = rar_decompress(inf.extract_version, inf.compress_type, data, inf.file_size, inf.flags, inf.CRC, psw, inf.salt) # check crc if self._crc_check: crc = crc32(cmt) if crc < 0: crc += (long(1) << 32) if crc != inf.CRC: return None return self._decode_comment(cmt) # extract using unrar def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None): cmd = [UNRAR_TOOL] + list(OPEN_ARGS) if psw is not None: cmd.append("-p" + psw) cmd.append(rarfile) # not giving filename avoids encoding related problems if not tmpfile: fn = inf.filename if PATH_SEP != os.sep: fn = fn.replace(PATH_SEP, os.sep) cmd.append(fn) # read from unrar pipe return PipeReader(self, inf, cmd, tmpfile) def _decode(self, val): for c in TRY_ENCODINGS: try: return val.decode(c) except UnicodeError: pass return val.decode(self._charset, 'replace') def _decode_comment(self, val): if UNICODE_COMMENTS: return self._decode(val) return val # call unrar to extract a file def _extract(self, fnlist, path=None, psw=None): cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) # pasoword psw = psw or self._password if psw is not None: cmd.append('-p' + psw) else: cmd.append('-p-') # rar file cmd.append(self.rarfile) # file list for fn in fnlist: if os.sep != PATH_SEP: fn = fn.replace(PATH_SEP, os.sep) cmd.append(fn) # destination path if path is not None: cmd.append(path + os.sep) # call p = custom_popen(cmd) p.communicate() ## ## Utility classes ## class UnicodeFilename: """Handle unicode filename decompression""" def __init__(self, name, encdata): self.std_name = bytearray(name) self.encdata = bytearray(encdata) self.pos = self.encpos = 0 self.buf = bytearray() self.failed = 0 def enc_byte(self): try: c = self.encdata[self.encpos] self.encpos += 1 return c except IndexError: self.failed = 1 return 0 def std_byte(self): try: return self.std_name[self.pos] except IndexError: self.failed = 1 return ord('?') def put(self, lo, hi): self.buf.append(lo) self.buf.append(hi) self.pos += 1 def decode(self): hi = self.enc_byte() flagbits = 0 while self.encpos < len(self.encdata): if flagbits == 0: flags = self.enc_byte() flagbits = 8 flagbits -= 2 t = (flags >> flagbits) & 3 if t == 0: self.put(self.enc_byte(), 0) elif t == 1: self.put(self.enc_byte(), hi) elif t == 2: self.put(self.enc_byte(), self.enc_byte()) else: n = self.enc_byte() if n & 0x80: c = self.enc_byte() for i in range((n & 0x7f) + 2): lo = (self.std_byte() + c) & 0xFF self.put(lo, hi) else: for i in range(n + 2): self.put(self.std_byte(), 0) return self.buf.decode("utf-16le", "replace") class RarExtFile(RawIOBase): """Base class for 'file-like' object that RarFile.open() returns. Provides public methods and common crc checking. Behaviour: - no short reads - .read() and .readinfo() read as much as requested. - no internal buffer, use io.BufferedReader for that. @ivar name: filename of the archive entry. """ def __init__(self, rf, inf): """Fill common fields""" RawIOBase.__init__(self) # standard io.* properties self.name = inf.filename self.mode = 'rb' self.rf = rf self.inf = inf self.crc_check = rf._crc_check self.fd = None self.CRC = 0 self.remain = 0 self._open() def _open(self): if self.fd: self.fd.close() self.fd = None self.CRC = 0 self.remain = self.inf.file_size def read(self, cnt = None): """Read all or specified amount of data from archive entry.""" # sanitize cnt if cnt is None or cnt < 0: cnt = self.remain elif cnt > self.remain: cnt = self.remain if cnt == 0: return EMPTY # actual read data = self._read(cnt) if data: self.CRC = crc32(data, self.CRC) self.remain -= len(data) # done? if not data or self.remain == 0: #self.close() self._check() return data def _check(self): """Check final CRC.""" if not self.crc_check: return if self.remain != 0: raise BadRarFile("Failed the read enough data") crc = self.CRC if crc < 0: crc += (long(1) << 32) if crc != self.inf.CRC: raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename) def _read(self, cnt): """Actual read that gets sanitized cnt.""" def close(self): """Close open resources.""" RawIOBase.close(self) if self.fd: self.fd.close() self.fd = None def __del__(self): """Hook delete to make sure tempfile is removed.""" self.close() def readinto(self, buf): """Zero-copy read directly into buffer. Returns bytes read. """ data = self.read(len(buf)) n = len(data) try: buf[:n] = data except TypeError: import array if not isinstance(buf, array.array): raise buf[:n] = array.array(buf.typecode, data) return n def tell(self): """Return current reading position in uncompressed data.""" return self.inf.file_size - self.remain def seek(self, ofs, whence = 0): """Seek in data.""" # disable crc check when seeking self.crc_check = 0 fsize = self.inf.file_size cur_ofs = self.tell() if whence == 0: # seek from beginning of file new_ofs = ofs elif whence == 1: # seek from current position new_ofs = cur_ofs + ofs elif whence == 2: # seek from end of file new_ofs = fsize + ofs else: raise ValueError('Invalid value for whence') # sanity check if new_ofs < 0: new_ofs = 0 elif new_ofs > fsize: new_ofs = fsize # do the actual seek if new_ofs >= cur_ofs: self._skip(new_ofs - cur_ofs) else: # process old data ? #self._skip(fsize - cur_ofs) # reopen and seek self._open() self._skip(new_ofs) return self.tell() def _skip(self, cnt): """Read and discard data""" while cnt > 0: if cnt > 8192: buf = self.read(8192) else: buf = self.read(cnt) if not buf: break cnt -= len(buf) def readable(self): """Returns True""" return True def seekable(self): """Returns True""" return True def readall(self): """Read all remaining data""" # avoid RawIOBase default impl return self.read() class PipeReader(RarExtFile): """Read data from pipe, handle tempfile cleanup.""" def __init__(self, rf, inf, cmd, tempfile=None): self.cmd = cmd self.proc = None self.tempfile = tempfile RarExtFile.__init__(self, rf, inf) def _close_proc(self): if not self.proc: return if self.proc.stdout: self.proc.stdout.close() if self.proc.stdin: self.proc.stdin.close() if self.proc.stderr: self.proc.stderr.close() self.proc.wait() self.proc = None def _open(self): RarExtFile._open(self) # stop old process self._close_proc() # launch new process self.proc = custom_popen(self.cmd) self.fd = self.proc.stdout # avoid situation where unrar waits on stdin if self.proc.stdin: self.proc.stdin.close() def _read(self, cnt): """Read from pipe.""" return self.fd.read(cnt) def close(self): """Close open resources.""" self._close_proc() RarExtFile.close(self) if self.tempfile: try: os.unlink(self.tempfile) except OSError: pass self.tempfile = None if have_memoryview: def readinto(self, buf): """Zero-copy read directly into buffer.""" cnt = len(buf) if cnt > self.remain: cnt = self.remain vbuf = memoryview(buf) res = self.fd.readinto(vbuf[0:cnt]) if res: if self.crc_check: self.CRC = crc32(vbuf[:res], self.CRC) self.remain -= res return res class DirectReader(RarExtFile): """Read uncompressed data directly from archive.""" def _open(self): RarExtFile._open(self) self.volfile = self.inf.volume_file self.fd = open(self.volfile, "rb", 0) self.fd.seek(self.inf.header_offset, 0) self.cur = self.rf._parse_header(self.fd) self.cur_avail = self.cur.add_size def _skip(self, cnt): """RAR Seek, skipping through rar files to get to correct position """ while cnt > 0: # next vol needed? if self.cur_avail == 0: if not self._open_next(): break # fd is in read pos, do the read if cnt > self.cur_avail: cnt -= self.cur_avail self.remain -= self.cur_avail self.cur_avail = 0 else: self.fd.seek(cnt, 1) self.cur_avail -= cnt self.remain -= cnt cnt = 0 def _read(self, cnt): """Read from potentially multi-volume archive.""" buf = EMPTY while cnt > 0: # next vol needed? if self.cur_avail == 0: if not self._open_next(): break # fd is in read pos, do the read if cnt > self.cur_avail: data = self.fd.read(self.cur_avail) else: data = self.fd.read(cnt) if not data: break # got some data cnt -= len(data) self.cur_avail -= len(data) if buf: buf += data else: buf = data return buf def _open_next(self): """Proceed to next volume.""" # is the file split over archives? if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0: return False if self.fd: self.fd.close() self.fd = None # open next part self.volfile = self.rf._next_volname(self.volfile) fd = open(self.volfile, "rb", 0) self.fd = fd # loop until first file header while 1: cur = self.rf._parse_header(fd) if not cur: raise BadRarFile("Unexpected EOF") if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): if cur.add_size: fd.seek(cur.add_size, 1) continue if cur.orig_filename != self.inf.orig_filename: raise BadRarFile("Did not found file entry") self.cur = cur self.cur_avail = cur.add_size return True if have_memoryview: def readinto(self, buf): """Zero-copy read directly into buffer.""" got = 0 vbuf = memoryview(buf) while got < len(buf): # next vol needed? if self.cur_avail == 0: if not self._open_next(): break # lenght for next read cnt = len(buf) - got if cnt > self.cur_avail: cnt = self.cur_avail # read into temp view res = self.fd.readinto(vbuf[got : got + cnt]) if not res: break if self.crc_check: self.CRC = crc32(vbuf[got : got + res], self.CRC) self.cur_avail -= res self.remain -= res got += res return got class HeaderDecrypt: """File-like object that decrypts from another file""" def __init__(self, f, key, iv): self.f = f self.ciph = AES.new(key, AES.MODE_CBC, iv) self.buf = EMPTY def tell(self): return self.f.tell() def read(self, cnt=None): if cnt > 8*1024: raise BadRarFile('Bad count to header decrypt - wrong password?') # consume old data if cnt <= len(self.buf): res = self.buf[:cnt] self.buf = self.buf[cnt:] return res res = self.buf self.buf = EMPTY cnt -= len(res) # decrypt new data BLK = self.ciph.block_size while cnt > 0: enc = self.f.read(BLK) if len(enc) < BLK: break dec = self.ciph.decrypt(enc) if cnt >= len(dec): res += dec cnt -= len(dec) else: res += dec[:cnt] self.buf = dec[cnt:] cnt = 0 return res ## ## Utility functions ## def rar3_s2k(psw, salt): """String-to-key hash for RAR3.""" seed = psw.encode('utf-16le') + salt iv = EMPTY h = sha1() for i in range(16): for j in range(0x4000): cnt = S_LONG.pack(i*0x4000 + j) h.update(seed + cnt[:3]) if j == 0: iv += h.digest()[19:20] key_be = h.digest()[:16] key_le = pack("LLLL", key_be)) return key_le, iv def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): """Decompress blob of compressed data. Used for data with non-standard header - eg. comments. """ # already uncompressed? if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: return data # take only necessary flags flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK) flags |= RAR_LONG_BLOCK # file header fname = bytes('data', 'ascii') date = 0 mode = 0x20 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, date, vers, meth, len(fname), mode) fhdr += fname if flags & RAR_FILE_SALT: if not salt: return EMPTY fhdr += salt # full header hlen = S_BLK_HDR.size + len(fhdr) hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr hcrc = crc32(hdr[2:]) & 0xFFFF hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr # archive main header mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4) # decompress via temp rar tmpfd, tmpname = mkstemp(suffix='.rar') tmpf = os.fdopen(tmpfd, "wb") try: tmpf.write(RAR_ID + mh + hdr + data) tmpf.close() cmd = [UNRAR_TOOL] + list(OPEN_ARGS) if psw is not None and (flags & RAR_FILE_PASSWORD): cmd.append("-p" + psw) else: cmd.append("-p-") cmd.append(tmpname) p = custom_popen(cmd) return p.communicate()[0] finally: tmpf.close() os.unlink(tmpname) def to_datetime(t): """Convert 6-part time tuple into datetime object.""" if t is None: return None # extract values year, mon, day, h, m, xs = t s = int(xs) us = int(1000000 * (xs - s)) # assume the values are valid try: return datetime(year, mon, day, h, m, s, us) except ValueError: pass # sanitize invalid values MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) if mon < 1: mon = 1 if mon > 12: mon = 12 if day < 1: day = 1 if day > MDAY[mon]: day = MDAY[mon] if h > 23: h = 23 if m > 59: m = 59 if s > 59: s = 59 if mon == 2 and day == 29: try: return datetime(year, mon, day, h, m, s, us) except ValueError: day = 28 return datetime(year, mon, day, h, m, s, us) def parse_dos_time(stamp): """Parse standard 32-bit DOS timestamp.""" sec = stamp & 0x1F; stamp = stamp >> 5 min = stamp & 0x3F; stamp = stamp >> 6 hr = stamp & 0x1F; stamp = stamp >> 5 day = stamp & 0x1F; stamp = stamp >> 5 mon = stamp & 0x0F; stamp = stamp >> 4 yr = (stamp & 0x7F) + 1980 return (yr, mon, day, hr, min, sec * 2) def custom_popen(cmd): """Disconnect cmd from parent fds, read only from stdout.""" # needed for py2exe creationflags = 0 if sys.platform == 'win32': creationflags = 0x08000000 # CREATE_NO_WINDOW # run command p = Popen(cmd, bufsize = 0, stdout = PIPE, stdin = PIPE, stderr = STDOUT, creationflags = creationflags) return p