1
0
mirror of https://github.com/ciromattia/kcc synced 2025-12-23 22:51:45 +00:00

Great Index: Using MD5 checksums instead file paths.

Performance impact is negligible, it simplify the code and is much more error resistant.
This commit is contained in:
Paweł Jastrzębski
2014-01-24 22:43:24 +01:00
parent bd53c6108d
commit 3bb8cc7778
2 changed files with 26 additions and 12 deletions

View File

@@ -35,6 +35,7 @@ from xml.dom.minidom import parse
from uuid import uuid4 from uuid import uuid4
from slugify import slugify as slugifyExt from slugify import slugify as slugifyExt
from PIL import Image from PIL import Image
from hashlib import md5
try: try:
from PyQt5 import QtCore from PyQt5 import QtCore
except ImportError: except ImportError:
@@ -45,7 +46,19 @@ from . import cbxarchive
from . import pdfjpgextract from . import pdfjpgextract
def md5Checksum(filePath):
with open(filePath, 'rb') as fh:
m = md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
def buildHTML(path, imgfile, imgfilepath): def buildHTML(path, imgfile, imgfilepath):
imgfilepath = md5Checksum(imgfilepath)
filename = getImageFileName(imgfile) filename = getImageFileName(imgfile)
if filename is not None: if filename is not None:
if "Rotated" in theGreatIndex[imgfilepath]: if "Rotated" in theGreatIndex[imgfilepath]:
@@ -671,7 +684,6 @@ def slugify(value):
def sanitizeTree(filetree): def sanitizeTree(filetree):
global theGreatIndex
chapterNames = {} chapterNames = {}
for root, dirs, files in os.walk(filetree, False): for root, dirs, files in os.walk(filetree, False):
for name in files: for name in files:
@@ -687,8 +699,6 @@ def sanitizeTree(filetree):
key = os.path.join(root, name) key = os.path.join(root, name)
if key != newKey: if key != newKey:
os.rename(key, newKey) os.rename(key, newKey)
theGreatIndex[newKey] = theGreatIndex[key]
del theGreatIndex[key]
for name in dirs: for name in dirs:
if name.startswith('.'): if name.startswith('.'):
os.remove(os.path.join(root, name)) os.remove(os.path.join(root, name))
@@ -702,14 +712,6 @@ def sanitizeTree(filetree):
key = os.path.join(root, name) key = os.path.join(root, name)
if key != newKey: if key != newKey:
os.rename(key, newKey) os.rename(key, newKey)
theGreatIndexTmp = theGreatIndex.copy()
for keyI in theGreatIndex:
if key in keyI:
newKeyI = keyI.replace(key, newKey)
if newKeyI != keyI:
theGreatIndexTmp[newKeyI] = theGreatIndexTmp[keyI]
del theGreatIndexTmp[keyI]
theGreatIndex = theGreatIndexTmp.copy()
return chapterNames return chapterNames

View File

@@ -21,10 +21,22 @@ __copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jas
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os
from hashlib import md5
from functools import reduce from functools import reduce
from PIL import Image, ImageOps, ImageStat, ImageChops from PIL import Image, ImageOps, ImageStat, ImageChops
def md5Checksum(filePath):
with open(filePath, 'rb') as fh:
m = md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
class ProfileData: class ProfileData:
def __init__(self): def __init__(self):
pass pass
@@ -146,7 +158,7 @@ class ComicPage:
else: else:
filename += ".jpg" filename += ".jpg"
self.image.save(filename, "JPEG", optimize=1) self.image.save(filename, "JPEG", optimize=1)
return [filename, flags] return [md5Checksum(filename), flags]
else: else:
return None return None
except IOError as e: except IOError as e: