1
0
mirror of https://github.com/ciromattia/kcc synced 2026-05-17 21:11:47 +00:00

epub input: extract biggest image per page (#1342)

* extract biggest image on page

* fix largest_size location
This commit is contained in:
Alex Xu
2026-05-17 09:12:44 -07:00
committed by GitHub
parent 9827f11944
commit c385ef7ae0

View File

@@ -980,17 +980,24 @@ def getWorkFolder(afile, workdir=None):
page_path = os.path.join(os.path.dirname(opf_path), manifest_dict[spine_item])
page = ET.parse(page_path)
imgs = page.findall(r'.//{*}img') + page.findall(r'.//{*}image')
largest_size = 0
img_path = None
# TODO handle more than first image
for img in imgs:
for key in img.attrib:
if 'src' in key or 'href' in key:
img_path = img.attrib[key]
if img_path.startswith('..'):
img_path = os.path.join(os.path.dirname(opf_path), os.path.dirname(manifest_dict[spine_item]), img_path)
temp_img_path = img.attrib[key]
if temp_img_path.startswith('..'):
temp_img_path = os.path.join(os.path.dirname(opf_path), os.path.dirname(manifest_dict[spine_item]), temp_img_path)
else:
img_path = os.path.join(os.path.dirname(opf_path), os.path.dirname(manifest_dict[spine_item]), img_path)
break
temp_img_path = os.path.join(os.path.dirname(opf_path), os.path.dirname(manifest_dict[spine_item]), temp_img_path)
try:
temp_size = os.path.getsize(temp_img_path)
if temp_size > largest_size:
largest_size = temp_size
img_path = temp_img_path
except OSError:
pass
# TODO empty image
if img_path:
ordered_image_paths.append(img_path)