1
0
mirror of https://github.com/ciromattia/kcc synced 2025-12-13 01:36:27 +00:00

fix mars pdf input (#1081)

This commit is contained in:
Alex Xu
2025-09-11 13:03:27 -07:00
committed by GitHub
parent bc92c2dd85
commit 420bed995b

View File

@@ -785,16 +785,16 @@ def extract_page(vector):
width, height = int(page.rect.width), int(page.rect.height) width, height = int(page.rect.width), int(page.rect.height)
blank_page = Image.new("RGB", (width, height), "white") blank_page = Image.new("RGB", (width, height), "white")
blank_page.save(output_path) blank_page.save(output_path)
xref = image_list[0][0]
d = doc.extract_image(xref)
if d['cs-name'] == 'DeviceCMYK':
pix = pymupdf.Pixmap(doc, xref)
pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
pix.save(output_path)
else: else:
with open(Path(output_path).with_suffix('.' + d['ext']), "wb") as imgout: xref = image_list[0][0]
imgout.write(d["image"]) d = doc.extract_image(xref)
if d['cs-name'] == 'DeviceCMYK':
pix = pymupdf.Pixmap(doc, xref)
pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
pix.save(output_path)
else:
with open(Path(output_path).with_suffix('.' + d['ext']), "wb") as imgout:
imgout.write(d["image"])
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1)) print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
@@ -810,6 +810,11 @@ def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
if len(page.get_images()) > 1: if len(page.get_images()) > 1:
render = True render = True
break break
if len(page.get_images()) == 1:
image = page.get_images()[0]
if not image[5] or image[8] == 'CCITTFaxDecode':
render = True
break
cpu = cpu_count() cpu = cpu_count()