mirror of
https://github.com/ciromattia/kcc
synced 2025-12-15 18:56:28 +00:00
extract_image instead of Pixmap if possible 20x faster
This commit is contained in:
@@ -754,21 +754,15 @@ def extract_page(vector):
|
|||||||
blank_page = Image.new("RGB", (width, height), "white")
|
blank_page = Image.new("RGB", (width, height), "white")
|
||||||
blank_page.save(output_path)
|
blank_page.save(output_path)
|
||||||
xref = image_list[0][0]
|
xref = image_list[0][0]
|
||||||
pix = pymupdf.Pixmap(doc, xref)
|
d = doc.extract_image(xref)
|
||||||
if pix.colorspace is None:
|
if d['cs-name'] == 'DeviceCMYK':
|
||||||
# It's a stencil mask (grayscale image with inverted colors)
|
pix = pymupdf.Pixmap(doc, xref)
|
||||||
mask_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width)
|
|
||||||
inverted = 255 - mask_array
|
|
||||||
img = Image.fromarray(inverted, mode="L")
|
|
||||||
img.save(output_path)
|
|
||||||
if pix.colorspace.name.startswith("Colorspace(CS_GRAY)"):
|
|
||||||
# Make sure that an image is just grayscale and not smth like "Colorspace(CS_GRAY) - Separation(DeviceCMYK,Black)"
|
|
||||||
pix = pymupdf.Pixmap(pymupdf.csGRAY, pix)
|
|
||||||
else:
|
|
||||||
pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
|
pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
|
||||||
if pix.alpha:
|
pix.save(output_path)
|
||||||
pix = pymupdf.Pixmap(pix, alpha=0)
|
|
||||||
pix.save(output_path)
|
else:
|
||||||
|
with open(Path(output_path).with_suffix('.' + d['ext']), "wb") as imgout:
|
||||||
|
imgout.write(d["image"])
|
||||||
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
|
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user