extract_image instead of Pixmap if possible 20x faster

2025-12-15 18:56:28 +00:00 · 2025-07-20 12:12:02 -07:00
parent cb5f4db5c4
commit 3cd6e09bcb
1 changed files with 8 additions and 14 deletions
--- a/kindlecomicconverter/comic2ebook.py
+++ b/kindlecomicconverter/comic2ebook.py
@@ -754,21 +754,15 @@ def extract_page(vector):
                blank_page = Image.new("RGB", (width, height), "white")
                blank_page.save(output_path)
            xref = image_list[0][0]
-            pix = pymupdf.Pixmap(doc, xref)
+            d = doc.extract_image(xref)
-            if pix.colorspace is None:
+            if d['cs-name'] == 'DeviceCMYK':
-                # It's a stencil mask (grayscale image with inverted colors)
+                pix = pymupdf.Pixmap(doc, xref)
                mask_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width)
                inverted = 255 - mask_array
                img = Image.fromarray(inverted, mode="L")
                img.save(output_path)
            if pix.colorspace.name.startswith("Colorspace(CS_GRAY)"):
                # Make sure that an image is just grayscale and not smth like "Colorspace(CS_GRAY) - Separation(DeviceCMYK,Black)"
                pix = pymupdf.Pixmap(pymupdf.csGRAY, pix)
            else:
                pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
-            if pix.alpha: 
+                pix.save(output_path)
-                pix = pymupdf.Pixmap(pix, alpha=0)
+                
-            pix.save(output_path)
+            else:
                with open(Path(output_path).with_suffix('.' + d['ext']), "wb") as imgout:
                    imgout.write(d["image"])
        print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))