1
0
mirror of https://github.com/ciromattia/kcc synced 2025-12-15 18:56:28 +00:00

don't try catch so many layers of pdf

This commit is contained in:
Alex Xu
2025-07-20 12:02:42 -07:00
parent f1ffb2c4e8
commit cb5f4db5c4

View File

@@ -691,7 +691,6 @@ def render_page(vector):
filename = vector[2] # document filename
output_dir = vector[3]
target_height = vector[4]
try:
with pymupdf.open(filename) as doc: # open the document
num_pages = doc.page_count # get number of pages
@@ -708,8 +707,7 @@ def render_page(vector):
pix = page.get_pixmap(matrix=mat, colorspace='RGB', alpha=False)
pix.save(os.path.join(output_dir, "p-%i.png" % i))
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
except Exception as e:
raise UserWarning(f"Error rendering {filename}: {e}")
def extract_page(vector):
@@ -736,7 +734,7 @@ def extract_page(vector):
filename = vector[2] # document filename
output_dir = vector[3]
try:
with pymupdf.open(filename) as doc: # open the document
num_pages = doc.page_count # get number of pages
@@ -772,8 +770,7 @@ def extract_page(vector):
pix = pymupdf.Pixmap(pix, alpha=0)
pix.save(output_path)
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
except Exception as e:
raise UserWarning(f"Error exporting {filename}: {e}")
def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
@@ -794,7 +791,7 @@ def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
vectors = [(i, cpu, filename, output_dir, target_height) for i in range(cpu)]
print("Starting %i processes for '%s'." % (cpu, filename))
try:
start = perf_counter()
with Pool() as pool:
results = pool.map(
@@ -802,8 +799,7 @@ def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
)
end = perf_counter()
print(f"MuPDF: {end - start} sec")
except Exception as e:
raise UserWarning(f"Error while processing PDF pages: {e}")
def getWorkFolder(afile):