mirror of
https://github.com/ciromattia/kcc
synced 2025-12-20 13:11:47 +00:00
don't try catch so many layers of pdf
This commit is contained in:
@@ -691,7 +691,6 @@ def render_page(vector):
|
|||||||
filename = vector[2] # document filename
|
filename = vector[2] # document filename
|
||||||
output_dir = vector[3]
|
output_dir = vector[3]
|
||||||
target_height = vector[4]
|
target_height = vector[4]
|
||||||
try:
|
|
||||||
with pymupdf.open(filename) as doc: # open the document
|
with pymupdf.open(filename) as doc: # open the document
|
||||||
num_pages = doc.page_count # get number of pages
|
num_pages = doc.page_count # get number of pages
|
||||||
|
|
||||||
@@ -708,8 +707,7 @@ def render_page(vector):
|
|||||||
pix = page.get_pixmap(matrix=mat, colorspace='RGB', alpha=False)
|
pix = page.get_pixmap(matrix=mat, colorspace='RGB', alpha=False)
|
||||||
pix.save(os.path.join(output_dir, "p-%i.png" % i))
|
pix.save(os.path.join(output_dir, "p-%i.png" % i))
|
||||||
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
|
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
|
||||||
except Exception as e:
|
|
||||||
raise UserWarning(f"Error rendering {filename}: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def extract_page(vector):
|
def extract_page(vector):
|
||||||
@@ -736,7 +734,7 @@ def extract_page(vector):
|
|||||||
filename = vector[2] # document filename
|
filename = vector[2] # document filename
|
||||||
output_dir = vector[3]
|
output_dir = vector[3]
|
||||||
|
|
||||||
try:
|
|
||||||
with pymupdf.open(filename) as doc: # open the document
|
with pymupdf.open(filename) as doc: # open the document
|
||||||
num_pages = doc.page_count # get number of pages
|
num_pages = doc.page_count # get number of pages
|
||||||
|
|
||||||
@@ -772,8 +770,7 @@ def extract_page(vector):
|
|||||||
pix = pymupdf.Pixmap(pix, alpha=0)
|
pix = pymupdf.Pixmap(pix, alpha=0)
|
||||||
pix.save(output_path)
|
pix.save(output_path)
|
||||||
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
|
print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
|
||||||
except Exception as e:
|
|
||||||
raise UserWarning(f"Error exporting {filename}: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
|
def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
|
||||||
@@ -794,7 +791,7 @@ def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
|
|||||||
vectors = [(i, cpu, filename, output_dir, target_height) for i in range(cpu)]
|
vectors = [(i, cpu, filename, output_dir, target_height) for i in range(cpu)]
|
||||||
print("Starting %i processes for '%s'." % (cpu, filename))
|
print("Starting %i processes for '%s'." % (cpu, filename))
|
||||||
|
|
||||||
try:
|
|
||||||
start = perf_counter()
|
start = perf_counter()
|
||||||
with Pool() as pool:
|
with Pool() as pool:
|
||||||
results = pool.map(
|
results = pool.map(
|
||||||
@@ -802,8 +799,7 @@ def mupdf_pdf_process_pages_parallel(filename, output_dir, target_height):
|
|||||||
)
|
)
|
||||||
end = perf_counter()
|
end = perf_counter()
|
||||||
print(f"MuPDF: {end - start} sec")
|
print(f"MuPDF: {end - start} sec")
|
||||||
except Exception as e:
|
|
||||||
raise UserWarning(f"Error while processing PDF pages: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def getWorkFolder(afile):
|
def getWorkFolder(afile):
|
||||||
|
|||||||
Reference in New Issue
Block a user