mirror of
https://github.com/sismics/docs.git
synced 2025-12-13 17:56:20 +00:00
Force loading of corrupted PDF
This commit is contained in:
@@ -40,7 +40,7 @@ public class FileCreatedAsyncListener {
|
||||
final File file = fileCreatedAsyncEvent.getFile();
|
||||
long startTime = System.currentTimeMillis();
|
||||
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file);
|
||||
log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime));
|
||||
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
|
||||
|
||||
// Store the OCR-ization result in the database
|
||||
TransactionUtil.handle(new Runnable() {
|
||||
|
||||
@@ -103,7 +103,7 @@ public class FileUtil {
|
||||
java.io.File storedfile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId()).toFile();
|
||||
try {
|
||||
PDFTextStripper stripper = new PDFTextStripper();
|
||||
pdfDocument = PDDocument.load(storedfile);
|
||||
pdfDocument = PDDocument.load(storedfile.getAbsolutePath(), true);
|
||||
content = stripper.getText(pdfDocument);
|
||||
} catch (IOException e) {
|
||||
log.error("Error while extracting text from the PDF " + storedfile, e);
|
||||
@@ -153,7 +153,7 @@ public class FileUtil {
|
||||
image = ImageIO.read(originalFile);
|
||||
} else if(file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||
// Generate preview from the first page of the PDF
|
||||
PDDocument pdfDocument = PDDocument.load(originalFile);
|
||||
PDDocument pdfDocument = PDDocument.load(originalFile.getAbsolutePath(), true);
|
||||
@SuppressWarnings("unchecked")
|
||||
List<PDPage> pageList = pdfDocument.getDocumentCatalog().getAllPages();
|
||||
if (pageList.size() > 0) {
|
||||
|
||||
Reference in New Issue
Block a user