1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-13 17:56:20 +00:00

Force loading of corrupted PDF

This commit is contained in:
jendib
2013-08-18 14:11:08 +02:00
parent 77c5a10aba
commit 2c7083aa43
5 changed files with 9 additions and 9 deletions

View File

@@ -40,7 +40,7 @@ public class FileCreatedAsyncListener {
final File file = fileCreatedAsyncEvent.getFile();
long startTime = System.currentTimeMillis();
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file);
log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime));
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
// Store the OCR-ization result in the database
TransactionUtil.handle(new Runnable() {

View File

@@ -103,7 +103,7 @@ public class FileUtil {
java.io.File storedfile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId()).toFile();
try {
PDFTextStripper stripper = new PDFTextStripper();
pdfDocument = PDDocument.load(storedfile);
pdfDocument = PDDocument.load(storedfile.getAbsolutePath(), true);
content = stripper.getText(pdfDocument);
} catch (IOException e) {
log.error("Error while extracting text from the PDF " + storedfile, e);
@@ -153,7 +153,7 @@ public class FileUtil {
image = ImageIO.read(originalFile);
} else if(file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
// Generate preview from the first page of the PDF
PDDocument pdfDocument = PDDocument.load(originalFile);
PDDocument pdfDocument = PDDocument.load(originalFile.getAbsolutePath(), true);
@SuppressWarnings("unchecked")
List<PDPage> pageList = pdfDocument.getDocumentCatalog().getAllPages();
if (pageList.size() > 0) {