1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-26 08:01:45 +00:00

Closes #373: high quality PDF to image conversion before OCR

This commit is contained in:
bgamard
2020-02-13 17:43:07 +01:00
parent a7423caeb1
commit 90a49efa4a
5 changed files with 41 additions and 21 deletions

View File

@@ -0,0 +1,19 @@
package com.sismics.util.format;
import com.sismics.docs.core.util.format.PdfFormatHandler;
import org.junit.Assert;
import org.junit.Test;
import java.nio.file.Paths;
public class TestPdfFormatHandler {
@Test
public void testIssue373() throws Exception {
PdfFormatHandler formatHandler = new PdfFormatHandler();
String content = formatHandler.extractContent("deu", Paths.get(ClassLoader.getSystemResource("file/issue373.pdf").toURI()));
Assert.assertTrue(content.contains("Aufrechterhaltung"));
Assert.assertTrue(content.contains("Außentemperatur"));
Assert.assertTrue(content.contains("Grundumsatzmessungen"));
Assert.assertTrue(content.contains("ermitteln"));
}
}

Binary file not shown.