mirror of
https://github.com/sismics/docs.git
synced 2025-12-13 09:46:17 +00:00
Closes #59: Use TwelveMonkeys' ImageIO plugin for JPEG
This commit is contained in:
@@ -113,11 +113,6 @@
|
||||
<artifactId>bcprov-jdk15on</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.levigo.jbig2</groupId>
|
||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>fr.opensagres.xdocreport</groupId>
|
||||
<artifactId>org.odftoolkit.odfdom.converter.pdf</artifactId>
|
||||
@@ -127,16 +122,26 @@
|
||||
<groupId>fr.opensagres.xdocreport</groupId>
|
||||
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- OCR dependencies -->
|
||||
|
||||
<dependency>
|
||||
<groupId>jna</groupId>
|
||||
<groupId>net.java.dev.jna</groupId>
|
||||
<artifactId>jna</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- ImageIO plugins -->
|
||||
<dependency>
|
||||
<groupId>com.levigo.jbig2</groupId>
|
||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twelvemonkeys.imageio</groupId>
|
||||
<artifactId>imageio-jpeg</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>jai</groupId>
|
||||
<artifactId>imageio</artifactId>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
|
||||
@@ -38,9 +38,9 @@ import javax.imageio.stream.ImageOutputStream;
|
||||
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
|
||||
import com.sun.media.imageioimpl.plugins.tiff.TIFFImageReaderSpi;
|
||||
import com.sun.media.imageioimpl.plugins.tiff.TIFFImageWriterSpi;
|
||||
import com.github.jaiimageio.impl.plugins.tiff.TIFFImageReaderSpi;
|
||||
import com.github.jaiimageio.impl.plugins.tiff.TIFFImageWriterSpi;
|
||||
import com.github.jaiimageio.plugins.tiff.TIFFImageWriteParam;
|
||||
|
||||
public class ImageIOHelper {
|
||||
|
||||
@@ -51,26 +51,26 @@ public class ImageIOHelper {
|
||||
* Gets pixel data of an
|
||||
* <code>IIOImage</code> object.
|
||||
*
|
||||
* @param image an
|
||||
* @param oimage an
|
||||
* <code>IIOImage</code> object
|
||||
* @return a byte buffer of pixel data
|
||||
* @throws Exception
|
||||
*/
|
||||
public static ByteBuffer getImageByteBuffer(IIOImage image) throws IOException {
|
||||
//Set up the writeParam
|
||||
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
|
||||
|
||||
//Get tif writer and set output to file
|
||||
public static ByteBuffer getImageByteBuffer(BufferedImage oimage) throws IOException {
|
||||
// Get tif writer and set output to file
|
||||
ImageWriter writer = new TIFFImageWriterSpi().createWriterInstance();
|
||||
|
||||
//Get the stream metadata
|
||||
// Set up the writeParam
|
||||
// We are using the old JAI ImageIO plugin, because for some reason, OCR don't work with TwelveMonkeys' plugin
|
||||
ImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
|
||||
|
||||
// Get the stream metadata
|
||||
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
||||
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);
|
||||
writer.setOutput(ios);
|
||||
writer.write(streamMetadata, new IIOImage(image.getRenderedImage(), null, null), tiffWriteParam);
|
||||
writer.write(streamMetadata, new IIOImage(oimage, null, null), tiffWriteParam);
|
||||
writer.dispose();
|
||||
|
||||
// Read the writed image
|
||||
|
||||
@@ -17,7 +17,6 @@ package com.sismics.tess4j;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.RenderedImage;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
@@ -25,8 +24,6 @@ import java.util.Enumeration;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.imageio.IIOImage;
|
||||
|
||||
import com.sun.jna.Pointer;
|
||||
|
||||
/**
|
||||
@@ -169,9 +166,8 @@ public class Tesseract {
|
||||
* @throws TesseractException
|
||||
*/
|
||||
public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
|
||||
IIOImage oimage = new IIOImage(bi, null, null);
|
||||
List<IIOImage> imageList = new ArrayList<IIOImage>();
|
||||
imageList.add(oimage);
|
||||
List<BufferedImage> imageList = new ArrayList<BufferedImage>();
|
||||
imageList.add(bi);
|
||||
return doOCR(imageList, rect);
|
||||
}
|
||||
|
||||
@@ -179,23 +175,22 @@ public class Tesseract {
|
||||
* Performs OCR operation.
|
||||
*
|
||||
* @param imageList a list of
|
||||
* <code>IIOImage</code> objects
|
||||
* <code>BufferedImage</code> objects
|
||||
* @param rect the bounding rectangle defines the region of the image to be
|
||||
* recognized. A rectangle of zero dimension or
|
||||
* <code>null</code> indicates the whole image.
|
||||
* @return the recognized text
|
||||
* @throws TesseractException
|
||||
*/
|
||||
public String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException {
|
||||
public String doOCR(List<BufferedImage> imageList, Rectangle rect) throws TesseractException {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
pageNum = 0;
|
||||
|
||||
for (IIOImage oimage : imageList) {
|
||||
for (BufferedImage oimage : imageList) {
|
||||
pageNum++;
|
||||
try {
|
||||
ByteBuffer buf = ImageIOHelper.getImageByteBuffer(oimage);
|
||||
RenderedImage ri = oimage.getRenderedImage();
|
||||
String pageText = doOCR(ri.getWidth(), ri.getHeight(), buf, rect, ri.getColorModel().getPixelSize());
|
||||
String pageText = doOCR(oimage.getWidth(), oimage.getHeight(), buf, rect, oimage.getColorModel().getPixelSize());
|
||||
sb.append(pageText);
|
||||
} catch (IOException ioe) {
|
||||
//skip the problematic image
|
||||
|
||||
Reference in New Issue
Block a user