mirror of
https://github.com/sismics/docs.git
synced 2025-12-17 19:51:39 +00:00
#53: Handle and extract text content from DOCX and ODT files
This commit is contained in:
@@ -18,7 +18,6 @@ import com.google.common.io.ByteStreams;
|
||||
* @author bgamard
|
||||
*/
|
||||
public class TestEncryptUtil {
|
||||
|
||||
/**
|
||||
* Test private key.
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
package com.sismics.docs.core.util;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.io.Resources;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
|
||||
/**
|
||||
* Test of the file entity utilities.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class TestFileUtil {
|
||||
@Test
|
||||
public void extractContentOpenDocumentTextTest() throws Exception {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extractContentOfficeDocumentTest() throws Exception {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OFFICE_DOCUMENT);
|
||||
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.sismics.util;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.compress.utils.IOUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.io.Resources;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import com.sismics.util.mime.MimeTypeUtil;
|
||||
|
||||
/**
|
||||
* Test of the utilities to check MIME types.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class TestMimeTypeUtil {
|
||||
|
||||
@Test
|
||||
public void guessOpenDocumentFormatTest() throws Exception {
|
||||
// Detect ODT files
|
||||
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream();
|
||||
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
|
||||
}
|
||||
|
||||
// Detect DOCX files
|
||||
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream();
|
||||
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
docs-core/src/test/resources/file/document.docx
Normal file
BIN
docs-core/src/test/resources/file/document.docx
Normal file
Binary file not shown.
BIN
docs-core/src/test/resources/file/document.odt
Normal file
BIN
docs-core/src/test/resources/file/document.odt
Normal file
Binary file not shown.
Reference in New Issue
Block a user