1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-17 19:51:39 +00:00

#53: Handle and extract text content from DOCX and ODT files

This commit is contained in:
jendib
2015-12-07 23:53:30 +01:00
parent 046984a447
commit 1a37d97a61
12 changed files with 248 additions and 4 deletions

View File

@@ -18,7 +18,6 @@ import com.google.common.io.ByteStreams;
* @author bgamard
*/
public class TestEncryptUtil {
/**
* Test private key.
*/

View File

@@ -0,0 +1,36 @@
package com.sismics.docs.core.util;
import java.io.InputStream;
import junit.framework.Assert;
import org.junit.Test;
import com.google.common.io.Resources;
import com.sismics.docs.core.model.jpa.File;
import com.sismics.util.mime.MimeType;
/**
* Test of the file entity utilities.
*
* @author bgamard
*/
public class TestFileUtil {
@Test
public void extractContentOpenDocumentTextTest() throws Exception {
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) {
File file = new File();
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream));
}
}
@Test
public void extractContentOfficeDocumentTest() throws Exception {
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) {
File file = new File();
file.setMimeType(MimeType.OFFICE_DOCUMENT);
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream));
}
}
}

View File

@@ -0,0 +1,40 @@
package com.sismics.util;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.junit.Assert;
import org.junit.Test;
import com.google.common.io.Resources;
import com.sismics.docs.core.model.jpa.File;
import com.sismics.util.mime.MimeType;
import com.sismics.util.mime.MimeTypeUtil;
/**
* Test of the utilities to check MIME types.
*
* @author bgamard
*/
public class TestMimeTypeUtil {
@Test
public void guessOpenDocumentFormatTest() throws Exception {
// Detect ODT files
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream();
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
File file = new File();
file.setMimeType(MimeType.APPLICATION_ZIP);
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
}
// Detect DOCX files
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream();
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
File file = new File();
file.setMimeType(MimeType.APPLICATION_ZIP);
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
}
}
}

Binary file not shown.

Binary file not shown.