mirror of
https://github.com/sismics/docs.git
synced 2025-12-14 18:26:17 +00:00
Closes #215: handle pptx files
This commit is contained in:
@@ -15,6 +15,7 @@ public class FormatHandlerUtil {
|
||||
*/
|
||||
private static final List<Class<? extends FormatHandler>> FORMAT_HANDLERS = Lists.newArrayList(
|
||||
DocxFormatHandler.class,
|
||||
PptxFormatHandler.class,
|
||||
OdtFormatHandler.class,
|
||||
VideoFormatHandler.class,
|
||||
PdfFormatHandler.class,
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
package com.sismics.docs.core.util.format;
|
||||
|
||||
import com.google.common.io.Closer;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import org.apache.pdfbox.io.MemoryUsageSetting;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||
|
||||
import java.awt.*;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* PPTX format handler.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class PptxFormatHandler implements FormatHandler {
|
||||
/**
|
||||
* Cached PPTX loaded file.
|
||||
*/
|
||||
private XMLSlideShow slideShow;
|
||||
|
||||
@Override
|
||||
public boolean accept(String mimeType) {
|
||||
return MimeType.OFFICE_PRESENTATION.equals(mimeType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferedImage generateThumbnail(Path file) throws Exception {
|
||||
XMLSlideShow pptx = loadPPtxFile(file);
|
||||
if (pptx.getSlides().length > 0) {
|
||||
return generateImageFromSlide(pptx, 0);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String extractContent(String language, Path file) throws Exception {
|
||||
XMLSlideShow pptx = loadPPtxFile(file);
|
||||
return new XSLFPowerPointExtractor(pptx).getText();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendToPdf(Path file, PDDocument doc, boolean fitImageToPage, int margin, MemoryUsageSetting memUsageSettings, Closer closer) throws Exception {
|
||||
XMLSlideShow pptx = loadPPtxFile(file);
|
||||
XSLFSlide[] slides = pptx.getSlides();
|
||||
Dimension pgsize = pptx.getPageSize();
|
||||
for (int slideIndex = 0; slideIndex < slides.length; slideIndex++) {
|
||||
// One PDF page per slide
|
||||
PDPage page = new PDPage(new PDRectangle(pgsize.width, pgsize.height));
|
||||
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
|
||||
BufferedImage bim = generateImageFromSlide(pptx, slideIndex);
|
||||
PDImageXObject pdImage = LosslessFactory.createFromImage(doc, bim);
|
||||
contentStream.drawImage(pdImage, 0, page.getMediaBox().getHeight() - pdImage.getHeight());
|
||||
}
|
||||
doc.addPage(page);
|
||||
}
|
||||
}
|
||||
|
||||
private XMLSlideShow loadPPtxFile(Path file) throws Exception {
|
||||
if (slideShow == null) {
|
||||
try (InputStream inputStream = Files.newInputStream(file)) {
|
||||
slideShow = new XMLSlideShow(inputStream);
|
||||
}
|
||||
}
|
||||
return slideShow;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an image from a PPTX slide.
|
||||
*
|
||||
* @param pptx PPTX
|
||||
* @param slideIndex Slide index
|
||||
* @return Image
|
||||
*/
|
||||
private BufferedImage generateImageFromSlide(XMLSlideShow pptx, int slideIndex) {
|
||||
Dimension pgsize = pptx.getPageSize();
|
||||
BufferedImage img = new BufferedImage(pgsize.width, pgsize.height,BufferedImage.TYPE_INT_RGB);
|
||||
Graphics2D graphics = img.createGraphics();
|
||||
graphics.setPaint(Color.white);
|
||||
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
|
||||
pptx.getSlides()[slideIndex].draw(graphics);
|
||||
return img;
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,10 @@ public class MimeType {
|
||||
|
||||
public static final String OFFICE_DOCUMENT = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
||||
|
||||
public static final String OFFICE_PRESENTATION = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
|
||||
|
||||
public static final String OFFICE_SHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
||||
|
||||
public static final String TEXT_PLAIN = "text/plain";
|
||||
|
||||
public static final String TEXT_CSV = "text/csv";
|
||||
|
||||
@@ -145,6 +145,9 @@ public class MimeTypeUtil {
|
||||
if (content.contains(MimeType.OFFICE_DOCUMENT)) {
|
||||
mimeType = MimeType.OFFICE_DOCUMENT;
|
||||
break;
|
||||
} else if (content.contains(MimeType.OFFICE_PRESENTATION)) {
|
||||
mimeType = MimeType.OFFICE_PRESENTATION;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -44,6 +44,16 @@ public class TestFileUtil {
|
||||
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extractContentPowerpointTest() throws Exception {
|
||||
Path path = Paths.get(ClassLoader.getSystemResource("file/apache.pptx").toURI());
|
||||
FormatHandler formatHandler = FormatHandlerUtil.find(MimeTypeUtil.guessMimeType(path, "apache.pptx"));
|
||||
Assert.assertNotNull(formatHandler);
|
||||
Assert.assertTrue(formatHandler instanceof PptxFormatHandler);
|
||||
String content = formatHandler.extractContent("eng", path);
|
||||
Assert.assertTrue(content.contains("Scaling"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extractContentPdf() throws Exception {
|
||||
Path path = Paths.get(ClassLoader.getSystemResource("file/udhr.pdf").toURI());
|
||||
@@ -70,7 +80,8 @@ public class TestFileUtil {
|
||||
InputStream inputStream1 = Resources.getResource("file/apollo_portrait.jpg").openStream();
|
||||
InputStream inputStream2 = Resources.getResource("file/udhr_encrypted.pdf").openStream();
|
||||
InputStream inputStream3 = Resources.getResource("file/document.docx").openStream();
|
||||
InputStream inputStream4 = Resources.getResource("file/document.odt").openStream()) {
|
||||
InputStream inputStream4 = Resources.getResource("file/document.odt").openStream();
|
||||
InputStream inputStream5 = Resources.getResource("file/apache.pptx").openStream()) {
|
||||
// Document
|
||||
DocumentDto documentDto = new DocumentDto();
|
||||
documentDto.setTitle("My super document 1");
|
||||
@@ -117,9 +128,16 @@ public class TestFileUtil {
|
||||
file4.setId("document_odt");
|
||||
file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||
|
||||
// Sixth file
|
||||
Files.copy(inputStream5, DirectoryUtil.getStorageDirectory().resolve("document_pptx"), StandardCopyOption.REPLACE_EXISTING);
|
||||
File file5 = new File();
|
||||
file5.setId("document_pptx");
|
||||
file5.setMimeType(MimeType.OFFICE_PRESENTATION);
|
||||
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4), true, true, 10, outputStream);
|
||||
PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4, file5), true, true, 10, outputStream);
|
||||
Assert.assertTrue(outputStream.toByteArray().length > 0);
|
||||
com.google.common.io.Files.write(outputStream.toByteArray(), new java.io.File("C:\\Users\\Jendib\\Downloads\\test.pdf"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,5 +23,9 @@ public class TestMimeTypeUtil {
|
||||
// Detect DOCX files
|
||||
path = Paths.get(ClassLoader.getSystemResource("file/document.docx").toURI());
|
||||
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessMimeType(path, "document.odt"));
|
||||
|
||||
// Detect PPTX files
|
||||
path = Paths.get(ClassLoader.getSystemResource("file/apache.pptx").toURI());
|
||||
Assert.assertEquals(MimeType.OFFICE_PRESENTATION, MimeTypeUtil.guessMimeType(path, "apache.pptx"));
|
||||
}
|
||||
}
|
||||
|
||||
BIN
docs-core/src/test/resources/file/apache.pptx
Normal file
BIN
docs-core/src/test/resources/file/apache.pptx
Normal file
Binary file not shown.
Reference in New Issue
Block a user