1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-14 18:26:17 +00:00

Closes #215: handle pptx files

This commit is contained in:
Benjamin Gamard
2018-03-20 22:46:56 +01:00
parent 945e619d55
commit 3613f6f8de
11 changed files with 272 additions and 10 deletions

View File

@@ -15,6 +15,7 @@ public class FormatHandlerUtil {
*/
private static final List<Class<? extends FormatHandler>> FORMAT_HANDLERS = Lists.newArrayList(
DocxFormatHandler.class,
PptxFormatHandler.class,
OdtFormatHandler.class,
VideoFormatHandler.class,
PdfFormatHandler.class,

View File

@@ -0,0 +1,97 @@
package com.sismics.docs.core.util.format;
import com.google.common.io.Closer;
import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import java.awt.*;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
/**
* PPTX format handler.
*
* @author bgamard
*/
public class PptxFormatHandler implements FormatHandler {
/**
* Cached PPTX loaded file.
*/
private XMLSlideShow slideShow;
@Override
public boolean accept(String mimeType) {
return MimeType.OFFICE_PRESENTATION.equals(mimeType);
}
@Override
public BufferedImage generateThumbnail(Path file) throws Exception {
XMLSlideShow pptx = loadPPtxFile(file);
if (pptx.getSlides().length > 0) {
return generateImageFromSlide(pptx, 0);
}
return null;
}
@Override
public String extractContent(String language, Path file) throws Exception {
XMLSlideShow pptx = loadPPtxFile(file);
return new XSLFPowerPointExtractor(pptx).getText();
}
@Override
public void appendToPdf(Path file, PDDocument doc, boolean fitImageToPage, int margin, MemoryUsageSetting memUsageSettings, Closer closer) throws Exception {
XMLSlideShow pptx = loadPPtxFile(file);
XSLFSlide[] slides = pptx.getSlides();
Dimension pgsize = pptx.getPageSize();
for (int slideIndex = 0; slideIndex < slides.length; slideIndex++) {
// One PDF page per slide
PDPage page = new PDPage(new PDRectangle(pgsize.width, pgsize.height));
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
BufferedImage bim = generateImageFromSlide(pptx, slideIndex);
PDImageXObject pdImage = LosslessFactory.createFromImage(doc, bim);
contentStream.drawImage(pdImage, 0, page.getMediaBox().getHeight() - pdImage.getHeight());
}
doc.addPage(page);
}
}
private XMLSlideShow loadPPtxFile(Path file) throws Exception {
if (slideShow == null) {
try (InputStream inputStream = Files.newInputStream(file)) {
slideShow = new XMLSlideShow(inputStream);
}
}
return slideShow;
}
/**
* Generate an image from a PPTX slide.
*
* @param pptx PPTX
* @param slideIndex Slide index
* @return Image
*/
private BufferedImage generateImageFromSlide(XMLSlideShow pptx, int slideIndex) {
Dimension pgsize = pptx.getPageSize();
BufferedImage img = new BufferedImage(pgsize.width, pgsize.height,BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
pptx.getSlides()[slideIndex].draw(graphics);
return img;
}
}

View File

@@ -20,6 +20,10 @@ public class MimeType {
public static final String OFFICE_DOCUMENT = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
public static final String OFFICE_PRESENTATION = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
public static final String OFFICE_SHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
public static final String TEXT_PLAIN = "text/plain";
public static final String TEXT_CSV = "text/csv";

View File

@@ -145,6 +145,9 @@ public class MimeTypeUtil {
if (content.contains(MimeType.OFFICE_DOCUMENT)) {
mimeType = MimeType.OFFICE_DOCUMENT;
break;
} else if (content.contains(MimeType.OFFICE_PRESENTATION)) {
mimeType = MimeType.OFFICE_PRESENTATION;
break;
}
}

View File

@@ -44,6 +44,16 @@ public class TestFileUtil {
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
}
@Test
public void extractContentPowerpointTest() throws Exception {
Path path = Paths.get(ClassLoader.getSystemResource("file/apache.pptx").toURI());
FormatHandler formatHandler = FormatHandlerUtil.find(MimeTypeUtil.guessMimeType(path, "apache.pptx"));
Assert.assertNotNull(formatHandler);
Assert.assertTrue(formatHandler instanceof PptxFormatHandler);
String content = formatHandler.extractContent("eng", path);
Assert.assertTrue(content.contains("Scaling"));
}
@Test
public void extractContentPdf() throws Exception {
Path path = Paths.get(ClassLoader.getSystemResource("file/udhr.pdf").toURI());
@@ -70,7 +80,8 @@ public class TestFileUtil {
InputStream inputStream1 = Resources.getResource("file/apollo_portrait.jpg").openStream();
InputStream inputStream2 = Resources.getResource("file/udhr_encrypted.pdf").openStream();
InputStream inputStream3 = Resources.getResource("file/document.docx").openStream();
InputStream inputStream4 = Resources.getResource("file/document.odt").openStream()) {
InputStream inputStream4 = Resources.getResource("file/document.odt").openStream();
InputStream inputStream5 = Resources.getResource("file/apache.pptx").openStream()) {
// Document
DocumentDto documentDto = new DocumentDto();
documentDto.setTitle("My super document 1");
@@ -117,9 +128,16 @@ public class TestFileUtil {
file4.setId("document_odt");
file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
// Sixth file
Files.copy(inputStream5, DirectoryUtil.getStorageDirectory().resolve("document_pptx"), StandardCopyOption.REPLACE_EXISTING);
File file5 = new File();
file5.setId("document_pptx");
file5.setMimeType(MimeType.OFFICE_PRESENTATION);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4), true, true, 10, outputStream);
PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4, file5), true, true, 10, outputStream);
Assert.assertTrue(outputStream.toByteArray().length > 0);
com.google.common.io.Files.write(outputStream.toByteArray(), new java.io.File("C:\\Users\\Jendib\\Downloads\\test.pdf"));
}
}
}

View File

@@ -23,5 +23,9 @@ public class TestMimeTypeUtil {
// Detect DOCX files
path = Paths.get(ClassLoader.getSystemResource("file/document.docx").toURI());
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessMimeType(path, "document.odt"));
// Detect PPTX files
path = Paths.get(ClassLoader.getSystemResource("file/apache.pptx").toURI());
Assert.assertEquals(MimeType.OFFICE_PRESENTATION, MimeTypeUtil.guessMimeType(path, "apache.pptx"));
}
}

Binary file not shown.