1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-13 09:46:17 +00:00

DAO/event refactoring

This commit is contained in:
jendib
2016-03-14 01:39:29 +01:00
parent 31fff7e021
commit c2a2e9f585
18 changed files with 119 additions and 167 deletions

View File

@@ -57,7 +57,7 @@ public class DocumentDao {
}
/**
* Returns the list of all documents.
* Returns the list of all active documents.
*
* @return List of documents
*/
@@ -69,7 +69,7 @@ public class DocumentDao {
}
/**
* Returns the list of all documents from a user.
* Returns the list of all active documents from a user.
*
* @param userId User ID
* @return List of documents
@@ -83,21 +83,28 @@ public class DocumentDao {
}
/**
* Returns an active document.
* Returns an active document with permission checking.
*
* @param id Document ID
* @param perm Permission needed
* @param userId User ID
* @return Document
*/
public DocumentDto getDocument(String id) {
public DocumentDto getDocument(String id, PermType perm, String userId) {
EntityManager em = ThreadLocalContext.get().getEntityManager();
StringBuilder sb = new StringBuilder("select d.DOC_ID_C, d.DOC_TITLE_C, d.DOC_DESCRIPTION_C, d.DOC_SUBJECT_C, d.DOC_IDENTIFIER_C, d.DOC_PUBLISHER_C, d.DOC_FORMAT_C, d.DOC_SOURCE_C, d.DOC_TYPE_C, d.DOC_COVERAGE_C, d.DOC_RIGHTS_C, d.DOC_CREATEDATE_D, d.DOC_LANGUAGE_C, ");
sb.append(" (select count(s.SHA_ID_C) from T_SHARE s, T_ACL ac where ac.ACL_SOURCEID_C = d.DOC_ID_C and ac.ACL_TARGETID_C = s.SHA_ID_C and ac.ACL_DELETEDATE_D is null and s.SHA_DELETEDATE_D is null), ");
sb.append(" (select count(f.FIL_ID_C) from T_FILE f where f.FIL_DELETEDATE_D is null and f.FIL_IDDOC_C = d.DOC_ID_C), ");
sb.append(" u.USE_USERNAME_C ");
sb.append(" from T_DOCUMENT d, T_USER u ");
sb.append(" join T_ACL a on a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_TARGETID_C = :userId and a.ACL_PERM_C = :perm and a.ACL_DELETEDATE_D is null ");
sb.append(" where d.DOC_IDUSER_C = u.USE_ID_C and d.DOC_ID_C = :id and d.DOC_DELETEDATE_D is null ");
Query q = em.createNativeQuery(sb.toString());
q.setParameter("id", id);
q.setParameter("perm", perm.name());
q.setParameter("userId", userId);
Object[] o = null;
try {
o = (Object[]) q.getSingleResult();
@@ -126,30 +133,6 @@ public class DocumentDao {
return documentDto;
}
/**
* Returns an active document.
*
* @param id Document ID
* @param perm Permission needed
* @param userId User ID
* @return Document
*/
public Document getDocument(String id, PermType perm, String userId) {
EntityManager em = ThreadLocalContext.get().getEntityManager();
StringBuilder sb = new StringBuilder("select d.* from T_DOCUMENT d ");
sb.append(" join T_ACL a on a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_TARGETID_C = :userId and a.ACL_PERM_C = :perm and a.ACL_DELETEDATE_D is null ");
sb.append(" where d.DOC_ID_C = :id and d.DOC_DELETEDATE_D is null");
Query q = em.createNativeQuery(sb.toString(), Document.class);
q.setParameter("id", id);
q.setParameter("perm", perm.name());
q.setParameter("userId", userId);
try {
return (Document) q.getSingleResult();
} catch (NoResultException e) {
return null;
}
}
/**
* Deletes a document.
*
@@ -194,15 +177,17 @@ public class DocumentDao {
}
/**
* Gets a document by its ID.
* Gets an active document by its ID.
*
* @param id Document ID
* @return Document
*/
public Document getById(String id) {
EntityManager em = ThreadLocalContext.get().getEntityManager();
Query q = em.createQuery("select d from Document d where d.id = :id and d.deleteDate is null");
q.setParameter("id", id);
try {
return em.find(Document.class, id);
return (Document) q.getSingleResult();
} catch (NoResultException e) {
return null;
}

View File

@@ -35,7 +35,7 @@ public class VocabularyDao {
}
/**
* Get all vocabulary entries sharing a single name
* Get all vocabulary entries sharing a single name.
*
* @param name Name
* @return Vocabulary entries

View File

@@ -1,9 +1,7 @@
package com.sismics.docs.core.dao.lucene;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -46,16 +44,14 @@ public class LuceneDao {
indexWriter.deleteAll();
// Add all documents
Map<String, Document> documentMap = new HashMap<>();
for (Document document : documentList) {
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
indexWriter.addDocument(luceneDocument);
documentMap.put(document.getId(), document);
}
// Add all files
for (File file : fileList) {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, documentMap.get(file.getDocumentId()));
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
indexWriter.addDocument(luceneDocument);
}
}
@@ -81,13 +77,12 @@ public class LuceneDao {
* Add file to the index.
*
* @param file File to add
* @param document Document linked to the file
*/
public void createFile(final File file, final Document document) {
public void createFile(final File file) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document);
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
indexWriter.addDocument(luceneDocument);
}
});
@@ -112,13 +107,12 @@ public class LuceneDao {
* Update file index.
*
* @param file Updated file
* @param document Document linked to the file
*/
public void updateFile(final File file, final Document document) {
public void updateFile(final File file) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document);
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
indexWriter.updateDocument(new Term("id", file.getId()), luceneDocument);
}
});
@@ -200,7 +194,7 @@ public class LuceneDao {
/**
* Build Lucene document from database document.
*
* @param document Document
* @param documentDto Document
* @return Document
*/
private org.apache.lucene.document.Document getDocumentFromDocument(Document document) {
@@ -243,10 +237,9 @@ public class LuceneDao {
* Build Lucene document from file.
*
* @param file File
* @param document Document linked to the file
* @return Document
*/
private org.apache.lucene.document.Document getDocumentFromFile(File file, Document document) {
private org.apache.lucene.document.Document getDocumentFromFile(File file) {
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES));
luceneDocument.add(new StringField("doctype", "file", Field.Store.YES));

View File

@@ -1,7 +1,6 @@
package com.sismics.docs.core.event;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.Document;
/**
* Document deleted event.
@@ -10,32 +9,22 @@ import com.sismics.docs.core.model.jpa.Document;
*/
public class DocumentDeletedAsyncEvent extends UserEvent {
/**
* Created document.
* Document ID.
*/
private Document document;
private String documentId;
/**
* Getter of document.
*
* @return the document
*/
public Document getDocument() {
return document;
public String getDocumentId() {
return documentId;
}
/**
* Setter of document.
*
* @param document document
*/
public void setDocument(Document document) {
this.document = document;
public void setDocumentId(String documentId) {
this.documentId = documentId;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("document", document)
.add("documentId", documentId)
.toString();
}
}

View File

@@ -1,7 +1,6 @@
package com.sismics.docs.core.event;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.Document;
/**
* Document updated event.
@@ -10,32 +9,22 @@ import com.sismics.docs.core.model.jpa.Document;
*/
public class DocumentUpdatedAsyncEvent extends UserEvent {
/**
* Created document.
* Document ID.
*/
private Document document;
private String documentId;
/**
* Getter of document.
*
* @return the document
*/
public Document getDocument() {
return document;
public String getDocumentId() {
return documentId;
}
/**
* Setter of document.
*
* @param document document
*/
public void setDocument(Document document) {
this.document = document;
public void setDocumentId(String documentId) {
this.documentId = documentId;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("document", document)
.add("documentId", documentId)
.toString();
}
}

View File

@@ -3,7 +3,6 @@ package com.sismics.docs.core.event;
import java.io.InputStream;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.model.jpa.File;
/**
@@ -18,9 +17,9 @@ public class FileCreatedAsyncEvent extends UserEvent {
private File file;
/**
* Document linked to the file.
* Language of the file.
*/
private Document document;
private String language;
/**
* Unencrypted input stream containing the file.
@@ -42,12 +41,12 @@ public class FileCreatedAsyncEvent extends UserEvent {
this.file = file;
}
public Document getDocument() {
return document;
public String getLanguage() {
return language;
}
public void setDocument(Document document) {
this.document = document;
public void setLanguage(String language) {
this.language = language;
}
public InputStream getInputStream() {
@@ -70,7 +69,7 @@ public class FileCreatedAsyncEvent extends UserEvent {
public String toString() {
return MoreObjects.toStringHelper(this)
.add("file", file)
.add("document", document)
.add("language", language)
.toString();
}
}

View File

@@ -14,20 +14,10 @@ public class FileDeletedAsyncEvent extends UserEvent {
*/
private File file;
/**
* Getter of file.
*
* @return the file
*/
public File getFile() {
return file;
}
/**
* Setter of file.
*
* @param file file
*/
public void setFile(File file) {
this.file = file;
}

View File

@@ -32,6 +32,6 @@ public class DocumentDeletedAsyncListener {
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.deleteDocument(documentDeletedAsyncEvent.getDocument().getId());
luceneDao.deleteDocument(documentDeletedAsyncEvent.getDocumentId());
}
}

View File

@@ -7,6 +7,7 @@ import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.jpa.ContributorDao;
import com.sismics.docs.core.dao.jpa.DocumentDao;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent;
import com.sismics.docs.core.model.jpa.Contributor;
@@ -35,12 +36,17 @@ public class DocumentUpdatedAsyncListener {
log.info("Document updated event: " + event.toString());
}
// Update contributors list
TransactionUtil.handle(new Runnable() {
@Override
public void run() {
// Update Lucene index
DocumentDao documentDao = new DocumentDao();
LuceneDao luceneDao = new LuceneDao();
luceneDao.updateDocument(documentDao.getById(event.getDocumentId()));
// Update contributors list
ContributorDao contributorDao = new ContributorDao();
List<Contributor> contributorList = contributorDao.findByDocumentId(event.getDocument().getId());
List<Contributor> contributorList = contributorDao.findByDocumentId(event.getDocumentId());
// Check if the user firing this event is not already a contributor
for (Contributor contributor : contributorList) {
@@ -52,14 +58,10 @@ public class DocumentUpdatedAsyncListener {
// Add a new contributor
Contributor contributor = new Contributor();
contributor.setDocumentId(event.getDocument().getId());
contributor.setDocumentId(event.getDocumentId());
contributor.setUserId(event.getUserId());
contributorDao.create(contributor);
}
});
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.updateDocument(event.getDocument());
}
}

View File

@@ -41,7 +41,7 @@ public class FileCreatedAsyncListener {
// Extract text content from the file
long startTime = System.currentTimeMillis();
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file,
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getLanguage(), file,
fileCreatedAsyncEvent.getInputStream(), fileCreatedAsyncEvent.getPdfInputStream());
fileCreatedAsyncEvent.getInputStream().close();
if (fileCreatedAsyncEvent.getPdfInputStream() != null) {
@@ -66,6 +66,6 @@ public class FileCreatedAsyncListener {
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.createFile(fileCreatedAsyncEvent.getFile(), fileCreatedAsyncEvent.getDocument());
luceneDao.createFile(fileCreatedAsyncEvent.getFile());
}
}

View File

@@ -18,7 +18,6 @@ import org.imgscalr.Scalr.Mode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.model.jpa.File;
import com.sismics.tess4j.Tesseract;
import com.sismics.util.ImageUtil;
@@ -37,17 +36,17 @@ public class FileUtil {
/**
* Extract content from a file.
*
* @param document Document linked to the file
* @param language Language to extract
* @param file File to extract
* @param inputStream Unencrypted input stream
* @param pdfInputStream Unencrypted PDF input stream
* @return Content extract
*/
public static String extractContent(Document document, File file, InputStream inputStream, InputStream pdfInputStream) {
public static String extractContent(String language, File file, InputStream inputStream, InputStream pdfInputStream) {
String content = null;
if (ImageUtil.isImage(file.getMimeType())) {
content = ocrFile(inputStream, document);
content = ocrFile(inputStream, language);
} else if (pdfInputStream != null) {
content = PdfUtil.extractPdf(pdfInputStream);
}
@@ -59,10 +58,10 @@ public class FileUtil {
* Optical character recognition on a stream.
*
* @param inputStream Unencrypted input stream
* @param document Document linked to the file
* @param language Language to OCR
* @return Content extracted
*/
private static String ocrFile(InputStream inputStream, Document document) {
private static String ocrFile(InputStream inputStream, String language) {
Tesseract instance = Tesseract.getInstance();
String content = null;
BufferedImage image = null;
@@ -80,7 +79,7 @@ public class FileUtil {
// OCR the file
try {
log.info("Starting OCR with TESSDATA_PREFIX=" + System.getenv("TESSDATA_PREFIX") + ";LC_NUMERIC=" + System.getenv("LC_NUMERIC"));
instance.setLanguage(document.getLanguage());
instance.setLanguage(language);
content = instance.doOCR(image);
} catch (Throwable e) {
log.error("Error while OCR-izing the image", e);

View File

@@ -143,24 +143,37 @@ public class PdfUtil {
*
* @param fileList List of files
* @param fitImageToPage Fit images to the page
* @param metadata Add a page with metadata
* @param comment Add a page with comments
* @param margin Margins in millimeters
* @return PDF input stream
* @throws IOException
*/
public static InputStream convertToPdf(List<File> fileList, boolean fitImageToPage, int margin) throws Exception {
public static InputStream convertToPdf(List<File> fileList, boolean fitImageToPage, boolean metadata, boolean comments, int margin) throws Exception {
// TODO PDF Export: Option to add a front page with:
// document title, document description, creator, date created, language,
// additional dublincore metadata (except relations)
// list of all files (and information if it is in this document or not)
// TODO PDF Export: Option to add the comments
// Create a blank PDF
// Setup PDFBox
Closer closer = Closer.create();
MemoryUsageSetting memUsageSettings = MemoryUsageSetting.setupMixed(1000000); // 1MB max memory usage
memUsageSettings.setTempDir(new java.io.File(System.getProperty("java.io.tmpdir"))); // To OS temp
float mmPerInch = 1 / (10 * 2.54f) * 72f;
// Create a blank PDF
try (PDDocument doc = new PDDocument(memUsageSettings)) {
// Add metadata
if (metadata) {
}
// Add comments
if (comments) {
}
// Add files
for (File file : fileList) {
Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file.getId());

View File

@@ -81,7 +81,7 @@ public class TestFileUtil {
file4.setId("document_odt");
file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
PdfUtil.convertToPdf(Lists.newArrayList(file0, file1, file2, file3, file4), true, 10).close();
PdfUtil.convertToPdf(Lists.newArrayList(file0, file1, file2, file3, file4), true, true, true, 10).close();
}
}
}