1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-15 02:36:24 +00:00

Closes #201: reprocess file manually

This commit is contained in:
Benjamin Gamard
2018-03-29 11:34:25 +02:00
parent 0409c2ef79
commit 899f13cb35
16 changed files with 243 additions and 90 deletions

View File

@@ -77,8 +77,20 @@ public class LuceneDao {
}
/**
* Update document index.
* Update file index.
*
* @param file Updated file
*/
public void updateFile(final File file) {
LuceneUtil.handle(indexWriter -> {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
indexWriter.updateDocument(new Term("id", file.getId()), luceneDocument);
});
}
/**
* Update document index.
*
* @param document Updated document
*/
public void updateDocument(final Document document) {
@@ -87,7 +99,7 @@ public class LuceneDao {
indexWriter.updateDocument(new Term("id", document.getId()), luceneDocument);
});
}
/**
* Delete document from the index.
*
@@ -112,7 +124,7 @@ public class LuceneDao {
// Build search query
StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer());
qpHelper.setPhraseSlop(100000); // PhraseQuery add terms
qpHelper.setPhraseSlop(100); // PhraseQuery add terms
// Search on documents and files
BooleanQuery query = new BooleanQuery.Builder()
@@ -126,6 +138,7 @@ public class LuceneDao {
.add(qpHelper.parse(searchQuery, "type"), Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "coverage"), Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "rights"), Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "filename"), Occur.SHOULD)
.add(qpHelper.parse(fullSearchQuery, "content"), Occur.SHOULD)
.build();
@@ -150,7 +163,9 @@ public class LuceneDao {
} else if (type.equals("file")) {
documentId = document.get("document_id");
}
documentIdList.add(documentId);
if (documentId != null) {
documentIdList.add(documentId);
}
}
return documentIdList;
@@ -208,7 +223,12 @@ public class LuceneDao {
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES));
luceneDocument.add(new StringField("doctype", "file", Field.Store.YES));
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
if (file.getName() != null) {
luceneDocument.add(new TextField("filename", file.getName(), Field.Store.NO));
}
if (file.getDocumentId() != null) {
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
}
if (file.getContent() != null) {
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
}

View File

@@ -1,61 +1,9 @@
package com.sismics.docs.core.event;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.File;
import java.nio.file.Path;
/**
* New file created event.
*
* @author bgamard
*/
public class FileCreatedAsyncEvent extends UserEvent {
/**
* Created file.
*/
private File file;
/**
* Language of the file.
*/
private String language;
/**
* Unencrypted original file.
*/
private Path unencryptedFile;
public File getFile() {
return file;
}
public void setFile(File file) {
this.file = file;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public Path getUnencryptedFile() {
return unencryptedFile;
}
public FileCreatedAsyncEvent setUnencryptedFile(Path unencryptedFile) {
this.unencryptedFile = unencryptedFile;
return this;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("file", file)
.add("language", language)
.toString();
}
public class FileCreatedAsyncEvent extends FileEvent {
}

View File

@@ -0,0 +1,61 @@
package com.sismics.docs.core.event;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.File;
import java.nio.file.Path;
/**
* New file event.
*
* @author bgamard
*/
public abstract class FileEvent extends UserEvent {
/**
* Created file.
*/
private File file;
/**
* Language of the file.
*/
private String language;
/**
* Unencrypted original file.
*/
private Path unencryptedFile;
public File getFile() {
return file;
}
public void setFile(File file) {
this.file = file;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public Path getUnencryptedFile() {
return unencryptedFile;
}
public FileEvent setUnencryptedFile(Path unencryptedFile) {
this.unencryptedFile = unencryptedFile;
return this;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("file", file)
.add("language", language)
.toString();
}
}

View File

@@ -0,0 +1,9 @@
package com.sismics.docs.core.event;
/**
* New file created event.
*
* @author bgamard
*/
public class FileUpdatedAsyncEvent extends FileEvent {
}

View File

@@ -5,6 +5,8 @@ import com.sismics.docs.core.dao.jpa.FileDao;
import com.sismics.docs.core.dao.jpa.UserDao;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.event.FileEvent;
import com.sismics.docs.core.event.FileUpdatedAsyncEvent;
import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.model.jpa.User;
import com.sismics.docs.core.util.DirectoryUtil;
@@ -28,19 +30,19 @@ import java.text.MessageFormat;
import java.util.concurrent.atomic.AtomicReference;
/**
* Listener on file created.
* Listener on file processing.
*
* @author bgamard
*/
public class FileCreatedAsyncListener {
public class FileProcessingAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
private static final Logger log = LoggerFactory.getLogger(FileProcessingAsyncListener.class);
/**
* File created.
*
*
* @param event File created event
*/
@Subscribe
@@ -49,6 +51,41 @@ public class FileCreatedAsyncListener {
log.info("File created event: " + event.toString());
}
processFile(event);
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.createFile(event.getFile());
FileUtil.endProcessingFile(event.getFile().getId());
}
/**
* File updated.
*
* @param event File updated event
*/
@Subscribe
public void on(final FileUpdatedAsyncEvent event) {
if (log.isInfoEnabled()) {
log.info("File updated event: " + event.toString());
}
processFile(event);
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.updateFile(event.getFile());
FileUtil.endProcessingFile(event.getFile().getId());
}
/**
* Process the file (create/update).
*
* @param event File event
*/
private void processFile(FileEvent event) {
// Find a format handler
final File file = event.getFile();
FormatHandler formatHandler = FormatHandlerUtil.find(file.getMimeType());
@@ -102,7 +139,7 @@ public class FileCreatedAsyncListener {
try {
content.set(formatHandler.extractContent(event.getLanguage(), event.getUnencryptedFile()));
} catch (Exception e) {
log.error("Error extracting content from: " + event.getFile());
log.error("Error extracting content from: " + event.getFile(), e);
}
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
@@ -117,13 +154,5 @@ public class FileCreatedAsyncListener {
file.setContent(content.get());
fileDao.update(file);
});
if (file.getDocumentId() != null) {
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.createFile(event.getFile());
}
FileUtil.endProcessingFile(file.getId());
}
}

View File

@@ -17,7 +17,7 @@ public class TemporaryFileCleanupAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
private static final Logger log = LoggerFactory.getLogger(TemporaryFileCleanupAsyncListener.class);
/**
* Cleanup temporary files.

View File

@@ -117,7 +117,7 @@ public class AppContext {
asyncExecutorList = new ArrayList<>();
asyncEventBus = newAsyncEventBus();
asyncEventBus.register(new FileCreatedAsyncListener());
asyncEventBus.register(new FileProcessingAsyncListener());
asyncEventBus.register(new FileDeletedAsyncListener());
asyncEventBus.register(new DocumentCreatedAsyncListener());
asyncEventBus.register(new DocumentUpdatedAsyncListener());

View File

@@ -232,7 +232,8 @@ public class InboxService extends AbstractScheduledService {
// Add files to the document
for (EmailUtil.FileContent fileContent : mailContent.getFileContentList()) {
FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(), "eng", "admin", document.getId());
FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(),
document.getLanguage(), "admin", document.getId());
}
}

View File

@@ -54,7 +54,7 @@ public class EncryptionUtil {
* @param is InputStream to encrypt
* @param privateKey Private key
* @return Encrypted stream
* @throws Exception
* @throws Exception e
*/
public static InputStream decryptInputStream(InputStream is, String privateKey) throws Exception {
return new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE));
@@ -66,7 +66,7 @@ public class EncryptionUtil {
* @param file Encrypted file
* @param privateKey Private key
* @return Decrypted temporary file
* @throws Exception
* @throws Exception e
*/
public static Path decryptFile(Path file, String privateKey) throws Exception {
if (privateKey == null) {
@@ -86,7 +86,7 @@ public class EncryptionUtil {
*
* @param privateKey Private key
* @return Encryption cipher
* @throws Exception
* @throws Exception e
*/
public static Cipher getEncryptionCipher(String privateKey) throws Exception {
if (Strings.isNullOrEmpty(privateKey)) {
@@ -101,7 +101,7 @@ public class EncryptionUtil {
* @param privateKey Private key
* @param mode Mode (encrypt or decrypt)
* @return Cipher
* @throws Exception
* @throws Exception e
*/
private static Cipher getCipher(String privateKey, int mode) throws Exception {
PBEKeySpec keySpec = new PBEKeySpec(privateKey.toCharArray(), SALT.getBytes(), 2000, 256);

View File

@@ -45,6 +45,10 @@ public class ImageFormatHandler implements FormatHandler {
@Override
public String extractContent(String language, Path file) throws Exception {
if (language == null) {
return null;
}
try (InputStream inputStream = Files.newInputStream(file)) {
return FileUtil.ocrFile(language, ImageIO.read(inputStream));
}