mirror of
https://github.com/sismics/docs.git
synced 2025-12-15 02:36:24 +00:00
Closes #201: reprocess file manually
This commit is contained in:
@@ -77,8 +77,20 @@ public class LuceneDao {
|
||||
}
|
||||
|
||||
/**
|
||||
* Update document index.
|
||||
* Update file index.
|
||||
*
|
||||
* @param file Updated file
|
||||
*/
|
||||
public void updateFile(final File file) {
|
||||
LuceneUtil.handle(indexWriter -> {
|
||||
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
|
||||
indexWriter.updateDocument(new Term("id", file.getId()), luceneDocument);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update document index.
|
||||
*
|
||||
* @param document Updated document
|
||||
*/
|
||||
public void updateDocument(final Document document) {
|
||||
@@ -87,7 +99,7 @@ public class LuceneDao {
|
||||
indexWriter.updateDocument(new Term("id", document.getId()), luceneDocument);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Delete document from the index.
|
||||
*
|
||||
@@ -112,7 +124,7 @@ public class LuceneDao {
|
||||
|
||||
// Build search query
|
||||
StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer());
|
||||
qpHelper.setPhraseSlop(100000); // PhraseQuery add terms
|
||||
qpHelper.setPhraseSlop(100); // PhraseQuery add terms
|
||||
|
||||
// Search on documents and files
|
||||
BooleanQuery query = new BooleanQuery.Builder()
|
||||
@@ -126,6 +138,7 @@ public class LuceneDao {
|
||||
.add(qpHelper.parse(searchQuery, "type"), Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "coverage"), Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "rights"), Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "filename"), Occur.SHOULD)
|
||||
.add(qpHelper.parse(fullSearchQuery, "content"), Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
@@ -150,7 +163,9 @@ public class LuceneDao {
|
||||
} else if (type.equals("file")) {
|
||||
documentId = document.get("document_id");
|
||||
}
|
||||
documentIdList.add(documentId);
|
||||
if (documentId != null) {
|
||||
documentIdList.add(documentId);
|
||||
}
|
||||
}
|
||||
|
||||
return documentIdList;
|
||||
@@ -208,7 +223,12 @@ public class LuceneDao {
|
||||
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
|
||||
luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES));
|
||||
luceneDocument.add(new StringField("doctype", "file", Field.Store.YES));
|
||||
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
|
||||
if (file.getName() != null) {
|
||||
luceneDocument.add(new TextField("filename", file.getName(), Field.Store.NO));
|
||||
}
|
||||
if (file.getDocumentId() != null) {
|
||||
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
|
||||
}
|
||||
if (file.getContent() != null) {
|
||||
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
|
||||
}
|
||||
|
||||
@@ -1,61 +1,9 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.MoreObjects;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* New file created event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class FileCreatedAsyncEvent extends UserEvent {
|
||||
/**
|
||||
* Created file.
|
||||
*/
|
||||
private File file;
|
||||
|
||||
/**
|
||||
* Language of the file.
|
||||
*/
|
||||
private String language;
|
||||
|
||||
/**
|
||||
* Unencrypted original file.
|
||||
*/
|
||||
private Path unencryptedFile;
|
||||
|
||||
public File getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
public void setFile(File file) {
|
||||
this.file = file;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public Path getUnencryptedFile() {
|
||||
return unencryptedFile;
|
||||
}
|
||||
|
||||
public FileCreatedAsyncEvent setUnencryptedFile(Path unencryptedFile) {
|
||||
this.unencryptedFile = unencryptedFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("file", file)
|
||||
.add("language", language)
|
||||
.toString();
|
||||
}
|
||||
public class FileCreatedAsyncEvent extends FileEvent {
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.MoreObjects;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* New file event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public abstract class FileEvent extends UserEvent {
|
||||
/**
|
||||
* Created file.
|
||||
*/
|
||||
private File file;
|
||||
|
||||
/**
|
||||
* Language of the file.
|
||||
*/
|
||||
private String language;
|
||||
|
||||
/**
|
||||
* Unencrypted original file.
|
||||
*/
|
||||
private Path unencryptedFile;
|
||||
|
||||
public File getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
public void setFile(File file) {
|
||||
this.file = file;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public Path getUnencryptedFile() {
|
||||
return unencryptedFile;
|
||||
}
|
||||
|
||||
public FileEvent setUnencryptedFile(Path unencryptedFile) {
|
||||
this.unencryptedFile = unencryptedFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("file", file)
|
||||
.add("language", language)
|
||||
.toString();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
/**
|
||||
* New file created event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class FileUpdatedAsyncEvent extends FileEvent {
|
||||
}
|
||||
@@ -5,6 +5,8 @@ import com.sismics.docs.core.dao.jpa.FileDao;
|
||||
import com.sismics.docs.core.dao.jpa.UserDao;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.event.FileEvent;
|
||||
import com.sismics.docs.core.event.FileUpdatedAsyncEvent;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.model.jpa.User;
|
||||
import com.sismics.docs.core.util.DirectoryUtil;
|
||||
@@ -28,19 +30,19 @@ import java.text.MessageFormat;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* Listener on file created.
|
||||
* Listener on file processing.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class FileCreatedAsyncListener {
|
||||
public class FileProcessingAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(FileProcessingAsyncListener.class);
|
||||
|
||||
/**
|
||||
* File created.
|
||||
*
|
||||
*
|
||||
* @param event File created event
|
||||
*/
|
||||
@Subscribe
|
||||
@@ -49,6 +51,41 @@ public class FileCreatedAsyncListener {
|
||||
log.info("File created event: " + event.toString());
|
||||
}
|
||||
|
||||
processFile(event);
|
||||
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.createFile(event.getFile());
|
||||
|
||||
FileUtil.endProcessingFile(event.getFile().getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* File updated.
|
||||
*
|
||||
* @param event File updated event
|
||||
*/
|
||||
@Subscribe
|
||||
public void on(final FileUpdatedAsyncEvent event) {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("File updated event: " + event.toString());
|
||||
}
|
||||
|
||||
processFile(event);
|
||||
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.updateFile(event.getFile());
|
||||
|
||||
FileUtil.endProcessingFile(event.getFile().getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the file (create/update).
|
||||
*
|
||||
* @param event File event
|
||||
*/
|
||||
private void processFile(FileEvent event) {
|
||||
// Find a format handler
|
||||
final File file = event.getFile();
|
||||
FormatHandler formatHandler = FormatHandlerUtil.find(file.getMimeType());
|
||||
@@ -102,7 +139,7 @@ public class FileCreatedAsyncListener {
|
||||
try {
|
||||
content.set(formatHandler.extractContent(event.getLanguage(), event.getUnencryptedFile()));
|
||||
} catch (Exception e) {
|
||||
log.error("Error extracting content from: " + event.getFile());
|
||||
log.error("Error extracting content from: " + event.getFile(), e);
|
||||
}
|
||||
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
|
||||
|
||||
@@ -117,13 +154,5 @@ public class FileCreatedAsyncListener {
|
||||
file.setContent(content.get());
|
||||
fileDao.update(file);
|
||||
});
|
||||
|
||||
if (file.getDocumentId() != null) {
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.createFile(event.getFile());
|
||||
}
|
||||
|
||||
FileUtil.endProcessingFile(file.getId());
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ public class TemporaryFileCleanupAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(TemporaryFileCleanupAsyncListener.class);
|
||||
|
||||
/**
|
||||
* Cleanup temporary files.
|
||||
|
||||
@@ -117,7 +117,7 @@ public class AppContext {
|
||||
asyncExecutorList = new ArrayList<>();
|
||||
|
||||
asyncEventBus = newAsyncEventBus();
|
||||
asyncEventBus.register(new FileCreatedAsyncListener());
|
||||
asyncEventBus.register(new FileProcessingAsyncListener());
|
||||
asyncEventBus.register(new FileDeletedAsyncListener());
|
||||
asyncEventBus.register(new DocumentCreatedAsyncListener());
|
||||
asyncEventBus.register(new DocumentUpdatedAsyncListener());
|
||||
|
||||
@@ -232,7 +232,8 @@ public class InboxService extends AbstractScheduledService {
|
||||
|
||||
// Add files to the document
|
||||
for (EmailUtil.FileContent fileContent : mailContent.getFileContentList()) {
|
||||
FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(), "eng", "admin", document.getId());
|
||||
FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(),
|
||||
document.getLanguage(), "admin", document.getId());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ public class EncryptionUtil {
|
||||
* @param is InputStream to encrypt
|
||||
* @param privateKey Private key
|
||||
* @return Encrypted stream
|
||||
* @throws Exception
|
||||
* @throws Exception e
|
||||
*/
|
||||
public static InputStream decryptInputStream(InputStream is, String privateKey) throws Exception {
|
||||
return new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE));
|
||||
@@ -66,7 +66,7 @@ public class EncryptionUtil {
|
||||
* @param file Encrypted file
|
||||
* @param privateKey Private key
|
||||
* @return Decrypted temporary file
|
||||
* @throws Exception
|
||||
* @throws Exception e
|
||||
*/
|
||||
public static Path decryptFile(Path file, String privateKey) throws Exception {
|
||||
if (privateKey == null) {
|
||||
@@ -86,7 +86,7 @@ public class EncryptionUtil {
|
||||
*
|
||||
* @param privateKey Private key
|
||||
* @return Encryption cipher
|
||||
* @throws Exception
|
||||
* @throws Exception e
|
||||
*/
|
||||
public static Cipher getEncryptionCipher(String privateKey) throws Exception {
|
||||
if (Strings.isNullOrEmpty(privateKey)) {
|
||||
@@ -101,7 +101,7 @@ public class EncryptionUtil {
|
||||
* @param privateKey Private key
|
||||
* @param mode Mode (encrypt or decrypt)
|
||||
* @return Cipher
|
||||
* @throws Exception
|
||||
* @throws Exception e
|
||||
*/
|
||||
private static Cipher getCipher(String privateKey, int mode) throws Exception {
|
||||
PBEKeySpec keySpec = new PBEKeySpec(privateKey.toCharArray(), SALT.getBytes(), 2000, 256);
|
||||
|
||||
@@ -45,6 +45,10 @@ public class ImageFormatHandler implements FormatHandler {
|
||||
|
||||
@Override
|
||||
public String extractContent(String language, Path file) throws Exception {
|
||||
if (language == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try (InputStream inputStream = Files.newInputStream(file)) {
|
||||
return FileUtil.ocrFile(language, ImageIO.read(inputStream));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user