1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-13 17:56:20 +00:00

Index files OCR-ized content and documents, search on index fields

This commit is contained in:
jendib
2013-08-17 14:16:55 +02:00
parent 5507d4ca57
commit 7ed976b27a
23 changed files with 695 additions and 91 deletions

View File

@@ -1,8 +1,23 @@
package com.sismics.docs.core.dao.jpa;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import javax.persistence.EntityManager;
import javax.persistence.NoResultException;
import javax.persistence.Query;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria;
import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.util.jpa.PaginatedList;
import com.sismics.docs.core.util.jpa.PaginatedLists;
@@ -10,12 +25,6 @@ import com.sismics.docs.core.util.jpa.QueryParam;
import com.sismics.docs.core.util.jpa.SortCriteria;
import com.sismics.util.context.ThreadLocalContext;
import javax.persistence.EntityManager;
import javax.persistence.NoResultException;
import javax.persistence.Query;
import java.sql.Timestamp;
import java.util.*;
/**
* Document DAO.
*
@@ -40,6 +49,18 @@ public class DocumentDao {
return document.getId();
}
/**
* Returns the list of all documents.
*
* @return List of documents
*/
@SuppressWarnings("unchecked")
public List<Document> findAll() {
EntityManager em = ThreadLocalContext.get().getEntityManager();
Query q = em.createQuery("select d from Document d where d.deleteDate is null");
return q.getResultList();
}
/**
* Returns an active document.
*
@@ -118,8 +139,9 @@ public class DocumentDao {
* @param paginatedList List of documents (updated by side effects)
* @param criteria Search criteria
* @return List of document
* @throws Exception
*/
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) {
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
Map<String, Object> parameterMap = new HashMap<String, Object>();
List<String> criteriaList = new ArrayList<String>();
@@ -133,9 +155,15 @@ public class DocumentDao {
criteriaList.add("d.DOC_IDUSER_C = :userId");
parameterMap.put("userId", criteria.getUserId());
}
if (criteria.getSearch() != null) {
criteriaList.add("(d.DOC_TITLE_C LIKE :search OR d.DOC_DESCRIPTION_C LIKE :search OR f.FIL_CONTENT_C LIKE :search)");
parameterMap.put("search", "%" + criteria.getSearch() + "%");
if (!Strings.isNullOrEmpty(criteria.getSearch())) {
LuceneDao luceneDao = new LuceneDao();
Set<String> documentIdList = luceneDao.search(criteria.getUserId(), criteria.getSearch());
if (documentIdList.size() == 0) {
// If the search doesn't find any document, the request should return nothing
documentIdList.add(UUID.randomUUID().toString());
}
criteriaList.add("d.DOC_ID_C in :documentIdList");
parameterMap.put("documentIdList", documentIdList);
}
if (criteria.getCreateDateMin() != null) {
criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin");

View File

@@ -1,8 +1,10 @@
package com.sismics.docs.core.dao.lucene;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Field;
@@ -18,12 +20,12 @@ import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import com.sismics.docs.core.model.context.AppContext;
import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.util.LuceneUtil;
import com.sismics.docs.core.util.LuceneUtil.LuceneRunnable;
@@ -40,114 +42,190 @@ public class LuceneDao {
*
* @param fileList
*/
public void rebuildIndex(final List<File> fileList) {
public void rebuildIndex(final List<Document> documentList, final List<File> fileList) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
// Empty index
indexWriter.deleteAll();
// Add all files
for (File file : fileList) {
org.apache.lucene.document.Document document = getDocumentFromFile(file);
indexWriter.addDocument(document);
// Add all documents
Map<String, Document> documentMap = new HashMap<>();
for (Document document : documentList) {
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
indexWriter.addDocument(luceneDocument);
documentMap.put(document.getId(), document);
}
}
});
}
/**
* Add files to the index.
*
* @param fileList
*/
public void create(final List<File> fileList) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
// Add all files
for (File file : fileList) {
org.apache.lucene.document.Document document = getDocumentFromFile(file);
indexWriter.addDocument(document);
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, documentMap.get(file.getDocumentId()));
indexWriter.addDocument(luceneDocument);
}
}
});
}
/**
* Update index.
* Add document to the index.
*
* @param fileList File list
* @param document Document to add
*/
public void update(final List<File> fileList) {
public void createDocument(final Document document) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
// Update all files
for (File file : fileList) {
org.apache.lucene.document.Document document = getDocumentFromFile(file);
indexWriter.updateDocument(new Term("id", file.getId()), document);
}
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
indexWriter.addDocument(luceneDocument);
}
});
}
/**
* Add file to the index.
*
* @param file File to add
* @param document Document linked to the file
*/
public void createFile(final File file, final Document document) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document);
indexWriter.addDocument(luceneDocument);
}
});
}
/**
* Update document index.
*
* @param document Updated document
*/
public void updateDocument(final Document document) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
indexWriter.updateDocument(new Term("id", document.getId()), luceneDocument);
}
});
}
/**
* Update file index.
*
* @param file Updated file
* @param document Document linked to the file
*/
public void updateFile(final File file, final Document document) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document);
indexWriter.updateDocument(new Term("id", file.getId()), luceneDocument);
}
});
}
/**
* Delete document from the index.
*
* @param id Document ID to delete
*/
public void deleteDocument(final String id) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
indexWriter.deleteDocuments(new Term("id", id));
}
});
}
/**
* Search files.
*
* @param paginatedList
* @param feedList
* @param searchQuery
* @return List of file IDs
* @param userId User ID to filter on
* @param searchQuery Search query
* @return List of document IDs
* @throws Exception
*/
public Set<String> search(String userId, String searchQuery, int limit) throws Exception {
public Set<String> search(String userId, String searchQuery) throws Exception {
// Escape query and add quotes so QueryParser generate a PhraseQuery
searchQuery = "\"" + QueryParserUtil.escape(searchQuery) + "\"";
// Build search query
StandardQueryParser qpHelper = new StandardQueryParser(new DocsStandardAnalyzer(Version.LUCENE_42));
qpHelper.setPhraseSlop(100000); // PhraseQuery add terms
Query contentQuery = qpHelper.parse(searchQuery, "content");
// Search on file content
// Search on documents and files
BooleanQuery query = new BooleanQuery();
query.add(contentQuery, Occur.SHOULD);
query.add(qpHelper.parse(searchQuery, "content"), Occur.SHOULD);
query.add(qpHelper.parse(searchQuery, "title"), Occur.SHOULD);
query.add(qpHelper.parse(searchQuery, "description"), Occur.SHOULD);
// Filter on provided user ID
List<Term> terms = new ArrayList<Term>();
terms.add(new Term("user_id", userId));
TermsFilter feedsFilter = new TermsFilter(terms);
if (userId != null) {
terms.add(new Term("user_id", userId));
}
TermsFilter userFilter = new TermsFilter(terms);
// Search
IndexReader reader = DirectoryReader.open(AppContext.getInstance().getLuceneDirectory());
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(query, feedsFilter, limit);
TopDocs topDocs = searcher.search(query, userFilter, Integer.MAX_VALUE);
ScoreDoc[] docs = topDocs.scoreDocs;
// Extract file IDs
Set<String> fileIdList = new HashSet<String>();
// Extract document IDs
Set<String> documentIdList = new HashSet<String>();
for (int i = 0; i < docs.length; i++) {
String id = searcher.doc(docs[i].doc).get("id");
fileIdList.add(id);
org.apache.lucene.document.Document document = searcher.doc(docs[i].doc);
String type = document.get("type");
String documentId = null;
if (type.equals("document")) {
documentId = document.get("id");
} else if (type.equals("file")) {
documentId = document.get("document_id");
}
documentIdList.add(documentId);
}
return fileIdList;
return documentIdList;
}
/**
* Build Lucene document from database document.
*
* @param document Document
* @return Document
*/
private org.apache.lucene.document.Document getDocumentFromDocument(Document document) {
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
luceneDocument.add(new StringField("id", document.getId(), Field.Store.YES));
luceneDocument.add(new StringField("user_id", document.getUserId(), Field.Store.YES));
luceneDocument.add(new StringField("type", "document", Field.Store.YES));
luceneDocument.add(new TextField("title", document.getTitle(), Field.Store.NO));
luceneDocument.add(new TextField("description", document.getDescription(), Field.Store.NO));
return luceneDocument;
}
/**
* Build Lucene document from file.
*
* @param file File
* @param document Document linked to the file
* @return Document
*/
private org.apache.lucene.document.Document getDocumentFromFile(File file) {
// Building document
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
document.add(new StringField("id", file.getId(), Field.Store.YES));
document.add(new TextField("content", file.getContent(), Field.Store.NO));
private org.apache.lucene.document.Document getDocumentFromFile(File file, Document document) {
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES));
luceneDocument.add(new StringField("user_id", document.getUserId(), Field.Store.YES));
luceneDocument.add(new StringField("type", "file", Field.Store.YES));
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
return document;
return luceneDocument;
}
}

View File

@@ -0,0 +1,41 @@
package com.sismics.docs.core.event;
import com.google.common.base.Objects;
import com.sismics.docs.core.model.jpa.Document;
/**
* Document created event.
*
* @author bgamard
*/
public class DocumentCreatedAsyncEvent {
/**
* Created document.
*/
private Document document;
/**
* Getter of document.
*
* @return the document
*/
public Document getDocument() {
return document;
}
/**
* Setter of document.
*
* @param document document
*/
public void setDocument(Document document) {
this.document = document;
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("document", document)
.toString();
}
}

View File

@@ -0,0 +1,41 @@
package com.sismics.docs.core.event;
import com.google.common.base.Objects;
import com.sismics.docs.core.model.jpa.Document;
/**
* Document deleted event.
*
* @author bgamard
*/
public class DocumentDeletedAsyncEvent {
/**
* Created document.
*/
private Document document;
/**
* Getter of document.
*
* @return the document
*/
public Document getDocument() {
return document;
}
/**
* Setter of document.
*
* @param document document
*/
public void setDocument(Document document) {
this.document = document;
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("document", document)
.toString();
}
}

View File

@@ -0,0 +1,41 @@
package com.sismics.docs.core.event;
import com.google.common.base.Objects;
import com.sismics.docs.core.model.jpa.Document;
/**
* Document updated event.
*
* @author bgamard
*/
public class DocumentUpdatedAsyncEvent {
/**
* Created document.
*/
private Document document;
/**
* Getter of document.
*
* @return the document
*/
public Document getDocument() {
return document;
}
/**
* Setter of document.
*
* @param document document
*/
public void setDocument(Document document) {
this.document = document;
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("document", document)
.toString();
}
}

View File

@@ -0,0 +1,41 @@
package com.sismics.docs.core.event;
import com.google.common.base.Objects;
import com.sismics.docs.core.model.jpa.File;
/**
* File deleted event.
*
* @author bgamard
*/
public class FileDeletedAsyncEvent {
/**
* Deleted file.
*/
private File file;
/**
* Getter of file.
*
* @return the file
*/
public File getFile() {
return file;
}
/**
* Setter of file.
*
* @param file file
*/
public void setFile(File file) {
this.file = file;
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("file", file)
.toString();
}
}

View File

@@ -0,0 +1,16 @@
package com.sismics.docs.core.event;
import com.google.common.base.Objects;
/**
* Rebuild index event.
*
* @author bgamard
*/
public class RebuildIndexAsyncEvent {
@Override
public String toString() {
return Objects.toStringHelper(this)
.toString();
}
}

View File

@@ -0,0 +1,57 @@
package com.sismics.docs.core.event;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.jpa.DocumentDao;
import com.sismics.docs.core.dao.jpa.FileDao;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.util.TransactionUtil;
/**
* Listener on rebuild index.
*
* @author bgamard
*/
public class RebuildIndexAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(RebuildIndexAsyncListener.class);
/**
* Rebuild Lucene index.
*
* @param rebuildIndexAsyncEvent Index rebuild event
* @throws Exception
*/
@Subscribe
public void onArticleCreated(final RebuildIndexAsyncEvent rebuildIndexAsyncEvent) throws Exception {
if (log.isInfoEnabled()) {
log.info("Rebuild index event: " + rebuildIndexAsyncEvent.toString());
}
// Fetch all documents and files
TransactionUtil.handle(new Runnable() {
@Override
public void run() {
// Fetch all documents
DocumentDao documentDao = new DocumentDao();
List<Document> documentList = documentDao.findAll();
// Fetch all files
FileDao fileDao = new FileDao();
List<File> fileList = fileDao.findAll();
// Rebuild index
LuceneDao luceneDao = new LuceneDao();
luceneDao.rebuildIndex(documentList, fileList);
}
});
}
}

View File

@@ -0,0 +1,37 @@
package com.sismics.docs.core.listener.async;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.DocumentCreatedAsyncEvent;
/**
* Listener on document created.
*
* @author bgamard
*/
public class DocumentCreatedAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(DocumentCreatedAsyncListener.class);
/**
* Document created.
*
* @param documentCreatedAsyncEvent Document created event
* @throws Exception
*/
@Subscribe
public void on(final DocumentCreatedAsyncEvent documentCreatedAsyncEvent) throws Exception {
if (log.isInfoEnabled()) {
log.info("Document created event: " + documentCreatedAsyncEvent.toString());
}
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.createDocument(documentCreatedAsyncEvent.getDocument());
}
}

View File

@@ -0,0 +1,37 @@
package com.sismics.docs.core.listener.async;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.DocumentDeletedAsyncEvent;
/**
* Listener on document deleted.
*
* @author bgamard
*/
public class DocumentDeletedAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(DocumentDeletedAsyncListener.class);
/**
* Document deleted.
*
* @param documentDeletedAsyncEvent Document deleted event
* @throws Exception
*/
@Subscribe
public void on(final DocumentDeletedAsyncEvent documentDeletedAsyncEvent) throws Exception {
if (log.isInfoEnabled()) {
log.info("Document deleted event: " + documentDeletedAsyncEvent.toString());
}
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.deleteDocument(documentDeletedAsyncEvent.getDocument().getId());
}
}

View File

@@ -0,0 +1,37 @@
package com.sismics.docs.core.listener.async;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent;
/**
* Listener on document updated.
*
* @author bgamard
*/
public class DocumentUpdatedAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(DocumentUpdatedAsyncListener.class);
/**
* Document updated.
*
* @param documentUpdatedAsyncEvent Document updated event
* @throws Exception
*/
@Subscribe
public void on(final DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent) throws Exception {
if (log.isInfoEnabled()) {
log.info("Document updated event: " + documentUpdatedAsyncEvent.toString());
}
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.updateDocument(documentUpdatedAsyncEvent.getDocument());
}
}

View File

@@ -6,12 +6,13 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.util.ImageUtil;
/**
* Listener on new file.
* Listener on file created.
*
* @author bgamard
*/
@@ -22,13 +23,13 @@ public class FileCreatedAsyncListener {
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
/**
* Process new file.
* File created.
*
* @param fileCreatedAsyncEvent New file created event
* @param fileCreatedAsyncEvent File created event
* @throws Exception
*/
@Subscribe
public void onFileCreated(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
if (log.isInfoEnabled()) {
log.info("File created event: " + fileCreatedAsyncEvent.toString());
}
@@ -39,5 +40,9 @@ public class FileCreatedAsyncListener {
FileUtil.ocrFile(fileCreatedAsyncEvent.getDocument(), fileCreatedAsyncEvent.getFile());
log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime));
}
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.createFile(fileCreatedAsyncEvent.getFile(), fileCreatedAsyncEvent.getDocument());
}
}

View File

@@ -0,0 +1,39 @@
package com.sismics.docs.core.listener.async;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.FileDeletedAsyncEvent;
/**
* Listener on file deleted.
*
* @author bgamard
*/
public class FileDeletedAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(FileDeletedAsyncListener.class);
/**
* File deleted.
*
* @param fileDeletedAsyncEvent File deleted event
* @throws Exception
*/
@Subscribe
public void on(final FileDeletedAsyncEvent fileDeletedAsyncEvent) throws Exception {
if (log.isInfoEnabled()) {
log.info("File deleted event: " + fileDeletedAsyncEvent.toString());
}
// TODO Delete the file from storage
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.deleteDocument(fileDeletedAsyncEvent.getFile().getId());
}
}

View File

@@ -13,7 +13,12 @@ import com.google.common.eventbus.AsyncEventBus;
import com.google.common.eventbus.EventBus;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.docs.core.dao.jpa.ConfigDao;
import com.sismics.docs.core.event.RebuildIndexAsyncListener;
import com.sismics.docs.core.listener.async.DocumentCreatedAsyncListener;
import com.sismics.docs.core.listener.async.DocumentDeletedAsyncListener;
import com.sismics.docs.core.listener.async.DocumentUpdatedAsyncListener;
import com.sismics.docs.core.listener.async.FileCreatedAsyncListener;
import com.sismics.docs.core.listener.async.FileDeletedAsyncListener;
import com.sismics.docs.core.listener.sync.DeadEventListener;
import com.sismics.docs.core.model.jpa.Config;
import com.sismics.docs.core.service.IndexingService;
@@ -80,6 +85,11 @@ public class AppContext {
asyncEventBus = newAsyncEventBus();
asyncEventBus.register(new FileCreatedAsyncListener());
asyncEventBus.register(new FileDeletedAsyncListener());
asyncEventBus.register(new DocumentCreatedAsyncListener());
asyncEventBus.register(new DocumentUpdatedAsyncListener());
asyncEventBus.register(new DocumentDeletedAsyncListener());
asyncEventBus.register(new RebuildIndexAsyncListener());
}
/**

View File

@@ -1,10 +1,9 @@
package com.sismics.docs.core.service;
import com.google.common.util.concurrent.AbstractScheduledService;
import com.sismics.docs.core.constant.Constants;
import com.sismics.docs.core.model.context.AppContext;
import com.sismics.docs.core.util.DirectoryUtil;
import com.sismics.docs.core.util.TransactionUtil;
import java.io.File;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
@@ -12,9 +11,12 @@ import org.apache.lucene.store.SimpleFSLockFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import com.google.common.util.concurrent.AbstractScheduledService;
import com.sismics.docs.core.constant.Constants;
import com.sismics.docs.core.event.RebuildIndexAsyncEvent;
import com.sismics.docs.core.model.context.AppContext;
import com.sismics.docs.core.util.DirectoryUtil;
import com.sismics.docs.core.util.TransactionUtil;
/**
* Indexing service.
@@ -85,6 +87,16 @@ public class IndexingService extends AbstractScheduledService {
return Scheduler.newFixedDelaySchedule(0, 1, TimeUnit.HOURS);
}
/**
* Destroy and rebuild Lucene index.
*
* @throws Exception
*/
public void rebuildIndex() throws Exception {
RebuildIndexAsyncEvent rebuildIndexAsyncEvent = new RebuildIndexAsyncEvent();
AppContext.getInstance().getAsyncEventBus().post(rebuildIndexAsyncEvent);
}
/**
* Getter of directory.
*

View File

@@ -1,6 +1,8 @@
package com.sismics.docs.core.util;
import java.awt.color.ColorSpace;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.io.IOException;
import java.nio.file.Paths;
@@ -46,12 +48,11 @@ public class FileUtil {
log.error("Error reading the image " + storedfile, e);
}
// Upscale the image if it is too small
if (image.getWidth() < 2500 || image.getHeight() < 2500) {
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500);
image.flush();
image = resizedImage;
}
// Upscale and grayscale the image
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500,
new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null));
image.flush();
image = resizedImage;
// OCR the file
try {