mirror of
https://github.com/sismics/docs.git
synced 2025-12-14 02:06:25 +00:00
Make search for documents faster for large dataset (#698)
This commit is contained in:
@@ -4,13 +4,16 @@ import com.sismics.docs.core.constant.AuditLogType;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.AuditLogUtil;
|
||||
import com.sismics.util.context.ThreadLocalContext;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import jakarta.persistence.NoResultException;
|
||||
import jakarta.persistence.Query;
|
||||
import jakarta.persistence.TypedQuery;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
@@ -213,6 +216,24 @@ public class FileDao {
|
||||
return q.getResultList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get files count by documents IDs.
|
||||
*
|
||||
* @param documentIds Documents IDs
|
||||
* @return the number of files per document id
|
||||
*/
|
||||
public Map<String, Long> countByDocumentsIds(Iterable<String> documentIds) {
|
||||
EntityManager em = ThreadLocalContext.get().getEntityManager();
|
||||
Query q = em.createQuery("select f.documentId, count(*) from File f where f.documentId in :documentIds and f.latestVersion = true and f.deleteDate is null group by (f.documentId)");
|
||||
q.setParameter("documentIds", documentIds);
|
||||
Map<String, Long> result = new HashMap<>();
|
||||
q.getResultList().forEach(o -> {
|
||||
Object[] resultLine = (Object[]) o;
|
||||
result.put((String) resultLine[0], (Long) resultLine[1]);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all files from a version.
|
||||
*
|
||||
|
||||
@@ -26,9 +26,18 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.highlight.Highlighter;
|
||||
import org.apache.lucene.search.highlight.QueryScorer;
|
||||
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
||||
@@ -47,7 +56,12 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Timestamp;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Lucene indexing handler.
|
||||
@@ -242,32 +256,27 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
|
||||
StringBuilder sb = new StringBuilder("select distinct d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, d.DOC_LANGUAGE_C c4, d.DOC_IDFILE_C, ");
|
||||
sb.append(" s.count c5, ");
|
||||
sb.append(" f.count c6, ");
|
||||
sb.append(" rs2.RTP_ID_C c7, rs2.RTP_NAME_C, d.DOC_UPDATEDATE_D c8 ");
|
||||
sb.append(" from T_DOCUMENT d ");
|
||||
sb.append(" left join (SELECT count(s.SHA_ID_C) count, ac.ACL_SOURCEID_C " +
|
||||
" FROM T_SHARE s, T_ACL ac " +
|
||||
" WHERE ac.ACL_TARGETID_C = s.SHA_ID_C AND ac.ACL_DELETEDATE_D IS NULL AND " +
|
||||
" s.SHA_DELETEDATE_D IS NULL group by ac.ACL_SOURCEID_C) s on s.ACL_SOURCEID_C = d.DOC_ID_C " +
|
||||
" left join (SELECT count(f.FIL_ID_C) count, f.FIL_IDDOC_C " +
|
||||
" FROM T_FILE f " +
|
||||
" WHERE f.FIL_DELETEDATE_D is null group by f.FIL_IDDOC_C) f on f.FIL_IDDOC_C = d.DOC_ID_C ");
|
||||
" s.SHA_DELETEDATE_D IS NULL group by ac.ACL_SOURCEID_C) s on s.ACL_SOURCEID_C = d.DOC_ID_C ");
|
||||
sb.append(" left join (select rs.*, rs3.idDocument " +
|
||||
"from T_ROUTE_STEP rs " +
|
||||
"join (select r.RTE_IDDOCUMENT_C idDocument, rs.RTP_IDROUTE_C idRoute, min(rs.RTP_ORDER_N) minOrder from T_ROUTE_STEP rs join T_ROUTE r on r.RTE_ID_C = rs.RTP_IDROUTE_C and r.RTE_DELETEDATE_D is null where rs.RTP_DELETEDATE_D is null and rs.RTP_ENDDATE_D is null group by rs.RTP_IDROUTE_C, r.RTE_IDDOCUMENT_C) rs3 on rs.RTP_IDROUTE_C = rs3.idRoute and rs.RTP_ORDER_N = rs3.minOrder " +
|
||||
"where rs.RTP_IDTARGET_C in (:targetIdList)) rs2 on rs2.idDocument = d.DOC_ID_C ");
|
||||
|
||||
// Add search criterias
|
||||
if (criteria.getTargetIdList() != null) {
|
||||
if (!SecurityUtil.skipAclCheck(criteria.getTargetIdList())) {
|
||||
// Read permission is enough for searching
|
||||
sb.append(" left join T_ACL a on a.ACL_TARGETID_C in (:targetIdList) and a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_PERM_C = 'READ' and a.ACL_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_DOCUMENT_TAG dta on dta.DOT_IDDOCUMENT_C = d.DOC_ID_C and dta.DOT_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_ACL a2 on a2.ACL_TARGETID_C in (:targetIdList) and a2.ACL_SOURCEID_C = dta.DOT_IDTAG_C and a2.ACL_PERM_C = 'READ' and a2.ACL_DELETEDATE_D is null ");
|
||||
criteriaList.add("(a.ACL_ID_C is not null or a2.ACL_ID_C is not null)");
|
||||
}
|
||||
parameterMap.put("targetIdList", criteria.getTargetIdList());
|
||||
if (!SecurityUtil.skipAclCheck(criteria.getTargetIdList())) {
|
||||
// Read permission is enough for searching
|
||||
sb.append(" left join T_ACL a on a.ACL_TARGETID_C in (:targetIdList) and a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_PERM_C = 'READ' and a.ACL_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_DOCUMENT_TAG dta on dta.DOT_IDDOCUMENT_C = d.DOC_ID_C and dta.DOT_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_ACL a2 on a2.ACL_TARGETID_C in (:targetIdList) and a2.ACL_SOURCEID_C = dta.DOT_IDTAG_C and a2.ACL_PERM_C = 'READ' and a2.ACL_DELETEDATE_D is null ");
|
||||
criteriaList.add("(a.ACL_ID_C is not null or a2.ACL_ID_C is not null)");
|
||||
}
|
||||
parameterMap.put("targetIdList", criteria.getTargetIdList());
|
||||
|
||||
if (!Strings.isNullOrEmpty(criteria.getSearch()) || !Strings.isNullOrEmpty(criteria.getFullSearch())) {
|
||||
documentSearchMap = search(criteria.getSearch(), criteria.getFullSearch());
|
||||
if (documentSearchMap.isEmpty()) {
|
||||
@@ -312,7 +321,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
criteriaList.add("(" + Joiner.on(" OR ").join(tagCriteriaList) + ")");
|
||||
}
|
||||
}
|
||||
if (criteria.getExcludedTagIdList() != null && !criteria.getExcludedTagIdList().isEmpty()) {
|
||||
if (!criteria.getExcludedTagIdList().isEmpty()) {
|
||||
int index = 0;
|
||||
for (List<String> tagIdList : criteria.getExcludedTagIdList()) {
|
||||
List<String> tagCriteriaList = Lists.newArrayList();
|
||||
@@ -367,8 +376,6 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
documentDto.setFileId((String) o[i++]);
|
||||
Number shareCount = (Number) o[i++];
|
||||
documentDto.setShared(shareCount != null && shareCount.intValue() > 0);
|
||||
Number fileCount = (Number) o[i++];
|
||||
documentDto.setFileCount(fileCount == null ? 0 : fileCount.intValue());
|
||||
documentDto.setActiveRoute(o[i++] != null);
|
||||
documentDto.setCurrentStepName((String) o[i++]);
|
||||
documentDto.setUpdateTimestamp(((Timestamp) o[i]).getTime());
|
||||
|
||||
@@ -1 +1 @@
|
||||
db.version=29
|
||||
db.version=30
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
create index IDX_FIL_IDDOC_C ON T_FILE (FIL_IDDOC_C ASC);
|
||||
update T_CONFIG set CFG_VALUE_C = '30' where CFG_ID_C = 'DB_VERSION';
|
||||
Reference in New Issue
Block a user