1
0
mirror of https://github.com/sismics/docs.git synced 2025-12-15 02:36:24 +00:00

feat: add option to disable OCR (#768)

fixes #344
refs #767
This commit is contained in:
Alexander ADAM
2024-09-07 22:27:48 +02:00
committed by GitHub
parent 8f1ff56d34
commit c2d7f3ebc6
16 changed files with 221 additions and 62 deletions

View File

@@ -1,9 +1,9 @@
package com.sismics.docs.core.constant;
/**
* Configuration parameters.
* Configuration parameters.
*
* @author jtremeaux
* @author jtremeaux
*/
public enum ConfigType {
/**
@@ -20,6 +20,11 @@ public enum ConfigType {
*/
GUEST_LOGIN,
/**
* OCR enabled.
*/
OCR_ENABLED,
/**
* Default language.
*/

View File

@@ -8,13 +8,12 @@ import java.util.ResourceBundle;
/**
* Configuration parameter utilities.
*
* @author jtremeaux
*
*/
public class ConfigUtil {
/**
* Returns the textual value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Textual value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
@@ -30,7 +29,7 @@ public class ConfigUtil {
/**
* Returns the configuration resource bundle.
*
*
* @return Resource bundle
*/
public static ResourceBundle getConfigBundle() {
@@ -39,14 +38,14 @@ public class ConfigUtil {
/**
* Returns the integer value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Integer value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
*/
public static int getConfigIntegerValue(ConfigType configType) {
String value = getConfigStringValue(configType);
return Integer.parseInt(value);
}
@@ -65,14 +64,28 @@ public class ConfigUtil {
/**
* Returns the boolean value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Boolean value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
*/
public static boolean getConfigBooleanValue(ConfigType configType) {
String value = getConfigStringValue(configType);
return Boolean.parseBoolean(value);
}
/**
* Returns the boolean value of a configuration parameter with a default value.
*
* @param configType Type of the configuration parameter
* @param defaultValue Default value to return if the configuration parameter is undefined
* @return Boolean value of the configuration parameter
*/
public static boolean getConfigBooleanValue(ConfigType configType, boolean defaultValue) {
try {
return getConfigBooleanValue(configType);
} catch (IllegalStateException e) {
return defaultValue;
}
}
}

View File

@@ -3,6 +3,8 @@ package com.sismics.docs.core.util.format;
import com.google.common.io.Closer;
import com.sismics.docs.core.constant.Constants;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.ConfigUtil;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -22,7 +24,6 @@ import java.nio.file.Path;
/**
* Image format handler.
*
* @author bgamard
*/
public class ImageFormatHandler implements FormatHandler {
/**
@@ -45,7 +46,7 @@ public class ImageFormatHandler implements FormatHandler {
@Override
public String extractContent(String language, Path file) throws Exception {
if (language == null) {
if (language == null || !ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
return null;
}

View File

@@ -2,6 +2,8 @@ package com.sismics.docs.core.util.format;
import com.google.common.io.Closer;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.ConfigUtil;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
@@ -53,7 +55,7 @@ public class PdfFormatHandler implements FormatHandler {
}
// No text content, try to OCR it
if (language != null && content != null && content.trim().isEmpty()) {
if (language != null && content != null && content.trim().isEmpty() && ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
StringBuilder sb = new StringBuilder();
try (InputStream inputStream = Files.newInputStream(file);
PDDocument pdfDocument = PDDocument.load(inputStream)) {

View File

@@ -39,7 +39,7 @@ abstract class DbOpenHelper {
private static final Logger log = LoggerFactory.getLogger(DbOpenHelper.class);
private final JdbcConnectionAccess jdbcConnectionAccess;
private final List<Exception> exceptions = new ArrayList<>();
private Formatter formatter;
@@ -99,7 +99,7 @@ abstract class DbOpenHelper {
onCreate();
oldVersion = 0;
}
// Execute update script
ResourceBundle configBundle = ConfigUtil.getConfigBundle();
Integer currentVersion = Integer.parseInt(configBundle.getString("db.version"));
@@ -126,7 +126,7 @@ abstract class DbOpenHelper {
/**
* Execute all upgrade scripts in ascending order for a given version.
*
*
* @param version Version number
* @throws Exception e
*/
@@ -136,7 +136,7 @@ abstract class DbOpenHelper {
return name.matches("dbupdate-" + versionString + "-\\d+\\.sql");
});
Collections.sort(fileNameList);
for (String fileName : fileNameList) {
if (log.isInfoEnabled()) {
log.info(MessageFormat.format("Executing script: {0}", fileName));
@@ -145,16 +145,16 @@ abstract class DbOpenHelper {
executeScript(is);
}
}
/**
* Execute a SQL script. All statements must be one line only.
*
*
* @param inputScript Script to execute
* @throws IOException e
*/
private void executeScript(InputStream inputScript) throws IOException {
List<String> lines = CharStreams.readLines(new InputStreamReader(inputScript));
for (String sql : lines) {
if (Strings.isNullOrEmpty(sql) || sql.startsWith("--")) {
continue;
@@ -178,13 +178,13 @@ abstract class DbOpenHelper {
}
public abstract void onCreate() throws Exception;
public abstract void onUpgrade(int oldVersion, int newVersion) throws Exception;
/**
* Returns a List of all Exceptions which occured during the export.
* Returns a List of all Exceptions which occurred during the export.
*
* @return A List containig the Exceptions occured during the export
* @return A List containing the Exceptions occurred during the export
*/
public List<?> getExceptions() {
return exceptions;
@@ -192,7 +192,7 @@ abstract class DbOpenHelper {
/**
* Format the output SQL statements.
*
*
* @param format True to format
*/
public void setFormat(boolean format) {

View File

@@ -1 +1 @@
db.version=30
db.version=31

View File

@@ -0,0 +1,7 @@
-- DBUPDATE-031-0.SQL
-- Insert a new setting for OCR recognition
insert into T_CONFIG (CFG_ID_C, CFG_VALUE_C) values ('OCR_ENABLED', 'true');
-- Update the database version
update T_CONFIG set CFG_VALUE_C = '31' where CFG_ID_C = 'DB_VERSION';