1
0
mirror of https://git.tt-rss.org/git/tt-rss.git synced 2025-12-13 13:25:54 +00:00

add text_languagedetect to guess article language for better hyphenation

(bump schema)
This commit is contained in:
Andrew Dolgov
2013-07-31 10:30:14 +04:00
parent f035e6dc82
commit 6b4617970f
13 changed files with 2498 additions and 6 deletions

View File

@@ -354,6 +354,11 @@
$rss->init();
}
require_once "lib/languagedetect/LanguageDetect.php";
$lang = new Text_LanguageDetect();
$lang->setNameMode(2);
// print_r($rss);
$feed = db_escape_string($feed);
@@ -565,6 +570,15 @@
print "\n";
}
$entry_language = $lang->detect($entry_content, 1);
if (count($entry_language) > 0) {
$entry_language = array_keys($entry_language);
$entry_language = db_escape_string($entry_language[0]);
_debug("detected language: $entry_language", $debug_enabled);
}
$entry_comments = $item->get_comments_url();
$entry_author = $item->get_author();
@@ -677,6 +691,7 @@
comments,
num_comments,
plugin_data,
lang,
author)
VALUES
('$entry_title',
@@ -691,6 +706,7 @@
'$entry_comments',
'$num_comments',
'$entry_plugin_data',
'$entry_language',
'$entry_author')");
$article_labels = array();