mirror of
https://git.tt-rss.org/git/tt-rss.git
synced 2025-12-13 01:25:56 +00:00
tag-related fixes
1. move tag sanitization to feedparser common item class 2. enforce length limit on tags when parsing 3. support multiple tags passed via one dc:subject and other such elements, parse them as a comma-separated list 4. sort resulting tag list to prevent different order between feed updates 5. remove some duplicate code related to tag validation 6. allow + symbol in tags
This commit is contained in:
@@ -103,20 +103,20 @@ class FeedItem_Atom extends FeedItem_Common {
|
||||
|
||||
function get_categories() {
|
||||
$categories = $this->elem->getElementsByTagName("category");
|
||||
$cats = array();
|
||||
$cats = [];
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
if ($cat->hasAttribute("term"))
|
||||
array_push($cats, trim($cat->getAttribute("term")));
|
||||
array_push($cats, $cat->getAttribute("term"));
|
||||
}
|
||||
|
||||
$categories = $this->xpath->query("dc:subject", $this->elem);
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
array_push($cats, clean(trim($cat->nodeValue)));
|
||||
array_push($cats, $cat->nodeValue);
|
||||
}
|
||||
|
||||
return $cats;
|
||||
return $this->normalize_categories($cats);
|
||||
}
|
||||
|
||||
function get_enclosures() {
|
||||
|
||||
@@ -162,4 +162,35 @@ abstract class FeedItem_Common extends FeedItem {
|
||||
}
|
||||
}
|
||||
|
||||
static function normalize_categories($cats) {
|
||||
|
||||
$tmp = [];
|
||||
|
||||
foreach ($cats as $rawcat) {
|
||||
$tmp = array_merge($tmp, explode(",", $rawcat));
|
||||
}
|
||||
|
||||
$tmp = array_map(function($srccat) {
|
||||
$cat = clean(trim(mb_strtolower($srccat)));
|
||||
|
||||
// we don't support numeric tags
|
||||
if (is_numeric($cat))
|
||||
$cat = 't:' . $cat;
|
||||
|
||||
$cat = preg_replace('/[,\'\"]/', "", $cat);
|
||||
|
||||
if (DB_TYPE == "mysql") {
|
||||
$cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat);
|
||||
}
|
||||
|
||||
if (mb_strlen($cat) > 250)
|
||||
$cat = mb_substr($cat, 0, 250);
|
||||
|
||||
return $cat;
|
||||
}, $tmp);
|
||||
|
||||
asort($tmp);
|
||||
|
||||
return array_unique($tmp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,19 +97,19 @@ class FeedItem_RSS extends FeedItem_Common {
|
||||
|
||||
function get_categories() {
|
||||
$categories = $this->elem->getElementsByTagName("category");
|
||||
$cats = array();
|
||||
$cats = [];
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
array_push($cats, trim($cat->nodeValue));
|
||||
array_push($cats, $cat->nodeValue);
|
||||
}
|
||||
|
||||
$categories = $this->xpath->query("dc:subject", $this->elem);
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
array_push($cats, clean(trim($cat->nodeValue)));
|
||||
array_push($cats, $cat->nodeValue);
|
||||
}
|
||||
|
||||
return $cats;
|
||||
return $this->normalize_categories($cats);
|
||||
}
|
||||
|
||||
function get_enclosures() {
|
||||
|
||||
Reference in New Issue
Block a user