mirror of
https://git.tt-rss.org/git/tt-rss.git
synced 2026-02-10 16:01:33 +00:00
subscribe: verify XML before adding to the database; fetch: try to work around entity problems if initial parsing fails
This commit is contained in:
@@ -1558,6 +1558,7 @@
|
||||
* Here you should call extractfeedurls in rpc-backend
|
||||
* to get all possible feeds.
|
||||
* 5 - Couldn't download the URL content.
|
||||
* 6 - Content is an invalid XML.
|
||||
*/
|
||||
function subscribe_to_feed($url, $cat_id = 0,
|
||||
$auth_login = '', $auth_pass = '') {
|
||||
@@ -1588,6 +1589,18 @@
|
||||
$url = key($feedUrls);
|
||||
}
|
||||
|
||||
libxml_use_internal_errors(true);
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadXML(html_entity_decode($contents));
|
||||
$error = libxml_get_last_error();
|
||||
libxml_clear_errors();
|
||||
|
||||
if ($error) {
|
||||
$error_message = format_libxml_error($error);
|
||||
|
||||
return array("code" => 6, "message" => $error_message);
|
||||
}
|
||||
|
||||
if ($cat_id == "0" || !$cat_id) {
|
||||
$cat_qpart = "NULL";
|
||||
} else {
|
||||
@@ -4203,4 +4216,10 @@
|
||||
return LABEL_BASE_INDEX - 1 + abs($feed);
|
||||
}
|
||||
|
||||
function format_libxml_error($error) {
|
||||
return T_sprintf("LibXML error %s at line %d (column %d): %s",
|
||||
$error->code, $error->line, $error->column,
|
||||
$error->message);
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
@@ -316,6 +316,25 @@
|
||||
_debug("update_rss_feed: fetch done.");
|
||||
}
|
||||
|
||||
$error = verify_feed_xml($feed_data);
|
||||
|
||||
if ($error) {
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: error verifying XML, code: " . $error->code);
|
||||
}
|
||||
|
||||
if ($error->code == 26) {
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: got error 26, trying to decode entities...");
|
||||
}
|
||||
|
||||
$feed_data = html_entity_decode($feed_data, ENT_COMPAT, 'UTF-8');
|
||||
|
||||
$error = verify_feed_xml($feed_data);
|
||||
|
||||
if ($error) $feed_data = '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!$feed_data) {
|
||||
@@ -559,7 +578,7 @@
|
||||
_debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]");
|
||||
}
|
||||
|
||||
$entry_title = html_entity_decode($item->get_title());
|
||||
$entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8');
|
||||
|
||||
$entry_link = rewrite_relative_url($site_url, $item->get_link());
|
||||
|
||||
@@ -1421,5 +1440,13 @@
|
||||
mb_strtolower(strip_tags($title), 'utf-8'));
|
||||
}
|
||||
|
||||
function verify_feed_xml($feed_data) {
|
||||
libxml_use_internal_errors(true);
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadXML($feed_data);
|
||||
$error = libxml_get_last_error();
|
||||
libxml_clear_errors();
|
||||
return $error;
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
Reference in New Issue
Block a user