mirror of
https://git.tt-rss.org/git/tt-rss.git
synced 2025-12-15 21:55:56 +00:00
domdocument: remove old meta charset unicode hacks, replace with shorter xml preamble utf8 hack (on loadhtml where it makes sense)
af_readability: better (?) charset hack for non-unicode pages
This commit is contained in:
@@ -1200,12 +1200,8 @@ class RSSUtils {
|
||||
static function cache_media($html, $site_url) {
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$charset_hack = '<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
||||
</head>';
|
||||
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadHTML($charset_hack . $html);
|
||||
$doc->loadHTML('<?xml encoding="UTF-8">' . $html);
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
$entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
|
||||
|
||||
Reference in New Issue
Block a user