mirror of
https://git.tt-rss.org/git/tt-rss.git
synced 2025-12-13 08:15:55 +00:00
domdocument: remove old meta charset unicode hacks, replace with shorter xml preamble utf8 hack (on loadhtml where it makes sense)
af_readability: better (?) charset hack for non-unicode pages
This commit is contained in:
@@ -19,11 +19,7 @@ class Af_Fsckportal extends Plugin {
|
||||
|
||||
$doc = new DOMDocument();
|
||||
|
||||
$charset_hack = '<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
||||
</head>';
|
||||
|
||||
@$doc->loadHTML($charset_hack . $article["content"]);
|
||||
@$doc->loadHTML('<?xml encoding="UTF-8">' . $article["content"]);
|
||||
|
||||
if ($doc) {
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
@@ -172,14 +172,10 @@ class Af_Readability extends Plugin {
|
||||
if (!$tmpdoc->loadHTML($tmp))
|
||||
return false;
|
||||
|
||||
// this is the worst hack yet :(
|
||||
if (strtolower($tmpdoc->encoding) != 'utf-8') {
|
||||
$tmpxpath = new DOMXPath($tmpdoc);
|
||||
|
||||
foreach ($tmpxpath->query("//meta") as $elem) {
|
||||
$elem->parentNode->removeChild($elem);
|
||||
}
|
||||
|
||||
$tmp = $tmpdoc->saveHTML();
|
||||
$tmp = preg_replace("/<meta.*?charset.*?\/>/i", "", $tmp);
|
||||
$tmp = mb_convert_encoding($tmp, 'utf-8', $tmpdoc->encoding);
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -210,7 +206,6 @@ class Af_Readability extends Plugin {
|
||||
} catch (Exception $e) {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
@@ -25,12 +25,8 @@ class Af_Tumblr_1280 extends Plugin {
|
||||
if (!function_exists("curl_init") || ini_get("open_basedir"))
|
||||
return $article;
|
||||
|
||||
$charset_hack = '<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
||||
</head>';
|
||||
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadHTML($charset_hack . $article["content"]);
|
||||
$doc->loadHTML('<?xml encoding="UTF-8">' . $article["content"]);
|
||||
|
||||
$found = false;
|
||||
|
||||
@@ -92,4 +88,4 @@ class Af_Tumblr_1280 extends Plugin {
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -155,7 +155,7 @@ class Af_Zz_ImgProxy extends Plugin {
|
||||
$proxy_all = $this->host->get($this, "proxy_all");
|
||||
|
||||
$doc = new DOMDocument();
|
||||
if (@$doc->loadHTML($article["content"])) {
|
||||
if (@$doc->loadHTML('<?xml encoding="UTF-8">' . $article["content"])) {
|
||||
$xpath = new DOMXPath($doc);
|
||||
$imgs = $xpath->query("//img[@src]");
|
||||
|
||||
|
||||
@@ -190,12 +190,8 @@ class Cache_Starred_Images extends Plugin implements IHandler {
|
||||
return;
|
||||
}
|
||||
|
||||
$charset_hack = '<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
||||
</head>';
|
||||
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadHTML($charset_hack . $content);
|
||||
$doc->loadHTML('<?xml encoding="UTF-8">' . $content);
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
$entries = $xpath->query('(//img[@src])|(//video/source[@src])');
|
||||
|
||||
Reference in New Issue
Block a user