1
0
mirror of https://git.tt-rss.org/git/tt-rss.git synced 2025-12-24 15:11:45 +00:00

update HTMLPurifier; enable embedded flash video in articles

This commit is contained in:
Andrew Dolgov
2011-04-11 16:41:01 +04:00
parent ad92c6ac62
commit f4f0f80d21
341 changed files with 2014 additions and 643 deletions

View File

36
lib/htmlpurifier/library/HTMLPurifier/AttrDef.php Executable file → Normal file
View File

@@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string) {
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) break;
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') $i--;
continue;
}
if ($string[$i] === "\n") continue;
}
$ret .= $string[$i];
}
return $ret;
}
}
// vim: et sw=4 sts=4

0
lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php Executable file → Normal file
View File

View File

View File

View File

@@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$keywords = array();
$keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom
$keywords['c'] = false; // center
$keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array();
$i = 0;
@@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) {
$status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit;
$i++;
}
@@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
if (!$i) return false; // no valid values were caught
$ret = array();
// first keyword
if ($keywords['h']) $ret[] = $keywords['h'];
elseif (count($measures)) $ret[] = array_shift($measures);
elseif ($keywords['c']) {
$ret[] = $keywords['c'];
$keywords['c'] = false; // prevent re-use: center = center center
elseif ($keywords['ch']) {
$ret[] = $keywords['ch'];
$keywords['cv'] = false; // prevent re-use: center = center center
}
elseif (count($measures)) $ret[] = array_shift($measures);
if ($keywords['v']) $ret[] = $keywords['v'];
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
elseif (count($measures)) $ret[] = array_shift($measures);
elseif ($keywords['c']) $ret[] = $keywords['c'];
if (empty($ret)) return false;
return implode(' ', $ret);

View File

View File

@@ -9,7 +9,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
public function validate($color, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$color = trim($color);
if ($color === '') return false;

View File

View File

View File

View File

@@ -2,11 +2,43 @@
/**
* Validates a font family list according to CSS spec
* @todo whitelisting allowed fonts would be nice
*/
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
protected $mask = null;
public function __construct() {
$this->mask = '- ';
for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
// special bytes used by UTF-8
for ($i = 0x80; $i <= 0xFF; $i++) {
// We don't bother excluding invalid bytes in this range,
// because the our restriction of well-formed UTF-8 will
// prevent these from ever occurring.
$this->mask .= chr($i);
}
/*
PHP's internal strcspn implementation is
O(length of string * length of mask), making it inefficient
for large masks. However, it's still faster than
preg_match 8)
for (p = s1;;) {
spanp = s2;
do {
if (*spanp == c || p == s1_end) {
return p - s1;
}
} while (spanp++ < (s2_end - 1));
c = *++p;
}
*/
// possible optimization: invert the mask.
}
public function validate($string, $config, $context) {
static $generic_names = array(
'serif' => true,
@@ -15,6 +47,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
'fantasy' => true,
'cursive' => true
);
$allowed_fonts = $config->get('CSS.AllowedFonts');
// assume that no font names contain commas in them
$fonts = explode(',', $string);
@@ -24,7 +57,9 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
if ($font === '') continue;
// match a generic name
if (isset($generic_names[$font])) {
$final .= $font . ', ';
if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
$final .= $font . ', ';
}
continue;
}
// match a quoted name
@@ -34,50 +69,122 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
}
$font = $new_font;
}
$font = $this->expandCSSEscape($font);
// $font is a pure representation of the font name
if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
continue;
}
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
// complicated font, requires quoting
// bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// armor single quotes and new lines
$font = str_replace("\\", "\\\\", $font);
$font = str_replace("'", "\\'", $font);
// Here, there are various classes of characters which need
// to be treated differently:
// - Alphanumeric characters are essentially safe. We
// handled these above.
// - Spaces require quoting, though most parsers will do
// the right thing if there aren't any characters that
// can be misinterpreted
// - Dashes rarely occur, but they fairly unproblematic
// for parsing/rendering purposes.
// The above characters cover the majority of Western font
// names.
// - Arbitrary Unicode characters not in ASCII. Because
// most parsers give little thought to Unicode, treatment
// of these codepoints is basically uniform, even for
// punctuation-like codepoints. These characters can
// show up in non-Western pages and are supported by most
// major browsers, for example: " 明朝" is a
// legitimate font-name
// <http://ja.wikipedia.org/wiki/MS_明朝>. See
// the CSS3 spec for more examples:
// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
// You can see live samples of these on the Internet:
// <http://www.google.co.jp/search?q=font-family++明朝|ゴシック>
// However, most of these fonts have ASCII equivalents:
// for example, 'MS Mincho', and it's considered
// professional to use ASCII font names instead of
// Unicode font names. Thanks Takeshi Terada for
// providing this information.
// The following characters, to my knowledge, have not been
// used to name font names.
// - Single quote. While theoretically you might find a
// font name that has a single quote in its name (serving
// as an apostrophe, e.g. Dave's Scribble), I haven't
// been able to find any actual examples of this.
// Internet Explorer's cssText translation (which I
// believe is invoked by innerHTML) normalizes any
// quoting to single quotes, and fails to escape single
// quotes. (Note that this is not IE's behavior for all
// CSS properties, just some sort of special casing for
// font-family). So a single quote *cannot* be used
// safely in the font-family context if there will be an
// innerHTML/cssText translation. Note that Firefox 3.x
// does this too.
// - Double quote. In IE, these get normalized to
// single-quotes, no matter what the encoding. (Fun
// fact, in IE8, the 'content' CSS property gained
// support, where they special cased to preserve encoded
// double quotes, but still translate unadorned double
// quotes into single quotes.) So, because their
// fixpoint behavior is identical to single quotes, they
// cannot be allowed either. Firefox 3.x displays
// single-quote style behavior.
// - Backslashes are reduced by one (so \\ -> \) every
// iteration, so they cannot be used safely. This shows
// up in IE7, IE8 and FF3
// - Semicolons, commas and backticks are handled properly.
// - The rest of the ASCII punctuation is handled properly.
// We haven't checked what browsers do to unadorned
// versions, but this is not important as long as the
// browser doesn't /remove/ surrounding quotes (as IE does
// for HTML).
//
// With these results in hand, we conclude that there are
// various levels of safety:
// - Paranoid: alphanumeric, spaces and dashes(?)
// - International: Paranoid + non-ASCII Unicode
// - Edgy: Everything except quotes, backslashes
// - NoJS: Standards compliance, e.g. sod IE. Note that
// with some judicious character escaping (since certain
// types of escaping doesn't work) this is theoretically
// OK as long as innerHTML/cssText is not called.
// We believe that international is a reasonable default
// (that we will implement now), and once we do more
// extensive research, we may feel comfortable with dropping
// it down to edgy.
// Edgy: alphanumeric, spaces, dashes and Unicode. Use of
// str(c)spn assumes that the string was already well formed
// Unicode (which of course it is).
if (strspn($font, $this->mask) !== strlen($font)) {
continue;
}
// Historical:
// In the absence of innerHTML/cssText, these ugly
// transforms don't pose a security risk (as \\ and \"
// might--these escapes are not supported by most browsers).
// We could try to be clever and use single-quote wrapping
// when there is a double quote present, but I have choosen
// not to implement that. (NOTE: you can reduce the amount
// of escapes by one depending on what quoting style you use)
// $font = str_replace('\\', '\\5C ', $font);
// $font = str_replace('"', '\\22 ', $font);
// $font = str_replace("'", '\\27 ', $font);
// font possibly with spaces, requires quoting
$final .= "'$font', ";
}
$final = rtrim($final, ', ');

View File

View File

View File

View File

View File

View File

View File

@@ -34,20 +34,25 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
$uri = substr($uri, 1, $new_length - 1);
}
$keys = array( '(', ')', ',', ' ', '"', "'");
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
$uri = str_replace($values, $keys, $uri);
$uri = $this->expandCSSEscape($uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) return false;
// escape necessary characters according to CSS spec
// except for the comma, none of these should appear in the
// URI at all
$result = str_replace($keys, $values, $result);
// extra sanity check; should have been done by URI
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
return "url($result)";
// suspicious characters are ()'; we're going to percent encode
// them for safety.
$result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
// there's an extra bug where ampersands lose their escaping on
// an innerHTML cycle, so a very unlucky query parameter could
// then change the meaning of the URL. Unfortunately, there's
// not much we can do about that...
return "url(\"$result\")";
}

0
lib/htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php Executable file → Normal file
View File

View File

View File

@@ -0,0 +1,34 @@
<?php
/**
* Implements special behavior for class attribute (normally NMTOKENS)
*/
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
{
protected function split($string, $config, $context) {
// really, this twiddle should be lazy loaded
$name = $config->getDefinition('HTML')->doctype->name;
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
return parent::split($string, $config, $context);
} else {
return preg_split('/\s+/', $string);
}
}
protected function filter($tokens, $config, $context) {
$allowed = $config->get('Attr.AllowedClasses');
$forbidden = $config->get('Attr.ForbiddenClasses');
$ret = array();
foreach ($tokens as $token) {
if (
($allowed === null || isset($allowed[$token])) &&
!isset($forbidden[$token]) &&
// We need this O(n) check because of PHP's array
// implementation that casts -0 to 0.
!in_array($token, $ret, true)
) {
$ret[] = $token;
}
}
return $ret;
}
}

View File

@@ -9,7 +9,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
public function validate($string, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$string = trim($string);

View File

@@ -12,7 +12,7 @@ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
public function __construct() {}
public function validate($string, $config, $context) {
if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
return parent::validate($string, $config, $context);
}

View File

@@ -17,18 +17,18 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
public function validate($id, $config, $context) {
if (!$config->get('Attr', 'EnableID')) return false;
if (!$config->get('Attr.EnableID')) return false;
$id = trim($id); // trim it first
if ($id === '') return false;
$prefix = $config->get('Attr', 'IDPrefix');
$prefix = $config->get('Attr.IDPrefix');
if ($prefix !== '') {
$prefix .= $config->get('Attr', 'IDPrefixLocal');
$prefix .= $config->get('Attr.IDPrefixLocal');
// prevent re-appending the prefix
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
} elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
} elseif ($config->get('Attr.IDPrefixLocal') !== '') {
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
'%Attr.IDPrefix is set', E_USER_WARNING);
}
@@ -51,7 +51,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
$result = ($trim === '');
}
$regexp = $config->get('Attr', 'IDBlacklistRegexp');
$regexp = $config->get('Attr.IDBlacklistRegexp');
if ($regexp && preg_match($regexp, $id)) {
return false;
}

View File

View File

@@ -27,7 +27,7 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
public function validate($string, $config, $context) {
$allowed = $config->get('Attr', $this->name);
$allowed = $config->get('Attr.' . $this->name);
if (empty($allowed)) return false;
$string = $this->parseCDATA($string);

View File

View File

@@ -2,10 +2,6 @@
/**
* Validates contents based on NMTOKENS attribute type.
* @note The only current use for this is the class attribute in HTML
* @note Could have some functionality factored out into Nmtoken class
* @warning We cannot assume this class will be used only for 'class'
* attributes. Not sure how to hook in magic behavior, then.
*/
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{
@@ -17,6 +13,17 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
$tokens = $this->split($string, $config, $context);
$tokens = $this->filter($tokens, $config, $context);
if (empty($tokens)) return false;
return implode(' ', $tokens);
}
/**
* Splits a space separated list of tokens into its constituent parts.
*/
protected function split($string, $config, $context) {
// OPTIMIZABLE!
// do the preg_match, capture all subpatterns for reformulation
@@ -24,23 +31,20 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
// escaping because I don't know how to do that with regexps
// and plus it would complicate optimization efforts (you never
// see that anyway).
$matches = array();
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
'(?:(?=\s)|\z)/'; // look ahead for space or string end
preg_match_all($pattern, $string, $matches);
return $matches[1];
}
if (empty($matches[1])) return false;
// reconstruct string
$new_string = '';
foreach ($matches[1] as $token) {
$new_string .= $token . ' ';
}
$new_string = rtrim($new_string);
return $new_string;
/**
* Template method for removing certain tokens based on arbitrary criteria.
* @note If we wanted to be really functional, we'd do an array_filter
* with a callback. But... we're not.
*/
protected function filter($tokens, $config, $context) {
return $tokens;
}
}

View File

View File

0
lib/htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php Executable file → Normal file
View File

View File

0
lib/htmlpurifier/library/HTMLPurifier/AttrDef/Text.php Executable file → Normal file
View File

2
lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php Executable file → Normal file
View File

@@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
public function validate($uri, $config, $context) {
if ($config->get('URI', 'Disable')) return false;
if ($config->get('URI.Disable')) return false;
$uri = $this->parseCDATA($uri);

View File

View File

@@ -23,6 +23,12 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
public function validate($string, $config, $context) {
$length = strlen($string);
// empty hostname is OK; it's usually semantically equivalent:
// the default host as defined by a URI scheme is used:
//
// If the URI scheme defines a default for host, then that
// default applies when the host subcomponent is undefined
// or when the registered name is empty (zero length).
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6

View File

View File

View File

View File

View File

@@ -10,7 +10,7 @@ class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
public function transform($attr, $config, $context) {
if (isset($attr['dir'])) return $attr;
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
$attr['dir'] = $config->get('Attr.DefaultTextDir');
return $attr;
}

View File

View File

View File

View File

View File

@@ -15,21 +15,22 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
$src = true;
if (!isset($attr['src'])) {
if ($config->get('Core', 'RemoveInvalidImg')) return $attr;
$attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
if ($config->get('Core.RemoveInvalidImg')) return $attr;
$attr['src'] = $config->get('Attr.DefaultInvalidImage');
$src = false;
}
if (!isset($attr['alt'])) {
if ($src) {
$alt = $config->get('Attr', 'DefaultImageAlt');
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
$attr['alt'] = basename($attr['src']);
// truncate if the alt is too long
$attr['alt'] = substr(basename($attr['src']),0,40);
} else {
$attr['alt'] = $alt;
}
} else {
$attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
$attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
}
}

View File

View File

View File

View File

View File

@@ -7,6 +7,8 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
// Abort early if we're using relaxed definition of name
if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr;
if (!isset($attr['name'])) return $attr;
$id = $this->confiscateAttr($attr, 'name');
if ( isset($attr['id'])) return $attr;

View File

@@ -0,0 +1,27 @@
<?php
/**
* Post-transform that performs validation to the name attribute; if
* it is present with an equivalent id attribute, it is passed through;
* otherwise validation is performed.
*/
class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
{
public function __construct() {
$this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
}
public function transform($attr, $config, $context) {
if (!isset($attr['name'])) return $attr;
$name = $attr['name'];
if (isset($attr['id']) && $attr['id'] === $name) return $attr;
$result = $this->idDef->validate($name, $config, $context);
if ($result === false) unset($attr['name']);
else $attr['name'] = $result;
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@@ -0,0 +1,41 @@
<?php
// must be called POST validation
/**
* Adds rel="nofollow" to all outbound links. This transform is
* only attached if Attr.Nofollow is TRUE.
*/
class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
{
private $parser;
public function __construct() {
$this->parser = new HTMLPurifier_URIParser();
}
public function transform($attr, $config, $context) {
if (!isset($attr['href'])) {
return $attr;
}
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
if (!is_null($url->host) && $scheme !== false && $scheme->browsable) {
if (isset($attr['rel'])) {
$attr['rel'] .= ' nofollow';
} else {
$attr['rel'] = 'nofollow';
}
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

View File

View File

@@ -19,6 +19,7 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
public function __construct() {
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
$this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
}
public function transform($attr, $config, $context) {
@@ -33,12 +34,25 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
case 'allowNetworking':
$attr['value'] = 'internal';
break;
case 'allowFullScreen':
if ($config->get('HTML.FlashAllowFullScreen')) {
$attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
} else {
$attr['value'] = 'false';
}
break;
case 'wmode':
$attr['value'] = 'window';
$attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
break;
case 'movie':
case 'src':
$attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
case 'flashvars':
// we're going to allow arbitrary inputs to the SWF, on
// the reasoning that it could only hack the SWF, not us.
break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;

View File

View File

3
lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php Executable file → Normal file
View File

@@ -36,6 +36,9 @@ class HTMLPurifier_AttrTypes
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
// "proprietary" types
$this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
// number is really a positive integer (one or more digits)
// FIXME: ^^ not always, see start and value of list items
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);

View File

10
lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php Executable file → Normal file
View File

@@ -37,7 +37,12 @@ class HTMLPurifier_Bootstrap
public static function autoload($class) {
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) return false;
require HTMLPURIFIER_PREFIX . '/' . $file;
// Technically speaking, it should be ok and more efficient to
// just do 'require', but Antonio Parraga reports that with
// Zend extensions such as Zend debugger and APC, this invariant
// may be broken. Since we have efficient alternatives, pay
// the cost here and avoid the bug.
require_once HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
@@ -65,10 +70,11 @@ class HTMLPurifier_Bootstrap
if ( ($funcs = spl_autoload_functions()) === false ) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
$buggy = version_compare(PHP_VERSION, '5.2.11', '<');
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
if (is_array($func)) {
if ($buggy && is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);

48
lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php Executable file → Normal file
View File

@@ -154,7 +154,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
$max = $config->get('CSS', 'MaxImgLength');
$max = $config->get('CSS.MaxImgLength');
$this->info['width'] =
$this->info['height'] =
@@ -211,15 +211,19 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// partial support
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
if ($config->get('CSS', 'Proprietary')) {
if ($config->get('CSS.Proprietary')) {
$this->doSetupProprietary($config);
}
if ($config->get('CSS', 'AllowTricky')) {
if ($config->get('CSS.AllowTricky')) {
$this->doSetupTricky($config);
}
$allow_important = $config->get('CSS', 'AllowImportant');
if ($config->get('CSS.Trusted')) {
$this->doSetupTrusted($config);
}
$allow_important = $config->get('CSS.AllowImportant');
// wrap all attr-defs with decorator that handles !important
foreach ($this->info as $k => $v) {
$this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
@@ -260,6 +264,23 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
}
protected function doSetupTrusted($config) {
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
'static', 'relative', 'absolute', 'fixed'
));
$this->info['top'] =
$this->info['left'] =
$this->info['right'] =
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
));
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Integer(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
));
}
/**
* Performs extra config-based processing. Based off of
@@ -272,20 +293,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// setup allowed elements
$support = "(for information on implementing this, see the ".
"support forums) ";
$allowed_attributes = $config->get('CSS', 'AllowedProperties');
if ($allowed_attributes !== null) {
$allowed_properties = $config->get('CSS.AllowedProperties');
if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
unset($allowed_attributes[$name]);
if(!isset($allowed_properties[$name])) unset($this->info[$name]);
unset($allowed_properties[$name]);
}
// emit errors
foreach ($allowed_attributes as $name => $d) {
foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
if ($forbidden_properties !== null) {
foreach ($this->info as $name => $d) {
if (isset($forbidden_properties[$name])) {
unset($this->info[$name]);
}
}
}
}
}

0
lib/htmlpurifier/library/HTMLPurifier/ChildDef.php Executable file → Normal file
View File

View File

View File

View File

View File

View File

@@ -59,7 +59,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$all_whitespace = true;
// some configuration
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
$escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
// generator
$gen = new HTMLPurifier_Generator($config, $context);

View File

View File

384
lib/htmlpurifier/library/HTMLPurifier/Config.php Executable file → Normal file
View File

@@ -20,7 +20,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
public $version = '3.3.0';
public $version = '4.3.0';
/**
* Bool indicator whether or not to automatically finalize
@@ -68,12 +68,31 @@ class HTMLPurifier_Config
*/
protected $plist;
/**
* Whether or not a set is taking place due to an
* alias lookup.
*/
private $aliasMode;
/**
* Set to false if you do not want line and file numbers in errors
* (useful when unit testing). This will also compress some errors
* and exceptions.
*/
public $chatty = true;
/**
* Current lock; only gets to this namespace are allowed.
*/
private $lock;
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed.
*/
public function __construct($definition) {
$this->plist = new HTMLPurifier_PropertyList($definition->defaultPlist);
public function __construct($definition, $parent = null) {
$parent = $parent ? $parent : $definition->defaultPlist;
$this->plist = new HTMLPurifier_PropertyList($parent);
$this->def = $definition; // keep a copy around for checking
$this->parser = new HTMLPurifier_VarParser_Flexible();
}
@@ -102,6 +121,16 @@ class HTMLPurifier_Config
return $ret;
}
/**
* Creates a new config object that inherits from a previous one.
* @param HTMLPurifier_Config $config Configuration object to inherit
* from.
* @return HTMLPurifier_Config object with $config as its parent.
*/
public static function inherit(HTMLPurifier_Config $config) {
return new HTMLPurifier_Config($config->def, $config->plist);
}
/**
* Convenience constructor that creates a default configuration object.
* @return Default HTMLPurifier_Config object.
@@ -114,24 +143,34 @@ class HTMLPurifier_Config
/**
* Retreives a value from the configuration.
* @param $namespace String namespace
* @param $key String key
*/
public function get($namespace, $key) {
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
if (!isset($this->def->info[$namespace][$key])) {
public function get($key, $a = null) {
if ($a !== null) {
$this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
$key = "$key.$a";
}
if (!$this->finalized) $this->autoFinalize();
if (!isset($this->def->info[$key])) {
// can't add % due to SimpleTest bug
trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"),
$this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
E_USER_WARNING);
return;
}
if (isset($this->def->info[$namespace][$key]->isAlias)) {
$d = $this->def->info[$namespace][$key];
trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
if (isset($this->def->info[$key]->isAlias)) {
$d = $this->def->info[$key];
$this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
E_USER_ERROR);
return;
}
return $this->plist->get("$namespace.$key");
if ($this->lock) {
list($ns) = explode('.', $key);
if ($ns !== $this->lock) {
$this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
return;
}
}
return $this->plist->get($key);
}
/**
@@ -139,13 +178,13 @@ class HTMLPurifier_Config
* @param $namespace String namespace
*/
public function getBatch($namespace) {
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
if (!isset($this->def->info[$namespace])) {
trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
if (!$this->finalized) $this->autoFinalize();
$full = $this->getAll();
if (!isset($full[$namespace])) {
$this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
E_USER_WARNING);
return;
}
$full = $this->getAll();
return $full[$namespace];
}
@@ -178,9 +217,10 @@ class HTMLPurifier_Config
/**
* Retrieves all directives, organized by namespace
* @warning This is a pretty inefficient function, avoid if you can
*/
public function getAll() {
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
if (!$this->finalized) $this->autoFinalize();
$ret = array();
foreach ($this->plist->squash() as $name => $value) {
list($ns, $key) = explode('.', $name, 2);
@@ -191,29 +231,37 @@ class HTMLPurifier_Config
/**
* Sets a value to configuration.
* @param $namespace String namespace
* @param $key String key
* @param $value Mixed value
*/
public function set($namespace, $key, $value, $from_alias = false) {
public function set($key, $value, $a = null) {
if (strpos($key, '.') === false) {
$namespace = $key;
$directive = $value;
$value = $a;
$key = "$key.$directive";
$this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
} else {
list($namespace) = explode('.', $key);
}
if ($this->isFinalized('Cannot set directive after finalization')) return;
if (!isset($this->def->info[$namespace][$key])) {
trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
if (!isset($this->def->info[$key])) {
$this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
E_USER_WARNING);
return;
}
$def = $this->def->info[$namespace][$key];
$def = $this->def->info[$key];
if (isset($def->isAlias)) {
if ($from_alias) {
trigger_error('Double-aliases not allowed, please fix '.
'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR);
if ($this->aliasMode) {
$this->triggerError('Double-aliases not allowed, please fix '.
'ConfigSchema bug with' . $key, E_USER_ERROR);
return;
}
$this->set($new_ns = $def->namespace,
$new_dir = $def->name,
$value, true);
trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE);
$this->aliasMode = true;
$this->set($def->key, $value);
$this->aliasMode = false;
$this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
return;
}
@@ -231,7 +279,7 @@ class HTMLPurifier_Config
try {
$value = $this->parser->parse($value, $type, $allow_null);
} catch (HTMLPurifier_VarParserException $e) {
trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
$this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
return;
}
if (is_string($value) && is_object($def)) {
@@ -241,17 +289,17 @@ class HTMLPurifier_Config
}
// check to see if the value is allowed
if (isset($def->allowed) && !isset($def->allowed[$value])) {
trigger_error('Value not supported, valid values are: ' .
$this->triggerError('Value not supported, valid values are: ' .
$this->_listify($def->allowed), E_USER_WARNING);
return;
}
}
$this->plist->set("$namespace.$key", $value);
$this->plist->set($key, $value);
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
if ($namespace == 'HTML' || $namespace == 'CSS') {
if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
$this->definitions[$namespace] = null;
}
@@ -271,74 +319,203 @@ class HTMLPurifier_Config
* Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawHTMLDefinition, which is more explicitly
* named, instead.
*/
public function getHTMLDefinition($raw = false) {
return $this->getDefinition('HTML', $raw);
public function getHTMLDefinition($raw = false, $optimized = false) {
return $this->getDefinition('HTML', $raw, $optimized);
}
/**
* Retrieves object reference to the CSS definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawCSSDefinition, which is more explicitly
* named, instead.
*/
public function getCSSDefinition($raw = false) {
return $this->getDefinition('CSS', $raw);
public function getCSSDefinition($raw = false, $optimized = false) {
return $this->getDefinition('CSS', $raw, $optimized);
}
/**
* Retrieves object reference to the URI definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawURIDefinition, which is more explicitly
* named, instead.
*/
public function getURIDefinition($raw = false, $optimized = false) {
return $this->getDefinition('URI', $raw, $optimized);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
* @param $optimized Only has an effect when $raw is true. Whether
* or not to return null if the result is already present in
* the cache. This is off by default for backwards
* compatibility reasons, but you need to do things this
* way in order to ensure that caching is done properly.
* Check out enduser-customize.html for more details.
* We probably won't ever change this default, as much as the
* maybe semantics is the "right thing to do."
*/
public function getDefinition($type, $raw = false) {
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
public function getDefinition($type, $raw = false, $optimized = false) {
if ($optimized && !$raw) {
throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
}
if (!$this->finalized) $this->autoFinalize();
// temporarily suspend locks, so we can handle recursive definition calls
$lock = $this->lock;
$this->lock = null;
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
$this->lock = $lock;
if (!$raw) {
// see if we can quickly supply a definition
// full definition
// ---------------
// check if definition is in memory
if (!empty($this->definitions[$type])) {
if (!$this->definitions[$type]->setup) {
$this->definitions[$type]->setup($this);
$cache->set($this->definitions[$type], $this);
$def = $this->definitions[$type];
// check if the definition is setup
if ($def->setup) {
return $def;
} else {
$def->setup($this);
if ($def->optimized) $cache->add($def, $this);
return $def;
}
return $this->definitions[$type];
}
// memory check missed, try cache
$this->definitions[$type] = $cache->get($this);
if ($this->definitions[$type]) {
// definition in cache, return it
return $this->definitions[$type];
// check if definition is in cache
$def = $cache->get($this);
if ($def) {
// definition in cache, save to memory and return it
$this->definitions[$type] = $def;
return $def;
}
} elseif (
!empty($this->definitions[$type]) &&
!$this->definitions[$type]->setup
) {
// raw requested, raw in memory, quick return
return $this->definitions[$type];
// initialize it
$def = $this->initDefinition($type);
// set it up
$this->lock = $type;
$def->setup($this);
$this->lock = null;
// save in cache
$cache->add($def, $this);
// return it
return $def;
} else {
// raw definition
// --------------
// check preconditions
$def = null;
if ($optimized) {
if (is_null($this->get($type . '.DefinitionID'))) {
// fatally error out if definition ID not set
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
}
if (!empty($this->definitions[$type])) {
$def = $this->definitions[$type];
if ($def->setup && !$optimized) {
$extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
}
if ($def->optimized === null) {
$extra = $this->chatty ? " (try flushing your cache)" : "";
throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
}
if ($def->optimized !== $optimized) {
$msg = $optimized ? "optimized" : "unoptimized";
$extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
}
}
// check if definition was in memory
if ($def) {
if ($def->setup) {
// invariant: $optimized === true (checked above)
return null;
} else {
return $def;
}
}
// if optimized, check if definition was in cache
// (because we do the memory check first, this formulation
// is prone to cache slamming, but I think
// guaranteeing that either /all/ of the raw
// setup code or /none/ of it is run is more important.)
if ($optimized) {
// This code path only gets run once; once we put
// something in $definitions (which is guaranteed by the
// trailing code), we always short-circuit above.
$def = $cache->get($this);
if ($def) {
// save the full definition for later, but don't
// return it yet
$this->definitions[$type] = $def;
return null;
}
}
// check invariants for creation
if (!$optimized) {
if (!is_null($this->get($type . '.DefinitionID'))) {
if ($this->chatty) {
$this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
} else {
$this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
}
}
}
// initialize it
$def = $this->initDefinition($type);
$def->optimized = $optimized;
return $def;
}
throw new HTMLPurifier_Exception("The impossible happened!");
}
private function initDefinition($type) {
// quick checks failed, let's create the object
if ($type == 'HTML') {
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
$def = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
$def = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
$def = new HTMLPurifier_URIDefinition();
} else {
throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
// quick abort if raw
if ($raw) {
if (is_null($this->get($type, 'DefinitionID'))) {
// fatally error out if definition ID not set
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
return $this->definitions[$type];
}
// set it up
$this->definitions[$type]->setup($this);
// save in cache
$cache->set($this->definitions[$type], $this);
return $this->definitions[$type];
$this->definitions[$type] = $def;
return $def;
}
public function maybeGetRawDefinition($name) {
return $this->getDefinition($name, true, true);
}
public function maybeGetRawHTMLDefinition() {
return $this->getDefinition('HTML', true, true);
}
public function maybeGetRawCSSDefinition() {
return $this->getDefinition('CSS', true, true);
}
public function maybeGetRawURIDefinition() {
return $this->getDefinition('URI', true, true);
}
/**
@@ -351,14 +528,12 @@ class HTMLPurifier_Config
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
// condensed form
list($namespace, $directive) = explode('.', $key);
$this->set($namespace, $directive, $value);
$this->set($key, $value);
} else {
$namespace = $key;
$namespace_values = $value;
foreach ($namespace_values as $directive => $value) {
$this->set($namespace, $directive, $value);
$this->set($namespace .'.'. $directive, $value);
}
}
}
@@ -394,16 +569,15 @@ class HTMLPurifier_Config
}
}
$ret = array();
foreach ($schema->info as $ns => $keypairs) {
foreach ($keypairs as $directive => $def) {
if ($allowed !== true) {
if (isset($blacklisted_directives["$ns.$directive"])) continue;
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
}
if (isset($def->isAlias)) continue;
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
$ret[] = array($ns, $directive);
foreach ($schema->info as $key => $def) {
list($ns, $directive) = explode('.', $key, 2);
if ($allowed !== true) {
if (isset($blacklisted_directives["$ns.$directive"])) continue;
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
}
if (isset($def->isAlias)) continue;
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
$ret[] = array($ns, $directive);
}
return $ret;
}
@@ -472,7 +646,7 @@ class HTMLPurifier_Config
*/
public function isFinalized($error = false) {
if ($this->finalized && $error) {
trigger_error($error, E_USER_ERROR);
$this->triggerError($error, E_USER_ERROR);
}
return $this->finalized;
}
@@ -482,7 +656,11 @@ class HTMLPurifier_Config
* already finalized
*/
public function autoFinalize() {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
if ($this->autoFinalize) {
$this->finalize();
} else {
$this->plist->squash(true);
}
}
/**
@@ -490,6 +668,40 @@ class HTMLPurifier_Config
*/
public function finalize() {
$this->finalized = true;
unset($this->parser);
}
/**
* Produces a nicely formatted error message by supplying the
* stack frame information OUTSIDE of HTMLPurifier_Config.
*/
protected function triggerError($msg, $no) {
// determine previous stack frame
$extra = '';
if ($this->chatty) {
$trace = debug_backtrace();
// zip(tail(trace), trace) -- but PHP is not Haskell har har
for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
continue;
}
$frame = $trace[$i];
$extra = " invoked on line {$frame['line']} in file {$frame['file']}";
break;
}
}
trigger_error($msg . $extra, $no);
}
/**
* Returns a serialized form of the configuration object that can
* be reconstituted.
*/
public function serialize() {
$this->getDefinition('HTML');
$this->getDefinition('CSS');
$this->getDefinition('URI');
return serialize($this);
}
}

117
lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php Executable file → Normal file
View File

@@ -60,7 +60,13 @@ class HTMLPurifier_ConfigSchema {
* Unserializes the default ConfigSchema.
*/
public static function makeFromSerial() {
return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
$contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
$r = unserialize($contents);
if (!$r) {
$hash = sha1($contents);
trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
}
return $r;
}
/**
@@ -87,24 +93,13 @@ class HTMLPurifier_ConfigSchema {
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $allow_null Whether or not to allow null values
*/
public function add($namespace, $name, $default, $type, $allow_null) {
public function add($key, $default, $type, $allow_null) {
$obj = new stdclass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) $obj->allow_null = true;
$this->info[$namespace][$name] = $obj;
$this->defaults[$namespace][$name] = $default;
$this->defaultPlist->set("$namespace.$name", $default);
}
/**
* Defines a namespace for directives to be put into.
* @warning This is slightly different from the corresponding static
* method.
* @param $namespace Namespace's name
*/
public function addNamespace($namespace) {
$this->info[$namespace] = array();
$this->defaults[$namespace] = array();
$this->info[$key] = $obj;
$this->defaults[$key] = $default;
$this->defaultPlist->set($key, $default);
}
/**
@@ -116,12 +111,12 @@ class HTMLPurifier_ConfigSchema {
* @param $name Name of Directive
* @param $aliases Hash of aliased values to the real alias
*/
public function addValueAliases($namespace, $name, $aliases) {
if (!isset($this->info[$namespace][$name]->aliases)) {
$this->info[$namespace][$name]->aliases = array();
public function addValueAliases($key, $aliases) {
if (!isset($this->info[$key]->aliases)) {
$this->info[$key]->aliases = array();
}
foreach ($aliases as $alias => $real) {
$this->info[$namespace][$name]->aliases[$alias] = $real;
$this->info[$key]->aliases[$alias] = $real;
}
}
@@ -133,8 +128,8 @@ class HTMLPurifier_ConfigSchema {
* @param $name Name of directive
* @param $allowed Lookup array of allowed values
*/
public function addAllowedValues($namespace, $name, $allowed) {
$this->info[$namespace][$name]->allowed = $allowed;
public function addAllowedValues($key, $allowed) {
$this->info[$key]->allowed = $allowed;
}
/**
@@ -144,88 +139,26 @@ class HTMLPurifier_ConfigSchema {
* @param $new_namespace
* @param $new_name Directive that the alias will be to
*/
public function addAlias($namespace, $name, $new_namespace, $new_name) {
public function addAlias($key, $new_key) {
$obj = new stdclass;
$obj->namespace = $new_namespace;
$obj->name = $new_name;
$obj->key = $new_key;
$obj->isAlias = true;
$this->info[$namespace][$name] = $obj;
$this->info[$key] = $obj;
}
/**
* Replaces any stdclass that only has the type property with type integer.
*/
public function postProcess() {
foreach ($this->info as $namespace => $info) {
foreach ($info as $directive => $v) {
if (count((array) $v) == 1) {
$this->info[$namespace][$directive] = $v->type;
} elseif (count((array) $v) == 2 && isset($v->allow_null)) {
$this->info[$namespace][$directive] = -$v->type;
}
foreach ($this->info as $key => $v) {
if (count((array) $v) == 1) {
$this->info[$key] = $v->type;
} elseif (count((array) $v) == 2 && isset($v->allow_null)) {
$this->info[$key] = -$v->type;
}
}
}
// DEPRECATED METHODS
/** @see HTMLPurifier_ConfigSchema->set() */
public static function define($namespace, $name, $default, $type, $description) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$type_values = explode('/', $type, 2);
$type = $type_values[0];
$modifier = isset($type_values[1]) ? $type_values[1] : false;
$allow_null = ($modifier === 'null');
$def = HTMLPurifier_ConfigSchema::instance();
$def->add($namespace, $name, $default, $type, $allow_null);
}
/** @see HTMLPurifier_ConfigSchema->addNamespace() */
public static function defineNamespace($namespace, $description) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$def = HTMLPurifier_ConfigSchema::instance();
$def->addNamespace($namespace);
}
/** @see HTMLPurifier_ConfigSchema->addValueAliases() */
public static function defineValueAliases($namespace, $name, $aliases) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$def = HTMLPurifier_ConfigSchema::instance();
$def->addValueAliases($namespace, $name, $aliases);
}
/** @see HTMLPurifier_ConfigSchema->addAllowedValues() */
public static function defineAllowedValues($namespace, $name, $allowed_values) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$allowed = array();
foreach ($allowed_values as $value) {
$allowed[$value] = true;
}
$def = HTMLPurifier_ConfigSchema::instance();
$def->addAllowedValues($namespace, $name, $allowed);
}
/** @see HTMLPurifier_ConfigSchema->addAlias() */
public static function defineAlias($namespace, $name, $new_namespace, $new_name) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$def = HTMLPurifier_ConfigSchema::instance();
$def->addAlias($namespace, $name, $new_namespace, $new_name);
}
/** @deprecated, use HTMLPurifier_VarParser->parse() */
public function validate($a, $b, $c = false) {
trigger_error("HTMLPurifier_ConfigSchema->validate deprecated, use HTMLPurifier_VarParser->parse instead", E_USER_NOTICE);
$parser = new HTMLPurifier_VarParser();
return $parser->parse($a, $b, $c);
}
/**
* Throws an E_USER_NOTICE stating that a method is deprecated.
*/
private static function deprecated($method) {
trigger_error("Static HTMLPurifier_ConfigSchema::$method deprecated, use add*() method instead", E_USER_NOTICE);
}
}
// vim: et sw=4 sts=4

View File

@@ -9,36 +9,28 @@ class HTMLPurifier_ConfigSchema_Builder_ConfigSchema
public function build($interchange) {
$schema = new HTMLPurifier_ConfigSchema();
foreach ($interchange->namespaces as $n) {
$schema->addNamespace($n->namespace);
}
foreach ($interchange->directives as $d) {
$schema->add(
$d->id->namespace,
$d->id->directive,
$d->id->key,
$d->default,
$d->type,
$d->typeAllowsNull
);
if ($d->allowed !== null) {
$schema->addAllowedValues(
$d->id->namespace,
$d->id->directive,
$d->id->key,
$d->allowed
);
}
foreach ($d->aliases as $alias) {
$schema->addAlias(
$alias->namespace,
$alias->directive,
$d->id->namespace,
$d->id->directive
$alias->key,
$d->id->key
);
}
if ($d->valueAliases !== null) {
$schema->addValueAliases(
$d->id->namespace,
$d->id->directive,
$d->id->key,
$d->valueAliases
);
}

View File

@@ -8,6 +8,7 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
{
protected $interchange;
private $namespace;
protected function writeHTMLDiv($html) {
$this->startElement('div');
@@ -34,36 +35,33 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
$this->startElement('configdoc');
$this->writeElement('title', $interchange->name);
foreach ($interchange->namespaces as $namespace) {
$this->buildNamespace($namespace);
foreach ($interchange->directives as $directive) {
$this->buildDirective($directive);
}
if ($this->namespace) $this->endElement(); // namespace
$this->endElement(); // configdoc
$this->flush();
}
public function buildNamespace($namespace) {
$this->startElement('namespace');
$this->writeAttribute('id', $namespace->namespace);
public function buildDirective($directive) {
$this->writeElement('name', $namespace->namespace);
$this->startElement('description');
$this->writeHTMLDiv($namespace->description);
$this->endElement(); // description
foreach ($this->interchange->directives as $directive) {
if ($directive->id->namespace !== $namespace->namespace) continue;
$this->buildDirective($directive);
// Kludge, although I suppose having a notion of a "root namespace"
// certainly makes things look nicer when documentation is built.
// Depends on things being sorted.
if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) {
if ($this->namespace) $this->endElement(); // namespace
$this->namespace = $directive->id->getRootNamespace();
$this->startElement('namespace');
$this->writeAttribute('id', $this->namespace);
$this->writeElement('name', $this->namespace);
}
$this->endElement(); // namespace
}
public function buildDirective($directive) {
$this->startElement('directive');
$this->writeAttribute('id', $directive->id->toString());
$this->writeElement('name', $directive->id->directive);
$this->writeElement('name', $directive->id->getDirective());
$this->startElement('aliases');
foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString());

View File

View File

@@ -13,26 +13,11 @@ class HTMLPurifier_ConfigSchema_Interchange
*/
public $name;
/**
* Array of Namespace ID => array(namespace info)
*/
public $namespaces = array();
/**
* Array of Directive ID => array(directive info)
*/
public $directives = array();
/**
* Adds a namespace array to $namespaces
*/
public function addNamespace($namespace) {
if (isset($this->namespaces[$i = $namespace->namespace])) {
throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine namespace '$i'");
}
$this->namespaces[$i] = $namespace;
}
/**
* Adds a directive array to $directives
*/

View File

@@ -6,11 +6,10 @@
class HTMLPurifier_ConfigSchema_Interchange_Id
{
public $namespace, $directive;
public $key;
public function __construct($namespace, $directive) {
$this->namespace = $namespace;
$this->directive = $directive;
public function __construct($key) {
$this->key = $key;
}
/**
@@ -18,12 +17,19 @@ class HTMLPurifier_ConfigSchema_Interchange_Id
* cause problems for PHP 5.0 support.
*/
public function toString() {
return $this->namespace . '.' . $this->directive;
return $this->key;
}
public function getRootNamespace() {
return substr($this->key, 0, strpos($this->key, "."));
}
public function getDirective() {
return substr($this->key, strpos($this->key, ".") + 1);
}
public static function make($id) {
list($namespace, $directive) = explode('.', $id);
return new HTMLPurifier_ConfigSchema_Interchange_Id($namespace, $directive);
return new HTMLPurifier_ConfigSchema_Interchange_Id($id);
}
}

View File

@@ -13,13 +13,17 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder
}
public static function buildFromDirectory($dir = null) {
$parser = new HTMLPurifier_StringHashParser();
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
return $builder->buildDir($interchange, $dir);
}
if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema/';
$info = parse_ini_file($dir . 'info.ini');
$interchange->name = $info['name'];
public function buildDir($interchange, $dir = null) {
if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema';
if (file_exists($dir . '/info.ini')) {
$info = parse_ini_file($dir . '/info.ini');
$interchange->name = $info['name'];
}
$files = array();
$dh = opendir($dir);
@@ -33,15 +37,20 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder
sort($files);
foreach ($files as $file) {
$builder->build(
$interchange,
new HTMLPurifier_StringHash( $parser->parseFile($dir . $file) )
);
$this->buildFile($interchange, $dir . '/' . $file);
}
return $interchange;
}
public function buildFile($interchange, $file) {
$parser = new HTMLPurifier_StringHashParser();
$this->build(
$interchange,
new HTMLPurifier_StringHash( $parser->parseFile($file) )
);
}
/**
* Builds an interchange object based on a hash.
* @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build
@@ -55,22 +64,17 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder
throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID');
}
if (strpos($hash['ID'], '.') === false) {
$this->buildNamespace($interchange, $hash);
if (count($hash) == 2 && isset($hash['DESCRIPTION'])) {
$hash->offsetGet('DESCRIPTION'); // prevent complaining
} else {
throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace');
}
} else {
$this->buildDirective($interchange, $hash);
}
$this->_findUnused($hash);
}
public function buildNamespace($interchange, $hash) {
$namespace = new HTMLPurifier_ConfigSchema_Interchange_Namespace();
$namespace->namespace = $hash->offsetGet('ID');
if (isset($hash['DESCRIPTION'])) {
$namespace->description = $hash->offsetGet('DESCRIPTION');
}
$interchange->addNamespace($namespace);
}
public function buildDirective($interchange, $hash) {
$directive = new HTMLPurifier_ConfigSchema_Interchange_Directive();

View File

@@ -39,10 +39,6 @@ class HTMLPurifier_ConfigSchema_Validator
$this->aliases = array();
// PHP is a bit lax with integer <=> string conversions in
// arrays, so we don't use the identical !== comparison
foreach ($interchange->namespaces as $i => $namespace) {
if ($i != $namespace->namespace) $this->error(false, "Integrity violation: key '$i' does not match internal id '{$namespace->namespace}'");
$this->validateNamespace($namespace);
}
foreach ($interchange->directives as $i => $directive) {
$id = $directive->id->toString();
if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'");
@@ -51,20 +47,6 @@ class HTMLPurifier_ConfigSchema_Validator
return true;
}
/**
* Validates a HTMLPurifier_ConfigSchema_Interchange_Namespace object.
*/
public function validateNamespace($n) {
$this->context[] = "namespace '{$n->namespace}'";
$this->with($n, 'namespace')
->assertNotEmpty()
->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
$this->with($n, 'description')
->assertNotEmpty()
->assertIsString(); // handled by InterchangeBuilder
array_pop($this->context);
}
/**
* Validates a HTMLPurifier_ConfigSchema_Interchange_Id object.
*/
@@ -75,12 +57,11 @@ class HTMLPurifier_ConfigSchema_Validator
// handled by InterchangeBuilder
$this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id');
}
if (!isset($this->interchange->namespaces[$id->namespace])) {
$this->error('namespace', 'does not exist'); // assumes that the namespace was validated already
}
$this->with($id, 'directive')
// keys are now unconstrained (we might want to narrow down to A-Za-z0-9.)
// we probably should check that it has at least one namespace
$this->with($id, 'key')
->assertNotEmpty()
->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
->assertIsString(); // implicit assertIsString handled by InterchangeBuilder
array_pop($this->context);
}

View File

Binary file not shown.

View File

@@ -0,0 +1,8 @@
Attr.AllowedClasses
TYPE: lookup/null
VERSION: 4.0.0
DEFAULT: null
--DESCRIPTION--
List of allowed class values in the class attribute. By default, this is null,
which means all classes are allowed.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,19 @@
Attr.ClassUseCDATA
TYPE: bool/null
DEFAULT: null
VERSION: 4.0.0
--DESCRIPTION--
If null, class will auto-detect the doctype and, if matching XHTML 1.1 or
XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise,
it will use a relaxed CDATA definition. If true, the relaxed CDATA definition
is forced; if false, the NMTOKENS definition is forced. To get behavior
of HTML Purifier prior to 4.0.0, set this directive to false.
Some rational behind the auto-detection:
in previous versions of HTML Purifier, it was assumed that the form of
class was NMTOKENS, as specified by the XHTML Modularization (representing
XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however
specify class as CDATA. HTML 5 effectively defines it as CDATA, but
with the additional constraint that each name should be unique (this is not
explicitly outlined in previous specifications).
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,8 @@
Attr.ForbiddenClasses
TYPE: lookup
VERSION: 4.0.0
DEFAULT: array()
--DESCRIPTION--
List of forbidden class values in the class attribute. By default, this is
empty, which means that no classes are forbidden. See also %Attr.AllowedClasses.
--# vim: et sw=4 sts=4

Some files were not shown because too many files have changed in this diff Show More