diff --git a/functions.php b/functions.php index 6a8e85628..8e98d4874 100644 --- a/functions.php +++ b/functions.php @@ -106,6 +106,7 @@ require_once "lib/simplepie/simplepie.inc"; require_once "lib/magpierss/rss_fetch.inc"; require_once 'lib/magpierss/rss_utils.inc'; + require_once 'lib/htmlpurifier/library/HTMLPurifier.auto.php'; /** * Print a timestamped debug message. @@ -3550,9 +3551,20 @@ } } + function strip_tags_long($string, $allowed) { + + $config = HTMLPurifier_Config::createDefault(); + + $config->set('HTML', 'Allowed', $allowed); + $purifier = new HTMLPurifier($config); + + return $purifier->purify($string); + + } + // http://ru2.php.net/strip-tags - function strip_tags_long($textstring, $allowed){ +/* function strip_tags_long($textstring, $allowed){ while($textstring != strip_tags($textstring, $allowed)) { while (strlen($textstring) != 0) @@ -3569,7 +3581,7 @@ $textstring = $safetext; } return $textstring; - } +} */ function sanitize_rss($link, $str, $force_strip_tags = false) { @@ -3577,11 +3589,12 @@ if (get_pref($link, "STRIP_UNSAFE_TAGS") || $force_strip_tags) { - $res = strip_tags_long($res, - "
to
+ foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
+ $attr = $transform->transform($o = $attr, $config, $context);
+ if ($e) {
+ if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
+ }
+ }
+
+ // create alias to this element's attribute definition array, see
+ // also $d_defs (global attribute definition array)
+ // DEFINITION CALL
+ $defs = $definition->info[$token->name]->attr;
+
+ $attr_key = false;
+ $context->register('CurrentAttr', $attr_key);
+
+ // iterate through all the attribute keypairs
+ // Watch out for name collisions: $key has previously been used
+ foreach ($attr as $attr_key => $value) {
+
+ // call the definition
+ if ( isset($defs[$attr_key]) ) {
+ // there is a local definition defined
+ if ($defs[$attr_key] === false) {
+ // We've explicitly been told not to allow this element.
+ // This is usually when there's a global definition
+ // that must be overridden.
+ // Theoretically speaking, we could have a
+ // AttrDef_DenyAll, but this is faster!
+ $result = false;
+ } else {
+ // validate according to the element's definition
+ $result = $defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ }
+ } elseif ( isset($d_defs[$attr_key]) ) {
+ // there is a global definition defined, validate according
+ // to the global definition
+ $result = $d_defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ } else {
+ // system never heard of the attribute? DELETE!
+ $result = false;
+ }
+
+ // put the results into effect
+ if ($result === false || $result === null) {
+ // this is a generic error message that should replaced
+ // with more specific ones when possible
+ if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
+
+ // remove the attribute
+ unset($attr[$attr_key]);
+ } elseif (is_string($result)) {
+ // generally, if a substitution is happening, there
+ // was some sort of implicit correction going on. We'll
+ // delegate it to the attribute classes to say exactly what.
+
+ // simple substitution
+ $attr[$attr_key] = $result;
+ } else {
+ // nothing happens
+ }
+
+ // we'd also want slightly more complicated substitution
+ // involving an array as the return value,
+ // although we're not sure how colliding attributes would
+ // resolve (certain ones would be completely overriden,
+ // others would prepend themselves).
+ }
+
+ $context->destroy('CurrentAttr');
+
+ // post transforms
+
+ // global (error reporting untested)
+ foreach ($definition->info_attr_transform_post as $transform) {
+ $attr = $transform->transform($o = $attr, $config, $context);
+ if ($e) {
+ if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
+ }
+ }
+
+ // local (error reporting untested)
+ foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
+ $attr = $transform->transform($o = $attr, $config, $context);
+ if ($e) {
+ if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
+ }
+ }
+
+ $token->attr = $attr;
+
+ // destroy CurrentToken if we made it ourselves
+ if (!$current_token) $context->destroy('CurrentToken');
+
+ }
+
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php b/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php
new file mode 100755
index 000000000..559f61a23
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php
@@ -0,0 +1,98 @@
+
+if (!defined('PHP_EOL')) {
+ switch (strtoupper(substr(PHP_OS, 0, 3))) {
+ case 'WIN':
+ define('PHP_EOL', "\r\n");
+ break;
+ case 'DAR':
+ define('PHP_EOL', "\r");
+ break;
+ default:
+ define('PHP_EOL', "\n");
+ }
+}
+
+/**
+ * Bootstrap class that contains meta-functionality for HTML Purifier such as
+ * the autoload function.
+ *
+ * @note
+ * This class may be used without any other files from HTML Purifier.
+ */
+class HTMLPurifier_Bootstrap
+{
+
+ /**
+ * Autoload function for HTML Purifier
+ * @param $class Class to load
+ */
+ public static function autoload($class) {
+ $file = HTMLPurifier_Bootstrap::getPath($class);
+ if (!$file) return false;
+ require HTMLPURIFIER_PREFIX . '/' . $file;
+ return true;
+ }
+
+ /**
+ * Returns the path for a specific class.
+ */
+ public static function getPath($class) {
+ if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
+ // Custom implementations
+ if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
+ $code = str_replace('_', '-', substr($class, 22));
+ $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
+ } else {
+ $file = str_replace('_', '/', $class) . '.php';
+ }
+ if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
+ return $file;
+ }
+
+ /**
+ * "Pre-registers" our autoloader on the SPL stack.
+ */
+ public static function registerAutoload() {
+ $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
+ if ( ($funcs = spl_autoload_functions()) === false ) {
+ spl_autoload_register($autoload);
+ } elseif (function_exists('spl_autoload_unregister')) {
+ $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
+ version_compare(PHP_VERSION, '5.1.0', '>=');
+ foreach ($funcs as $func) {
+ if (is_array($func)) {
+ // :TRICKY: There are some compatibility issues and some
+ // places where we need to error out
+ $reflector = new ReflectionMethod($func[0], $func[1]);
+ if (!$reflector->isStatic()) {
+ throw new Exception('
+ HTML Purifier autoloader registrar is not compatible
+ with non-static object methods due to PHP Bug #44144;
+ Please do not use HTMLPurifier.autoload.php (or any
+ file that includes this file); instead, place the code:
+ spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
+ after your own autoloaders.
+ ');
+ }
+ // Suprisingly, spl_autoload_register supports the
+ // Class::staticMethod callback format, although call_user_func doesn't
+ if ($compat) $func = implode('::', $func);
+ }
+ spl_autoload_unregister($func);
+ }
+ spl_autoload_register($autoload);
+ foreach ($funcs as $func) spl_autoload_register($func);
+ }
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php
new file mode 100755
index 000000000..1a1805733
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php
@@ -0,0 +1,292 @@
+info['text-align'] = new HTMLPurifier_AttrDef_Enum(
+ array('left', 'right', 'center', 'justify'), false);
+
+ $border_style =
+ $this->info['border-bottom-style'] =
+ $this->info['border-right-style'] =
+ $this->info['border-left-style'] =
+ $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
+ array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
+ 'groove', 'ridge', 'inset', 'outset'), false);
+
+ $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
+
+ $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
+ array('none', 'left', 'right', 'both'), false);
+ $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
+ array('none', 'left', 'right'), false);
+ $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
+ array('normal', 'italic', 'oblique'), false);
+ $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
+ array('normal', 'small-caps'), false);
+
+ $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
+ array(
+ new HTMLPurifier_AttrDef_Enum(array('none')),
+ new HTMLPurifier_AttrDef_CSS_URI()
+ )
+ );
+
+ $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
+ array('inside', 'outside'), false);
+ $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
+ array('disc', 'circle', 'square', 'decimal', 'lower-roman',
+ 'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
+ $this->info['list-style-image'] = $uri_or_none;
+
+ $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
+
+ $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
+ array('capitalize', 'uppercase', 'lowercase', 'none'), false);
+ $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
+
+ $this->info['background-image'] = $uri_or_none;
+ $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
+ array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
+ );
+ $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
+ array('scroll', 'fixed')
+ );
+ $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
+
+ $border_color =
+ $this->info['border-top-color'] =
+ $this->info['border-bottom-color'] =
+ $this->info['border-left-color'] =
+ $this->info['border-right-color'] =
+ $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Enum(array('transparent')),
+ new HTMLPurifier_AttrDef_CSS_Color()
+ ));
+
+ $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
+
+ $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
+
+ $border_width =
+ $this->info['border-top-width'] =
+ $this->info['border-bottom-width'] =
+ $this->info['border-left-width'] =
+ $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
+ new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
+ ));
+
+ $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
+
+ $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Enum(array('normal')),
+ new HTMLPurifier_AttrDef_CSS_Length()
+ ));
+
+ $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Enum(array('normal')),
+ new HTMLPurifier_AttrDef_CSS_Length()
+ ));
+
+ $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
+ 'small', 'medium', 'large', 'x-large', 'xx-large',
+ 'larger', 'smaller')),
+ new HTMLPurifier_AttrDef_CSS_Percentage(),
+ new HTMLPurifier_AttrDef_CSS_Length()
+ ));
+
+ $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Enum(array('normal')),
+ new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
+ new HTMLPurifier_AttrDef_CSS_Length('0'),
+ new HTMLPurifier_AttrDef_CSS_Percentage(true)
+ ));
+
+ $margin =
+ $this->info['margin-top'] =
+ $this->info['margin-bottom'] =
+ $this->info['margin-left'] =
+ $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_CSS_Length(),
+ new HTMLPurifier_AttrDef_CSS_Percentage(),
+ new HTMLPurifier_AttrDef_Enum(array('auto'))
+ ));
+
+ $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
+
+ // non-negative
+ $padding =
+ $this->info['padding-top'] =
+ $this->info['padding-bottom'] =
+ $this->info['padding-left'] =
+ $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_CSS_Length('0'),
+ new HTMLPurifier_AttrDef_CSS_Percentage(true)
+ ));
+
+ $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
+
+ $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_CSS_Length(),
+ new HTMLPurifier_AttrDef_CSS_Percentage()
+ ));
+
+ $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_CSS_Length('0'),
+ new HTMLPurifier_AttrDef_CSS_Percentage(true),
+ new HTMLPurifier_AttrDef_Enum(array('auto'))
+ ));
+ $max = $config->get('CSS', 'MaxImgLength');
+
+ $this->info['width'] =
+ $this->info['height'] =
+ $max === null ?
+ $trusted_wh :
+ new HTMLPurifier_AttrDef_Switch('img',
+ // For img tags:
+ new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_CSS_Length('0', $max),
+ new HTMLPurifier_AttrDef_Enum(array('auto'))
+ )),
+ // For everyone else:
+ $trusted_wh
+ );
+
+ $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
+
+ $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
+
+ // this could use specialized code
+ $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
+ array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
+ '400', '500', '600', '700', '800', '900'), false);
+
+ // MUST be called after other font properties, as it references
+ // a CSSDefinition object
+ $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
+
+ // same here
+ $this->info['border'] =
+ $this->info['border-bottom'] =
+ $this->info['border-top'] =
+ $this->info['border-left'] =
+ $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
+
+ $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'collapse', 'separate'));
+
+ $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'top', 'bottom'));
+
+ $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'auto', 'fixed'));
+
+ $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+ new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
+ 'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
+ new HTMLPurifier_AttrDef_CSS_Length(),
+ new HTMLPurifier_AttrDef_CSS_Percentage()
+ ));
+
+ $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
+
+ // partial support
+ $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
+
+ if ($config->get('CSS', 'Proprietary')) {
+ $this->doSetupProprietary($config);
+ }
+
+ if ($config->get('CSS', 'AllowTricky')) {
+ $this->doSetupTricky($config);
+ }
+
+ $allow_important = $config->get('CSS', 'AllowImportant');
+ // wrap all attr-defs with decorator that handles !important
+ foreach ($this->info as $k => $v) {
+ $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
+ }
+
+ $this->setupConfigStuff($config);
+ }
+
+ protected function doSetupProprietary($config) {
+ // Internet Explorer only scrollbar colors
+ $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+ $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
+
+ // technically not proprietary, but CSS3, and no one supports it
+ $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+
+ // only opacity, for now
+ $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
+
+ }
+
+ protected function doSetupTricky($config) {
+ $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'inline', 'block', 'list-item', 'run-in', 'compact',
+ 'marker', 'table', 'inline-table', 'table-row-group',
+ 'table-header-group', 'table-footer-group', 'table-row',
+ 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
+ ));
+ $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'visible', 'hidden', 'collapse'
+ ));
+ $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
+ }
+
+
+ /**
+ * Performs extra config-based processing. Based off of
+ * HTMLPurifier_HTMLDefinition.
+ * @todo Refactor duplicate elements into common class (probably using
+ * composition, not inheritance).
+ */
+ protected function setupConfigStuff($config) {
+
+ // setup allowed elements
+ $support = "(for information on implementing this, see the ".
+ "support forums) ";
+ $allowed_attributes = $config->get('CSS', 'AllowedProperties');
+ if ($allowed_attributes !== null) {
+ foreach ($this->info as $name => $d) {
+ if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
+ unset($allowed_attributes[$name]);
+ }
+ // emit errors
+ foreach ($allowed_attributes as $name => $d) {
+ // :TODO: Is this htmlspecialchars() call really necessary?
+ $name = htmlspecialchars($name);
+ trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
+ }
+ }
+
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef.php
new file mode 100755
index 000000000..c5d5216da
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef.php
@@ -0,0 +1,48 @@
+elements;
+ }
+
+ /**
+ * Validates nodes according to definition and returns modification.
+ *
+ * @param $tokens_of_children Array of HTMLPurifier_Token
+ * @param $config HTMLPurifier_Config object
+ * @param $context HTMLPurifier_Context object
+ * @return bool true to leave nodes as is
+ * @return bool false to remove parent node
+ * @return array of replacement child tokens
+ */
+ abstract public function validateChildren($tokens_of_children, $config, $context);
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php
new file mode 100755
index 000000000..15c364ee3
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php
@@ -0,0 +1,48 @@
+inline = new HTMLPurifier_ChildDef_Optional($inline);
+ $this->block = new HTMLPurifier_ChildDef_Optional($block);
+ $this->elements = $this->block->elements;
+ }
+
+ public function validateChildren($tokens_of_children, $config, $context) {
+ if ($context->get('IsInline') === false) {
+ return $this->block->validateChildren(
+ $tokens_of_children, $config, $context);
+ } else {
+ return $this->inline->validateChildren(
+ $tokens_of_children, $config, $context);
+ }
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php
new file mode 100755
index 000000000..b68047b4b
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php
@@ -0,0 +1,90 @@
+dtd_regex = $dtd_regex;
+ $this->_compileRegex();
+ }
+ /**
+ * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
+ */
+ protected function _compileRegex() {
+ $raw = str_replace(' ', '', $this->dtd_regex);
+ if ($raw{0} != '(') {
+ $raw = "($raw)";
+ }
+ $el = '[#a-zA-Z0-9_.-]+';
+ $reg = $raw;
+
+ // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
+ // DOING! Seriously: if there's problems, please report them.
+
+ // collect all elements into the $elements array
+ preg_match_all("/$el/", $reg, $matches);
+ foreach ($matches[0] as $match) {
+ $this->elements[$match] = true;
+ }
+
+ // setup all elements as parentheticals with leading commas
+ $reg = preg_replace("/$el/", '(,\\0)', $reg);
+
+ // remove commas when they were not solicited
+ $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
+
+ // remove all non-paranthetical commas: they are handled by first regex
+ $reg = preg_replace("/,\(/", '(', $reg);
+
+ $this->_pcre_regex = $reg;
+ }
+ public function validateChildren($tokens_of_children, $config, $context) {
+ $list_of_children = '';
+ $nesting = 0; // depth into the nest
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) continue;
+
+ $is_child = ($nesting == 0); // direct
+
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ $list_of_children .= $token->name . ',';
+ }
+ }
+ // add leading comma to deal with stray comma declarations
+ $list_of_children = ',' . rtrim($list_of_children, ',');
+ $okay =
+ preg_match(
+ '/^,?'.$this->_pcre_regex.'$/',
+ $list_of_children
+ );
+
+ return (bool) $okay;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php
new file mode 100755
index 000000000..13171f665
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php
@@ -0,0 +1,20 @@
+whitespace) return $tokens_of_children;
+ else return array();
+ }
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php
new file mode 100755
index 000000000..c3e748b26
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php
@@ -0,0 +1,117 @@
+ $x) {
+ $elements[$i] = true;
+ if (empty($i)) unset($elements[$i]); // remove blank
+ }
+ }
+ $this->elements = $elements;
+ }
+ public $allow_empty = false;
+ public $type = 'required';
+ public function validateChildren($tokens_of_children, $config, $context) {
+ // Flag for subclasses
+ $this->whitespace = false;
+
+ // if there are no tokens, delete parent node
+ if (empty($tokens_of_children)) return false;
+
+ // the new set of children
+ $result = array();
+
+ // current depth into the nest
+ $nesting = 0;
+
+ // whether or not we're deleting a node
+ $is_deleting = false;
+
+ // whether or not parsed character data is allowed
+ // this controls whether or not we silently drop a tag
+ // or generate escaped HTML from it
+ $pcdata_allowed = isset($this->elements['#PCDATA']);
+
+ // a little sanity check to make sure it's not ALL whitespace
+ $all_whitespace = true;
+
+ // some configuration
+ $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
+
+ // generator
+ $gen = new HTMLPurifier_Generator($config, $context);
+
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) {
+ $result[] = $token;
+ continue;
+ }
+ $all_whitespace = false; // phew, we're not talking about whitespace
+
+ $is_child = ($nesting == 0);
+
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ $is_deleting = false;
+ if (!isset($this->elements[$token->name])) {
+ $is_deleting = true;
+ if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
+ $result[] = $token;
+ } elseif ($pcdata_allowed && $escape_invalid_children) {
+ $result[] = new HTMLPurifier_Token_Text(
+ $gen->generateFromToken($token)
+ );
+ }
+ continue;
+ }
+ }
+ if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
+ $result[] = $token;
+ } elseif ($pcdata_allowed && $escape_invalid_children) {
+ $result[] =
+ new HTMLPurifier_Token_Text(
+ $gen->generateFromToken($token)
+ );
+ } else {
+ // drop silently
+ }
+ }
+ if (empty($result)) return false;
+ if ($all_whitespace) {
+ $this->whitespace = true;
+ return false;
+ }
+ if ($tokens_of_children == $result) return true;
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php
new file mode 100755
index 000000000..dfae8a6e5
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php
@@ -0,0 +1,88 @@
+init($config);
+ return $this->fake_elements;
+ }
+
+ public function validateChildren($tokens_of_children, $config, $context) {
+
+ $this->init($config);
+
+ // trick the parent class into thinking it allows more
+ $this->elements = $this->fake_elements;
+ $result = parent::validateChildren($tokens_of_children, $config, $context);
+ $this->elements = $this->real_elements;
+
+ if ($result === false) return array();
+ if ($result === true) $result = $tokens_of_children;
+
+ $def = $config->getHTMLDefinition();
+ $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
+ $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
+ $is_inline = false;
+ $depth = 0;
+ $ret = array();
+
+ // assuming that there are no comment tokens
+ foreach ($result as $i => $token) {
+ $token = $result[$i];
+ // ifs are nested for readability
+ if (!$is_inline) {
+ if (!$depth) {
+ if (
+ ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
+ (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
+ ) {
+ $is_inline = true;
+ $ret[] = $block_wrap_start;
+ }
+ }
+ } else {
+ if (!$depth) {
+ // starting tokens have been inline text / empty
+ if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
+ if (isset($this->elements[$token->name])) {
+ // ended
+ $ret[] = $block_wrap_end;
+ $is_inline = false;
+ }
+ }
+ }
+ }
+ $ret[] = $token;
+ if ($token instanceof HTMLPurifier_Token_Start) $depth++;
+ if ($token instanceof HTMLPurifier_Token_End) $depth--;
+ }
+ if ($is_inline) $ret[] = $block_wrap_end;
+ return $ret;
+ }
+
+ private function init($config) {
+ if (!$this->init) {
+ $def = $config->getHTMLDefinition();
+ // allow all inline elements
+ $this->real_elements = $this->elements;
+ $this->fake_elements = $def->info_content_sets['Flow'];
+ $this->fake_elements['#PCDATA'] = true;
+ $this->init = true;
+ }
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
new file mode 100755
index 000000000..34f0227dd
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
@@ -0,0 +1,142 @@
+ true, 'tbody' => true, 'thead' => true,
+ 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
+ public function __construct() {}
+ public function validateChildren($tokens_of_children, $config, $context) {
+ if (empty($tokens_of_children)) return false;
+
+ // this ensures that the loop gets run one last time before closing
+ // up. It's a little bit of a hack, but it works! Just make sure you
+ // get rid of the token later.
+ $tokens_of_children[] = false;
+
+ // only one of these elements is allowed in a table
+ $caption = false;
+ $thead = false;
+ $tfoot = false;
+
+ // as many of these as you want
+ $cols = array();
+ $content = array();
+
+ $nesting = 0; // current depth so we can determine nodes
+ $is_collecting = false; // are we globbing together tokens to package
+ // into one of the collectors?
+ $collection = array(); // collected nodes
+ $tag_index = 0; // the first node might be whitespace,
+ // so this tells us where the start tag is
+
+ foreach ($tokens_of_children as $token) {
+ $is_child = ($nesting == 0);
+
+ if ($token === false) {
+ // terminating sequence started
+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ // handle node collection
+ if ($is_collecting) {
+ if ($is_child) {
+ // okay, let's stash the tokens away
+ // first token tells us the type of the collection
+ switch ($collection[$tag_index]->name) {
+ case 'tr':
+ case 'tbody':
+ $content[] = $collection;
+ break;
+ case 'caption':
+ if ($caption !== false) break;
+ $caption = $collection;
+ break;
+ case 'thead':
+ case 'tfoot':
+ // access the appropriate variable, $thead or $tfoot
+ $var = $collection[$tag_index]->name;
+ if ($$var === false) {
+ $$var = $collection;
+ } else {
+ // transmutate the first and less entries into
+ // tbody tags, and then put into content
+ $collection[$tag_index]->name = 'tbody';
+ $collection[count($collection)-1]->name = 'tbody';
+ $content[] = $collection;
+ }
+ break;
+ case 'colgroup':
+ $cols[] = $collection;
+ break;
+ }
+ $collection = array();
+ $is_collecting = false;
+ $tag_index = 0;
+ } else {
+ // add the node to the collection
+ $collection[] = $token;
+ }
+ }
+
+ // terminate
+ if ($token === false) break;
+
+ if ($is_child) {
+ // determine what we're dealing with
+ if ($token->name == 'col') {
+ // the only empty tag in the possie, we can handle it
+ // immediately
+ $cols[] = array_merge($collection, array($token));
+ $collection = array();
+ $tag_index = 0;
+ continue;
+ }
+ switch($token->name) {
+ case 'caption':
+ case 'colgroup':
+ case 'thead':
+ case 'tfoot':
+ case 'tbody':
+ case 'tr':
+ $is_collecting = true;
+ $collection[] = $token;
+ continue;
+ default:
+ if (!empty($token->is_whitespace)) {
+ $collection[] = $token;
+ $tag_index++;
+ }
+ continue;
+ }
+ }
+ }
+
+ if (empty($content)) return false;
+
+ $ret = array();
+ if ($caption !== false) $ret = array_merge($ret, $caption);
+ if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
+ if ($thead !== false) $ret = array_merge($ret, $thead);
+ if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
+ foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
+ if (!empty($collection) && $is_collecting == false){
+ // grab the trailing space
+ $ret = array_merge($ret, $collection);
+ }
+
+ array_pop($tokens_of_children); // remove phantom token
+
+ return ($ret === $tokens_of_children) ? true : $ret;
+
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Config.php b/lib/htmlpurifier/library/HTMLPurifier/Config.php
new file mode 100755
index 000000000..f8e1f7804
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Config.php
@@ -0,0 +1,497 @@
+plist = new HTMLPurifier_PropertyList($definition->defaultPlist);
+ $this->def = $definition; // keep a copy around for checking
+ $this->parser = new HTMLPurifier_VarParser_Flexible();
+ }
+
+ /**
+ * Convenience constructor that creates a config object based on a mixed var
+ * @param mixed $config Variable that defines the state of the config
+ * object. Can be: a HTMLPurifier_Config() object,
+ * an array of directives based on loadArray(),
+ * or a string filename of an ini file.
+ * @param HTMLPurifier_ConfigSchema Schema object
+ * @return Configured HTMLPurifier_Config object
+ */
+ public static function create($config, $schema = null) {
+ if ($config instanceof HTMLPurifier_Config) {
+ // pass-through
+ return $config;
+ }
+ if (!$schema) {
+ $ret = HTMLPurifier_Config::createDefault();
+ } else {
+ $ret = new HTMLPurifier_Config($schema);
+ }
+ if (is_string($config)) $ret->loadIni($config);
+ elseif (is_array($config)) $ret->loadArray($config);
+ return $ret;
+ }
+
+ /**
+ * Convenience constructor that creates a default configuration object.
+ * @return Default HTMLPurifier_Config object.
+ */
+ public static function createDefault() {
+ $definition = HTMLPurifier_ConfigSchema::instance();
+ $config = new HTMLPurifier_Config($definition);
+ return $config;
+ }
+
+ /**
+ * Retreives a value from the configuration.
+ * @param $namespace String namespace
+ * @param $key String key
+ */
+ public function get($namespace, $key) {
+ if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
+ if (!isset($this->def->info[$namespace][$key])) {
+ // can't add % due to SimpleTest bug
+ trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"),
+ E_USER_WARNING);
+ return;
+ }
+ if (isset($this->def->info[$namespace][$key]->isAlias)) {
+ $d = $this->def->info[$namespace][$key];
+ trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
+ E_USER_ERROR);
+ return;
+ }
+ return $this->plist->get("$namespace.$key");
+ }
+
+ /**
+ * Retreives an array of directives to values from a given namespace
+ * @param $namespace String namespace
+ */
+ public function getBatch($namespace) {
+ if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
+ if (!isset($this->def->info[$namespace])) {
+ trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
+ E_USER_WARNING);
+ return;
+ }
+ $full = $this->getAll();
+ return $full[$namespace];
+ }
+
+ /**
+ * Returns a md5 signature of a segment of the configuration object
+ * that uniquely identifies that particular configuration
+ * @note Revision is handled specially and is removed from the batch
+ * before processing!
+ * @param $namespace Namespace to get serial for
+ */
+ public function getBatchSerial($namespace) {
+ if (empty($this->serials[$namespace])) {
+ $batch = $this->getBatch($namespace);
+ unset($batch['DefinitionRev']);
+ $this->serials[$namespace] = md5(serialize($batch));
+ }
+ return $this->serials[$namespace];
+ }
+
+ /**
+ * Returns a md5 signature for the entire configuration object
+ * that uniquely identifies that particular configuration
+ */
+ public function getSerial() {
+ if (empty($this->serial)) {
+ $this->serial = md5(serialize($this->getAll()));
+ }
+ return $this->serial;
+ }
+
+ /**
+ * Retrieves all directives, organized by namespace
+ */
+ public function getAll() {
+ if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
+ $ret = array();
+ foreach ($this->plist->squash() as $name => $value) {
+ list($ns, $key) = explode('.', $name, 2);
+ $ret[$ns][$key] = $value;
+ }
+ return $ret;
+ }
+
+ /**
+ * Sets a value to configuration.
+ * @param $namespace String namespace
+ * @param $key String key
+ * @param $value Mixed value
+ */
+ public function set($namespace, $key, $value, $from_alias = false) {
+ if ($this->isFinalized('Cannot set directive after finalization')) return;
+ if (!isset($this->def->info[$namespace][$key])) {
+ trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
+ E_USER_WARNING);
+ return;
+ }
+ $def = $this->def->info[$namespace][$key];
+
+ if (isset($def->isAlias)) {
+ if ($from_alias) {
+ trigger_error('Double-aliases not allowed, please fix '.
+ 'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR);
+ return;
+ }
+ $this->set($new_ns = $def->namespace,
+ $new_dir = $def->name,
+ $value, true);
+ trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE);
+ return;
+ }
+
+ // Raw type might be negative when using the fully optimized form
+ // of stdclass, which indicates allow_null == true
+ $rtype = is_int($def) ? $def : $def->type;
+ if ($rtype < 0) {
+ $type = -$rtype;
+ $allow_null = true;
+ } else {
+ $type = $rtype;
+ $allow_null = isset($def->allow_null);
+ }
+
+ try {
+ $value = $this->parser->parse($value, $type, $allow_null);
+ } catch (HTMLPurifier_VarParserException $e) {
+ trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
+ return;
+ }
+ if (is_string($value) && is_object($def)) {
+ // resolve value alias if defined
+ if (isset($def->aliases[$value])) {
+ $value = $def->aliases[$value];
+ }
+ // check to see if the value is allowed
+ if (isset($def->allowed) && !isset($def->allowed[$value])) {
+ trigger_error('Value not supported, valid values are: ' .
+ $this->_listify($def->allowed), E_USER_WARNING);
+ return;
+ }
+ }
+ $this->plist->set("$namespace.$key", $value);
+
+ // reset definitions if the directives they depend on changed
+ // this is a very costly process, so it's discouraged
+ // with finalization
+ if ($namespace == 'HTML' || $namespace == 'CSS') {
+ $this->definitions[$namespace] = null;
+ }
+
+ $this->serials[$namespace] = false;
+ }
+
+ /**
+ * Convenience function for error reporting
+ */
+ private function _listify($lookup) {
+ $list = array();
+ foreach ($lookup as $name => $b) $list[] = $name;
+ return implode(', ', $list);
+ }
+
+ /**
+ * Retrieves object reference to the HTML definition.
+ * @param $raw Return a copy that has not been setup yet. Must be
+ * called before it's been setup, otherwise won't work.
+ */
+ public function getHTMLDefinition($raw = false) {
+ return $this->getDefinition('HTML', $raw);
+ }
+
+ /**
+ * Retrieves object reference to the CSS definition
+ * @param $raw Return a copy that has not been setup yet. Must be
+ * called before it's been setup, otherwise won't work.
+ */
+ public function getCSSDefinition($raw = false) {
+ return $this->getDefinition('CSS', $raw);
+ }
+
+ /**
+ * Retrieves a definition
+ * @param $type Type of definition: HTML, CSS, etc
+ * @param $raw Whether or not definition should be returned raw
+ */
+ public function getDefinition($type, $raw = false) {
+ if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
+ $factory = HTMLPurifier_DefinitionCacheFactory::instance();
+ $cache = $factory->create($type, $this);
+ if (!$raw) {
+ // see if we can quickly supply a definition
+ if (!empty($this->definitions[$type])) {
+ if (!$this->definitions[$type]->setup) {
+ $this->definitions[$type]->setup($this);
+ $cache->set($this->definitions[$type], $this);
+ }
+ return $this->definitions[$type];
+ }
+ // memory check missed, try cache
+ $this->definitions[$type] = $cache->get($this);
+ if ($this->definitions[$type]) {
+ // definition in cache, return it
+ return $this->definitions[$type];
+ }
+ } elseif (
+ !empty($this->definitions[$type]) &&
+ !$this->definitions[$type]->setup
+ ) {
+ // raw requested, raw in memory, quick return
+ return $this->definitions[$type];
+ }
+ // quick checks failed, let's create the object
+ if ($type == 'HTML') {
+ $this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
+ } elseif ($type == 'CSS') {
+ $this->definitions[$type] = new HTMLPurifier_CSSDefinition();
+ } elseif ($type == 'URI') {
+ $this->definitions[$type] = new HTMLPurifier_URIDefinition();
+ } else {
+ throw new HTMLPurifier_Exception("Definition of $type type not supported");
+ }
+ // quick abort if raw
+ if ($raw) {
+ if (is_null($this->get($type, 'DefinitionID'))) {
+ // fatally error out if definition ID not set
+ throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
+ }
+ return $this->definitions[$type];
+ }
+ // set it up
+ $this->definitions[$type]->setup($this);
+ // save in cache
+ $cache->set($this->definitions[$type], $this);
+ return $this->definitions[$type];
+ }
+
+ /**
+ * Loads configuration values from an array with the following structure:
+ * Namespace.Directive => Value
+ * @param $config_array Configuration associative array
+ */
+ public function loadArray($config_array) {
+ if ($this->isFinalized('Cannot load directives after finalization')) return;
+ foreach ($config_array as $key => $value) {
+ $key = str_replace('_', '.', $key);
+ if (strpos($key, '.') !== false) {
+ // condensed form
+ list($namespace, $directive) = explode('.', $key);
+ $this->set($namespace, $directive, $value);
+ } else {
+ $namespace = $key;
+ $namespace_values = $value;
+ foreach ($namespace_values as $directive => $value) {
+ $this->set($namespace, $directive, $value);
+ }
+ }
+ }
+ }
+
+ /**
+ * Returns a list of array(namespace, directive) for all directives
+ * that are allowed in a web-form context as per an allowed
+ * namespaces/directives list.
+ * @param $allowed List of allowed namespaces/directives
+ */
+ public static function getAllowedDirectivesForForm($allowed, $schema = null) {
+ if (!$schema) {
+ $schema = HTMLPurifier_ConfigSchema::instance();
+ }
+ if ($allowed !== true) {
+ if (is_string($allowed)) $allowed = array($allowed);
+ $allowed_ns = array();
+ $allowed_directives = array();
+ $blacklisted_directives = array();
+ foreach ($allowed as $ns_or_directive) {
+ if (strpos($ns_or_directive, '.') !== false) {
+ // directive
+ if ($ns_or_directive[0] == '-') {
+ $blacklisted_directives[substr($ns_or_directive, 1)] = true;
+ } else {
+ $allowed_directives[$ns_or_directive] = true;
+ }
+ } else {
+ // namespace
+ $allowed_ns[$ns_or_directive] = true;
+ }
+ }
+ }
+ $ret = array();
+ foreach ($schema->info as $ns => $keypairs) {
+ foreach ($keypairs as $directive => $def) {
+ if ($allowed !== true) {
+ if (isset($blacklisted_directives["$ns.$directive"])) continue;
+ if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
+ }
+ if (isset($def->isAlias)) continue;
+ if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
+ $ret[] = array($ns, $directive);
+ }
+ }
+ return $ret;
+ }
+
+ /**
+ * Loads configuration values from $_GET/$_POST that were posted
+ * via ConfigForm
+ * @param $array $_GET or $_POST array to import
+ * @param $index Index/name that the config variables are in
+ * @param $allowed List of allowed namespaces/directives
+ * @param $mq_fix Boolean whether or not to enable magic quotes fix
+ * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy
+ */
+ public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
+ $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
+ $config = HTMLPurifier_Config::create($ret, $schema);
+ return $config;
+ }
+
+ /**
+ * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
+ * @note Same parameters as loadArrayFromForm
+ */
+ public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
+ $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
+ $this->loadArray($ret);
+ }
+
+ /**
+ * Prepares an array from a form into something usable for the more
+ * strict parts of HTMLPurifier_Config
+ */
+ public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
+ if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
+ $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
+
+ $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
+ $ret = array();
+ foreach ($allowed as $key) {
+ list($ns, $directive) = $key;
+ $skey = "$ns.$directive";
+ if (!empty($array["Null_$skey"])) {
+ $ret[$ns][$directive] = null;
+ continue;
+ }
+ if (!isset($array[$skey])) continue;
+ $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
+ $ret[$ns][$directive] = $value;
+ }
+ return $ret;
+ }
+
+ /**
+ * Loads configuration values from an ini file
+ * @param $filename Name of ini file
+ */
+ public function loadIni($filename) {
+ if ($this->isFinalized('Cannot load directives after finalization')) return;
+ $array = parse_ini_file($filename, true);
+ $this->loadArray($array);
+ }
+
+ /**
+ * Checks whether or not the configuration object is finalized.
+ * @param $error String error message, or false for no error
+ */
+ public function isFinalized($error = false) {
+ if ($this->finalized && $error) {
+ trigger_error($error, E_USER_ERROR);
+ }
+ return $this->finalized;
+ }
+
+ /**
+ * Finalizes configuration only if auto finalize is on and not
+ * already finalized
+ */
+ public function autoFinalize() {
+ if (!$this->finalized && $this->autoFinalize) $this->finalize();
+ }
+
+ /**
+ * Finalizes a configuration object, prohibiting further change
+ */
+ public function finalize() {
+ $this->finalized = true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php
new file mode 100755
index 000000000..340ed7dbc
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php
@@ -0,0 +1,231 @@
+ array(
+ * 'Directive' => new stdclass(),
+ * )
+ * )
+ *
+ * The stdclass may have the following properties:
+ *
+ * - If isAlias isn't set:
+ * - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
+ * - allow_null: If set, this directive allows null values
+ * - aliases: If set, an associative array of value aliases to real values
+ * - allowed: If set, a lookup array of allowed (string) values
+ * - If isAlias is set:
+ * - namespace: Namespace this directive aliases to
+ * - name: Directive name this directive aliases to
+ *
+ * In certain degenerate cases, stdclass will actually be an integer. In
+ * that case, the value is equivalent to an stdclass with the type
+ * property set to the integer. If the integer is negative, type is
+ * equal to the absolute value of integer, and allow_null is true.
+ *
+ * This class is friendly with HTMLPurifier_Config. If you need introspection
+ * about the schema, you're better of using the ConfigSchema_Interchange,
+ * which uses more memory but has much richer information.
+ */
+ public $info = array();
+
+ /**
+ * Application-wide singleton
+ */
+ static protected $singleton;
+
+ public function __construct() {
+ $this->defaultPlist = new HTMLPurifier_PropertyList();
+ }
+
+ /**
+ * Unserializes the default ConfigSchema.
+ */
+ public static function makeFromSerial() {
+ return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
+ }
+
+ /**
+ * Retrieves an instance of the application-wide configuration definition.
+ */
+ public static function instance($prototype = null) {
+ if ($prototype !== null) {
+ HTMLPurifier_ConfigSchema::$singleton = $prototype;
+ } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
+ HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
+ }
+ return HTMLPurifier_ConfigSchema::$singleton;
+ }
+
+ /**
+ * Defines a directive for configuration
+ * @warning Will fail of directive's namespace is defined.
+ * @warning This method's signature is slightly different from the legacy
+ * define() static method! Beware!
+ * @param $namespace Namespace the directive is in
+ * @param $name Key of directive
+ * @param $default Default value of directive
+ * @param $type Allowed type of the directive. See
+ * HTMLPurifier_DirectiveDef::$type for allowed values
+ * @param $allow_null Whether or not to allow null values
+ */
+ public function add($namespace, $name, $default, $type, $allow_null) {
+ $obj = new stdclass();
+ $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
+ if ($allow_null) $obj->allow_null = true;
+ $this->info[$namespace][$name] = $obj;
+ $this->defaults[$namespace][$name] = $default;
+ $this->defaultPlist->set("$namespace.$name", $default);
+ }
+
+ /**
+ * Defines a namespace for directives to be put into.
+ * @warning This is slightly different from the corresponding static
+ * method.
+ * @param $namespace Namespace's name
+ */
+ public function addNamespace($namespace) {
+ $this->info[$namespace] = array();
+ $this->defaults[$namespace] = array();
+ }
+
+ /**
+ * Defines a directive value alias.
+ *
+ * Directive value aliases are convenient for developers because it lets
+ * them set a directive to several values and get the same result.
+ * @param $namespace Directive's namespace
+ * @param $name Name of Directive
+ * @param $aliases Hash of aliased values to the real alias
+ */
+ public function addValueAliases($namespace, $name, $aliases) {
+ if (!isset($this->info[$namespace][$name]->aliases)) {
+ $this->info[$namespace][$name]->aliases = array();
+ }
+ foreach ($aliases as $alias => $real) {
+ $this->info[$namespace][$name]->aliases[$alias] = $real;
+ }
+ }
+
+ /**
+ * Defines a set of allowed values for a directive.
+ * @warning This is slightly different from the corresponding static
+ * method definition.
+ * @param $namespace Namespace of directive
+ * @param $name Name of directive
+ * @param $allowed Lookup array of allowed values
+ */
+ public function addAllowedValues($namespace, $name, $allowed) {
+ $this->info[$namespace][$name]->allowed = $allowed;
+ }
+
+ /**
+ * Defines a directive alias for backwards compatibility
+ * @param $namespace
+ * @param $name Directive that will be aliased
+ * @param $new_namespace
+ * @param $new_name Directive that the alias will be to
+ */
+ public function addAlias($namespace, $name, $new_namespace, $new_name) {
+ $obj = new stdclass;
+ $obj->namespace = $new_namespace;
+ $obj->name = $new_name;
+ $obj->isAlias = true;
+ $this->info[$namespace][$name] = $obj;
+ }
+
+ /**
+ * Replaces any stdclass that only has the type property with type integer.
+ */
+ public function postProcess() {
+ foreach ($this->info as $namespace => $info) {
+ foreach ($info as $directive => $v) {
+ if (count((array) $v) == 1) {
+ $this->info[$namespace][$directive] = $v->type;
+ } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
+ $this->info[$namespace][$directive] = -$v->type;
+ }
+ }
+ }
+ }
+
+ // DEPRECATED METHODS
+
+ /** @see HTMLPurifier_ConfigSchema->set() */
+ public static function define($namespace, $name, $default, $type, $description) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $type_values = explode('/', $type, 2);
+ $type = $type_values[0];
+ $modifier = isset($type_values[1]) ? $type_values[1] : false;
+ $allow_null = ($modifier === 'null');
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->add($namespace, $name, $default, $type, $allow_null);
+ }
+
+ /** @see HTMLPurifier_ConfigSchema->addNamespace() */
+ public static function defineNamespace($namespace, $description) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addNamespace($namespace);
+ }
+
+ /** @see HTMLPurifier_ConfigSchema->addValueAliases() */
+ public static function defineValueAliases($namespace, $name, $aliases) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addValueAliases($namespace, $name, $aliases);
+ }
+
+ /** @see HTMLPurifier_ConfigSchema->addAllowedValues() */
+ public static function defineAllowedValues($namespace, $name, $allowed_values) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $allowed = array();
+ foreach ($allowed_values as $value) {
+ $allowed[$value] = true;
+ }
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addAllowedValues($namespace, $name, $allowed);
+ }
+
+ /** @see HTMLPurifier_ConfigSchema->addAlias() */
+ public static function defineAlias($namespace, $name, $new_namespace, $new_name) {
+ HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
+ $def = HTMLPurifier_ConfigSchema::instance();
+ $def->addAlias($namespace, $name, $new_namespace, $new_name);
+ }
+
+ /** @deprecated, use HTMLPurifier_VarParser->parse() */
+ public function validate($a, $b, $c = false) {
+ trigger_error("HTMLPurifier_ConfigSchema->validate deprecated, use HTMLPurifier_VarParser->parse instead", E_USER_NOTICE);
+ $parser = new HTMLPurifier_VarParser();
+ return $parser->parse($a, $b, $c);
+ }
+
+ /**
+ * Throws an E_USER_NOTICE stating that a method is deprecated.
+ */
+ private static function deprecated($method) {
+ trigger_error("Static HTMLPurifier_ConfigSchema::$method deprecated, use add*() method instead", E_USER_NOTICE);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php
new file mode 100755
index 000000000..987f547bc
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php
@@ -0,0 +1,52 @@
+namespaces as $n) {
+ $schema->addNamespace($n->namespace);
+ }
+ foreach ($interchange->directives as $d) {
+ $schema->add(
+ $d->id->namespace,
+ $d->id->directive,
+ $d->default,
+ $d->type,
+ $d->typeAllowsNull
+ );
+ if ($d->allowed !== null) {
+ $schema->addAllowedValues(
+ $d->id->namespace,
+ $d->id->directive,
+ $d->allowed
+ );
+ }
+ foreach ($d->aliases as $alias) {
+ $schema->addAlias(
+ $alias->namespace,
+ $alias->directive,
+ $d->id->namespace,
+ $d->id->directive
+ );
+ }
+ if ($d->valueAliases !== null) {
+ $schema->addValueAliases(
+ $d->id->namespace,
+ $d->id->directive,
+ $d->valueAliases
+ );
+ }
+ }
+ $schema->postProcess();
+ return $schema;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php
new file mode 100755
index 000000000..51bcab78c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php
@@ -0,0 +1,108 @@
+startElement('div');
+
+ $purifier = HTMLPurifier::getInstance();
+ $html = $purifier->purify($html);
+ $this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
+ $this->writeRaw($html);
+
+ $this->endElement(); // div
+ }
+
+ protected function export($var) {
+ if ($var === array()) return 'array()';
+ return var_export($var, true);
+ }
+
+ public function build($interchange) {
+ // global access, only use as last resort
+ $this->interchange = $interchange;
+
+ $this->setIndent(true);
+ $this->startDocument('1.0', 'UTF-8');
+ $this->startElement('configdoc');
+ $this->writeElement('title', $interchange->name);
+
+ foreach ($interchange->namespaces as $namespace) {
+ $this->buildNamespace($namespace);
+ }
+
+ $this->endElement(); // configdoc
+ $this->flush();
+ }
+
+ public function buildNamespace($namespace) {
+ $this->startElement('namespace');
+ $this->writeAttribute('id', $namespace->namespace);
+
+ $this->writeElement('name', $namespace->namespace);
+ $this->startElement('description');
+ $this->writeHTMLDiv($namespace->description);
+ $this->endElement(); // description
+
+ foreach ($this->interchange->directives as $directive) {
+ if ($directive->id->namespace !== $namespace->namespace) continue;
+ $this->buildDirective($directive);
+ }
+
+ $this->endElement(); // namespace
+ }
+
+ public function buildDirective($directive) {
+ $this->startElement('directive');
+ $this->writeAttribute('id', $directive->id->toString());
+
+ $this->writeElement('name', $directive->id->directive);
+
+ $this->startElement('aliases');
+ foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString());
+ $this->endElement(); // aliases
+
+ $this->startElement('constraints');
+ if ($directive->version) $this->writeElement('version', $directive->version);
+ $this->startElement('type');
+ if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes');
+ $this->text($directive->type);
+ $this->endElement(); // type
+ if ($directive->allowed) {
+ $this->startElement('allowed');
+ foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value);
+ $this->endElement(); // allowed
+ }
+ $this->writeElement('default', $this->export($directive->default));
+ $this->writeAttribute('xml:space', 'preserve');
+ if ($directive->external) {
+ $this->startElement('external');
+ foreach ($directive->external as $project) $this->writeElement('project', $project);
+ $this->endElement();
+ }
+ $this->endElement(); // constraints
+
+ if ($directive->deprecatedVersion) {
+ $this->startElement('deprecated');
+ $this->writeElement('version', $directive->deprecatedVersion);
+ $this->writeElement('use', $directive->deprecatedUse->toString());
+ $this->endElement(); // deprecated
+ }
+
+ $this->startElement('description');
+ $this->writeHTMLDiv($directive->description);
+ $this->endElement(); // description
+
+ $this->endElement(); // directive
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Exception.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Exception.php
new file mode 100755
index 000000000..2671516c5
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Exception.php
@@ -0,0 +1,11 @@
+ array(namespace info)
+ */
+ public $namespaces = array();
+
+ /**
+ * Array of Directive ID => array(directive info)
+ */
+ public $directives = array();
+
+ /**
+ * Adds a namespace array to $namespaces
+ */
+ public function addNamespace($namespace) {
+ if (isset($this->namespaces[$i = $namespace->namespace])) {
+ throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine namespace '$i'");
+ }
+ $this->namespaces[$i] = $namespace;
+ }
+
+ /**
+ * Adds a directive array to $directives
+ */
+ public function addDirective($directive) {
+ if (isset($this->directives[$i = $directive->id->toString()])) {
+ throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'");
+ }
+ $this->directives[$i] = $directive;
+ }
+
+ /**
+ * Convenience function to perform standard validation. Throws exception
+ * on failed validation.
+ */
+ public function validate() {
+ $validator = new HTMLPurifier_ConfigSchema_Validator();
+ return $validator->validate($this);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php
new file mode 100755
index 000000000..ac8be0d97
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php
@@ -0,0 +1,77 @@
+ true).
+ * Null if all values are allowed.
+ */
+ public $allowed;
+
+ /**
+ * List of aliases for the directive,
+ * e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))).
+ */
+ public $aliases = array();
+
+ /**
+ * Hash of value aliases, e.g. array('alt' => 'real'). Null if value
+ * aliasing is disabled (necessary for non-scalar types).
+ */
+ public $valueAliases;
+
+ /**
+ * Version of HTML Purifier the directive was introduced, e.g. '1.3.1'.
+ * Null if the directive has always existed.
+ */
+ public $version;
+
+ /**
+ * ID of directive that supercedes this old directive, is an instance
+ * of HTMLPurifier_ConfigSchema_Interchange_Id. Null if not deprecated.
+ */
+ public $deprecatedUse;
+
+ /**
+ * Version of HTML Purifier this directive was deprecated. Null if not
+ * deprecated.
+ */
+ public $deprecatedVersion;
+
+ /**
+ * List of external projects this directive depends on, e.g. array('CSSTidy').
+ */
+ public $external = array();
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php
new file mode 100755
index 000000000..ec01589b6
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php
@@ -0,0 +1,31 @@
+namespace = $namespace;
+ $this->directive = $directive;
+ }
+
+ /**
+ * @warning This is NOT magic, to ensure that people don't abuse SPL and
+ * cause problems for PHP 5.0 support.
+ */
+ public function toString() {
+ return $this->namespace . '.' . $this->directive;
+ }
+
+ public static function make($id) {
+ list($namespace, $directive) = explode('.', $id);
+ return new HTMLPurifier_ConfigSchema_Interchange_Id($namespace, $directive);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php
new file mode 100755
index 000000000..3ffac0a0f
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Namespace.php
@@ -0,0 +1,21 @@
+varParser = $varParser ? $varParser : new HTMLPurifier_VarParser_Native();
+ }
+
+ public static function buildFromDirectory($dir = null) {
+ $parser = new HTMLPurifier_StringHashParser();
+ $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
+ $interchange = new HTMLPurifier_ConfigSchema_Interchange();
+
+ if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema/';
+ $info = parse_ini_file($dir . 'info.ini');
+ $interchange->name = $info['name'];
+
+ $files = array();
+ $dh = opendir($dir);
+ while (false !== ($file = readdir($dh))) {
+ if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') {
+ continue;
+ }
+ $files[] = $file;
+ }
+ closedir($dh);
+
+ sort($files);
+ foreach ($files as $file) {
+ $builder->build(
+ $interchange,
+ new HTMLPurifier_StringHash( $parser->parseFile($dir . $file) )
+ );
+ }
+
+ return $interchange;
+ }
+
+ /**
+ * Builds an interchange object based on a hash.
+ * @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build
+ * @param $hash HTMLPurifier_ConfigSchema_StringHash source data
+ */
+ public function build($interchange, $hash) {
+ if (!$hash instanceof HTMLPurifier_StringHash) {
+ $hash = new HTMLPurifier_StringHash($hash);
+ }
+ if (!isset($hash['ID'])) {
+ throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID');
+ }
+ if (strpos($hash['ID'], '.') === false) {
+ $this->buildNamespace($interchange, $hash);
+ } else {
+ $this->buildDirective($interchange, $hash);
+ }
+ $this->_findUnused($hash);
+ }
+
+ public function buildNamespace($interchange, $hash) {
+ $namespace = new HTMLPurifier_ConfigSchema_Interchange_Namespace();
+ $namespace->namespace = $hash->offsetGet('ID');
+ if (isset($hash['DESCRIPTION'])) {
+ $namespace->description = $hash->offsetGet('DESCRIPTION');
+ }
+ $interchange->addNamespace($namespace);
+ }
+
+ public function buildDirective($interchange, $hash) {
+ $directive = new HTMLPurifier_ConfigSchema_Interchange_Directive();
+
+ // These are required elements:
+ $directive->id = $this->id($hash->offsetGet('ID'));
+ $id = $directive->id->toString(); // convenience
+
+ if (isset($hash['TYPE'])) {
+ $type = explode('/', $hash->offsetGet('TYPE'));
+ if (isset($type[1])) $directive->typeAllowsNull = true;
+ $directive->type = $type[0];
+ } else {
+ throw new HTMLPurifier_ConfigSchema_Exception("TYPE in directive hash '$id' not defined");
+ }
+
+ if (isset($hash['DEFAULT'])) {
+ try {
+ $directive->default = $this->varParser->parse($hash->offsetGet('DEFAULT'), $directive->type, $directive->typeAllowsNull);
+ } catch (HTMLPurifier_VarParserException $e) {
+ throw new HTMLPurifier_ConfigSchema_Exception($e->getMessage() . " in DEFAULT in directive hash '$id'");
+ }
+ }
+
+ if (isset($hash['DESCRIPTION'])) {
+ $directive->description = $hash->offsetGet('DESCRIPTION');
+ }
+
+ if (isset($hash['ALLOWED'])) {
+ $directive->allowed = $this->lookup($this->evalArray($hash->offsetGet('ALLOWED')));
+ }
+
+ if (isset($hash['VALUE-ALIASES'])) {
+ $directive->valueAliases = $this->evalArray($hash->offsetGet('VALUE-ALIASES'));
+ }
+
+ if (isset($hash['ALIASES'])) {
+ $raw_aliases = trim($hash->offsetGet('ALIASES'));
+ $aliases = preg_split('/\s*,\s*/', $raw_aliases);
+ foreach ($aliases as $alias) {
+ $directive->aliases[] = $this->id($alias);
+ }
+ }
+
+ if (isset($hash['VERSION'])) {
+ $directive->version = $hash->offsetGet('VERSION');
+ }
+
+ if (isset($hash['DEPRECATED-USE'])) {
+ $directive->deprecatedUse = $this->id($hash->offsetGet('DEPRECATED-USE'));
+ }
+
+ if (isset($hash['DEPRECATED-VERSION'])) {
+ $directive->deprecatedVersion = $hash->offsetGet('DEPRECATED-VERSION');
+ }
+
+ if (isset($hash['EXTERNAL'])) {
+ $directive->external = preg_split('/\s*,\s*/', trim($hash->offsetGet('EXTERNAL')));
+ }
+
+ $interchange->addDirective($directive);
+ }
+
+ /**
+ * Evaluates an array PHP code string without array() wrapper
+ */
+ protected function evalArray($contents) {
+ return eval('return array('. $contents .');');
+ }
+
+ /**
+ * Converts an array list into a lookup array.
+ */
+ protected function lookup($array) {
+ $ret = array();
+ foreach ($array as $val) $ret[$val] = true;
+ return $ret;
+ }
+
+ /**
+ * Convenience function that creates an HTMLPurifier_ConfigSchema_Interchange_Id
+ * object based on a string Id.
+ */
+ protected function id($id) {
+ return HTMLPurifier_ConfigSchema_Interchange_Id::make($id);
+ }
+
+ /**
+ * Triggers errors for any unused keys passed in the hash; such keys
+ * may indicate typos, missing values, etc.
+ * @param $hash Instance of ConfigSchema_StringHash to check.
+ */
+ protected function _findUnused($hash) {
+ $accessed = $hash->getAccessed();
+ foreach ($hash as $k => $v) {
+ if (!isset($accessed[$k])) {
+ trigger_error("String hash key '$k' not used by builder", E_USER_NOTICE);
+ }
+ }
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Validator.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Validator.php
new file mode 100755
index 000000000..2dfd37bae
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Validator.php
@@ -0,0 +1,225 @@
+parser = new HTMLPurifier_VarParser();
+ }
+
+ /**
+ * Validates a fully-formed interchange object. Throws an
+ * HTMLPurifier_ConfigSchema_Exception if there's a problem.
+ */
+ public function validate($interchange) {
+ $this->interchange = $interchange;
+ $this->aliases = array();
+ // PHP is a bit lax with integer <=> string conversions in
+ // arrays, so we don't use the identical !== comparison
+ foreach ($interchange->namespaces as $i => $namespace) {
+ if ($i != $namespace->namespace) $this->error(false, "Integrity violation: key '$i' does not match internal id '{$namespace->namespace}'");
+ $this->validateNamespace($namespace);
+ }
+ foreach ($interchange->directives as $i => $directive) {
+ $id = $directive->id->toString();
+ if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'");
+ $this->validateDirective($directive);
+ }
+ return true;
+ }
+
+ /**
+ * Validates a HTMLPurifier_ConfigSchema_Interchange_Namespace object.
+ */
+ public function validateNamespace($n) {
+ $this->context[] = "namespace '{$n->namespace}'";
+ $this->with($n, 'namespace')
+ ->assertNotEmpty()
+ ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
+ $this->with($n, 'description')
+ ->assertNotEmpty()
+ ->assertIsString(); // handled by InterchangeBuilder
+ array_pop($this->context);
+ }
+
+ /**
+ * Validates a HTMLPurifier_ConfigSchema_Interchange_Id object.
+ */
+ public function validateId($id) {
+ $id_string = $id->toString();
+ $this->context[] = "id '$id_string'";
+ if (!$id instanceof HTMLPurifier_ConfigSchema_Interchange_Id) {
+ // handled by InterchangeBuilder
+ $this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id');
+ }
+ if (!isset($this->interchange->namespaces[$id->namespace])) {
+ $this->error('namespace', 'does not exist'); // assumes that the namespace was validated already
+ }
+ $this->with($id, 'directive')
+ ->assertNotEmpty()
+ ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
+ array_pop($this->context);
+ }
+
+ /**
+ * Validates a HTMLPurifier_ConfigSchema_Interchange_Directive object.
+ */
+ public function validateDirective($d) {
+ $id = $d->id->toString();
+ $this->context[] = "directive '$id'";
+ $this->validateId($d->id);
+
+ $this->with($d, 'description')
+ ->assertNotEmpty();
+
+ // BEGIN - handled by InterchangeBuilder
+ $this->with($d, 'type')
+ ->assertNotEmpty();
+ $this->with($d, 'typeAllowsNull')
+ ->assertIsBool();
+ try {
+ // This also tests validity of $d->type
+ $this->parser->parse($d->default, $d->type, $d->typeAllowsNull);
+ } catch (HTMLPurifier_VarParserException $e) {
+ $this->error('default', 'had error: ' . $e->getMessage());
+ }
+ // END - handled by InterchangeBuilder
+
+ if (!is_null($d->allowed) || !empty($d->valueAliases)) {
+ // allowed and valueAliases require that we be dealing with
+ // strings, so check for that early.
+ $d_int = HTMLPurifier_VarParser::$types[$d->type];
+ if (!isset(HTMLPurifier_VarParser::$stringTypes[$d_int])) {
+ $this->error('type', 'must be a string type when used with allowed or value aliases');
+ }
+ }
+
+ $this->validateDirectiveAllowed($d);
+ $this->validateDirectiveValueAliases($d);
+ $this->validateDirectiveAliases($d);
+
+ array_pop($this->context);
+ }
+
+ /**
+ * Extra validation if $allowed member variable of
+ * HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
+ */
+ public function validateDirectiveAllowed($d) {
+ if (is_null($d->allowed)) return;
+ $this->with($d, 'allowed')
+ ->assertNotEmpty()
+ ->assertIsLookup(); // handled by InterchangeBuilder
+ if (is_string($d->default) && !isset($d->allowed[$d->default])) {
+ $this->error('default', 'must be an allowed value');
+ }
+ $this->context[] = 'allowed';
+ foreach ($d->allowed as $val => $x) {
+ if (!is_string($val)) $this->error("value $val", 'must be a string');
+ }
+ array_pop($this->context);
+ }
+
+ /**
+ * Extra validation if $valueAliases member variable of
+ * HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
+ */
+ public function validateDirectiveValueAliases($d) {
+ if (is_null($d->valueAliases)) return;
+ $this->with($d, 'valueAliases')
+ ->assertIsArray(); // handled by InterchangeBuilder
+ $this->context[] = 'valueAliases';
+ foreach ($d->valueAliases as $alias => $real) {
+ if (!is_string($alias)) $this->error("alias $alias", 'must be a string');
+ if (!is_string($real)) $this->error("alias target $real from alias '$alias'", 'must be a string');
+ if ($alias === $real) {
+ $this->error("alias '$alias'", "must not be an alias to itself");
+ }
+ }
+ if (!is_null($d->allowed)) {
+ foreach ($d->valueAliases as $alias => $real) {
+ if (isset($d->allowed[$alias])) {
+ $this->error("alias '$alias'", 'must not be an allowed value');
+ } elseif (!isset($d->allowed[$real])) {
+ $this->error("alias '$alias'", 'must be an alias to an allowed value');
+ }
+ }
+ }
+ array_pop($this->context);
+ }
+
+ /**
+ * Extra validation if $aliases member variable of
+ * HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
+ */
+ public function validateDirectiveAliases($d) {
+ $this->with($d, 'aliases')
+ ->assertIsArray(); // handled by InterchangeBuilder
+ $this->context[] = 'aliases';
+ foreach ($d->aliases as $alias) {
+ $this->validateId($alias);
+ $s = $alias->toString();
+ if (isset($this->interchange->directives[$s])) {
+ $this->error("alias '$s'", 'collides with another directive');
+ }
+ if (isset($this->aliases[$s])) {
+ $other_directive = $this->aliases[$s];
+ $this->error("alias '$s'", "collides with alias for directive '$other_directive'");
+ }
+ $this->aliases[$s] = $d->id->toString();
+ }
+ array_pop($this->context);
+ }
+
+ // protected helper functions
+
+ /**
+ * Convenience function for generating HTMLPurifier_ConfigSchema_ValidatorAtom
+ * for validating simple member variables of objects.
+ */
+ protected function with($obj, $member) {
+ return new HTMLPurifier_ConfigSchema_ValidatorAtom($this->getFormattedContext(), $obj, $member);
+ }
+
+ /**
+ * Emits an error, providing helpful context.
+ */
+ protected function error($target, $msg) {
+ if ($target !== false) $prefix = ucfirst($target) . ' in ' . $this->getFormattedContext();
+ else $prefix = ucfirst($this->getFormattedContext());
+ throw new HTMLPurifier_ConfigSchema_Exception(trim($prefix . ' ' . $msg));
+ }
+
+ /**
+ * Returns a formatted context string.
+ */
+ protected function getFormattedContext() {
+ return implode(' in ', array_reverse($this->context));
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php
new file mode 100755
index 000000000..b95aea18c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php
@@ -0,0 +1,66 @@
+context = $context;
+ $this->obj = $obj;
+ $this->member = $member;
+ $this->contents =& $obj->$member;
+ }
+
+ public function assertIsString() {
+ if (!is_string($this->contents)) $this->error('must be a string');
+ return $this;
+ }
+
+ public function assertIsBool() {
+ if (!is_bool($this->contents)) $this->error('must be a boolean');
+ return $this;
+ }
+
+ public function assertIsArray() {
+ if (!is_array($this->contents)) $this->error('must be an array');
+ return $this;
+ }
+
+ public function assertNotNull() {
+ if ($this->contents === null) $this->error('must not be null');
+ return $this;
+ }
+
+ public function assertAlnum() {
+ $this->assertIsString();
+ if (!ctype_alnum($this->contents)) $this->error('must be alphanumeric');
+ return $this;
+ }
+
+ public function assertNotEmpty() {
+ if (empty($this->contents)) $this->error('must not be empty');
+ return $this;
+ }
+
+ public function assertIsLookup() {
+ $this->assertIsArray();
+ foreach ($this->contents as $v) {
+ if ($v !== true) $this->error('must be a lookup array');
+ }
+ return $this;
+ }
+
+ protected function error($msg) {
+ throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser
new file mode 100755
index 000000000..1eaecd11f
Binary files /dev/null and b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser differ
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt
new file mode 100755
index 000000000..249edd647
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt
@@ -0,0 +1,12 @@
+Attr.AllowedFrameTargets
+TYPE: lookup
+DEFAULT: array()
+--DESCRIPTION--
+Lookup table of all allowed link frame targets. Some commonly used link
+targets include _blank, _self, _parent and _top. Values should be
+lowercase, as validation will be done in a case-sensitive manner despite
+W3C's recommendation. XHTML 1.0 Strict does not permit the target attribute
+so this directive will have no effect in that doctype. XHTML 1.1 does not
+enable the Target module by default, you will have to manually enable it
+(see the module documentation for more details.)
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt
new file mode 100755
index 000000000..9a8fa6a2e
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt
@@ -0,0 +1,9 @@
+Attr.AllowedRel
+TYPE: lookup
+VERSION: 1.6.0
+DEFAULT: array()
+--DESCRIPTION--
+List of allowed forward document relationships in the rel attribute. Common
+values may be nofollow or print. By default, this is empty, meaning that no
+document relationships are allowed.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt
new file mode 100755
index 000000000..b01788348
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt
@@ -0,0 +1,9 @@
+Attr.AllowedRev
+TYPE: lookup
+VERSION: 1.6.0
+DEFAULT: array()
+--DESCRIPTION--
+List of allowed reverse document relationships in the rev attribute. This
+attribute is a bit of an edge-case; if you don't know what it is for, stay
+away.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt
new file mode 100755
index 000000000..533165e17
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt
@@ -0,0 +1,11 @@
+Attr.DefaultImageAlt
+TYPE: string/null
+DEFAULT: null
+VERSION: 3.2.0
+--DESCRIPTION--
+This is the content of the alt tag of an image if the user had not
+previously specified an alt attribute. This applies to all images without
+a valid alt attribute, as opposed to %Attr.DefaultInvalidImageAlt, which
+only applies to invalid images, and overrides in the case of an invalid image.
+Default behavior with null is to use the basename of the src tag for the alt.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt
new file mode 100755
index 000000000..9eb7e3846
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt
@@ -0,0 +1,9 @@
+Attr.DefaultInvalidImage
+TYPE: string
+DEFAULT: ''
+--DESCRIPTION--
+This is the default image an img tag will be pointed to if it does not have
+a valid src attribute. In future versions, we may allow the image tag to
+be removed completely, but due to design issues, this is not possible right
+now.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt
new file mode 100755
index 000000000..2f17bf477
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt
@@ -0,0 +1,8 @@
+Attr.DefaultInvalidImageAlt
+TYPE: string
+DEFAULT: 'Invalid image'
+--DESCRIPTION--
+This is the content of the alt tag of an invalid image if the user had not
+previously specified an alt attribute. It has no effect when the image is
+valid but there was no alt attribute present.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt
new file mode 100755
index 000000000..52654b53a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt
@@ -0,0 +1,10 @@
+Attr.DefaultTextDir
+TYPE: string
+DEFAULT: 'ltr'
+--DESCRIPTION--
+Defines the default text direction (ltr or rtl) of the document being
+parsed. This generally is the same as the value of the dir attribute in
+HTML, or ltr if that is not specified.
+--ALLOWED--
+'ltr', 'rtl'
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt
new file mode 100755
index 000000000..6440d2103
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt
@@ -0,0 +1,16 @@
+Attr.EnableID
+TYPE: bool
+DEFAULT: false
+VERSION: 1.2.0
+--DESCRIPTION--
+Allows the ID attribute in HTML. This is disabled by default due to the
+fact that without proper configuration user input can easily break the
+validation of a webpage by specifying an ID that is already on the
+surrounding HTML. If you don't mind throwing caution to the wind, enable
+this directive, but I strongly recommend you also consider blacklisting IDs
+you use (%Attr.IDBlacklist) or prefixing all user supplied IDs
+(%Attr.IDPrefix). When set to true HTML Purifier reverts to the behavior of
+pre-1.2.0 versions.
+--ALIASES--
+HTML.EnableAttrID
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt
new file mode 100755
index 000000000..5f2b5e3d2
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt
@@ -0,0 +1,5 @@
+Attr.IDBlacklist
+TYPE: list
+DEFAULT: array()
+DESCRIPTION: Array of IDs not allowed in the document.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt
new file mode 100755
index 000000000..6f5824586
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt
@@ -0,0 +1,9 @@
+Attr.IDBlacklistRegexp
+TYPE: string/null
+VERSION: 1.6.0
+DEFAULT: NULL
+--DESCRIPTION--
+PCRE regular expression to be matched against all IDs. If the expression is
+matches, the ID is rejected. Use this with care: may cause significant
+degradation. ID matching is done after all other validation.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt
new file mode 100755
index 000000000..cc49d43fd
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt
@@ -0,0 +1,12 @@
+Attr.IDPrefix
+TYPE: string
+VERSION: 1.2.0
+DEFAULT: ''
+--DESCRIPTION--
+String to prefix to IDs. If you have no idea what IDs your pages may use,
+you may opt to simply add a prefix to all user-submitted ID attributes so
+that they are still usable, but will not conflict with core page IDs.
+Example: setting the directive to 'user_' will result in a user submitted
+'foo' to become 'user_foo' Be sure to set %HTML.EnableAttrID to true
+before using this.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt
new file mode 100755
index 000000000..2c5924a7a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt
@@ -0,0 +1,14 @@
+Attr.IDPrefixLocal
+TYPE: string
+VERSION: 1.2.0
+DEFAULT: ''
+--DESCRIPTION--
+Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If you
+need to allow multiple sets of user content on web page, you may need to
+have a seperate prefix that changes with each iteration. This way,
+seperately submitted user content displayed on the same page doesn't
+clobber each other. Ideal values are unique identifiers for the content it
+represents (i.e. the id of the row in the database). Be sure to add a
+seperator (like an underscore) at the end. Warning: this directive will
+not work unless %Attr.IDPrefix is set to a non-empty value!
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.txt
new file mode 100755
index 000000000..2d72049d6
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.txt
@@ -0,0 +1,3 @@
+Attr
+DESCRIPTION: Features regarding attribute validation.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt
new file mode 100755
index 000000000..d5caa1bb9
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt
@@ -0,0 +1,31 @@
+AutoFormat.AutoParagraph
+TYPE: bool
+VERSION: 2.0.1
+DEFAULT: false
+--DESCRIPTION--
+
+
+ This directive turns on auto-paragraphing, where double newlines are
+ converted in to paragraphs whenever possible. Auto-paragraphing:
+
+
+ To prevent auto-paragraphing as a content-producer, refrain from using
+ double-newlines except to specify a new paragraph or in contexts where
+ it has special meaning (whitespace usually has no meaning except in
+ tags like
+ This directive can be used to add custom auto-format injectors.
+ Specify an array of injector names (class name minus the prefix)
+ or concrete implementations. Injector class must exist.
+
+ This directive turns on the in-text display of URIs in <a> tags, and disables
+ those links. For example, example becomes
+ example (http://example.com).
+
+ This directive turns on linkification, auto-linking http, ftp and
+ https URLs.
+ Internal auto-formatter that converts configuration directives in
+ syntax %Namespace.Directive to links.
+ When enabled, HTML Purifier will attempt to remove empty elements that
+ contribute no semantic information to the document. The following types
+ of nodes will be removed:
+
+ Please be very careful when using this functionality; while it may not
+ seem that empty elements contain useful information, they can alter the
+ layout of a document given appropriate styling. This directive is most
+ useful when you are processing machine-generated HTML, please avoid using
+ it on regular user HTML.
+
+ Elements that contain only whitespace will be treated as empty. Non-breaking
+ spaces, however, do not count as whitespace.
+
+ This algorithm is not perfect; you may still notice some empty tags,
+ particularly if a node had elements, but those elements were later removed
+ because they were not permitted in that context, or tags that, after
+ being auto-closed by another tag, where empty. This is for safety reasons
+ to prevent clever code from breaking validation. The general rule of thumb:
+ if a tag looked empty on the way end, it will get removed; if HTML Purifier
+ made it empty, it will stay.
+
+ Location of configuration documentation to link to, let %s substitute
+ into the configuration's namespace and directive names sans the percent
+ sign.
+
+ If HTML Purifier's style attributes set is unsatisfactory for your needs,
+ you can overload it with your own list of tags to allow. Note that this
+ method is subtractive: it does its job by taking away from HTML Purifier
+ usual feature set, so you cannot add an attribute that HTML Purifier never
+ supported in the first place.
+
+ Warning: If another directive conflicts with the
+ elements here, that directive will win and override.
+
+ Revision identifier for your custom definition. See
+ %HTML.DefinitionRev for details.
+
+ This parameter sets the maximum allowed length on
+ Whether or not to allow safe, proprietary CSS values.
+
+ Absolute path with no trailing slash to store serialized definitions in.
+ Default is within the
+ HTML Purifier library inside DefinitionCache/Serializer. This
+ path must be writable by the webserver.
+
+ This directive enables aggressive pre-filter fixes HTML Purifier can
+ perform in order to ensure that open angled-brackets do not get killed
+ during parsing stage. Enabling this will result in two preg_replace_callback
+ calls and at least two preg_replace calls for every HTML document parsed;
+ if your users make very well-formed HTML, you can set this directive false.
+ This has no effect when DirectLex is used.
+
+ Notice: This directive's default turned from false to true
+ in HTML Purifier 3.2.0.
+
+ Specifies the number of tokens the DirectLex line number tracking
+ implementations should process before attempting to resyncronize the
+ current line count by manually counting all previous new-lines. When
+ at 0, this functionality is disabled. Lower values will decrease
+ performance, and this is only strictly necessary if the counting
+ algorithm is buggy (in which case you should report it as a bug).
+ This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is
+ not being used.
+
+ This directive is a lookup array of elements which should have their
+ contents removed when they are not allowed by the HTML definition.
+ For example, the contents of a
+ This parameter determines what lexer implementation can be used. The
+ valid values are:
+
+ If true, HTML Purifier will add line number information to all tokens.
+ This is useful when error reporting is turned on, but can result in
+ significant performance degradation and should not be used when
+ unnecessary. This directive must be used with the DirectLex lexer,
+ as the DOMLex lexer does not (yet) support this functionality.
+ If the value is null, an appropriate value will be selected based
+ on other configuration.
+
+ This directive enables pre-emptive URI checking in
+ This directive enables HTML Purifier to remove not only script tags
+ but all of their contents.
+
+ This directive can be used to add custom filters; it is nearly the
+ equivalent of the now deprecated
+ This directive turns on the style block extraction filter, which removes
+
+ Sample usage:
+
+ Warning: It is possible for a user to mount an
+ imagecrash attack using this CSS. Counter-measures are difficult;
+ it is not simply enough to limit the range of CSS lengths (using
+ relative lengths with many nesting levels allows for large values
+ to be attained without actually specifying them in the stylesheet),
+ and the flexible nature of selectors makes it difficult to selectively
+ disable lengths on image tags (HTML Purifier, however, does disable
+ CSS width and height in inline styling). There are probably two effective
+ counter measures: an explicit width and height set to auto in all
+ images in your document (unlikely) or the disabling of width and
+ height (somewhat reasonable). Whether or not these measures should be
+ used is left to the reader.
+
+ This directive enables YouTube video embedding in HTML Purifier. Check
+ this document
+ on embedding videos for more information on what this filter does.
+
+ Whether or not to escape the dangerous characters <, > and &
+ as \3C, \3E and \26, respectively. This is can be safely set to false
+ if the contents of StyleBlocks will be placed in an external stylesheet,
+ where there is no risk of it being interpreted as HTML.
+
+ If you would like users to be able to define external stylesheets, but
+ only allow them to specify CSS declarations for a specific node and
+ prevent them from fiddling with other elements, use this directive.
+ It accepts any valid CSS selector, and will prepend this to any
+ CSS declaration extracted from the document. For example, if this
+ directive is set to
+ The comma shorthand may be used; consider the above example, with
+
+ Warning: It is possible for users to bypass this measure
+ using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML
+ Purifier, and I am working to get it fixed. Until then, HTML Purifier
+ performs a basic check to prevent this.
+
+ If left NULL, HTML Purifier will attempt to instantiate a
+ However, for trusted user input, you can set this to
+ This is a convenience directive that rolls the functionality of
+ %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
+ Specify elements and attributes that are allowed using:
+
+ Warning:
+ All of the constraints on the component directives are still enforced.
+ The syntax is a subset of TinyMCE's
+ If HTML Purifier's attribute set is unsatisfactory, overload it!
+ The syntax is "tag.attr" or "*.attr" for the global attributes
+ (style, id, class, dir, lang, xml:lang).
+
+ Warning: If another directive conflicts with the
+ elements here, that directive will win and override. For
+ example, %HTML.EnableAttrID will take precedence over *.id in this
+ directive. You must set that directive to true before you can use
+ IDs at all.
+
+ If HTML Purifier's tag set is unsatisfactory for your needs, you
+ can overload it with your own list of tags to allow. Note that this
+ method is subtractive: it does its job by taking away from HTML Purifier
+ usual feature set, so you cannot add a tag that HTML Purifier never
+ supported in the first place (like embed, form or head). If you
+ change this, you probably also want to change %HTML.AllowedAttributes.
+
+ Warning: If another directive conflicts with the
+ elements here, that directive will win and override.
+
+ A doctype comes with a set of usual modules to use. Without having
+ to mucking about with the doctypes, you can quickly activate or
+ disable these modules by specifying which modules you wish to allow
+ with this directive. This is most useful for unit testing specific
+ modules, although end users may find it useful for their own ends.
+
+ If you specify a module that does not exist, the manager will silently
+ fail to use it, so be careful! User-defined modules are not affected
+ by this directive. Modules defined in %HTML.CoreModules are not
+ affected by this directive.
+
+ String name of element to wrap inline elements that are inside a block
+ context. This only occurs in the children of blockquote in strict mode.
+
+ Example: by default value,
+
+ Certain modularized doctypes (XHTML, namely), have certain modules
+ that must be included for the doctype to be an conforming document
+ type: put those modules here. By default, XHTML's core modules
+ are used. You can set this to a blank array to disable core module
+ protection, but this is not recommended.
+
+ Unique identifier for a custom-built HTML definition. If you edit
+ the raw version of the HTMLDefinition, introducing changes that the
+ configuration object does not reflect, you must specify this variable.
+ If you change your custom edits, you should change this directive, or
+ clear your cache. Example:
+
+ In the above example, the configuration is still at the defaults, but
+ using the advanced API, an extra attribute has been added. The
+ configuration object normally has no way of knowing that this change
+ has taken place, so it needs an extra directive: %HTML.DefinitionID.
+ If someone else attempts to use the default configuration, these two
+ pieces of code will not clobber each other in the cache, since one has
+ an extra directive attached to it.
+
+ You must specify a value to this directive to use the
+ advanced API features.
+
+ Revision identifier for your custom definition specified in
+ %HTML.DefinitionID. This serves the same purpose: uniquely identifying
+ your custom definition, but this one does so in a chronological
+ context: revision 3 is more up-to-date then revision 2. Thus, when
+ this gets incremented, the cache handling is smart enough to clean
+ up any older revisions of your definition as well as flush the
+ cache.
+
+ While this directive is similar to %HTML.AllowedAttributes, for
+ forwards-compatibility with XML, this attribute has a different syntax. Instead of
+
+ Warning: This directive complements %HTML.ForbiddenElements,
+ accordingly, check
+ out that directive for a discussion of why you
+ should think twice before using this directive.
+
+ This was, perhaps, the most requested feature ever in HTML
+ Purifier. Please don't abuse it! This is the logical inverse of
+ %HTML.AllowedElements, and it will override that directive, or any
+ other directive.
+
+ If possible, %HTML.Allowed is recommended over this directive, because it
+ can sometimes be difficult to tell whether or not you've forbidden all of
+ the behavior you would like to disallow. If you forbid
+ This directive controls the maximum number of pixels in the width and
+ height attributes in
+ String name of element that HTML fragment passed to library will be
+ inserted in. An interesting variation would be using span as the
+ parent element, meaning that only inline tags would be allowed.
+
+ Whether or not to allow proprietary elements and attributes in your
+ documents, as per
+ Whether or not to permit embed tags in documents, with a number of extra
+ security features added to prevent script execution. This is similar to
+ what websites like MySpace do to embed tags. Embed is a proprietary
+ element and will cause your website to stop validating. You probably want
+ to enable this with %HTML.SafeObject.
+ Highly experimental.
+
+ Whether or not to permit object tags in documents, with a number of extra
+ security features added to prevent script execution. This is similar to
+ what websites like MySpace do to object tags. You may also want to
+ enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer,
+ although embed tags will cause your website to stop validating.
+ Highly experimental.
+ General level of cleanliness the Tidy module should enforce.
+There are four allowed values:
+ Newline string to format final output with. If left null, HTML Purifier
+ will auto-detect the default newline type of the system and use that;
+ you can manually override it here. Remember, \r\n is Windows, \r
+ is Mac, and \n is Unix.
+
+ If true, HTML Purifier will sort attributes by name before writing them back
+ to the document, converting a tag like:
+ Determines whether or not to run Tidy on the final output for pretty
+ formatting reasons, such as indentation and wrap.
+
+ This can greatly improve readability for editors who are hand-editing
+ the HTML, but is by no means necessary as HTML Purifier has already
+ fixed all major errors the HTML may have had. Tidy is a non-default
+ extension, and this directive will silently fail if Tidy is not
+ available.
+
+ If you are looking to make the overall look of your page's source
+ better, I recommend running Tidy on the entire page rather than just
+ user-content (after all, the indentation relative to the containing
+ blocks will be incorrect).
+
+ The base URI is the URI of the document this purified HTML will be
+ inserted into. This information is important if HTML Purifier needs
+ to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
+ is on. You may use a non-absolute URI for this value, but behavior
+ may vary (%URI.MakeAbsolute deals nicely with both absolute and
+ relative paths, but forwards-compatibility is not guaranteed).
+ Warning: If set, the scheme on this URI
+ overrides the one specified by %URI.DefaultScheme.
+
+ Defines through what scheme the output will be served, in order to
+ select the proper object validator when no scheme information is present.
+
+ Unique identifier for a custom-built URI definition. If you want
+ to add custom URIFilters, you must specify this value.
+
+ Revision identifier for your custom definition. See
+ %HTML.DefinitionRev for details.
+
+ Disables all URIs in all forms. Not sure why you'd want to do that
+ (after all, the Internet's founded on the notion of a hyperlink).
+
+ Disables embedding resources, essentially meaning no pictures. You can
+ still link to them though. See %URI.DisableExternalResources for why
+ this might be a good idea.
+
+ Defines the domain name of the server, so we can determine whether or
+ an absolute URI is from your website or not. Not strictly necessary,
+ as users should be using relative URIs to reference resources on your
+ website. It will, however, let you use absolute URIs to link to
+ subdomains of the domain you post here: i.e. example.com will allow
+ sub.example.com. However, higher up domains will still be excluded:
+ if you set %URI.Host to sub.example.com, example.com will be blocked.
+ Note: This directive overrides %URI.Base because
+ a given page may be on a sub-domain, but you wish HTML Purifier to be
+ more relaxed and allow some of the parent domains too.
+
+ Converts all URIs into absolute forms. This is useful when the HTML
+ being filtered assumes a specific base path, but will actually be
+ viewed in a different context (and setting an alternate base URI is
+ not possible). %URI.Base must be set for this directive to work.
+
+ Munges all browsable (usually http, https and ftp)
+ absolute URIs into another URI, usually a URI redirection service.
+ This directive accepts a URI, formatted with a
+ Uses for this directive:
+
+ Prior to HTML Purifier 3.1.1, this directive also enabled the munging
+ of browsable external resources, which could break things if your redirection
+ script was a splash page or used
+ You may want to also use %URI.MungeSecretKey along with this directive
+ in order to enforce what URIs your redirector script allows. Open
+ redirector scripts can be a security risk and negatively affect the
+ reputation of your domain name.
+
+ Starting with HTML Purifier 3.1.1, there is also these substitutions:
+
+ Admittedly, these letters are somewhat arbitrary; the only stipulation
+ was that they couldn't be a through f. r is for resource (I would have preferred
+ e, but you take what you can get), n is for name, m
+ was picked because it came after n (and I couldn't use a), p is for
+ property.
+
+ If true, any URI munging directives like %URI.Munge
+ will also apply to embedded resources, such as
+ Warning: It is strongly advised you use this in conjunction
+ %URI.MungeSecretKey to mitigate the security risk of an open redirector.
+
+ This directive enables secure checksum generation along with %URI.Munge.
+ It should be set to a secure key that is not shared with anyone else.
+ The checksum can be placed in the URI using %t. Use of this checksum
+ affords an additional level of protection by allowing a redirector
+ to check if a URI has passed through HTML Purifier with this line:
+
+ If the output is TRUE, the redirector script should accept the URI.
+
+ Please note that it would still be possible for an attacker to procure
+ secure hashes en-mass by abusing your website's Preview feature or the
+ like, but this service affords an additional level of protection
+ that should be combined with website blacklisting.
+
+ Remember this has no effect if %URI.Munge is not on.
+ ' . $this->locale->getMessage('ErrorCollector: No errors') . ' tags?
+ if ($this->allowsElement('p')) {
+ if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
+ // Note that we have differing behavior when dealing with text
+ // in the anonymous root node, or a node inside the document.
+ // If the text as a double-newline, the treatment is the same;
+ // if it doesn't, see the next if-block if you're in the document.
+
+ $i = $nesting = null;
+ if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
+ // State 1.1: ... ^ (whitespace, then document end)
+ // ----
+ // This is a degenerate case
+ } else {
+ // State 1.2: PAR1
+ // ----
+
+ // State 1.3: PAR1\n\nPAR2
+ // ------------
+
+ // State 1.4: tag?
+ } elseif (
+ !empty($this->currentNesting) &&
+ $this->currentNesting[count($this->currentNesting)-1]->name == 'p'
+ ) {
+ // State 3.1: ... PAR1
+ // ----
+
+ // State 3.2: ... PAR1\n\nPAR2
+ // ------------
+ $token = array();
+ $this->_splitText($text, $token);
+ // Abort!
+ } else {
+ // State 4.1: ...PAR1
+ // ----
+
+ // State 4.2: ...PAR1\n\nPAR2
+ // ------------
+ }
+ }
+
+ public function handleElement(&$token) {
+ // We don't have to check if we're already in a tag for block
+ // tokens, because the tag would have been autoclosed by MakeWellFormed.
+ if ($this->allowsElement('p')) {
+ if (!empty($this->currentNesting)) {
+ if ($this->_isInline($token)) {
+ // State 1: PAR1 PAR1 is needed.
+ if ($this->_pLookAhead()) {
+ // State 1.3.1: tags.
+ }
+ }
+ }
+ } else {
+ // State 2.2:
+ // ---
+ }
+ }
+
+ /**
+ * Splits up a text in paragraph tokens and appends them
+ * to the result stream that will replace the original
+ * @param $data String text data that will be processed
+ * into paragraphs
+ * @param $result Reference to array of tokens that the
+ * tags will be appended onto
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ */
+ private function _splitText($data, &$result) {
+ $raw_paragraphs = explode("\n\n", $data);
+ $paragraphs = array(); // without empty paragraphs
+ $needs_start = false;
+ $needs_end = false;
+
+ $c = count($raw_paragraphs);
+ if ($c == 1) {
+ // There were no double-newlines, abort quickly. In theory this
+ // should never happen.
+ $result[] = new HTMLPurifier_Token_Text($data);
+ return;
+ }
+ for ($i = 0; $i < $c; $i++) {
+ $par = $raw_paragraphs[$i];
+ if (trim($par) !== '') {
+ $paragraphs[] = $par;
+ } else {
+ if ($i == 0) {
+ // Double newline at the front
+ if (empty($result)) {
+ // The empty result indicates that the AutoParagraph
+ // injector did not add any start paragraph tokens.
+ // This means that we have been in a paragraph for
+ // a while, and the newline means we should start a new one.
+ $result[] = new HTMLPurifier_Token_End('p');
+ $result[] = new HTMLPurifier_Token_Text("\n\n");
+ // However, the start token should only be added if
+ // there is more processing to be done (i.e. there are
+ // real paragraphs in here). If there are none, the
+ // next start paragraph tag will be handled by the
+ // next call to the injector
+ $needs_start = true;
+ } else {
+ // We just started a new paragraph!
+ // Reinstate a double-newline for presentation's sake, since
+ // it was in the source code.
+ array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
+ }
+ } elseif ($i + 1 == $c) {
+ // Double newline at the end
+ // There should be a trailing tag.
+ */
+ private function _pLookAhead() {
+ $this->current($i, $current);
+ if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1;
+ else $nesting = 0;
+ $ok = false;
+ while ($this->forwardUntilEndToken($i, $current, $nesting)) {
+ $result = $this->_checkNeedsP($current);
+ if ($result !== null) {
+ $ok = $result;
+ break;
+ }
+ }
+ return $ok;
+ }
+
+ /**
+ * Determines if a particular token requires an earlier inline token
+ * to get a paragraph. This should be used with _forwardUntilEndToken
+ */
+ private function _checkNeedsP($current) {
+ if ($current instanceof HTMLPurifier_Token_Start){
+ if (!$this->_isInline($current)) {
+ //
+
+p tags must be allowed for this directive to take effect.
+ We do not use br tags for paragraphing, as that is
+ semantically incorrect.
+pre, so this should not be difficult.) To prevent
+ the paragraphing of inline text adjacent to block elements, wrap them
+ in div tags (the behavior is slightly different outside of
+ the root node.)
+a tags with the href attribute
+ must be allowed.
+a tags
+ with the href attribute must be allowed.
+
+<a></a> but not
+ <br />), and
+
+
colgroup element, orid or name attribute,
+ when those attributes are permitted on those elements.
+ Injectors)
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt
new file mode 100755
index 000000000..3e8309e39
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormatParam.PurifierLinkifyDocURL.txt
@@ -0,0 +1,12 @@
+AutoFormatParam.PurifierLinkifyDocURL
+TYPE: string
+VERSION: 2.0.1
+DEFAULT: '#%s'
+--DESCRIPTION--
+
+display:none; is considered a tricky property that
+will only be allowed if this directive is set to true.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt
new file mode 100755
index 000000000..460112ebe
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt
@@ -0,0 +1,18 @@
+CSS.AllowedProperties
+TYPE: lookup/null
+VERSION: 3.1.0
+DEFAULT: NULL
+--DESCRIPTION--
+
+img tags,
+ effectively the width and height properties.
+ Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is
+ in place to prevent imagecrash attacks, disable with null at your own risk.
+ This directive is similar to %HTML.MaxImgLength, and both should be
+ concurrently edited, although there are
+ subtle differences in the input format (the CSS max is a number with
+ a unit).
+script tag are not
+ normally shown in a document, so if script tags are to be removed,
+ their contents should be removed to. This is opposed to a b
+ tag, which defines some presentational changes but does not hide its
+ contents.
+
+
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt
new file mode 100755
index 000000000..eb841a759
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt
@@ -0,0 +1,16 @@
+Core.MaintainLineNumbers
+TYPE: bool/null
+VERSION: 2.0.0
+DEFAULT: NULL
+--DESCRIPTION--
+
+HTMLPurifier_Lexer.
+ I may remove this option simply because I don't expect anyone
+ to use it.
+ img
+ tags, as the attribute validation strategy is not authorized to
+ remove elements from the document. Revert to pre-1.3.0 behavior by setting to false.
+HTMLPurifier->addFilter()
+ method. Specify an array of concrete implementations.
+style blocks from input HTML, cleans them up with CSSTidy,
+ and places them in the StyleBlocks context variable, for further
+ use by you, usually to be placed in an external stylesheet, or a
+ style block in the head of your document.
+';
+?>
+
+
+
+
+#user-content and a user uses the
+ selector a:hover, the final selector will be
+ #user-content a:hover.
+#user-content, #user-content2, the final selector will
+ be #user-content a:hover, #user-content2 a:hover.
+csstidy
+ class to use for internal cleaning. This will usually be good enough.
+false to
+ disable cleaning. In addition, you can supply your own concrete implementation
+ of Tidy's interface to use, although I don't know why you'd want to do that.
+element1[attr1|attr2],element2.... You can also use
+ newlines instead of commas to separate elements.
+valid_elements
+ whitelist: directly copy-pasting it here will probably result in
+ broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
+ are set, this directive has no effect.
+<blockquote>Foo</blockquote> would become
+ <blockquote><p>Foo</p></blockquote>.
+ The <p> tags can be replaced with whatever you desire,
+ as long as it is a block level element.
+
+$config = HTMLPurifier_Config::createDefault();
+$config->set('HTML', 'DefinitionID', '1');
+$def = $config->getHTMLDefinition();
+$def->addAttribute('a', 'tabindex', 'Number');
+
+tag.attr, use tag@attr. To disallow href
+ attributes in a tags, set this directive to
+ a@href. You can also disallow an attribute globally with
+ attr or *@attr (either syntax is fine; the latter
+ is provided for consistency with %HTML.AllowedAttributes).
+img
+ with the expectation of preventing images on your site, you'll be in for
+ a nasty surprise when people start using the background-image
+ CSS property.
+img tags. This is
+ in place to prevent imagecrash attacks, disable with null at your own risk.
+ This directive is similar to %CSS.MaxImgLength, and both should be
+ concurrently edited, although there are
+ subtle differences in the input format (the HTML max is an integer).
+HTMLPurifier_HTMLModule_Proprietary.
+ Warning: This can cause your documents to stop
+ validating!
+
+
+
+--ALLOWED--
+'none', 'light', 'medium', 'heavy'
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt
new file mode 100755
index 000000000..996762bd1
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt
@@ -0,0 +1,8 @@
+HTML.TidyRemove
+TYPE: lookup
+VERSION: 2.0.0
+DEFAULT: array()
+--DESCRIPTION--
+
+Fixes to remove from the default set of Tidy fixes as per your level.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt
new file mode 100755
index 000000000..89133b1a3
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt
@@ -0,0 +1,8 @@
+HTML.Trusted
+TYPE: bool
+VERSION: 2.0.0
+DEFAULT: false
+--DESCRIPTION--
+Indicates whether or not the user input is trusted or not. If the input is
+trusted, a more expansive set of allowed tags and attributes will be used.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt
new file mode 100755
index 000000000..2a47e384f
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt
@@ -0,0 +1,11 @@
+HTML.XHTML
+TYPE: bool
+DEFAULT: true
+VERSION: 1.1.0
+DEPRECATED-VERSION: 1.7.0
+DEPRECATED-USE: HTML.Doctype
+--DESCRIPTION--
+Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor.
+--ALIASES--
+Core.XHTML
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt
new file mode 100755
index 000000000..f32ceb5b6
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.txt
@@ -0,0 +1,3 @@
+HTML
+DESCRIPTION: Configuration regarding allowed HTML.
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt
new file mode 100755
index 000000000..08921fde7
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt
@@ -0,0 +1,10 @@
+Output.CommentScriptContents
+TYPE: bool
+VERSION: 2.0.0
+DEFAULT: true
+--DESCRIPTION--
+Determines whether or not HTML Purifier should attempt to fix up the
+contents of script tags for legacy browsers with comments.
+--ALIASES--
+Core.CommentScriptContents
+--# vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt
new file mode 100755
index 000000000..79f8ad82c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt
@@ -0,0 +1,13 @@
+Output.Newline
+TYPE: string/null
+VERSION: 2.0.1
+DEFAULT: NULL
+--DESCRIPTION--
+
+<el b="" a="" c="" />
+ to <el a="" b="" c="" />. This is a workaround for
+ a bug in FCKeditor which causes it to swap attributes order, adding noise
+ to text diffs. If you're not seeing this bug, chances are, you don't need
+ this directive.
+%s where
+ the url-encoded original URI should be inserted (sample:
+ http://www.google.com/url?q=%s).
+
+
+meta tags. To revert to
+ previous behavior, please use %URI.MungeResources.
+
+
+
+
+
+
+
+ Key
+ Description
+ Example
+ <a href="">
+
+ %r
+ 1 - The URI embeds a resource
+
(blank) - The URI is merely a link
+
+
+ %n
+ The name of the tag this URI came from
+ a
+
+
+ %m
+ The name of the attribute this URI came from
+ href
+
+
+
+%p
+ The name of the CSS property this URI came from, or blank if irrelevant
+
+ <img src="">.
+ Be careful enabling this directive if you have a redirector script
+ that does not use the Location HTTP header; all of your images
+ and other embedded resources will break.
+$checksum === sha1($secret_key . ':' . $url)
+
+
';
+ }
+
+ }
+
+ private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
+ $stack = array($struct);
+ $context_stack = array(array());
+ while ($current = array_pop($stack)) {
+ $context = array_pop($context_stack);
+ foreach ($current->errors as $error) {
+ list($severity, $msg) = $error;
+ $string = '';
+ $string .= '';
+ //$string .= '';
+ //$string .= '
';
+ $ret[] = $string;
+ }
+ foreach ($current->children as $type => $array) {
+ $context[] = $current;
+ $stack = array_merge($stack, array_reverse($array, true));
+ for ($i = count($array); $i > 0; $i--) {
+ $context_stack[] = $context;
+ }
+ }
+ }
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ErrorStruct.php b/lib/htmlpurifier/library/HTMLPurifier/ErrorStruct.php
new file mode 100755
index 000000000..9bc8996ec
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ErrorStruct.php
@@ -0,0 +1,60 @@
+children[$type][$id])) {
+ $this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
+ $this->children[$type][$id]->type = $type;
+ }
+ return $this->children[$type][$id];
+ }
+
+ public function addError($severity, $message) {
+ $this->errors[] = array($severity, $message);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Exception.php b/lib/htmlpurifier/library/HTMLPurifier/Exception.php
new file mode 100755
index 000000000..be85b4c56
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Exception.php
@@ -0,0 +1,12 @@
+preFilter,
+ * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
+ * 1->postFilter.
+ *
+ * @note Methods are not declared abstract as it is perfectly legitimate
+ * for an implementation not to want anything to happen on a step
+ */
+
+class HTMLPurifier_Filter
+{
+
+ /**
+ * Name of the filter for identification purposes
+ */
+ public $name;
+
+ /**
+ * Pre-processor function, handles HTML before HTML Purifier
+ */
+ public function preFilter($html, $config, $context) {
+ return $html;
+ }
+
+ /**
+ * Post-processor function, handles HTML after HTML Purifier
+ */
+ public function postFilter($html, $config, $context) {
+ return $html;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
new file mode 100755
index 000000000..970f9e0c9
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
@@ -0,0 +1,135 @@
+ blocks from input HTML, cleans them up
+ * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
+ * so they can be used elsewhere in the document.
+ *
+ * @note
+ * See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for
+ * sample usage.
+ *
+ * @note
+ * This filter can also be used on stylesheets not included in the
+ * document--something purists would probably prefer. Just directly
+ * call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS()
+ */
+class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
+{
+
+ public $name = 'ExtractStyleBlocks';
+ private $_styleMatches = array();
+ private $_tidy;
+
+ public function __construct() {
+ $this->_tidy = new csstidy();
+ }
+
+ /**
+ * Save the contents of CSS blocks to style matches
+ * @param $matches preg_replace style $matches array
+ */
+ protected function styleCallback($matches) {
+ $this->_styleMatches[] = $matches[1];
+ }
+
+ /**
+ * Removes inline #isU', array($this, 'styleCallback'), $html);
+ $style_blocks = $this->_styleMatches;
+ $this->_styleMatches = array(); // reset
+ $context->register('StyleBlocks', $style_blocks); // $context must not be reused
+ if ($this->_tidy) {
+ foreach ($style_blocks as &$style) {
+ $style = $this->cleanCSS($style, $config, $context);
+ }
+ }
+ return $html;
+ }
+
+ /**
+ * Takes CSS (the stuff found in in a font-family prop).
+ if ($config->get('FilterParam', 'ExtractStyleBlocksEscaping')) {
+ $css = str_replace(
+ array('<', '>', '&'),
+ array('\3C ', '\3E ', '\26 '),
+ $css
+ );
+ }
+ return $css;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php
new file mode 100755
index 000000000..aca972f6c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php
@@ -0,0 +1,39 @@
+]+>.+?'.
+ 'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?#s';
+ $pre_replace = '';
+ return preg_replace($pre_regex, $pre_replace, $html);
+ }
+
+ public function postFilter($html, $config, $context) {
+ $post_regex = '##';
+ return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
+ }
+
+ protected function armorUrl($url) {
+ return str_replace('--', '--', $url);
+ }
+
+ protected function postFilterCallback($matches) {
+ $url = $this->armorUrl($matches[1]);
+ return '';
+
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Generator.php b/lib/htmlpurifier/library/HTMLPurifier/Generator.php
new file mode 100755
index 000000000..a1b96b9e4
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Generator.php
@@ -0,0 +1,183 @@
+ tags
+ */
+ private $_scriptFix = false;
+
+ /**
+ * Cache of HTMLDefinition during HTML output to determine whether or
+ * not attributes should be minimized.
+ */
+ private $_def;
+
+ /**
+ * Cache of %Output.SortAttr
+ */
+ private $_sortAttr;
+
+ /**
+ * Configuration for the generator
+ */
+ protected $config;
+
+ /**
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ */
+ public function __construct($config, $context) {
+ $this->config = $config;
+ $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
+ $this->_sortAttr = $config->get('Output', 'SortAttr');
+ $this->_def = $config->getHTMLDefinition();
+ $this->_xhtml = $this->_def->doctype->xml;
+ }
+
+ /**
+ * Generates HTML from an array of tokens.
+ * @param $tokens Array of HTMLPurifier_Token
+ * @param $config HTMLPurifier_Config object
+ * @return Generated HTML
+ */
+ public function generateFromTokens($tokens) {
+ if (!$tokens) return '';
+
+ // Basic algorithm
+ $html = '';
+ for ($i = 0, $size = count($tokens); $i < $size; $i++) {
+ if ($this->_scriptFix && $tokens[$i]->name === 'script'
+ && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
+ // script special case
+ // the contents of the script block must be ONE token
+ // for this to work.
+ $html .= $this->generateFromToken($tokens[$i++]);
+ $html .= $this->generateScriptFromToken($tokens[$i++]);
+ }
+ $html .= $this->generateFromToken($tokens[$i]);
+ }
+
+ // Tidy cleanup
+ if (extension_loaded('tidy') && $this->config->get('Output', 'TidyFormat')) {
+ $tidy = new Tidy;
+ $tidy->parseString($html, array(
+ 'indent'=> true,
+ 'output-xhtml' => $this->_xhtml,
+ 'show-body-only' => true,
+ 'indent-spaces' => 2,
+ 'wrap' => 68,
+ ), 'utf8');
+ $tidy->cleanRepair();
+ $html = (string) $tidy; // explicit cast necessary
+ }
+
+ // Normalize newlines to system defined value
+ $nl = $this->config->get('Output', 'Newline');
+ if ($nl === null) $nl = PHP_EOL;
+ if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
+ return $html;
+ }
+
+ /**
+ * Generates HTML from a single token.
+ * @param $token HTMLPurifier_Token object.
+ * @return Generated HTML
+ */
+ public function generateFromToken($token) {
+ if (!$token instanceof HTMLPurifier_Token) {
+ trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
+ return '';
+
+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
+ $attr = $this->generateAttributes($token->attr, $token->name);
+ return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
+
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ return '' . $token->name . '>';
+
+ } elseif ($token instanceof HTMLPurifier_Token_Empty) {
+ $attr = $this->generateAttributes($token->attr, $token->name);
+ return '<' . $token->name . ($attr ? ' ' : '') . $attr .
+ ( $this->_xhtml ? ' /': '' ) //
v.
+ . '>';
+
+ } elseif ($token instanceof HTMLPurifier_Token_Text) {
+ return $this->escape($token->data, ENT_NOQUOTES);
+
+ } elseif ($token instanceof HTMLPurifier_Token_Comment) {
+ return '';
+ } else {
+ return '';
+
+ }
+ }
+
+ /**
+ * Special case processor for the contents of script tags
+ * @warning This runs into problems if there's already a literal
+ * --> somewhere inside the script contents.
+ */
+ public function generateScriptFromToken($token) {
+ if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
+ // Thanks
+ // ---
+
+ // State 1.1.3:
, so
+ // any later token processing scripts must convert improperly
+ // classified EmptyTags from StartTags.
+ $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
+ if ($is_self_closing) {
+ $strlen_segment--;
+ $segment = substr($segment, 0, $strlen_segment);
+ }
+
+ // Check if there are any attributes
+ $position_first_space = strcspn($segment, $this->_whitespace);
+
+ if ($position_first_space >= $strlen_segment) {
+ if ($is_self_closing) {
+ $token = new HTMLPurifier_Token_Empty($segment);
+ } else {
+ $token = new HTMLPurifier_Token_Start($segment);
+ }
+ if ($maintain_line_numbers) {
+ $token->rawPosition($current_line, $current_col);
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+ }
+ $array[] = $token;
+ $inside_tag = false;
+ $cursor = $position_next_gt + 1;
+ continue;
+ }
+
+ // Grab out all the data
+ $type = substr($segment, 0, $position_first_space);
+ $attribute_string =
+ trim(
+ substr(
+ $segment, $position_first_space
+ )
+ );
+ if ($attribute_string) {
+ $attr = $this->parseAttributeString(
+ $attribute_string
+ , $config, $context
+ );
+ } else {
+ $attr = array();
+ }
+
+ if ($is_self_closing) {
+ $token = new HTMLPurifier_Token_Empty($type, $attr);
+ } else {
+ $token = new HTMLPurifier_Token_Start($type, $attr);
+ }
+ if ($maintain_line_numbers) {
+ $token->rawPosition($current_line, $current_col);
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+ }
+ $array[] = $token;
+ $cursor = $position_next_gt + 1;
+ $inside_tag = false;
+ continue;
+ } else {
+ // inside tag, but there's no ending > sign
+ if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
+ $token = new
+ HTMLPurifier_Token_Text(
+ '<' .
+ $this->parseData(
+ substr($html, $cursor)
+ )
+ );
+ if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
+ // no cursor scroll? Hmm...
+ $array[] = $token;
+ break;
+ }
+ break;
+ }
+
+ $context->destroy('CurrentLine');
+ $context->destroy('CurrentCol');
+ return $array;
+ }
+
+ /**
+ * PHP 5.0.x compatible substr_count that implements offset and length
+ */
+ protected function substrCount($haystack, $needle, $offset, $length) {
+ static $oldVersion;
+ if ($oldVersion === null) {
+ $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
+ }
+ if ($oldVersion) {
+ $haystack = substr($haystack, $offset, $length);
+ return substr_count($haystack, $needle);
+ } else {
+ return substr_count($haystack, $needle, $offset, $length);
+ }
+ }
+
+ /**
+ * Takes the inside of an HTML tag and makes an assoc array of attributes.
+ *
+ * @param $string Inside of tag excluding name.
+ * @returns Assoc array of attributes.
+ */
+ public function parseAttributeString($string, $config, $context) {
+ $string = (string) $string; // quick typecast
+
+ if ($string == '') return array(); // no attributes
+
+ $e = false;
+ if ($config->get('Core', 'CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ }
+
+ // let's see if we can abort as quickly as possible
+ // one equal sign, no spaces => one attribute
+ $num_equal = substr_count($string, '=');
+ $has_space = strpos($string, ' ');
+ if ($num_equal === 0 && !$has_space) {
+ // bool attribute
+ return array($string => $string);
+ } elseif ($num_equal === 1 && !$has_space) {
+ // only one attribute
+ list($key, $quoted_value) = explode('=', $string);
+ $quoted_value = trim($quoted_value);
+ if (!$key) {
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+ return array();
+ }
+ if (!$quoted_value) return array($key => '');
+ $first_char = @$quoted_value[0];
+ $last_char = @$quoted_value[strlen($quoted_value)-1];
+
+ $same_quote = ($first_char == $last_char);
+ $open_quote = ($first_char == '"' || $first_char == "'");
+
+ if ( $same_quote && $open_quote) {
+ // well behaved
+ $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
+ } else {
+ // not well behaved
+ if ($open_quote) {
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
+ $value = substr($quoted_value, 1);
+ } else {
+ $value = $quoted_value;
+ }
+ }
+ if ($value === false) $value = '';
+ return array($key => $value);
+ }
+
+ // setup loop environment
+ $array = array(); // return assoc array of attributes
+ $cursor = 0; // current position in string (moves forward)
+ $size = strlen($string); // size of the string (stays the same)
+
+ // if we have unquoted attributes, the parser expects a terminating
+ // space, so let's guarantee that there's always a terminating space.
+ $string .= ' ';
+
+ while(true) {
+
+ if ($cursor >= $size) {
+ break;
+ }
+
+ $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
+ // grab the key
+
+ $key_begin = $cursor; //we're currently at the start of the key
+
+ // scroll past all characters that are the key (not whitespace or =)
+ $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
+
+ $key_end = $cursor; // now at the end of the key
+
+ $key = substr($string, $key_begin, $key_end - $key_begin);
+
+ if (!$key) {
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+ $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
+ continue; // empty key
+ }
+
+ // scroll past all whitespace
+ $cursor += strspn($string, $this->_whitespace, $cursor);
+
+ if ($cursor >= $size) {
+ $array[$key] = $key;
+ break;
+ }
+
+ // if the next character is an equal sign, we've got a regular
+ // pair, otherwise, it's a bool attribute
+ $first_char = @$string[$cursor];
+
+ if ($first_char == '=') {
+ // key="value"
+
+ $cursor++;
+ $cursor += strspn($string, $this->_whitespace, $cursor);
+
+ if ($cursor === false) {
+ $array[$key] = '';
+ break;
+ }
+
+ // we might be in front of a quote right now
+
+ $char = @$string[$cursor];
+
+ if ($char == '"' || $char == "'") {
+ // it's quoted, end bound is $char
+ $cursor++;
+ $value_begin = $cursor;
+ $cursor = strpos($string, $char, $cursor);
+ $value_end = $cursor;
+ } else {
+ // it's not quoted, end bound is whitespace
+ $value_begin = $cursor;
+ $cursor += strcspn($string, $this->_whitespace, $cursor);
+ $value_end = $cursor;
+ }
+
+ // we reached a premature end
+ if ($cursor === false) {
+ $cursor = $size;
+ $value_end = $cursor;
+ }
+
+ $value = substr($string, $value_begin, $value_end - $value_begin);
+ if ($value === false) $value = '';
+ $array[$key] = $this->parseData($value);
+ $cursor++;
+
+ } else {
+ // boolattr
+ if ($key !== '') {
+ $array[$key] = $key;
+ } else {
+ // purely theoretical
+ if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+ }
+
+ }
+ }
+ return $array;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php
new file mode 100755
index 000000000..57cffa82a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php
@@ -0,0 +1,106 @@
+tokens = array();
+
+ $string = $this->normalize($string, $config, $context);
+
+ $parser = new XML_HTMLSax3();
+ $parser->set_object($this);
+ $parser->set_element_handler('openHandler','closeHandler');
+ $parser->set_data_handler('dataHandler');
+ $parser->set_escape_handler('escapeHandler');
+
+ // doesn't seem to work correctly for attributes
+ $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
+
+ $parser->parse($string);
+
+ return $this->tokens;
+
+ }
+
+ /**
+ * Open tag event handler, interface is defined by PEAR package.
+ */
+ public function openHandler(&$parser, $name, $attrs, $closed) {
+ // entities are not resolved in attrs
+ foreach ($attrs as $key => $attr) {
+ $attrs[$key] = $this->parseData($attr);
+ }
+ if ($closed) {
+ $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
+ } else {
+ $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
+ }
+ return true;
+ }
+
+ /**
+ * Close tag event handler, interface is defined by PEAR package.
+ */
+ public function closeHandler(&$parser, $name) {
+ // HTMLSax3 seems to always send empty tags an extra close tag
+ // check and ignore if you see it:
+ // [TESTME] to make sure it doesn't overreach
+ if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) {
+ return true;
+ }
+ $this->tokens[] = new HTMLPurifier_Token_End($name);
+ return true;
+ }
+
+ /**
+ * Data event handler, interface is defined by PEAR package.
+ */
+ public function dataHandler(&$parser, $data) {
+ $this->tokens[] = new HTMLPurifier_Token_Text($data);
+ return true;
+ }
+
+ /**
+ * Escaped text handler, interface is defined by PEAR package.
+ */
+ public function escapeHandler(&$parser, $data) {
+ if (strpos($data, '--') === 0) {
+ $this->tokens[] = new HTMLPurifier_Token_Comment($data);
+ }
+ // CDATA is handled elsewhere, but if it was handled here:
+ //if (strpos($data, '[CDATA[') === 0) {
+ // $this->tokens[] = new HTMLPurifier_Token_Text(
+ // substr($data, 7, strlen($data) - 9) );
+ //}
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php
new file mode 100755
index 000000000..fa1bf973e
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php
@@ -0,0 +1,3906 @@
+normalize($html, $config, $context);
+ $new_html = $this->wrapHTML($new_html, $config, $context);
+ try {
+ $parser = new HTML5($new_html);
+ $doc = $parser->save();
+ } catch (DOMException $e) {
+ // Uh oh, it failed. Punt to DirectLex.
+ $lexer = new HTMLPurifier_Lexer_DirectLex();
+ $context->register('PH5PError', $e); // save the error, so we can detect it
+ return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
+ }
+ $tokens = array();
+ $this->tokenizeDOM(
+ $doc->getElementsByTagName('html')->item(0)-> //
+ getElementsByTagName('body')->item(0)-> //
+ getElementsByTagName('div')->item(0) //
'; printTokens($tokens, $t); printTokens($this->stack);
+
+ // quick-check: if it's not a tag, no need to process
+ if (empty($token->is_tag)) {
+ if ($token instanceof HTMLPurifier_Token_Text) {
+ foreach ($this->injectors as $i => $injector) {
+ if (isset($token->skip[$i])) continue;
+ if ($token->rewind !== null && $token->rewind !== $i) continue;
+ $injector->handleText($token);
+ $this->processToken($token, $i);
+ $reprocess = true;
+ break;
+ }
+ }
+ // another possibility is a comment
+ continue;
+ }
+
+ if (isset($definition->info[$token->name])) {
+ $type = $definition->info[$token->name]->child->type;
+ } else {
+ $type = false; // Type is unknown, treat accordingly
+ }
+
+ // quick tag checks: anything that's *not* an end tag
+ $ok = false;
+ if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
+ // claims to be a start tag but is empty
+ $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
+ $ok = true;
+ } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
+ // claims to be empty but really is a start tag
+ $this->swap(new HTMLPurifier_Token_End($token->name));
+ $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
+ // punt (since we had to modify the input stream in a non-trivial way)
+ $reprocess = true;
+ continue;
+ } elseif ($token instanceof HTMLPurifier_Token_Empty) {
+ // real empty token
+ $ok = true;
+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
+ // start tag
+
+ // ...unless they also have to close their parent
+ if (!empty($this->stack)) {
+
+ $parent = array_pop($this->stack);
+ $this->stack[] = $parent;
+
+ if (isset($definition->info[$parent->name])) {
+ $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
+ $autoclose = !isset($elements[$token->name]);
+ } else {
+ $autoclose = false;
+ }
+
+ $carryover = false;
+ if ($autoclose && $definition->info[$parent->name]->formatting) {
+ $carryover = true;
+ }
+
+ if ($autoclose) {
+ // errors need to be updated
+ $new_token = new HTMLPurifier_Token_End($parent->name);
+ $new_token->start = $parent;
+ if ($carryover) {
+ $element = clone $parent;
+ $element->armor['MakeWellFormed_TagClosedError'] = true;
+ $element->carryover = true;
+ $this->processToken(array($new_token, $token, $element));
+ } else {
+ $this->insertBefore($new_token);
+ }
+ if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
+ if (!$carryover) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
+ } else {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
+ }
+ }
+ $reprocess = true;
+ continue;
+ }
+
+ }
+ $ok = true;
+ }
+
+ if ($ok) {
+ foreach ($this->injectors as $i => $injector) {
+ if (isset($token->skip[$i])) continue;
+ if ($token->rewind !== null && $token->rewind !== $i) continue;
+ $injector->handleElement($token);
+ $this->processToken($token, $i);
+ $reprocess = true;
+ break;
+ }
+ if (!$reprocess) {
+ // ah, nothing interesting happened; do normal processing
+ $this->swap($token);
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $this->stack[] = $token;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
+ }
+ }
+ continue;
+ }
+
+ // sanity check: we should be dealing with a closing tag
+ if (!$token instanceof HTMLPurifier_Token_End) {
+ throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
+ }
+
+ // make sure that we have something open
+ if (empty($this->stack)) {
+ if ($escape_invalid_tags) {
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
+ $this->swap(new HTMLPurifier_Token_Text(
+ $generator->generateFromToken($token)
+ ));
+ } else {
+ $this->remove();
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
+ }
+ $reprocess = true;
+ continue;
+ }
+
+ // first, check for the simplest case: everything closes neatly.
+ // Eventually, everything passes through here; if there are problems
+ // we modify the input stream accordingly and then punt, so that
+ // the tokens get processed again.
+ $current_parent = array_pop($this->stack);
+ if ($current_parent->name == $token->name) {
+ $token->start = $current_parent;
+ foreach ($this->injectors as $i => $injector) {
+ if (isset($token->skip[$i])) continue;
+ if ($token->rewind !== null && $token->rewind !== $i) continue;
+ $injector->handleEnd($token);
+ $this->processToken($token, $i);
+ $this->stack[] = $current_parent;
+ $reprocess = true;
+ break;
+ }
+ continue;
+ }
+
+ // okay, so we're trying to close the wrong tag
+
+ // undo the pop previous pop
+ $this->stack[] = $current_parent;
+
+ // scroll back the entire nest, trying to find our tag.
+ // (feature could be to specify how far you'd like to go)
+ $size = count($this->stack);
+ // -2 because -1 is the last element, but we already checked that
+ $skipped_tags = false;
+ for ($j = $size - 2; $j >= 0; $j--) {
+ if ($this->stack[$j]->name == $token->name) {
+ $skipped_tags = array_slice($this->stack, $j);
+ break;
+ }
+ }
+
+ // we didn't find the tag, so remove
+ if ($skipped_tags === false) {
+ if ($escape_invalid_tags) {
+ $this->swap(new HTMLPurifier_Token_Text(
+ $generator->generateFromToken($token)
+ ));
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
+ } else {
+ $this->remove();
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
+ }
+ $reprocess = true;
+ continue;
+ }
+
+ // do errors, in REVERSE $j order: a,b,c with
+ $c = count($skipped_tags);
+ if ($e) {
+ for ($j = $c - 1; $j > 0; $j--) {
+ // notice we exclude $j == 0, i.e. the current ending tag, from
+ // the errors...
+ if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
+ }
+ }
+ }
+
+ // insert tags, in FORWARD $j order: c,b,a with
+ $replace = array($token);
+ for ($j = 1; $j < $c; $j++) {
+ // ...as well as from the insertions
+ $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
+ $new_token->start = $skipped_tags[$j];
+ array_unshift($replace, $new_token);
+ if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
+ $element = clone $skipped_tags[$j];
+ $element->carryover = true;
+ $element->armor['MakeWellFormed_TagClosedError'] = true;
+ $replace[] = $element;
+ }
+ }
+ $this->processToken($replace);
+ $reprocess = true;
+ continue;
+ }
+
+ $context->destroy('CurrentNesting');
+ $context->destroy('InputTokens');
+ $context->destroy('InputIndex');
+ $context->destroy('CurrentToken');
+
+ unset($this->injectors, $this->stack, $this->tokens, $this->t);
+ return $tokens;
+ }
+
+ /**
+ * Processes arbitrary token values for complicated substitution patterns.
+ * In general:
+ *
+ * If $token is an array, it is a list of tokens to substitute for the
+ * current token. These tokens then get individually processed. If there
+ * is a leading integer in the list, that integer determines how many
+ * tokens from the stream should be removed.
+ *
+ * If $token is a regular token, it is swapped with the current token.
+ *
+ * If $token is false, the current token is deleted.
+ *
+ * If $token is an integer, that number of tokens (with the first token
+ * being the current one) will be deleted.
+ *
+ * @param $token Token substitution value
+ * @param $injector Injector that performed the substitution; default is if
+ * this is not an injector related operation.
+ */
+ protected function processToken($token, $injector = -1) {
+
+ // normalize forms of token
+ if (is_object($token)) $token = array(1, $token);
+ if (is_int($token)) $token = array($token);
+ if ($token === false) $token = array(1);
+ if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
+ if (!is_int($token[0])) array_unshift($token, 1);
+ if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
+
+ // $token is now an array with the following form:
+ // array(number nodes to delete, new node 1, new node 2, ...)
+
+ $delete = array_shift($token);
+ $old = array_splice($this->tokens, $this->t, $delete, $token);
+
+ if ($injector > -1) {
+ // determine appropriate skips
+ $oldskip = isset($old[0]) ? $old[0]->skip : array();
+ foreach ($token as $object) {
+ $object->skip = $oldskip;
+ $object->skip[$injector] = true;
+ }
+ }
+
+ }
+
+ /**
+ * Inserts a token before the current token. Cursor now points to this token
+ */
+ private function insertBefore($token) {
+ array_splice($this->tokens, $this->t, 0, array($token));
+ }
+
+ /**
+ * Removes current token. Cursor now points to new token occupying previously
+ * occupied space.
+ */
+ private function remove() {
+ array_splice($this->tokens, $this->t, 1);
+ }
+
+ /**
+ * Swap current token with new token. Cursor points to new token (no change).
+ */
+ private function swap($token) {
+ $this->tokens[$this->t] = $token;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php
new file mode 100755
index 000000000..d30e456db
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -0,0 +1,171 @@
+getHTMLDefinition();
+ $generator = new HTMLPurifier_Generator($config, $context);
+ $result = array();
+
+ $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
+ $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
+
+ // currently only used to determine if comments should be kept
+ $trusted = $config->get('HTML', 'Trusted');
+
+ $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
+ $hidden_elements = $config->get('Core', 'HiddenElements');
+
+ // remove script contents compatibility
+ if ($remove_script_contents === true) {
+ $hidden_elements['script'] = true;
+ } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
+ unset($hidden_elements['script']);
+ }
+
+ $attr_validator = new HTMLPurifier_AttrValidator();
+
+ // removes tokens until it reaches a closing tag with its value
+ $remove_until = false;
+
+ // converts comments into text tokens when this is equal to a tag name
+ $textify_comments = false;
+
+ $token = false;
+ $context->register('CurrentToken', $token);
+
+ $e = false;
+ if ($config->get('Core', 'CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ }
+
+ foreach($tokens as $token) {
+ if ($remove_until) {
+ if (empty($token->is_tag) || $token->name !== $remove_until) {
+ continue;
+ }
+ }
+ if (!empty( $token->is_tag )) {
+ // DEFINITION CALL
+
+ // before any processing, try to transform the element
+ if (
+ isset($definition->info_tag_transform[$token->name])
+ ) {
+ $original_name = $token->name;
+ // there is a transformation for this tag
+ // DEFINITION CALL
+ $token = $definition->
+ info_tag_transform[$token->name]->
+ transform($token, $config, $context);
+ if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
+ }
+
+ if (isset($definition->info[$token->name])) {
+
+ // mostly everything's good, but
+ // we need to make sure required attributes are in order
+ if (
+ ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
+ $definition->info[$token->name]->required_attr &&
+ ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
+ ) {
+ $attr_validator->validateToken($token, $config, $context);
+ $ok = true;
+ foreach ($definition->info[$token->name]->required_attr as $name) {
+ if (!isset($token->attr[$name])) {
+ $ok = false;
+ break;
+ }
+ }
+ if (!$ok) {
+ if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
+ continue;
+ }
+ $token->armor['ValidateAttributes'] = true;
+ }
+
+ if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
+ $textify_comments = $token->name;
+ } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
+ $textify_comments = false;
+ }
+
+ } elseif ($escape_invalid_tags) {
+ // invalid tag, generate HTML representation and insert in
+ if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
+ $token = new HTMLPurifier_Token_Text(
+ $generator->generateFromToken($token)
+ );
+ } else {
+ // check if we need to destroy all of the tag's children
+ // CAN BE GENERICIZED
+ if (isset($hidden_elements[$token->name])) {
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $remove_until = $token->name;
+ } elseif ($token instanceof HTMLPurifier_Token_Empty) {
+ // do nothing: we're still looking
+ } else {
+ $remove_until = false;
+ }
+ if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
+ } else {
+ if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
+ }
+ continue;
+ }
+ } elseif ($token instanceof HTMLPurifier_Token_Comment) {
+ // textify comments in script tags when they are allowed
+ if ($textify_comments !== false) {
+ $data = $token->data;
+ $token = new HTMLPurifier_Token_Text($data);
+ } elseif ($trusted) {
+ // keep, but perform comment cleaning
+ if ($e) {
+ // perform check whether or not there's a trailing hyphen
+ if (substr($token->data, -1) == '-') {
+ $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
+ }
+ }
+ $token->data = rtrim($token->data, '-');
+ $found_double_hyphen = false;
+ while (strpos($token->data, '--') !== false) {
+ if ($e && !$found_double_hyphen) {
+ $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
+ }
+ $found_double_hyphen = true; // prevent double-erroring
+ $token->data = str_replace('--', '-', $token->data);
+ }
+ } else {
+ // strip comments
+ if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
+ continue;
+ }
+ } elseif ($token instanceof HTMLPurifier_Token_Text) {
+ } else {
+ continue;
+ }
+ $result[] = $token;
+ }
+ if ($remove_until && $e) {
+ // we removed tokens until the end, throw error
+ $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
+ }
+
+ $context->destroy('CurrentToken');
+
+ return $result;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/ValidateAttributes.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/ValidateAttributes.php
new file mode 100755
index 000000000..c3328a9d4
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/ValidateAttributes.php
@@ -0,0 +1,39 @@
+register('CurrentToken', $token);
+
+ foreach ($tokens as $key => $token) {
+
+ // only process tokens that have attributes,
+ // namely start and empty tags
+ if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue;
+
+ // skip tokens that are armored
+ if (!empty($token->armor['ValidateAttributes'])) continue;
+
+ // note that we have no facilities here for removing tokens
+ $validator->validateToken($token, $config, $context);
+
+ $tokens[$key] = $token; // for PHP 4
+ }
+ $context->destroy('CurrentToken');
+
+ return $tokens;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/StringHash.php b/lib/htmlpurifier/library/HTMLPurifier/StringHash.php
new file mode 100755
index 000000000..62085c5c2
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/StringHash.php
@@ -0,0 +1,39 @@
+accessed[$index] = true;
+ return parent::offsetGet($index);
+ }
+
+ /**
+ * Returns a lookup array of all array indexes that have been accessed.
+ * @return Array in form array($index => true).
+ */
+ public function getAccessed() {
+ return $this->accessed;
+ }
+
+ /**
+ * Resets the access array.
+ */
+ public function resetAccessed() {
+ $this->accessed = array();
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/StringHashParser.php b/lib/htmlpurifier/library/HTMLPurifier/StringHashParser.php
new file mode 100755
index 000000000..f3e70c712
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/StringHashParser.php
@@ -0,0 +1,110 @@
+ 'DefaultKeyValue',
+ * 'KEY' => 'Value',
+ * 'KEY2' => 'Value2',
+ * 'MULTILINE-KEY' => "Multiline\nvalue.\n",
+ * )
+ *
+ * We use this as an easy to use file-format for configuration schema
+ * files, but the class itself is usage agnostic.
+ *
+ * You can use ---- to forcibly terminate parsing of a single string-hash;
+ * this marker is used in multi string-hashes to delimit boundaries.
+ */
+class HTMLPurifier_StringHashParser
+{
+
+ public $default = 'ID';
+
+ /**
+ * Parses a file that contains a single string-hash.
+ */
+ public function parseFile($file) {
+ if (!file_exists($file)) return false;
+ $fh = fopen($file, 'r');
+ if (!$fh) return false;
+ $ret = $this->parseHandle($fh);
+ fclose($fh);
+ return $ret;
+ }
+
+ /**
+ * Parses a file that contains multiple string-hashes delimited by '----'
+ */
+ public function parseMultiFile($file) {
+ if (!file_exists($file)) return false;
+ $ret = array();
+ $fh = fopen($file, 'r');
+ if (!$fh) return false;
+ while (!feof($fh)) {
+ $ret[] = $this->parseHandle($fh);
+ }
+ fclose($fh);
+ return $ret;
+ }
+
+ /**
+ * Internal parser that acepts a file handle.
+ * @note While it's possible to simulate in-memory parsing by using
+ * custom stream wrappers, if such a use-case arises we should
+ * factor out the file handle into its own class.
+ * @param $fh File handle with pointer at start of valid string-hash
+ * block.
+ */
+ protected function parseHandle($fh) {
+ $state = false;
+ $single = false;
+ $ret = array();
+ do {
+ $line = fgets($fh);
+ if ($line === false) break;
+ $line = rtrim($line, "\n\r");
+ if (!$state && $line === '') continue;
+ if ($line === '----') break;
+ if (strncmp('--#', $line, 3) === 0) {
+ // Comment
+ continue;
+ } elseif (strncmp('--', $line, 2) === 0) {
+ // Multiline declaration
+ $state = trim($line, '- ');
+ if (!isset($ret[$state])) $ret[$state] = '';
+ continue;
+ } elseif (!$state) {
+ $single = true;
+ if (strpos($line, ':') !== false) {
+ // Single-line declaration
+ list($state, $line) = explode(':', $line, 2);
+ $line = trim($line);
+ } else {
+ // Use default declaration
+ $state = $this->default;
+ }
+ }
+ if ($single) {
+ $ret[$state] = $line;
+ $single = false;
+ $state = false;
+ } else {
+ $ret[$state] .= "$line\n";
+ }
+ } while (!feof($fh));
+ return $ret;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/TagTransform.php b/lib/htmlpurifier/library/HTMLPurifier/TagTransform.php
new file mode 100755
index 000000000..210a44721
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/TagTransform.php
@@ -0,0 +1,36 @@
+ 'xx-small',
+ '1' => 'xx-small',
+ '2' => 'small',
+ '3' => 'medium',
+ '4' => 'large',
+ '5' => 'x-large',
+ '6' => 'xx-large',
+ '7' => '300%',
+ '-1' => 'smaller',
+ '-2' => '60%',
+ '+1' => 'larger',
+ '+2' => '150%',
+ '+3' => '200%',
+ '+4' => '300%'
+ );
+
+ public function transform($tag, $config, $context) {
+
+ if ($tag instanceof HTMLPurifier_Token_End) {
+ $new_tag = clone $tag;
+ $new_tag->name = $this->transform_to;
+ return $new_tag;
+ }
+
+ $attr = $tag->attr;
+ $prepend_style = '';
+
+ // handle color transform
+ if (isset($attr['color'])) {
+ $prepend_style .= 'color:' . $attr['color'] . ';';
+ unset($attr['color']);
+ }
+
+ // handle face transform
+ if (isset($attr['face'])) {
+ $prepend_style .= 'font-family:' . $attr['face'] . ';';
+ unset($attr['face']);
+ }
+
+ // handle size transform
+ if (isset($attr['size'])) {
+ // normalize large numbers
+ if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
+ $size = (int) $attr['size'];
+ if ($size < -2) $attr['size'] = '-2';
+ if ($size > 4) $attr['size'] = '+4';
+ } else {
+ $size = (int) $attr['size'];
+ if ($size > 7) $attr['size'] = '7';
+ }
+ if (isset($this->_size_lookup[$attr['size']])) {
+ $prepend_style .= 'font-size:' .
+ $this->_size_lookup[$attr['size']] . ';';
+ }
+ unset($attr['size']);
+ }
+
+ if ($prepend_style) {
+ $attr['style'] = isset($attr['style']) ?
+ $prepend_style . $attr['style'] :
+ $prepend_style;
+ }
+
+ $new_tag = clone $tag;
+ $new_tag->name = $this->transform_to;
+ $new_tag->attr = $attr;
+
+ return $new_tag;
+
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Simple.php b/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Simple.php
new file mode 100755
index 000000000..0e36130f2
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Simple.php
@@ -0,0 +1,35 @@
+transform_to = $transform_to;
+ $this->style = $style;
+ }
+
+ public function transform($tag, $config, $context) {
+ $new_tag = clone $tag;
+ $new_tag->name = $this->transform_to;
+ if (!is_null($this->style) &&
+ ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty)
+ ) {
+ $this->prependCSS($new_tag->attr, $this->style);
+ }
+ return $new_tag;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token.php b/lib/htmlpurifier/library/HTMLPurifier/Token.php
new file mode 100755
index 000000000..7900e6cb1
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Token.php
@@ -0,0 +1,57 @@
+line = $l;
+ $this->col = $c;
+ }
+
+ /**
+ * Convenience function for DirectLex settings line/col position.
+ */
+ public function rawPosition($l, $c) {
+ if ($c === -1) $l++;
+ $this->line = $l;
+ $this->col = $c;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php
new file mode 100755
index 000000000..dc6bdcabb
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php
@@ -0,0 +1,22 @@
+data = $data;
+ $this->line = $line;
+ $this->col = $col;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php
new file mode 100755
index 000000000..2a82b47ad
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php
@@ -0,0 +1,11 @@
+!empty($obj->is_tag)
+ * without having to use a function call is_a().
+ */
+ public $is_tag = true;
+
+ /**
+ * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
+ *
+ * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
+ * be lower-casing them, but these tokens cater to HTML tags, which are
+ * insensitive.
+ */
+ public $name;
+
+ /**
+ * Associative array of the tag's attributes.
+ */
+ public $attr = array();
+
+ /**
+ * Non-overloaded constructor, which lower-cases passed tag name.
+ *
+ * @param $name String name.
+ * @param $attr Associative array of attributes.
+ */
+ public function __construct($name, $attr = array(), $line = null, $col = null) {
+ $this->name = ctype_lower($name) ? $name : strtolower($name);
+ foreach ($attr as $key => $value) {
+ // normalization only necessary when key is not lowercase
+ if (!ctype_lower($key)) {
+ $new_key = strtolower($key);
+ if (!isset($attr[$new_key])) {
+ $attr[$new_key] = $attr[$key];
+ }
+ if ($new_key !== $key) {
+ unset($attr[$key]);
+ }
+ }
+ }
+ $this->attr = $attr;
+ $this->line = $line;
+ $this->col = $col;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php
new file mode 100755
index 000000000..82efd823d
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php
@@ -0,0 +1,33 @@
+data = $data;
+ $this->is_whitespace = ctype_space($data);
+ $this->line = $line;
+ $this->col = $col;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/TokenFactory.php b/lib/htmlpurifier/library/HTMLPurifier/TokenFactory.php
new file mode 100755
index 000000000..7cf48fb41
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/TokenFactory.php
@@ -0,0 +1,94 @@
+p_start = new HTMLPurifier_Token_Start('', array());
+ $this->p_end = new HTMLPurifier_Token_End('');
+ $this->p_empty = new HTMLPurifier_Token_Empty('', array());
+ $this->p_text = new HTMLPurifier_Token_Text('');
+ $this->p_comment= new HTMLPurifier_Token_Comment('');
+ }
+
+ /**
+ * Creates a HTMLPurifier_Token_Start.
+ * @param $name Tag name
+ * @param $attr Associative array of attributes
+ * @return Generated HTMLPurifier_Token_Start
+ */
+ public function createStart($name, $attr = array()) {
+ $p = clone $this->p_start;
+ $p->__construct($name, $attr);
+ return $p;
+ }
+
+ /**
+ * Creates a HTMLPurifier_Token_End.
+ * @param $name Tag name
+ * @return Generated HTMLPurifier_Token_End
+ */
+ public function createEnd($name) {
+ $p = clone $this->p_end;
+ $p->__construct($name);
+ return $p;
+ }
+
+ /**
+ * Creates a HTMLPurifier_Token_Empty.
+ * @param $name Tag name
+ * @param $attr Associative array of attributes
+ * @return Generated HTMLPurifier_Token_Empty
+ */
+ public function createEmpty($name, $attr = array()) {
+ $p = clone $this->p_empty;
+ $p->__construct($name, $attr);
+ return $p;
+ }
+
+ /**
+ * Creates a HTMLPurifier_Token_Text.
+ * @param $data Data of text token
+ * @return Generated HTMLPurifier_Token_Text
+ */
+ public function createText($data) {
+ $p = clone $this->p_text;
+ $p->__construct($data);
+ return $p;
+ }
+
+ /**
+ * Creates a HTMLPurifier_Token_Comment.
+ * @param $data Data of comment token
+ * @return Generated HTMLPurifier_Token_Comment
+ */
+ public function createComment($data) {
+ $p = clone $this->p_comment;
+ $p->__construct($data);
+ return $p;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URI.php b/lib/htmlpurifier/library/HTMLPurifier/URI.php
new file mode 100755
index 000000000..8b50d0d18
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URI.php
@@ -0,0 +1,173 @@
+scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
+ $this->userinfo = $userinfo;
+ $this->host = $host;
+ $this->port = is_null($port) ? $port : (int) $port;
+ $this->path = $path;
+ $this->query = $query;
+ $this->fragment = $fragment;
+ }
+
+ /**
+ * Retrieves a scheme object corresponding to the URI's scheme/default
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @return Scheme object appropriate for validating this URI
+ */
+ public function getSchemeObj($config, $context) {
+ $registry = HTMLPurifier_URISchemeRegistry::instance();
+ if ($this->scheme !== null) {
+ $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
+ if (!$scheme_obj) return false; // invalid scheme, clean it out
+ } else {
+ // no scheme: retrieve the default one
+ $def = $config->getDefinition('URI');
+ $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
+ if (!$scheme_obj) {
+ // something funky happened to the default scheme object
+ trigger_error(
+ 'Default scheme object "' . $def->defaultScheme . '" was not readable',
+ E_USER_WARNING
+ );
+ return false;
+ }
+ }
+ return $scheme_obj;
+ }
+
+ /**
+ * Generic validation method applicable for all schemes. May modify
+ * this URI in order to get it into a compliant form.
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @return True if validation/filtering succeeds, false if failure
+ */
+ public function validate($config, $context) {
+
+ // ABNF definitions from RFC 3986
+ $chars_sub_delims = '!$&\'()*+,;=';
+ $chars_gen_delims = ':/?#[]@';
+ $chars_pchar = $chars_sub_delims . ':@';
+
+ // validate scheme (MUST BE FIRST!)
+ if (!is_null($this->scheme) && is_null($this->host)) {
+ $def = $config->getDefinition('URI');
+ if ($def->defaultScheme === $this->scheme) {
+ $this->scheme = null;
+ }
+ }
+
+ // validate host
+ if (!is_null($this->host)) {
+ $host_def = new HTMLPurifier_AttrDef_URI_Host();
+ $this->host = $host_def->validate($this->host, $config, $context);
+ if ($this->host === false) $this->host = null;
+ }
+
+ // validate username
+ if (!is_null($this->userinfo)) {
+ $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
+ $this->userinfo = $encoder->encode($this->userinfo);
+ }
+
+ // validate port
+ if (!is_null($this->port)) {
+ if ($this->port < 1 || $this->port > 65535) $this->port = null;
+ }
+
+ // validate path
+ $path_parts = array();
+ $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
+ if (!is_null($this->host)) {
+ // path-abempty (hier and relative)
+ $this->path = $segments_encoder->encode($this->path);
+ } elseif ($this->path !== '' && $this->path[0] === '/') {
+ // path-absolute (hier and relative)
+ if (strlen($this->path) >= 2 && $this->path[1] === '/') {
+ // This shouldn't ever happen!
+ $this->path = '';
+ } else {
+ $this->path = $segments_encoder->encode($this->path);
+ }
+ } elseif (!is_null($this->scheme) && $this->path !== '') {
+ // path-rootless (hier)
+ // Short circuit evaluation means we don't need to check nz
+ $this->path = $segments_encoder->encode($this->path);
+ } elseif (is_null($this->scheme) && $this->path !== '') {
+ // path-noscheme (relative)
+ // (once again, not checking nz)
+ $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
+ $c = strpos($this->path, '/');
+ if ($c !== false) {
+ $this->path =
+ $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
+ $segments_encoder->encode(substr($this->path, $c));
+ } else {
+ $this->path = $segment_nc_encoder->encode($this->path);
+ }
+ } else {
+ // path-empty (hier and relative)
+ $this->path = ''; // just to be safe
+ }
+
+ // qf = query and fragment
+ $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
+
+ if (!is_null($this->query)) {
+ $this->query = $qf_encoder->encode($this->query);
+ }
+
+ if (!is_null($this->fragment)) {
+ $this->fragment = $qf_encoder->encode($this->fragment);
+ }
+
+ return true;
+
+ }
+
+ /**
+ * Convert URI back to string
+ * @return String URI appropriate for output
+ */
+ public function toString() {
+ // reconstruct authority
+ $authority = null;
+ if (!is_null($this->host)) {
+ $authority = '';
+ if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
+ $authority .= $this->host;
+ if(!is_null($this->port)) $authority .= ':' . $this->port;
+ }
+
+ // reconstruct the result
+ $result = '';
+ if (!is_null($this->scheme)) $result .= $this->scheme . ':';
+ if (!is_null($authority)) $result .= '//' . $authority;
+ $result .= $this->path;
+ if (!is_null($this->query)) $result .= '?' . $this->query;
+ if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
+
+ return $result;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php
new file mode 100755
index 000000000..183fcfc7e
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php
@@ -0,0 +1,93 @@
+registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
+ $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
+ $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
+ $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
+ $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
+ }
+
+ public function registerFilter($filter) {
+ $this->registeredFilters[$filter->name] = $filter;
+ }
+
+ public function addFilter($filter, $config) {
+ $r = $filter->prepare($config);
+ if ($r === false) return; // null is ok, for backwards compat
+ if ($filter->post) {
+ $this->postFilters[$filter->name] = $filter;
+ } else {
+ $this->filters[$filter->name] = $filter;
+ }
+ }
+
+ protected function doSetup($config) {
+ $this->setupMemberVariables($config);
+ $this->setupFilters($config);
+ }
+
+ protected function setupFilters($config) {
+ foreach ($this->registeredFilters as $name => $filter) {
+ $conf = $config->get('URI', $name);
+ if ($conf !== false && $conf !== null) {
+ $this->addFilter($filter, $config);
+ }
+ }
+ unset($this->registeredFilters);
+ }
+
+ protected function setupMemberVariables($config) {
+ $this->host = $config->get('URI', 'Host');
+ $base_uri = $config->get('URI', 'Base');
+ if (!is_null($base_uri)) {
+ $parser = new HTMLPurifier_URIParser();
+ $this->base = $parser->parse($base_uri);
+ $this->defaultScheme = $this->base->scheme;
+ if (is_null($this->host)) $this->host = $this->base->host;
+ }
+ if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme');
+ }
+
+ public function filter(&$uri, $config, $context) {
+ foreach ($this->filters as $name => $f) {
+ $result = $f->filter($uri, $config, $context);
+ if (!$result) return false;
+ }
+ return true;
+ }
+
+ public function postFilter(&$uri, $config, $context) {
+ foreach ($this->postFilters as $name => $f) {
+ $result = $f->filter($uri, $config, $context);
+ if (!$result) return false;
+ }
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php
new file mode 100755
index 000000000..c116f93df
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php
@@ -0,0 +1,45 @@
+getDefinition('URI')->host;
+ if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
+ }
+ public function filter(&$uri, $config, $context) {
+ if (is_null($uri->host)) return true;
+ if ($this->ourHostParts === false) return false;
+ $host_parts = array_reverse(explode('.', $uri->host));
+ foreach ($this->ourHostParts as $i => $x) {
+ if (!isset($host_parts[$i])) return false;
+ if ($host_parts[$i] != $this->ourHostParts[$i]) return false;
+ }
+ return true;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php
new file mode 100755
index 000000000..881abc43c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php
@@ -0,0 +1,12 @@
+get('EmbeddedURI', true)) return true;
+ return parent::filter($uri, $config, $context);
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php
new file mode 100755
index 000000000..bac56e8b8
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php
@@ -0,0 +1,21 @@
+blacklist = $config->get('URI', 'HostBlacklist');
+ return true;
+ }
+ public function filter(&$uri, $config, $context) {
+ foreach($this->blacklist as $blacklisted_host_fragment) {
+ if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php
new file mode 100755
index 000000000..f46ab2630
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php
@@ -0,0 +1,114 @@
+getDefinition('URI');
+ $this->base = $def->base;
+ if (is_null($this->base)) {
+ trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_WARNING);
+ return false;
+ }
+ $this->base->fragment = null; // fragment is invalid for base URI
+ $stack = explode('/', $this->base->path);
+ array_pop($stack); // discard last segment
+ $stack = $this->_collapseStack($stack); // do pre-parsing
+ $this->basePathStack = $stack;
+ return true;
+ }
+ public function filter(&$uri, $config, $context) {
+ if (is_null($this->base)) return true; // abort early
+ if (
+ $uri->path === '' && is_null($uri->scheme) &&
+ is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
+ ) {
+ // reference to current document
+ $uri = clone $this->base;
+ return true;
+ }
+ if (!is_null($uri->scheme)) {
+ // absolute URI already: don't change
+ if (!is_null($uri->host)) return true;
+ $scheme_obj = $uri->getSchemeObj($config, $context);
+ if (!$scheme_obj) {
+ // scheme not recognized
+ return false;
+ }
+ if (!$scheme_obj->hierarchical) {
+ // non-hierarchal URI with explicit scheme, don't change
+ return true;
+ }
+ // special case: had a scheme but always is hierarchical and had no authority
+ }
+ if (!is_null($uri->host)) {
+ // network path, don't bother
+ return true;
+ }
+ if ($uri->path === '') {
+ $uri->path = $this->base->path;
+ } elseif ($uri->path[0] !== '/') {
+ // relative path, needs more complicated processing
+ $stack = explode('/', $uri->path);
+ $new_stack = array_merge($this->basePathStack, $stack);
+ if ($new_stack[0] !== '' && !is_null($this->base->host)) {
+ array_unshift($new_stack, '');
+ }
+ $new_stack = $this->_collapseStack($new_stack);
+ $uri->path = implode('/', $new_stack);
+ } else {
+ // absolute path, but still we should collapse
+ $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
+ }
+ // re-combine
+ $uri->scheme = $this->base->scheme;
+ if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
+ if (is_null($uri->host)) $uri->host = $this->base->host;
+ if (is_null($uri->port)) $uri->port = $this->base->port;
+ return true;
+ }
+
+ /**
+ * Resolve dots and double-dots in a path stack
+ */
+ private function _collapseStack($stack) {
+ $result = array();
+ $is_folder = false;
+ for ($i = 0; isset($stack[$i]); $i++) {
+ $is_folder = false;
+ // absorb an internally duplicated slash
+ if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
+ if ($stack[$i] == '..') {
+ if (!empty($result)) {
+ $segment = array_pop($result);
+ if ($segment === '' && empty($result)) {
+ // error case: attempted to back out too far:
+ // restore the leading slash
+ $result[] = '';
+ } elseif ($segment === '..') {
+ $result[] = '..'; // cannot remove .. with ..
+ }
+ } else {
+ // relative path, preserve the double-dots
+ $result[] = '..';
+ }
+ $is_folder = true;
+ continue;
+ }
+ if ($stack[$i] == '.') {
+ // silently absorb
+ $is_folder = true;
+ continue;
+ }
+ $result[] = $stack[$i];
+ }
+ if ($is_folder) $result[] = '';
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php
new file mode 100755
index 000000000..29ed0ed1f
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php
@@ -0,0 +1,54 @@
+target = $config->get('URI', $this->name);
+ $this->parser = new HTMLPurifier_URIParser();
+ $this->doEmbed = $config->get('URI', 'MungeResources');
+ $this->secretKey = $config->get('URI', 'MungeSecretKey');
+ return true;
+ }
+ public function filter(&$uri, $config, $context) {
+ if ($context->get('EmbeddedURI', true) && !$this->doEmbed) return true;
+
+ $scheme_obj = $uri->getSchemeObj($config, $context);
+ if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
+ if (is_null($uri->host) || empty($scheme_obj->browsable)) {
+ return true;
+ }
+
+ $this->makeReplace($uri, $config, $context);
+ $this->replace = array_map('rawurlencode', $this->replace);
+
+ $new_uri = strtr($this->target, $this->replace);
+ $new_uri = $this->parser->parse($new_uri);
+ // don't redirect if the target host is the same as the
+ // starting host
+ if ($uri->host === $new_uri->host) return true;
+ $uri = $new_uri; // overwrite
+ return true;
+ }
+
+ protected function makeReplace($uri, $config, $context) {
+ $string = $uri->toString();
+ // always available
+ $this->replace['%s'] = $string;
+ $this->replace['%r'] = $context->get('EmbeddedURI', true);
+ $token = $context->get('CurrentToken', true);
+ $this->replace['%n'] = $token ? $token->name : null;
+ $this->replace['%m'] = $context->get('CurrentAttr', true);
+ $this->replace['%p'] = $context->get('CurrentCSSProperty', true);
+ // not always available
+ if ($this->secretKey) $this->replace['%t'] = sha1($this->secretKey . ':' . $string);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIParser.php b/lib/htmlpurifier/library/HTMLPurifier/URIParser.php
new file mode 100755
index 000000000..7179e4ab8
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIParser.php
@@ -0,0 +1,70 @@
+percentEncoder = new HTMLPurifier_PercentEncoder();
+ }
+
+ /**
+ * Parses a URI.
+ * @param $uri string URI to parse
+ * @return HTMLPurifier_URI representation of URI. This representation has
+ * not been validated yet and may not conform to RFC.
+ */
+ public function parse($uri) {
+
+ $uri = $this->percentEncoder->normalize($uri);
+
+ // Regexp is as per Appendix B.
+ // Note that ["<>] are an addition to the RFC's recommended
+ // characters, because they represent external delimeters.
+ $r_URI = '!'.
+ '(([^:/?#"<>]+):)?'. // 2. Scheme
+ '(//([^/?#"<>]*))?'. // 4. Authority
+ '([^?#"<>]*)'. // 5. Path
+ '(\?([^#"<>]*))?'. // 7. Query
+ '(#([^"<>]*))?'. // 8. Fragment
+ '!';
+
+ $matches = array();
+ $result = preg_match($r_URI, $uri, $matches);
+
+ if (!$result) return false; // *really* invalid URI
+
+ // seperate out parts
+ $scheme = !empty($matches[1]) ? $matches[2] : null;
+ $authority = !empty($matches[3]) ? $matches[4] : null;
+ $path = $matches[5]; // always present, can be empty
+ $query = !empty($matches[6]) ? $matches[7] : null;
+ $fragment = !empty($matches[8]) ? $matches[9] : null;
+
+ // further parse authority
+ if ($authority !== null) {
+ $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
+ $matches = array();
+ preg_match($r_authority, $authority, $matches);
+ $userinfo = !empty($matches[1]) ? $matches[2] : null;
+ $host = !empty($matches[3]) ? $matches[3] : '';
+ $port = !empty($matches[4]) ? (int) $matches[5] : null;
+ } else {
+ $port = $host = $userinfo = null;
+ }
+
+ return new HTMLPurifier_URI(
+ $scheme, $userinfo, $host, $port, $path, $query, $fragment);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php
new file mode 100755
index 000000000..039710fd1
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php
@@ -0,0 +1,42 @@
+, resolves edge cases
+ * with making relative URIs absolute
+ */
+ public $hierarchical = false;
+
+ /**
+ * Validates the components of a URI
+ * @note This implementation should be called by children if they define
+ * a default port, as it does port processing.
+ * @param $uri Instance of HTMLPurifier_URI
+ * @param $config HTMLPurifier_Config object
+ * @param $context HTMLPurifier_Context object
+ * @return Bool success or failure
+ */
+ public function validate(&$uri, $config, $context) {
+ if ($this->default_port == $uri->port) $uri->port = null;
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php
new file mode 100755
index 000000000..5849bf7ff
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php
@@ -0,0 +1,43 @@
+query = null;
+
+ // typecode check
+ $semicolon_pos = strrpos($uri->path, ';'); // reverse
+ if ($semicolon_pos !== false) {
+ $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
+ $uri->path = substr($uri->path, 0, $semicolon_pos);
+ $type_ret = '';
+ if (strpos($type, '=') !== false) {
+ // figure out whether or not the declaration is correct
+ list($key, $typecode) = explode('=', $type, 2);
+ if ($key !== 'type') {
+ // invalid key, tack it back on encoded
+ $uri->path .= '%3B' . $type;
+ } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
+ $type_ret = ";type=$typecode";
+ }
+ } else {
+ $uri->path .= '%3B' . $type;
+ }
+ $uri->path = str_replace(';', '%3B', $uri->path);
+ $uri->path .= $type_ret;
+ }
+
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php
new file mode 100755
index 000000000..b097a31d6
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php
@@ -0,0 +1,20 @@
+userinfo = null;
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php
new file mode 100755
index 000000000..29e380919
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php
@@ -0,0 +1,12 @@
+userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
+ // we need to validate path against RFC 2368's addr-spec
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php
new file mode 100755
index 000000000..f5f54f4f5
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php
@@ -0,0 +1,22 @@
+userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
+ $uri->query = null;
+ // typecode check needed on path
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php
new file mode 100755
index 000000000..5bf93ea78
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php
@@ -0,0 +1,20 @@
+userinfo = null;
+ $uri->query = null;
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URISchemeRegistry.php b/lib/htmlpurifier/library/HTMLPurifier/URISchemeRegistry.php
new file mode 100755
index 000000000..d24732c18
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URISchemeRegistry.php
@@ -0,0 +1,69 @@
+get('URI', 'AllowedSchemes');
+ if (!$config->get('URI', 'OverrideAllowedSchemes') &&
+ !isset($allowed_schemes[$scheme])
+ ) {
+ return $null;
+ }
+
+ if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
+ if (!isset($allowed_schemes[$scheme])) return $null;
+
+ $class = 'HTMLPurifier_URIScheme_' . $scheme;
+ if (!class_exists($class)) return $null;
+ $this->schemes[$scheme] = new $class();
+ return $this->schemes[$scheme];
+ }
+
+ /**
+ * Registers a custom scheme to the cache, bypassing reflection.
+ * @param $scheme Scheme name
+ * @param $scheme_obj HTMLPurifier_URIScheme object
+ */
+ public function register($scheme, $scheme_obj) {
+ $this->schemes[$scheme] = $scheme_obj;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/UnitConverter.php b/lib/htmlpurifier/library/HTMLPurifier/UnitConverter.php
new file mode 100755
index 000000000..545d42622
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/UnitConverter.php
@@ -0,0 +1,254 @@
+ array(
+ 'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
+ 'pt' => 4,
+ 'pc' => 48,
+ 'in' => 288,
+ self::METRIC => array('pt', '0.352777778', 'mm'),
+ ),
+ self::METRIC => array(
+ 'mm' => 1,
+ 'cm' => 10,
+ self::ENGLISH => array('mm', '2.83464567', 'pt'),
+ ),
+ );
+
+ /**
+ * Minimum bcmath precision for output.
+ */
+ protected $outputPrecision;
+
+ /**
+ * Bcmath precision for internal calculations.
+ */
+ protected $internalPrecision;
+
+ /**
+ * Whether or not BCMath is available
+ */
+ private $bcmath;
+
+ public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) {
+ $this->outputPrecision = $output_precision;
+ $this->internalPrecision = $internal_precision;
+ $this->bcmath = !$force_no_bcmath && function_exists('bcmul');
+ }
+
+ /**
+ * Converts a length object of one unit into another unit.
+ * @param HTMLPurifier_Length $length
+ * Instance of HTMLPurifier_Length to convert. You must validate()
+ * it before passing it here!
+ * @param string $to_unit
+ * Unit to convert to.
+ * @note
+ * About precision: This conversion function pays very special
+ * attention to the incoming precision of values and attempts
+ * to maintain a number of significant figure. Results are
+ * fairly accurate up to nine digits. Some caveats:
+ * - If a number is zero-padded as a result of this significant
+ * figure tracking, the zeroes will be eliminated.
+ * - If a number contains less than four sigfigs ($outputPrecision)
+ * and this causes some decimals to be excluded, those
+ * decimals will be added on.
+ */
+ public function convert($length, $to_unit) {
+
+ if (!$length->isValid()) return false;
+
+ $n = $length->getN();
+ $unit = $length->getUnit();
+
+ if ($n === '0' || $unit === false) {
+ return new HTMLPurifier_Length('0', false);
+ }
+
+ $state = $dest_state = false;
+ foreach (self::$units as $k => $x) {
+ if (isset($x[$unit])) $state = $k;
+ if (isset($x[$to_unit])) $dest_state = $k;
+ }
+ if (!$state || !$dest_state) return false;
+
+ // Some calculations about the initial precision of the number;
+ // this will be useful when we need to do final rounding.
+ $sigfigs = $this->getSigFigs($n);
+ if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision;
+
+ // BCMath's internal precision deals only with decimals. Use
+ // our default if the initial number has no decimals, or increase
+ // it by how ever many decimals, thus, the number of guard digits
+ // will always be greater than or equal to internalPrecision.
+ $log = (int) floor(log(abs($n), 10));
+ $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
+
+ for ($i = 0; $i < 2; $i++) {
+
+ // Determine what unit IN THIS SYSTEM we need to convert to
+ if ($dest_state === $state) {
+ // Simple conversion
+ $dest_unit = $to_unit;
+ } else {
+ // Convert to the smallest unit, pending a system shift
+ $dest_unit = self::$units[$state][$dest_state][0];
+ }
+
+ // Do the conversion if necessary
+ if ($dest_unit !== $unit) {
+ $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp);
+ $n = $this->mul($n, $factor, $cp);
+ $unit = $dest_unit;
+ }
+
+ // Output was zero, so bail out early. Shouldn't ever happen.
+ if ($n === '') {
+ $n = '0';
+ $unit = $to_unit;
+ break;
+ }
+
+ // It was a simple conversion, so bail out
+ if ($dest_state === $state) {
+ break;
+ }
+
+ if ($i !== 0) {
+ // Conversion failed! Apparently, the system we forwarded
+ // to didn't have this unit. This should never happen!
+ return false;
+ }
+
+ // Pre-condition: $i == 0
+
+ // Perform conversion to next system of units
+ $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp);
+ $unit = self::$units[$state][$dest_state][2];
+ $state = $dest_state;
+
+ // One more loop around to convert the unit in the new system.
+
+ }
+
+ // Post-condition: $unit == $to_unit
+ if ($unit !== $to_unit) return false;
+
+ // Useful for debugging:
+ //echo "n";
+ //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n
\n";
+
+ $n = $this->round($n, $sigfigs);
+ if (strpos($n, '.') !== false) $n = rtrim($n, '0');
+ $n = rtrim($n, '.');
+
+ return new HTMLPurifier_Length($n, $unit);
+ }
+
+ /**
+ * Returns the number of significant figures in a string number.
+ * @param string $n Decimal number
+ * @return int number of sigfigs
+ */
+ public function getSigFigs($n) {
+ $n = ltrim($n, '0+-');
+ $dp = strpos($n, '.'); // decimal position
+ if ($dp === false) {
+ $sigfigs = strlen(rtrim($n, '0'));
+ } else {
+ $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
+ if ($dp !== 0) $sigfigs--;
+ }
+ return $sigfigs;
+ }
+
+ /**
+ * Adds two numbers, using arbitrary precision when available.
+ */
+ private function add($s1, $s2, $scale) {
+ if ($this->bcmath) return bcadd($s1, $s2, $scale);
+ else return $this->scale($s1 + $s2, $scale);
+ }
+
+ /**
+ * Multiples two numbers, using arbitrary precision when available.
+ */
+ private function mul($s1, $s2, $scale) {
+ if ($this->bcmath) return bcmul($s1, $s2, $scale);
+ else return $this->scale($s1 * $s2, $scale);
+ }
+
+ /**
+ * Divides two numbers, using arbitrary precision when available.
+ */
+ private function div($s1, $s2, $scale) {
+ if ($this->bcmath) return bcdiv($s1, $s2, $scale);
+ else return $this->scale($s1 / $s2, $scale);
+ }
+
+ /**
+ * Rounds a number according to the number of sigfigs it should have,
+ * using arbitrary precision when available.
+ */
+ private function round($n, $sigfigs) {
+ $new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1
+ $rp = $sigfigs - $new_log - 1; // Number of decimal places needed
+ $neg = $n < 0 ? '-' : ''; // Negative sign
+ if ($this->bcmath) {
+ if ($rp >= 0) {
+ $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
+ $n = bcdiv($n, '1', $rp);
+ } else {
+ // This algorithm partially depends on the standardized
+ // form of numbers that comes out of bcmath.
+ $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
+ $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
+ }
+ return $n;
+ } else {
+ return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
+ }
+ }
+
+ /**
+ * Scales a float to $scale digits right of decimal point, like BCMath.
+ */
+ private function scale($r, $scale) {
+ if ($scale < 0) {
+ // The f sprintf type doesn't support negative numbers, so we
+ // need to cludge things manually. First get the string.
+ $r = sprintf('%.0f', (float) $r);
+ // Due to floating point precision loss, $r will more than likely
+ // look something like 4652999999999.9234. We grab one more digit
+ // than we need to precise from $r and then use that to round
+ // appropriately.
+ $precise = (string) round(substr($r, 0, strlen($r) + $scale), -1);
+ // Now we return it, truncating the zero that was rounded off.
+ return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
+ }
+ return sprintf('%.' . $scale . 'f', (float) $r);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParser.php b/lib/htmlpurifier/library/HTMLPurifier/VarParser.php
new file mode 100755
index 000000000..68e72ae86
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/VarParser.php
@@ -0,0 +1,154 @@
+ self::STRING,
+ 'istring' => self::ISTRING,
+ 'text' => self::TEXT,
+ 'itext' => self::ITEXT,
+ 'int' => self::INT,
+ 'float' => self::FLOAT,
+ 'bool' => self::BOOL,
+ 'lookup' => self::LOOKUP,
+ 'list' => self::ALIST,
+ 'hash' => self::HASH,
+ 'mixed' => self::MIXED
+ );
+
+ /**
+ * Lookup table of types that are string, and can have aliases or
+ * allowed value lists.
+ */
+ static public $stringTypes = array(
+ self::STRING => true,
+ self::ISTRING => true,
+ self::TEXT => true,
+ self::ITEXT => true,
+ );
+
+ /**
+ * Validate a variable according to type. Throws
+ * HTMLPurifier_VarParserException if invalid.
+ * It may return NULL as a valid type if $allow_null is true.
+ *
+ * @param $var Variable to validate
+ * @param $type Type of variable, see HTMLPurifier_VarParser->types
+ * @param $allow_null Whether or not to permit null as a value
+ * @return Validated and type-coerced variable
+ */
+ final public function parse($var, $type, $allow_null = false) {
+ if (is_string($type)) {
+ if (!isset(HTMLPurifier_VarParser::$types[$type])) {
+ throw new HTMLPurifier_VarParserException("Invalid type '$type'");
+ } else {
+ $type = HTMLPurifier_VarParser::$types[$type];
+ }
+ }
+ $var = $this->parseImplementation($var, $type, $allow_null);
+ if ($allow_null && $var === null) return null;
+ // These are basic checks, to make sure nothing horribly wrong
+ // happened in our implementations.
+ switch ($type) {
+ case (self::STRING):
+ case (self::ISTRING):
+ case (self::TEXT):
+ case (self::ITEXT):
+ if (!is_string($var)) break;
+ if ($type == self::ISTRING || $type == self::ITEXT) $var = strtolower($var);
+ return $var;
+ case (self::INT):
+ if (!is_int($var)) break;
+ return $var;
+ case (self::FLOAT):
+ if (!is_float($var)) break;
+ return $var;
+ case (self::BOOL):
+ if (!is_bool($var)) break;
+ return $var;
+ case (self::LOOKUP):
+ case (self::ALIST):
+ case (self::HASH):
+ if (!is_array($var)) break;
+ if ($type === self::LOOKUP) {
+ foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true');
+ } elseif ($type === self::ALIST) {
+ $keys = array_keys($var);
+ if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform');
+ }
+ return $var;
+ case (self::MIXED):
+ return $var;
+ default:
+ $this->errorInconsistent(get_class($this), $type);
+ }
+ $this->errorGeneric($var, $type);
+ }
+
+ /**
+ * Actually implements the parsing. Base implementation is to not
+ * do anything to $var. Subclasses should overload this!
+ */
+ protected function parseImplementation($var, $type, $allow_null) {
+ return $var;
+ }
+
+ /**
+ * Throws an exception.
+ */
+ protected function error($msg) {
+ throw new HTMLPurifier_VarParserException($msg);
+ }
+
+ /**
+ * Throws an inconsistency exception.
+ * @note This should not ever be called. It would be called if we
+ * extend the allowed values of HTMLPurifier_VarParser without
+ * updating subclasses.
+ */
+ protected function errorInconsistent($class, $type) {
+ throw new HTMLPurifier_Exception("Inconsistency in $class: ".HTMLPurifier_VarParser::getTypeName($type)." not implemented");
+ }
+
+ /**
+ * Generic error for if a type didn't work.
+ */
+ protected function errorGeneric($var, $type) {
+ $vtype = gettype($var);
+ $this->error("Expected type ".HTMLPurifier_VarParser::getTypeName($type).", got $vtype");
+ }
+
+ static public function getTypeName($type) {
+ static $lookup;
+ if (!$lookup) {
+ // Lazy load the alternative lookup table
+ $lookup = array_flip(HTMLPurifier_VarParser::$types);
+ }
+ if (!isset($lookup[$type])) return 'unknown';
+ return $lookup[$type];
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParser/Flexible.php b/lib/htmlpurifier/library/HTMLPurifier/VarParser/Flexible.php
new file mode 100755
index 000000000..c954250e9
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/VarParser/Flexible.php
@@ -0,0 +1,96 @@
+ $j) $var[$i] = trim($j);
+ if ($type === self::HASH) {
+ // key:value,key2:value2
+ $nvar = array();
+ foreach ($var as $keypair) {
+ $c = explode(':', $keypair, 2);
+ if (!isset($c[1])) continue;
+ $nvar[$c[0]] = $c[1];
+ }
+ $var = $nvar;
+ }
+ }
+ if (!is_array($var)) break;
+ $keys = array_keys($var);
+ if ($keys === array_keys($keys)) {
+ if ($type == self::ALIST) return $var;
+ elseif ($type == self::LOOKUP) {
+ $new = array();
+ foreach ($var as $key) {
+ $new[$key] = true;
+ }
+ return $new;
+ } else break;
+ }
+ if ($type === self::LOOKUP) {
+ foreach ($var as $key => $value) {
+ $var[$key] = true;
+ }
+ }
+ return $var;
+ default:
+ $this->errorInconsistent(__CLASS__, $type);
+ }
+ $this->errorGeneric($var, $type);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParser/Native.php b/lib/htmlpurifier/library/HTMLPurifier/VarParser/Native.php
new file mode 100755
index 000000000..b02a6de54
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/VarParser/Native.php
@@ -0,0 +1,26 @@
+evalExpression($var);
+ }
+
+ protected function evalExpression($expr) {
+ $var = null;
+ $result = eval("\$var = $expr;");
+ if ($result === false) {
+ throw new HTMLPurifier_VarParserException("Fatal error in evaluated code");
+ }
+ return $var;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParserException.php b/lib/htmlpurifier/library/HTMLPurifier/VarParserException.php
new file mode 100755
index 000000000..5df341495
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/VarParserException.php
@@ -0,0 +1,11 @@
+