diff --git a/browser/lib/markdown-it-sanitize-html.js b/browser/lib/markdown-it-sanitize-html.js index 05e5e7be..ea27bfa0 100644 --- a/browser/lib/markdown-it-sanitize-html.js +++ b/browser/lib/markdown-it-sanitize-html.js @@ -2,6 +2,7 @@ import sanitizeHtml from 'sanitize-html' import { escapeHtmlCharacters } from './utils' +import url from 'url' module.exports = function sanitizePlugin (md, options) { options = options || {} @@ -25,7 +26,7 @@ module.exports = function sanitizePlugin (md, options) { const inlineTokens = state.tokens[tokenIdx].children for (let childIdx = 0; childIdx < inlineTokens.length; childIdx++) { if (inlineTokens[childIdx].type === 'html_inline') { - inlineTokens[childIdx].content = sanitizeHtml( + inlineTokens[childIdx].content = sanitizeInline( inlineTokens[childIdx].content, options ) @@ -35,3 +36,86 @@ module.exports = function sanitizePlugin (md, options) { } }) } + +const tag_regex = /<([A-Z][A-Z0-9]*)\s*((?:\s*[A-Z][A-Z0-9]*(?:="(?:[^\"]+)\")?)*)\s*>|<\/([A-Z][A-Z0-9]*)\s*>/i +const attributes_regex = /([A-Z][A-Z0-9]*)(="[^\"]+\")?/ig + +function sanitizeInline(html, options) { + let match = tag_regex.exec(html) + if (!match) { + return '' + } + + const { allowedTags, allowedAttributes, allowedIframeHostnames, selfClosing, allowedSchemesAppliedToAttributes } = options + + if (match[1] !== null) { + // opening tag + const tag = match[1].toLowerCase() + if (allowedTags.indexOf(tag) === -1) { + return '' + } + + const attributes = match[2] + + let attrs = '' + let name + let value + + while ((match = attributes_regex.exec(attributes))) { + name = match[1].toLowerCase() + value = match[2] + + if (allowedAttributes['*'].indexOf(name) !== -1 || (allowedAttributes[tag] && allowedAttributes[tag].indexOf(name) !== -1)) { + if (allowedSchemesAppliedToAttributes.indexOf(name) !== -1) { + if (naughtyHRef(value) || (tag === 'iframe' && name === 'src' && naughtyIFrame(value))) { + continue + } + } + + attrs += ` ${name}${value}` + } + } + + if (selfClosing.indexOf(tag)) { + return '<' + tag + attrs + ' />' + } else { + return '<' + tag + attrs + '>' + } + } else { + // closing tag + if (allowedTags.indexOf(match[3].toLowerCase()) !== -1) { + return html + } else { + return '' + } + } +} + +function naughtyHRef(name, href, options) { + href = href.replace(/[\x00-\x20]+/g, '') + href = href.replace(/<\!\-\-.*?\-\-\>/g, '') + + const matches = href.match(/^([a-zA-Z]+)\:/) + if (!matches) { + if (href.match(/^[\/\\]{2}/)) { + return !options.allowProtocolRelative + } + + // No scheme + return false + } + + const scheme = matches[1].toLowerCase() + + return options.allowedSchemes.indexOf(scheme) === -1 +} + +function naughtyIFrame(src) { + try { + const parsed = url.parse(src, false, true) + + return allowedIframeHostnames.index(parsed.hostname) === -1 + } catch (e) { + return true + } +} \ No newline at end of file diff --git a/browser/lib/markdown.js b/browser/lib/markdown.js index 49fd2f86..90b1ccea 100644 --- a/browser/lib/markdown.js +++ b/browser/lib/markdown.js @@ -105,7 +105,11 @@ class Markdown { 'iframe': ['src', 'width', 'height', 'frameborder', 'allowfullscreen'], 'input': ['type', 'id', 'checked'] }, - allowedIframeHostnames: ['www.youtube.com'] + allowedIframeHostnames: ['www.youtube.com'], + selfClosing: [ 'img', 'br', 'hr', 'input' ], + allowedSchemes: [ 'http', 'https', 'ftp', 'mailto' ], + allowedSchemesAppliedToAttributes: [ 'href', 'src', 'cite' ], + allowProtocolRelative: true }) }