1
0
mirror of https://github.com/BoostIo/Boostnote synced 2025-12-11 00:36:26 +00:00
Files
Boostnote/browser/lib/markdown-it-sanitize-html.js

142 lines
3.5 KiB
JavaScript

'use strict'
import sanitizeHtml from 'sanitize-html'
import { escapeHtmlCharacters } from './utils'
import url from 'url'
module.exports = function sanitizePlugin(md, options) {
options = options || {}
md.core.ruler.after('linkify', 'sanitize_inline', state => {
for (let tokenIdx = 0; tokenIdx < state.tokens.length; tokenIdx++) {
if (state.tokens[tokenIdx].type === 'html_block') {
state.tokens[tokenIdx].content = sanitizeHtml(
state.tokens[tokenIdx].content,
options
)
}
if (state.tokens[tokenIdx].type.match(/.*_fence$/)) {
// escapeHtmlCharacters has better performance
state.tokens[tokenIdx].content = escapeHtmlCharacters(
state.tokens[tokenIdx].content,
{ skipSingleQuote: true }
)
}
if (state.tokens[tokenIdx].type === 'inline') {
const inlineTokens = state.tokens[tokenIdx].children
for (let childIdx = 0; childIdx < inlineTokens.length; childIdx++) {
if (inlineTokens[childIdx].type === 'html_inline') {
inlineTokens[childIdx].content = sanitizeInline(
inlineTokens[childIdx].content,
options
)
}
}
}
}
})
}
const tagRegex = /<([A-Z][A-Z0-9]*)\s*((?:\s*[A-Z][A-Z0-9]*(?:=("|')(?:[^\3]+?)\3)?)*)\s*\/?>|<\/([A-Z][A-Z0-9]*)\s*>/i
const attributesRegex = /([A-Z][A-Z0-9]*)(?:=("|')([^\2]+?)\2)?/gi
function sanitizeInline(html, options) {
let match = tagRegex.exec(html)
if (!match) {
return ''
}
const {
allowedTags,
allowedAttributes,
selfClosing,
allowedSchemesAppliedToAttributes
} = options
if (match[1] !== undefined) {
// opening tag
const tag = match[1].toLowerCase()
if (allowedTags.indexOf(tag) === -1) {
return ''
}
const attributes = match[2]
let attrs = ''
let name
let value
while ((match = attributesRegex.exec(attributes))) {
name = match[1].toLowerCase()
value = match[3]
if (
allowedAttributes['*'].indexOf(name) !== -1 ||
(allowedAttributes[tag] && allowedAttributes[tag].indexOf(name) !== -1)
) {
if (allowedSchemesAppliedToAttributes.indexOf(name) !== -1) {
if (
naughtyHRef(value, options) ||
(tag === 'iframe' &&
name === 'src' &&
naughtyIFrame(value, options))
) {
continue
}
}
attrs += ` ${name}`
if (match[2]) {
attrs += `="${value}"`
}
}
}
if (selfClosing.indexOf(tag) === -1) {
return '<' + tag + attrs + '>'
} else {
return '<' + tag + attrs + ' />'
}
} else {
// closing tag
if (allowedTags.indexOf(match[4].toLowerCase()) !== -1) {
return html
} else {
return ''
}
}
}
function naughtyHRef(href, options) {
// href = href.replace(/[\x00-\x20]+/g, '')
if (!href) {
// No href
return false
}
href = href.replace(/<\!\-\-.*?\-\-\>/g, '')
const matches = href.match(/^([a-zA-Z]+)\:/)
if (!matches) {
if (href.match(/^[\/\\]{2}/)) {
return !options.allowProtocolRelative
}
// No scheme
return false
}
const scheme = matches[1].toLowerCase()
return options.allowedSchemes.indexOf(scheme) === -1
}
function naughtyIFrame(src, options) {
try {
const parsed = url.parse(src, false, true)
return options.allowedIframeHostnames.index(parsed.hostname) === -1
} catch (e) {
return true
}
}