diff --git a/browser/components/MarkdownPreview.js b/browser/components/MarkdownPreview.js index 1cec5cdd..af599f95 100755 --- a/browser/components/MarkdownPreview.js +++ b/browser/components/MarkdownPreview.js @@ -444,13 +444,6 @@ export default class MarkdownPreview extends React.Component { let { value, codeBlockTheme } = this.props this.refs.root.contentWindow.document.body.setAttribute('data-theme', theme) - - const codeBlocks = value.match(/(```)(.|[\n])*?(```)/g) - if (codeBlocks !== null) { - codeBlocks.forEach((codeBlock) => { - value = value.replace(codeBlock, htmlTextHelper.encodeEntities(codeBlock)) - }) - } const renderedHTML = this.markdown.render(value) attachmentManagement.migrateAttachments(value, storagePath, noteKey) this.refs.root.contentWindow.document.body.innerHTML = attachmentManagement.fixLocalURLS(renderedHTML, storagePath) diff --git a/browser/lib/markdown-it-sanitize-html.js b/browser/lib/markdown-it-sanitize-html.js index 6d1a44b6..05e5e7be 100644 --- a/browser/lib/markdown-it-sanitize-html.js +++ b/browser/lib/markdown-it-sanitize-html.js @@ -1,6 +1,7 @@ 'use strict' import sanitizeHtml from 'sanitize-html' +import { escapeHtmlCharacters } from './utils' module.exports = function sanitizePlugin (md, options) { options = options || {} @@ -8,16 +9,26 @@ module.exports = function sanitizePlugin (md, options) { md.core.ruler.after('linkify', 'sanitize_inline', state => { for (let tokenIdx = 0; tokenIdx < state.tokens.length; tokenIdx++) { if (state.tokens[tokenIdx].type === 'html_block') { - state.tokens[tokenIdx].content = sanitizeHtml(state.tokens[tokenIdx].content, options) + state.tokens[tokenIdx].content = sanitizeHtml( + state.tokens[tokenIdx].content, + options + ) } if (state.tokens[tokenIdx].type === 'fence') { - state.tokens[tokenIdx].content = state.tokens[tokenIdx].content.replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"') + // escapeHtmlCharacters has better performance + state.tokens[tokenIdx].content = escapeHtmlCharacters( + state.tokens[tokenIdx].content, + { skipSingleQuote: true } + ) } if (state.tokens[tokenIdx].type === 'inline') { const inlineTokens = state.tokens[tokenIdx].children for (let childIdx = 0; childIdx < inlineTokens.length; childIdx++) { if (inlineTokens[childIdx].type === 'html_inline') { - inlineTokens[childIdx].content = sanitizeHtml(inlineTokens[childIdx].content, options) + inlineTokens[childIdx].content = sanitizeHtml( + inlineTokens[childIdx].content, + options + ) } } } diff --git a/browser/lib/utils.js b/browser/lib/utils.js index 564ed3d2..1d15b722 100644 --- a/browser/lib/utils.js +++ b/browser/lib/utils.js @@ -6,8 +6,12 @@ export function lastFindInArray (array, callback) { } } -export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) { +export function escapeHtmlCharacters ( + html, + opt = { detectCodeBlock: false, skipSingleQuote: false } +) { const matchHtmlRegExp = /["'&<>]/g + const matchCodeBlockRegExp = /```/g const escapes = ['"', '&', ''', '<', '>'] let match = null const replaceAt = (str, index, replace) => @@ -15,11 +19,18 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) { replace + str.substr(index + replace.length - (replace.length - 1)) - // detecting code block - while ((match = matchHtmlRegExp.exec(html)) != null) { + while ((match = matchHtmlRegExp.exec(html)) !== null) { const current = { char: match[0], index: match.index } + const codeBlockIndexs = [] + let openCodeBlock = null + // if the detectCodeBlock option is activated then this function should skip + // characters that needed to be escape but located in code block if (opt.detectCodeBlock) { - // position of the nearest line start + // The first type of code block is lines that start with 4 spaces + // Here we check for the \n character located before the character that + // needed to be escape. It means we check for the begining of the line that + // contain that character, then we check if there are 4 spaces next to the + // \n character (the line start with 4 spaces) let previousLineEnd = current.index - 1 while (html[previousLineEnd] !== '\n' && previousLineEnd !== -1) { previousLineEnd-- @@ -31,16 +42,54 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) { html[previousLineEnd + 3] === ' ' && html[previousLineEnd + 4] === ' ' ) { - // so skip it + // skip the current character + continue + } + // The second type of code block is lines that wrapped in ``` + // We will get the position of each ``` + // then push it into an array + // then the array returned will be like this: + // [startCodeblock, endCodeBlock, startCodeBlock, endCodeBlock] + while ((openCodeBlock = matchCodeBlockRegExp.exec(html)) !== null) { + codeBlockIndexs.push(openCodeBlock.index) + } + let shouldSkipChar = false + // we loop through the array of positions + // we skip 2 element as the i index position is the position of ``` that + // open the codeblock and the i + 1 is the position of the ``` that close + // the code block + for (let i = 0; i < codeBlockIndexs.length; i += 2) { + // the i index position is the position of the ``` that open code block + // so we have to + 2 as that position is the position of the first ` in the ```` + // but we need to make sure that the position current character is larger + // that the last ` in the ``` that open the code block so we have to take + // the position of the first ` and + 2 + // the i + 1 index position is the closing ``` so the char must less than it + if ( + current.index > codeBlockIndexs[i] + 2 && + current.index < codeBlockIndexs[i + 1] + ) { + // skip it + shouldSkipChar = true + break + } + } + if (shouldSkipChar) { + // skip the current character continue } } // otherwise, escape it !!! if (current.char === '&') { + // when escaping character & we have to be becareful as the & could be a part + // of an escaped character like " will be came &quot; let nextStr = '' let nextIndex = current.index let escapedStr = false - // maximum length of an escape string is 5. For example ('"') + // maximum length of an escaped string is 5. For example ('"') + // we take the next 5 character of the next string if it is one of the string: + // ['"', '&', ''', '<', '>'] then we will not escape the & character + // as it is a part of the escaped string and should not be escaped while (nextStr.length <= 5) { nextStr += html[nextIndex] nextIndex++ @@ -55,7 +104,7 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) { } } else if (current.char === '"') { html = replaceAt(html, current.index, '"') - } else if (current.char === "'") { + } else if (current.char === "'" && !opt.skipSingleQuote) { html = replaceAt(html, current.index, ''') } else if (current.char === '<') { html = replaceAt(html, current.index, '<') diff --git a/package-lock.json b/package-lock.json deleted file mode 100644 index 8150228b..00000000 --- a/package-lock.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "name": "boost", - "version": "0.10.0", - "lockfileVersion": 1, - "requires": true, - "dependencies": { - "debug": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", - "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", - "requires": { - "ms": "2.0.0" - } - }, - "i18n-2": { - "version": "0.7.2", - "resolved": "https://registry.npmjs.org/i18n-2/-/i18n-2-0.7.2.tgz", - "integrity": "sha512-Rdh6vfpNhL7q61cNf27x7QGULTi1TcGLVdFb5OJ6dOiJo+EkOTqEg0+3xgyeEMgYhopUBsh2IiSkFkjM+EhEmA==", - "requires": { - "debug": "3.1.0", - "sprintf": "0.1.5" - } - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - }, - "sprintf": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/sprintf/-/sprintf-0.1.5.tgz", - "integrity": "sha1-j4PjmpMXwaUCy324BQ5Rxnn27c8=" - } - } -} diff --git a/tests/lib/escapeHtmlCharacters-test.js b/tests/lib/escapeHtmlCharacters-test.js index f13ab297..672ef917 100644 --- a/tests/lib/escapeHtmlCharacters-test.js +++ b/tests/lib/escapeHtmlCharacters-test.js @@ -33,13 +33,38 @@ test('escapeHtmlCharacters should NOT skip code block if that option is NOT enab t.is(actual, expected) }) -test('escapeHtmlCharacters should NOT escape & character if it\'s a part of an escaped character', t => { +test("escapeHtmlCharacters should NOT escape & character if it's a part of an escaped character", t => { const input = 'Do not escape & or " but do escape &' const expected = 'Do not escape & or " but do escape &' const actual = escapeHtmlCharacters(input) t.is(actual, expected) }) +test('escapeHtmlCharacters should skip char if in code block', t => { + const input = ` +\`\`\` + +\`\`\` +dasdasd +dasdasdasd +\`\`\` + +\`\`\` +` + const expected = ` +\`\`\` + +\`\`\` +das<das>dasd +dasdasdasd +\`\`\` + +\`\`\` +` + const actual = escapeHtmlCharacters(input, { detectCodeBlock: true }) + t.is(actual, expected) +}) + test('escapeHtmlCharacters should return the correct result', t => { const input = '& < > " \'' const expected = '& < > " ''