Fix escape codeblock (#2230)

* updated package-lock * added fix and test for escape html in code block * fixed markdown preview render bug * updated comment in escape function * improved escape function * Delete package-lock.json
2025-12-12 17:26:17 +00:00 · 2018-08-09 15:08:52 +07:00
parent 7330cdaf1c
commit f0df787bbe
5 changed files with 96 additions and 53 deletions
--- a/browser/components/MarkdownPreview.js
+++ b/browser/components/MarkdownPreview.js
@@ -444,13 +444,6 @@ export default class MarkdownPreview extends React.Component {
    let { value, codeBlockTheme } = this.props

    this.refs.root.contentWindow.document.body.setAttribute('data-theme', theme)
-
-    const codeBlocks = value.match(/(```)(.|[\n])*?(```)/g)
-    if (codeBlocks !== null) {
-      codeBlocks.forEach((codeBlock) => {
-        value = value.replace(codeBlock, htmlTextHelper.encodeEntities(codeBlock))
-      })
-    }
    const renderedHTML = this.markdown.render(value)
    attachmentManagement.migrateAttachments(value, storagePath, noteKey)
    this.refs.root.contentWindow.document.body.innerHTML = attachmentManagement.fixLocalURLS(renderedHTML, storagePath)
--- a/browser/lib/markdown-it-sanitize-html.js
+++ b/browser/lib/markdown-it-sanitize-html.js
@@ -1,6 +1,7 @@
 'use strict'

 import sanitizeHtml from 'sanitize-html'
+import { escapeHtmlCharacters } from './utils'

 module.exports = function sanitizePlugin (md, options) {
  options = options || {}
@@ -8,16 +9,26 @@ module.exports = function sanitizePlugin (md, options) {
  md.core.ruler.after('linkify', 'sanitize_inline', state => {
    for (let tokenIdx = 0; tokenIdx < state.tokens.length; tokenIdx++) {
      if (state.tokens[tokenIdx].type === 'html_block') {
-        state.tokens[tokenIdx].content = sanitizeHtml(state.tokens[tokenIdx].content, options)
+        state.tokens[tokenIdx].content = sanitizeHtml(
+          state.tokens[tokenIdx].content,
+          options
+        )
      }
      if (state.tokens[tokenIdx].type === 'fence') {
-        state.tokens[tokenIdx].content = state.tokens[tokenIdx].content.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;')
+        // escapeHtmlCharacters has better performance
+        state.tokens[tokenIdx].content = escapeHtmlCharacters(
+          state.tokens[tokenIdx].content,
+          { skipSingleQuote: true }
+        )
      }
      if (state.tokens[tokenIdx].type === 'inline') {
        const inlineTokens = state.tokens[tokenIdx].children
        for (let childIdx = 0; childIdx < inlineTokens.length; childIdx++) {
          if (inlineTokens[childIdx].type === 'html_inline') {
-            inlineTokens[childIdx].content = sanitizeHtml(inlineTokens[childIdx].content, options)
+            inlineTokens[childIdx].content = sanitizeHtml(
+              inlineTokens[childIdx].content,
+              options
+            )
          }
        }
      }
--- a/browser/lib/utils.js
+++ b/browser/lib/utils.js
@@ -6,8 +6,12 @@ export function lastFindInArray (array, callback) {
  }
 }

-export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) {
+export function escapeHtmlCharacters (
+  html,
+  opt = { detectCodeBlock: false, skipSingleQuote: false }
+) {
  const matchHtmlRegExp = /["'&<>]/g
+  const matchCodeBlockRegExp = /```/g
  const escapes = ['&quot;', '&amp;', '&#39;', '&lt;', '&gt;']
  let match = null
  const replaceAt = (str, index, replace) =>
@@ -15,11 +19,18 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) {
    replace +
    str.substr(index + replace.length - (replace.length - 1))

-  // detecting code block
-  while ((match = matchHtmlRegExp.exec(html)) != null) {
+  while ((match = matchHtmlRegExp.exec(html)) !== null) {
    const current = { char: match[0], index: match.index }
+    const codeBlockIndexs = []
+    let openCodeBlock = null
+    // if the detectCodeBlock option is activated then this function should skip
+    // characters that needed to be escape but located in code block
    if (opt.detectCodeBlock) {
-      // position of the nearest line start
+      // The first type of code block is lines that start with 4 spaces
+      // Here we check for the \n character located before the character that
+      // needed to be escape. It means we check for the begining of the line that
+      // contain that character, then we check if there are 4 spaces next to the
+      // \n character (the line start with 4 spaces)
      let previousLineEnd = current.index - 1
      while (html[previousLineEnd] !== '\n' && previousLineEnd !== -1) {
        previousLineEnd--
@@ -31,16 +42,54 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) {
        html[previousLineEnd + 3] === ' ' &&
        html[previousLineEnd + 4] === ' '
      ) {
-        // so skip it
+        // skip the current character
+        continue
+      }
+      // The second type of code block is lines that wrapped in ```
+      // We will get the position of each ```
+      // then push it into an array
+      // then the array returned will be like this:
+      // [startCodeblock, endCodeBlock, startCodeBlock, endCodeBlock]
+      while ((openCodeBlock = matchCodeBlockRegExp.exec(html)) !== null) {
+        codeBlockIndexs.push(openCodeBlock.index)
+      }
+      let shouldSkipChar = false
+      // we loop through the array of positions
+      // we skip 2 element as the i index position is the position of ``` that
+      // open the codeblock and the i + 1 is the position of the ``` that close
+      // the code block
+      for (let i = 0; i < codeBlockIndexs.length; i += 2) {
+        // the i index position is the position of the ``` that open code block
+        // so we have to + 2 as that position is the position of the first ` in the ````
+        // but we need to make sure that the position current character is larger
+        // that the last ` in the ``` that open the code block so we have to take
+        // the position of the first ` and + 2
+        // the i + 1 index position is the closing ``` so the char must less than it
+        if (
+          current.index > codeBlockIndexs[i] + 2 &&
+          current.index < codeBlockIndexs[i + 1]
+        ) {
+          // skip it
+          shouldSkipChar = true
+          break
+        }
+      }
+      if (shouldSkipChar) {
+        // skip the current character
        continue
      }
    }
    // otherwise, escape it !!!
    if (current.char === '&') {
+      // when escaping character & we have to be becareful as the & could be a part
+      // of an escaped character like &quot; will be came &amp;quot;
      let nextStr = ''
      let nextIndex = current.index
      let escapedStr = false
-      // maximum length of an escape string is 5. For example ('&quot;')
+      // maximum length of an escaped string is 5. For example ('&quot;')
+      // we take the next 5 character of the next string if it is one of the string:
+      // ['&quot;', '&amp;', '&#39;', '&lt;', '&gt;'] then we will not escape the & character
+      // as it is a part of the escaped string and should not be escaped
      while (nextStr.length <= 5) {
        nextStr += html[nextIndex]
        nextIndex++
@@ -55,7 +104,7 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) {
      }
    } else if (current.char === '"') {
      html = replaceAt(html, current.index, '&quot;')
-    } else if (current.char === "'") {
+    } else if (current.char === "'" && !opt.skipSingleQuote) {
      html = replaceAt(html, current.index, '&#39;')
    } else if (current.char === '<') {
      html = replaceAt(html, current.index, '&lt;')
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,35 +0,0 @@
-{
-  "name": "boost",
-  "version": "0.10.0",
-  "lockfileVersion": 1,
-  "requires": true,
-  "dependencies": {
-    "debug": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz",
-      "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
-      "requires": {
-        "ms": "2.0.0"
-      }
-    },
-    "i18n-2": {
-      "version": "0.7.2",
-      "resolved": "https://registry.npmjs.org/i18n-2/-/i18n-2-0.7.2.tgz",
-      "integrity": "sha512-Rdh6vfpNhL7q61cNf27x7QGULTi1TcGLVdFb5OJ6dOiJo+EkOTqEg0+3xgyeEMgYhopUBsh2IiSkFkjM+EhEmA==",
-      "requires": {
-        "debug": "3.1.0",
-        "sprintf": "0.1.5"
-      }
-    },
-    "ms": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-      "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
-    },
-    "sprintf": {
-      "version": "0.1.5",
-      "resolved": "https://registry.npmjs.org/sprintf/-/sprintf-0.1.5.tgz",
-      "integrity": "sha1-j4PjmpMXwaUCy324BQ5Rxnn27c8="
-    }
-  }
-}
--- a/tests/lib/escapeHtmlCharacters-test.js
+++ b/tests/lib/escapeHtmlCharacters-test.js
@@ -33,13 +33,38 @@ test('escapeHtmlCharacters should NOT skip code block if that option is NOT enab
  t.is(actual, expected)
 })

-test('escapeHtmlCharacters should NOT escape & character if it\'s a part of an escaped character', t => {
+test("escapeHtmlCharacters should NOT escape & character if it's a part of an escaped character", t => {
  const input = 'Do not escape &amp; or &quot; but do escape &'
  const expected = 'Do not escape &amp; or &quot; but do escape &amp;'
  const actual = escapeHtmlCharacters(input)
  t.is(actual, expected)
 })

+test('escapeHtmlCharacters should skip char if in code block', t => {
+  const input = `
+\`\`\`
+<dontescapeme>
+\`\`\`
+das<das>dasd
+dasdasdasd
+\`\`\`
+<dontescapeme>
+\`\`\`
+`
+  const expected = `
+\`\`\`
+<dontescapeme>
+\`\`\`
+das&lt;das&gt;dasd
+dasdasdasd
+\`\`\`
+<dontescapeme>
+\`\`\`
+`
+  const actual = escapeHtmlCharacters(input, { detectCodeBlock: true })
+  t.is(actual, expected)
+})
+
 test('escapeHtmlCharacters should return the correct result', t => {
  const input = '& < > " \''
  const expected = '&amp; &lt; &gt; &quot; &#39;'