From 7110549e74f6c137b460e3a1a13659911e661761 Mon Sep 17 00:00:00 2001
From: wimrijnders <wrijnders@gmail.com>
Date: Fri, 20 Oct 2017 13:00:08 +0200
Subject: [PATCH] Network: DRY code in Label for parsing markup (#3565)

* Network: DRY code in Label for parsing markup

This gets rid of a major eyesore for me. The accumulator object was identical for HTML and  Markdown.

In addition, the parsing has been refactored. Common elements have been DRY'd and the logic of the parsing has been made more comprehensible.

* Added suggestion @mbroad wrt regexp precompile

* Fixed linting
---
 .../components/shared/LabelSplitter.js        | 536 +++++++++++-------
 test/Label.test.js                            |   1 +
 2 files changed, 323 insertions(+), 214 deletions(-)
diff --git a/lib/network/modules/components/shared/LabelSplitter.js b/lib/network/modules/components/shared/LabelSplitter.js
index 168d9966..75ad77b6 100644
--- a/lib/network/modules/components/shared/LabelSplitter.js
+++ b/lib/network/modules/components/shared/LabelSplitter.js
@@ -1,6 +1,274 @@
 let LabelAccumulator = require('./LabelAccumulator').default;
 let ComponentUtil = require('./ComponentUtil').default;
 
+// Hash of prepared regexp's for tags
+var tagPattern = {
+  // HTML
+  '<b>': /<b>/,
+  '<i>': /<i>/,
+  '<code>': /<code>/,
+  '</b>': /<\/b>/,
+  '</i>': /<\/i>/,
+  '</code>': /<\/code>/,
+  // Markdown
+  '*': /\*/,  // bold
+  '_': /\_/,   // ital
+  '`': /`/,   // mono
+  'afterBold': /[^\*]/,
+  'afterItal': /[^_]/,
+  'afterMono': /[^`]/,
+};
+
+
+/**
+ * Internal helper class for parsing the markup tags for HTML and Markdown.
+ *
+ * NOTE: Sequences of tabs and spaces are reduced to single space.
+ *       Scan usage of `this.spacing` within method
+ */
+class MarkupAccumulator {
+
+  /**
+   * Create an instance
+   *
+   * @param {string} text  text to parse for markup
+   */
+  constructor(text) {
+    this.text = text;
+    this.bold = false;
+    this.ital = false;
+    this.mono = false;
+    this.spacing = false;
+    this.position = 0;
+    this.buffer = "";
+    this.modStack = [];
+
+    this.blocks = [];
+  }
+
+
+  /**
+   * Return the mod label currently on the top of the stack
+   *
+   * @returns {string}  label of topmost mod 
+   * @private
+   */
+  mod() {
+    return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
+  }
+
+
+  /**
+   * Return the mod label currently active
+   * 
+   * @returns {string}  label of active mod 
+   * @private
+   */
+  modName() {
+    if (this.modStack.length === 0)
+      return 'normal';
+    else if (this.modStack[0] === 'mono')
+      return 'mono';
+    else {
+      if (this.bold && this.ital) {
+        return 'boldital';
+      } else if (this.bold) {
+        return 'bold';
+      } else if (this.ital) {
+        return 'ital';
+      }
+    }
+  }
+
+
+  /**
+   * @private
+   */
+  emitBlock() {
+    if (this.spacing) {
+      this.add(" ");
+      this.spacing = false;
+    }
+    if (this.buffer.length > 0) {
+      this.blocks.push({ text: this.buffer, mod: this.modName() });
+      this.buffer = "";
+    }
+  }
+
+
+  /**
+   * Output text to buffer
+   *
+   * @param {string} text  text to add
+   * @private
+   */
+  add(text) {
+    if (text === " ") {
+      this.spacing = true;
+    }
+    if (this.spacing) {
+      this.buffer += " ";
+      this.spacing = false;
+    }
+    if (text != " ") {
+      this.buffer += text;
+    }
+  }
+
+
+  /**
+   * Handle parsing of whitespace
+   *
+   * @param {string} ch  the character to check
+   * @returns {boolean} true if the character was processed as whitespace, false otherwise
+   */
+  parseWS(ch) {
+    if (/[ \t]/.test(ch)) {
+      if (!this.mono) {
+        this.spacing = true;
+      } else {
+        this.add(ch);
+      }
+      return true;
+    }
+
+    return false;
+  }
+
+
+  /**
+   * @param {string} tagName  label for block type to set
+   * @private
+   */
+  setTag(tagName) {
+    this.emitBlock();
+    this[tagName] = true;
+    this.modStack.unshift(tagName);
+  }
+
+
+  /**
+   * @param {string} tagName  label for block type to unset
+   * @private
+   */
+  unsetTag(tagName) {
+    this.emitBlock();
+    this[tagName] = false;
+    this.modStack.shift();
+  }
+
+
+  /**
+   * @param {string} tagName label for block type we are currently processing
+   * @param {string|RegExp} tag string to match in text
+   * @returns {boolean} true if the tag was processed, false otherwise
+   */
+  parseStartTag(tagName, tag) {
+    // Note: if 'mono' passed as tagName, there is a double check here. This is OK
+    if (!this.mono && !this[tagName] && this.match(tag)) {
+      this.setTag(tagName);
+      return true;
+    }
+
+    return false;
+  }
+
+
+  /**
+   * @param {string|RegExp} tag
+   * @param {number} [advance=true] if set, advance current position in text
+   * @returns {boolean} true if match at given position, false otherwise
+   * @private
+   */
+  match(tag, advance = true) {
+    let [regExp, length] = this.prepareRegExp(tag);
+    let matched = regExp.test(this.text.substr(this.position, length));
+
+    if (matched && advance) {
+      this.position += length - 1;
+    }
+
+    return matched;
+  }
+
+
+  /**
+   * @param {string} tagName label for block type we are currently processing
+   * @param {string|RegExp} tag string to match in text
+   * @param {RegExp} [nextTag] regular expression to match for characters *following* the current tag 
+   * @returns {boolean} true if the tag was processed, false otherwise
+   */
+  parseEndTag(tagName, tag, nextTag) {
+    let checkTag = (this.mod() === tagName);
+    if (tagName === 'mono') {  // special handling for 'mono'
+     checkTag = checkTag && this.mono;
+    } else {
+     checkTag = checkTag && !this.mono;
+    }
+
+    if (checkTag && this.match(tag)) {
+      if (nextTag !== undefined) {
+        // Purpose of the following match is to prevent a direct unset/set of a given tag
+        // E.g. '*bold **still bold*' => '*bold still bold*'
+        if ((this.position === this.text.length-1) || this.match(nextTag, false)) {
+          this.unsetTag(tagName);
+        }
+      } else {
+        this.unsetTag(tagName);
+      }
+
+      return true;
+    }
+
+    return false;
+  }
+
+
+  /**
+   * @param {string|RegExp} tag  string to match in text
+   * @param {value} value  string to replace tag with, if found at current position
+   * @returns {boolean} true if the tag was processed, false otherwise
+   */
+  replace(tag, value) {
+    if (this.match(tag)) {
+      this.add(value);
+      this.position += length - 1;
+      return true;
+    }
+
+    return false;
+  }
+
+
+  /**
+   * Create a regular expression for the tag if it isn't already one.
+   *
+   * @param {string|RegExp} tag  string to match in text
+   * @returns {[RegExp, number]}  regular expression to use and length of input string to match
+   * @private
+   */
+  prepareRegExp(tag) {
+    let length;
+    let regExp;
+    if (tag instanceof RegExp) {
+      regExp = tag;
+      length = 1;   // ASSUMPTION: regexp only tests one character
+    } else {
+      // use prepared regexp if present
+      var prepared = tagPattern[tag];
+      if (prepared !== undefined) {
+        regExp = prepared;
+      } else {
+        regExp = new RegExp(tag);
+      }
+
+      length = tag.length;
+    }
+
+    return [regExp, length];
+  }
+}
+
 
 /**
  * Helper class for Label which explodes the label text into lines and blocks within lines
@@ -159,123 +427,43 @@ class LabelSplitter {
    * @returns {Array}
    */
   splitHtmlBlocks(text) {
-    let blocks = [];
-
-    // TODO: consolidate following + methods/closures with splitMarkdownBlocks()
-    // NOTE: sequences of tabs and spaces are reduced to single space; scan usage of `this.spacing` within method
-    let s = {
-      bold: false,
-      ital: false,
-      mono: false,
-      spacing: false,
-      position: 0,
-      buffer: "",
-      modStack: []
-    };
+    let s = new MarkupAccumulator(text);
 
-    s.mod = function() {
-      return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
-    };
+    let parseEntities = (ch) => {
+      if (/&/.test(ch)) {
+        let parsed = s.replace(s.text, '&lt;', '<')
+          || s.replace(s.text, '&amp;', '&');
 
-    s.modName = function() {
-      if (this.modStack.length === 0)
-        return 'normal';
-      else if (this.modStack[0] === 'mono')
-        return 'mono';
-      else {
-        if (s.bold && s.ital) {
-          return 'boldital';
-        } else if (s.bold) {
-          return 'bold';
-        } else if (s.ital) {
-          return 'ital';
+        if (!parsed) {
+          s.add("&");
         }
-      }
-    };
 
-    s.emitBlock = function(override=false) {  // eslint-disable-line no-unused-vars
-      if (this.spacing) {
-        this.add(" ");
-        this.spacing = false;
-      }
-      if (this.buffer.length > 0) {
-        blocks.push({ text: this.buffer, mod: this.modName() });
-        this.buffer = "";
+        return true;
       }
-    };
 
-    s.add = function(text) {
-      if (text === " ") {
-        s.spacing = true;
-      }
-      if (s.spacing) {
-        this.buffer += " ";
-        this.spacing = false;
-      }
-      if (text != " ") {
-        this.buffer += text;
-      }
+      return false;
     };
 
-    while (s.position < text.length) {
-      let ch = text.charAt(s.position);
-      if (/[ \t]/.test(ch)) {
-        if (!s.mono) {
-          s.spacing = true;
-        } else {
-          s.add(ch);
-        }
-      } else if (/</.test(ch)) {
-        if (!s.mono && !s.bold && /<b>/.test(text.substr(s.position,3))) {
-          s.emitBlock();
-          s.bold = true;
-          s.modStack.unshift("bold");
-          s.position += 2;
-        } else if (!s.mono && !s.ital && /<i>/.test(text.substr(s.position,3))) {
-          s.emitBlock();
-          s.ital = true;
-          s.modStack.unshift("ital");
-          s.position += 2;
-        } else if (!s.mono && /<code>/.test(text.substr(s.position,6))) {
-          s.emitBlock();
-          s.mono = true;
-          s.modStack.unshift("mono");
-          s.position += 5;
-        } else if (!s.mono && (s.mod() === 'bold') && /<\/b>/.test(text.substr(s.position,4))) {
-          s.emitBlock();
-          s.bold = false;
-          s.modStack.shift();
-          s.position += 3;
-        } else if (!s.mono && (s.mod() === 'ital') && /<\/i>/.test(text.substr(s.position,4))) {
-          s.emitBlock();
-          s.ital = false;
-          s.modStack.shift();
-          s.position += 3;
-        } else if ((s.mod() === 'mono') && /<\/code>/.test(text.substr(s.position,7))) {
-          s.emitBlock();
-          s.mono = false;
-          s.modStack.shift();
-          s.position += 6;
-        } else {
-          s.add(ch);
-        }
-      } else if (/&/.test(ch)) {
-        if (/&lt;/.test(text.substr(s.position,4))) {
-          s.add("<");
-          s.position += 3;
-        } else if (/&amp;/.test(text.substr(s.position,5))) {
-          s.add("&");
-          s.position += 4;
-        } else {
-          s.add("&");
-        }
-      } else {
+    while (s.position < s.text.length) {
+      let ch = s.text.charAt(s.position);
+
+      let parsed = s.parseWS(ch)
+        || (/</.test(ch) && ( 
+             s.parseStartTag('bold', '<b>')
+          || s.parseStartTag('ital', '<i>')
+          || s.parseStartTag('mono', '<code>')
+          || s.parseEndTag('bold', '</b>')
+          || s.parseEndTag('ital', '</i>')
+          || s.parseEndTag('mono', '</code>')))
+        || parseEntities(ch);
+
+      if (!parsed) {
         s.add(ch);
       }
       s.position++
     }
     s.emitBlock();
-    return blocks;
+    return s.blocks;
   }
 
 
@@ -285,129 +473,49 @@ class LabelSplitter {
    * @returns {Array}
    */
   splitMarkdownBlocks(text) {
-    let blocks = [];
-
-    // TODO: consolidate following + methods/closures with splitHtmlBlocks()
-    // NOTE: sequences of tabs and spaces are reduced to single space; scan usage of `this.spacing` within method
-    let s = {
-      bold: false,
-      ital: false,
-      mono: false,
-      beginable: true,
-      spacing: false,
-      position: 0,
-      buffer: "",
-      modStack: []
-    };
-
-    s.mod = function() {
-      return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
-    };
-
-    s.modName = function() {
-      if (this.modStack.length === 0)
-        return 'normal';
-      else if (this.modStack[0] === 'mono')
-        return 'mono';
-      else {
-        if (s.bold && s.ital) {
-          return 'boldital';
-        } else if (s.bold) {
-          return 'bold';
-        } else if (s.ital) {
-          return 'ital';
-        }
-      }
-    };
-
-    s.emitBlock = function(override=false) {  // eslint-disable-line no-unused-vars
-      if (this.spacing) {
-        this.add(" ");
-        this.spacing = false;
-      }
-      if (this.buffer.length > 0) {
-        blocks.push({ text: this.buffer, mod: this.modName() });
-        this.buffer = "";
-      }
-    };
+    let s = new MarkupAccumulator(text); 
+    let beginable = true;
 
-    s.add = function(text) {
-      if (text === " ") {
-        s.spacing = true;
-      }
-      if (s.spacing) {
-        this.buffer += " ";
-        this.spacing = false;
-      }
-      if (text != " ") {
-        this.buffer += text;
-      }
-    };
-
-    while (s.position < text.length) {
-      let ch = text.charAt(s.position);
-      if (/[ \t]/.test(ch)) {
-        if (!s.mono) {
-          s.spacing = true;
-        } else {
-          s.add(ch);
-        }
-        s.beginable = true
-      } else if (/\\/.test(ch)) {
-        if (s.position < text.length+1) {
+    let parseOverride = (ch) => {
+      if (/\\/.test(ch)) {
+        if (s.position < this.text.length + 1) {
           s.position++;
-          ch = text.charAt(s.position);
+          ch = this.text.charAt(s.position);
           if (/ \t/.test(ch)) {
             s.spacing = true;
           } else {
             s.add(ch);
-            s.beginable = false;
+            beginable = false;
           }
         }
-      } else if (!s.mono && !s.bold && (s.beginable || s.spacing) && /\*/.test(ch)) {
-        s.emitBlock();
-        s.bold = true;
-        s.modStack.unshift("bold");
-      } else if (!s.mono && !s.ital && (s.beginable || s.spacing) && /\_/.test(ch)) {
-        s.emitBlock();
-        s.ital = true;
-        s.modStack.unshift("ital");
-      } else if (!s.mono && (s.beginable || s.spacing) && /`/.test(ch)) {
-        s.emitBlock();
-        s.mono = true;
-        s.modStack.unshift("mono");
-      } else if (!s.mono && (s.mod() === "bold") && /\*/.test(ch)) {
-        if ((s.position === text.length-1) || /[.,_` \t\n]/.test(text.charAt(s.position+1))) {
-          s.emitBlock();
-          s.bold = false;
-          s.modStack.shift();
-        } else {
-          s.add(ch);
-        }
-      } else if (!s.mono && (s.mod() === "ital") && /\_/.test(ch)) {
-        if ((s.position === text.length-1) || /[.,*` \t\n]/.test(text.charAt(s.position+1))) {
-          s.emitBlock();
-          s.ital = false;
-          s.modStack.shift();
-        } else {
-          s.add(ch);
-        }
-      } else if (s.mono && (s.mod() === "mono") && /`/.test(ch)) {
-        if ((s.position === text.length-1) || (/[.,*_ \t\n]/.test(text.charAt(s.position+1)))) {
-          s.emitBlock();
-          s.mono = false;
-          s.modStack.shift();
-        } else {
-          s.add(ch);
-        }
-      } else {
+
+        return true
+      }
+
+      return false;
+    }
+
+    while (s.position < s.text.length) {
+      let ch = s.text.charAt(s.position);
+
+      let parsed = s.parseWS(ch)
+        || parseOverride(ch)
+        || ((beginable || s.spacing) && (
+             s.parseStartTag('bold', '*')
+          || s.parseStartTag('ital', '_')
+          || s.parseStartTag('mono', '`')))
+        || s.parseEndTag('bold', '*', 'afterBold')
+        || s.parseEndTag('ital', '_', 'afterItal')
+        || s.parseEndTag('mono', '`', 'afterMono');
+
+      if (!parsed) {
         s.add(ch);
-        s.beginable = false;
+        beginable = false;
       }
       s.position++
     }
     s.emitBlock();
-    return blocks;
+    return s.blocks;
   }
 
 
diff --git a/test/Label.test.js b/test/Label.test.js
index f2dbe90a..8f2cf52a 100644
--- a/test/Label.test.js
+++ b/test/Label.test.js
@@ -2,6 +2,7 @@
  * TODO - add tests for:
  * ====
  *
+ * - html entities
  * - html unclosed or unopened tags
  * - html tag combinations with no font defined (e.g. bold within mono) 
  * - Unit tests for bad font shorthands.