Network: DRY code in Label for parsing markup (#3565)

* Network: DRY code in Label for parsing markup This gets rid of a major eyesore for me. The accumulator object was identical for HTML and Markdown. In addition, the parsing has been refactored. Common elements have been DRY'd and the logic of the parsing has been made more comprehensible. * Added suggestion @mbroad wrt regexp precompile * Fixed linting
7 years ago · 7110549e74
--- a/lib/network/modules/components/shared/LabelSplitter.js
+++ b/lib/network/modules/components/shared/LabelSplitter.js
@ -1,6 +1,274 @@
 let LabelAccumulator = require('./LabelAccumulator').default;
 let ComponentUtil = require('./ComponentUtil').default;

 // Hash of prepared regexp's for tags
 var tagPattern = {
  // HTML
  '<b>': /<b>/,
  '<i>': /<i>/,
  '<code>': /<code>/,
  '</b>': /<\/b>/,
  '</i>': /<\/i>/,
  '</code>': /<\/code>/,
  // Markdown
  '*': /\*/,  // bold
  '_': /\_/,   // ital
  '`': /`/,   // mono
  'afterBold': /[^\*]/,
  'afterItal': /[^_]/,
  'afterMono': /[^`]/,
 };


 /**
 * Internal helper class for parsing the markup tags for HTML and Markdown.
 *
 * NOTE: Sequences of tabs and spaces are reduced to single space.
 *       Scan usage of `this.spacing` within method
 */
 class MarkupAccumulator {

  /**
   * Create an instance
   *
   * @param {string} text  text to parse for markup
   */
  constructor(text) {
    this.text = text;
    this.bold = false;
    this.ital = false;
    this.mono = false;
    this.spacing = false;
    this.position = 0;
    this.buffer = "";
    this.modStack = [];

    this.blocks = [];
  }


  /**
   * Return the mod label currently on the top of the stack
   *
   * @returns {string}  label of topmost mod 
   * @private
   */
  mod() {
    return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
  }


  /**
   * Return the mod label currently active
   * 
   * @returns {string}  label of active mod 
   * @private
   */
  modName() {
    if (this.modStack.length === 0)
      return 'normal';
    else if (this.modStack[0] === 'mono')
      return 'mono';
    else {
      if (this.bold && this.ital) {
        return 'boldital';
      } else if (this.bold) {
        return 'bold';
      } else if (this.ital) {
        return 'ital';
      }
    }
  }


  /**
   * @private
   */
  emitBlock() {
    if (this.spacing) {
      this.add(" ");
      this.spacing = false;
    }
    if (this.buffer.length > 0) {
      this.blocks.push({ text: this.buffer, mod: this.modName() });
      this.buffer = "";
    }
  }


  /**
   * Output text to buffer
   *
   * @param {string} text  text to add
   * @private
   */
  add(text) {
    if (text === " ") {
      this.spacing = true;
    }
    if (this.spacing) {
      this.buffer += " ";
      this.spacing = false;
    }
    if (text != " ") {
      this.buffer += text;
    }
  }


  /**
   * Handle parsing of whitespace
   *
   * @param {string} ch  the character to check
   * @returns {boolean} true if the character was processed as whitespace, false otherwise
   */
  parseWS(ch) {
    if (/[ \t]/.test(ch)) {
      if (!this.mono) {
        this.spacing = true;
      } else {
        this.add(ch);
      }
      return true;
    }

    return false;
  }


  /**
   * @param {string} tagName  label for block type to set
   * @private
   */
  setTag(tagName) {
    this.emitBlock();
    this[tagName] = true;
    this.modStack.unshift(tagName);
  }


  /**
   * @param {string} tagName  label for block type to unset
   * @private
   */
  unsetTag(tagName) {
    this.emitBlock();
    this[tagName] = false;
    this.modStack.shift();
  }


  /**
   * @param {string} tagName label for block type we are currently processing
   * @param {string|RegExp} tag string to match in text
   * @returns {boolean} true if the tag was processed, false otherwise
   */
  parseStartTag(tagName, tag) {
    // Note: if 'mono' passed as tagName, there is a double check here. This is OK
    if (!this.mono && !this[tagName] && this.match(tag)) {
      this.setTag(tagName);
      return true;
    }

    return false;
  }


  /**
   * @param {string|RegExp} tag
   * @param {number} [advance=true] if set, advance current position in text
   * @returns {boolean} true if match at given position, false otherwise
   * @private
   */
  match(tag, advance = true) {
    let [regExp, length] = this.prepareRegExp(tag);
    let matched = regExp.test(this.text.substr(this.position, length));

    if (matched && advance) {
      this.position += length - 1;
    }

    return matched;
  }


  /**
   * @param {string} tagName label for block type we are currently processing
   * @param {string|RegExp} tag string to match in text
   * @param {RegExp} [nextTag] regular expression to match for characters *following* the current tag 
   * @returns {boolean} true if the tag was processed, false otherwise
   */
  parseEndTag(tagName, tag, nextTag) {
    let checkTag = (this.mod() === tagName);
    if (tagName === 'mono') {  // special handling for 'mono'
     checkTag = checkTag && this.mono;
    } else {
     checkTag = checkTag && !this.mono;
    }

    if (checkTag && this.match(tag)) {
      if (nextTag !== undefined) {
        // Purpose of the following match is to prevent a direct unset/set of a given tag
        // E.g. '*bold **still bold*' => '*bold still bold*'
        if ((this.position === this.text.length-1) || this.match(nextTag, false)) {
          this.unsetTag(tagName);
        }
      } else {
        this.unsetTag(tagName);
      }

      return true;
    }

    return false;
  }


  /**
   * @param {string|RegExp} tag  string to match in text
   * @param {value} value  string to replace tag with, if found at current position
   * @returns {boolean} true if the tag was processed, false otherwise
   */
  replace(tag, value) {
    if (this.match(tag)) {
      this.add(value);
      this.position += length - 1;
      return true;
    }

    return false;
  }


  /**
   * Create a regular expression for the tag if it isn't already one.
   *
   * @param {string|RegExp} tag  string to match in text
   * @returns {[RegExp, number]}  regular expression to use and length of input string to match
   * @private
   */
  prepareRegExp(tag) {
    let length;
    let regExp;
    if (tag instanceof RegExp) {
      regExp = tag;
      length = 1;   // ASSUMPTION: regexp only tests one character
    } else {
      // use prepared regexp if present
      var prepared = tagPattern[tag];
      if (prepared !== undefined) {
        regExp = prepared;
      } else {
        regExp = new RegExp(tag);
      }

      length = tag.length;
    }

    return [regExp, length];
  }
 }


 /**
 * Helper class for Label which explodes the label text into lines and blocks within lines
@ -159,123 +427,43 @@ class LabelSplitter {
   * @returns {Array}
   */
  splitHtmlBlocks(text) {
    let blocks = [];

    // TODO: consolidate following + methods/closures with splitMarkdownBlocks()
    // NOTE: sequences of tabs and spaces are reduced to single space; scan usage of `this.spacing` within method
    let s = {
      bold: false,
      ital: false,
      mono: false,
      spacing: false,
      position: 0,
      buffer: "",
      modStack: []
    };
    let s = new MarkupAccumulator(text);

    s.mod = function() {
      return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
    };
    let parseEntities = (ch) => {
      if (/&/.test(ch)) {
        let parsed = s.replace(s.text, '&lt;', '<')
          || s.replace(s.text, '&amp;', '&');

    s.modName = function() {
      if (this.modStack.length === 0)
        return 'normal';
      else if (this.modStack[0] === 'mono')
        return 'mono';
      else {
        if (s.bold && s.ital) {
          return 'boldital';
        } else if (s.bold) {
          return 'bold';
        } else if (s.ital) {
          return 'ital';
        if (!parsed) {
          s.add("&");
        }
      }
    };

    s.emitBlock = function(override=false) {  // eslint-disable-line no-unused-vars
      if (this.spacing) {
        this.add(" ");
        this.spacing = false;
      }
      if (this.buffer.length > 0) {
        blocks.push({ text: this.buffer, mod: this.modName() });
        this.buffer = "";
        return true;
      }
    };

    s.add = function(text) {
      if (text === " ") {
        s.spacing = true;
      }
      if (s.spacing) {
        this.buffer += " ";
        this.spacing = false;
      }
      if (text != " ") {
        this.buffer += text;
      }
      return false;
    };

    while (s.position < text.length) {
      let ch = text.charAt(s.position);
      if (/[ \t]/.test(ch)) {
        if (!s.mono) {
          s.spacing = true;
        } else {
          s.add(ch);
        }
      } else if (/</.test(ch)) {
        if (!s.mono && !s.bold && /<b>/.test(text.substr(s.position,3))) {
          s.emitBlock();
          s.bold = true;
          s.modStack.unshift("bold");
          s.position += 2;
        } else if (!s.mono && !s.ital && /<i>/.test(text.substr(s.position,3))) {
          s.emitBlock();
          s.ital = true;
          s.modStack.unshift("ital");
          s.position += 2;
        } else if (!s.mono && /<code>/.test(text.substr(s.position,6))) {
          s.emitBlock();
          s.mono = true;
          s.modStack.unshift("mono");
          s.position += 5;
        } else if (!s.mono && (s.mod() === 'bold') && /<\/b>/.test(text.substr(s.position,4))) {
          s.emitBlock();
          s.bold = false;
          s.modStack.shift();
          s.position += 3;
        } else if (!s.mono && (s.mod() === 'ital') && /<\/i>/.test(text.substr(s.position,4))) {
          s.emitBlock();
          s.ital = false;
          s.modStack.shift();
          s.position += 3;
        } else if ((s.mod() === 'mono') && /<\/code>/.test(text.substr(s.position,7))) {
          s.emitBlock();
          s.mono = false;
          s.modStack.shift();
          s.position += 6;
        } else {
          s.add(ch);
        }
      } else if (/&/.test(ch)) {
        if (/&lt;/.test(text.substr(s.position,4))) {
          s.add("<");
          s.position += 3;
        } else if (/&amp;/.test(text.substr(s.position,5))) {
          s.add("&");
          s.position += 4;
        } else {
          s.add("&");
        }
      } else {
    while (s.position < s.text.length) {
      let ch = s.text.charAt(s.position);

      let parsed = s.parseWS(ch)
        || (/</.test(ch) && ( 
             s.parseStartTag('bold', '<b>')
          || s.parseStartTag('ital', '<i>')
          || s.parseStartTag('mono', '<code>')
          || s.parseEndTag('bold', '</b>')
          || s.parseEndTag('ital', '</i>')
          || s.parseEndTag('mono', '</code>')))
        || parseEntities(ch);

      if (!parsed) {
        s.add(ch);
      }
      s.position++
    }
    s.emitBlock();
    return blocks;
    return s.blocks;
  }


@ -285,129 +473,49 @@ class LabelSplitter {
   * @returns {Array}
   */
  splitMarkdownBlocks(text) {
    let blocks = [];

    // TODO: consolidate following + methods/closures with splitHtmlBlocks()
    // NOTE: sequences of tabs and spaces are reduced to single space; scan usage of `this.spacing` within method
    let s = {
      bold: false,
      ital: false,
      mono: false,
      beginable: true,
      spacing: false,
      position: 0,
      buffer: "",
      modStack: []
    };

    s.mod = function() {
      return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
    };

    s.modName = function() {
      if (this.modStack.length === 0)
        return 'normal';
      else if (this.modStack[0] === 'mono')
        return 'mono';
      else {
        if (s.bold && s.ital) {
          return 'boldital';
        } else if (s.bold) {
          return 'bold';
        } else if (s.ital) {
          return 'ital';
        }
      }
    };

    s.emitBlock = function(override=false) {  // eslint-disable-line no-unused-vars
      if (this.spacing) {
        this.add(" ");
        this.spacing = false;
      }
      if (this.buffer.length > 0) {
        blocks.push({ text: this.buffer, mod: this.modName() });
        this.buffer = "";
      }
    };
    let s = new MarkupAccumulator(text); 
    let beginable = true;

    s.add = function(text) {
      if (text === " ") {
        s.spacing = true;
      }
      if (s.spacing) {
        this.buffer += " ";
        this.spacing = false;
      }
      if (text != " ") {
        this.buffer += text;
      }
    };

    while (s.position < text.length) {
      let ch = text.charAt(s.position);
      if (/[ \t]/.test(ch)) {
        if (!s.mono) {
          s.spacing = true;
        } else {
          s.add(ch);
        }
        s.beginable = true
      } else if (/\\/.test(ch)) {
        if (s.position < text.length+1) {
    let parseOverride = (ch) => {
      if (/\\/.test(ch)) {
        if (s.position < this.text.length + 1) {
          s.position++;
          ch = text.charAt(s.position);
          ch = this.text.charAt(s.position);
          if (/ \t/.test(ch)) {
            s.spacing = true;
          } else {
            s.add(ch);
            s.beginable = false;
            beginable = false;
          }
        }
      } else if (!s.mono && !s.bold && (s.beginable || s.spacing) && /\*/.test(ch)) {
        s.emitBlock();
        s.bold = true;
        s.modStack.unshift("bold");
      } else if (!s.mono && !s.ital && (s.beginable || s.spacing) && /\_/.test(ch)) {
        s.emitBlock();
        s.ital = true;
        s.modStack.unshift("ital");
      } else if (!s.mono && (s.beginable || s.spacing) && /`/.test(ch)) {
        s.emitBlock();
        s.mono = true;
        s.modStack.unshift("mono");
      } else if (!s.mono && (s.mod() === "bold") && /\*/.test(ch)) {
        if ((s.position === text.length-1) || /[.,_` \t\n]/.test(text.charAt(s.position+1))) {
          s.emitBlock();
          s.bold = false;
          s.modStack.shift();
        } else {
          s.add(ch);
        }
      } else if (!s.mono && (s.mod() === "ital") && /\_/.test(ch)) {
        if ((s.position === text.length-1) || /[.,*` \t\n]/.test(text.charAt(s.position+1))) {
          s.emitBlock();
          s.ital = false;
          s.modStack.shift();
        } else {
          s.add(ch);
        }
      } else if (s.mono && (s.mod() === "mono") && /`/.test(ch)) {
        if ((s.position === text.length-1) || (/[.,*_ \t\n]/.test(text.charAt(s.position+1)))) {
          s.emitBlock();
          s.mono = false;
          s.modStack.shift();
        } else {
          s.add(ch);
        }
      } else {

        return true
      }

      return false;
    }

    while (s.position < s.text.length) {
      let ch = s.text.charAt(s.position);

      let parsed = s.parseWS(ch)
        || parseOverride(ch)
        || ((beginable || s.spacing) && (
             s.parseStartTag('bold', '*')
          || s.parseStartTag('ital', '_')
          || s.parseStartTag('mono', '`')))
        || s.parseEndTag('bold', '*', 'afterBold')
        || s.parseEndTag('ital', '_', 'afterItal')
        || s.parseEndTag('mono', '`', 'afterMono');

      if (!parsed) {
        s.add(ch);
        s.beginable = false;
        beginable = false;
      }
      s.position++
    }
    s.emitBlock();
    return blocks;
    return s.blocks;
  }


--- a/test/Label.test.js
+++ b/test/Label.test.js
@ -2,6 +2,7 @@
 * TODO - add tests for:
 * ====
 *
 * - html entities
 * - html unclosed or unopened tags
 * - html tag combinations with no font defined (e.g. bold within mono) 
 * - Unit tests for bad font shorthands.