Browse Source

Network: DRY code in Label for parsing markup (#3565)

* Network: DRY code in Label for parsing markup

This gets rid of a major eyesore for me. The accumulator object was identical for HTML and  Markdown.

In addition, the parsing has been refactored. Common elements have been DRY'd and the logic of the parsing has been made more comprehensible.

* Added suggestion @mbroad wrt regexp precompile

* Fixed linting
mbroad/code-climate-coverage-develop
wimrijnders 7 years ago
committed by Yotam Berkowitz
parent
commit
7110549e74
2 changed files with 323 additions and 214 deletions
  1. +322
    -214
      lib/network/modules/components/shared/LabelSplitter.js
  2. +1
    -0
      test/Label.test.js

+ 322
- 214
lib/network/modules/components/shared/LabelSplitter.js View File

@ -1,6 +1,274 @@
let LabelAccumulator = require('./LabelAccumulator').default;
let ComponentUtil = require('./ComponentUtil').default;
// Hash of prepared regexp's for tags
var tagPattern = {
// HTML
'<b>': /<b>/,
'<i>': /<i>/,
'<code>': /<code>/,
'</b>': /<\/b>/,
'</i>': /<\/i>/,
'</code>': /<\/code>/,
// Markdown
'*': /\*/, // bold
'_': /\_/, // ital
'`': /`/, // mono
'afterBold': /[^\*]/,
'afterItal': /[^_]/,
'afterMono': /[^`]/,
};
/**
* Internal helper class for parsing the markup tags for HTML and Markdown.
*
* NOTE: Sequences of tabs and spaces are reduced to single space.
* Scan usage of `this.spacing` within method
*/
class MarkupAccumulator {
/**
* Create an instance
*
* @param {string} text text to parse for markup
*/
constructor(text) {
this.text = text;
this.bold = false;
this.ital = false;
this.mono = false;
this.spacing = false;
this.position = 0;
this.buffer = "";
this.modStack = [];
this.blocks = [];
}
/**
* Return the mod label currently on the top of the stack
*
* @returns {string} label of topmost mod
* @private
*/
mod() {
return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
}
/**
* Return the mod label currently active
*
* @returns {string} label of active mod
* @private
*/
modName() {
if (this.modStack.length === 0)
return 'normal';
else if (this.modStack[0] === 'mono')
return 'mono';
else {
if (this.bold && this.ital) {
return 'boldital';
} else if (this.bold) {
return 'bold';
} else if (this.ital) {
return 'ital';
}
}
}
/**
* @private
*/
emitBlock() {
if (this.spacing) {
this.add(" ");
this.spacing = false;
}
if (this.buffer.length > 0) {
this.blocks.push({ text: this.buffer, mod: this.modName() });
this.buffer = "";
}
}
/**
* Output text to buffer
*
* @param {string} text text to add
* @private
*/
add(text) {
if (text === " ") {
this.spacing = true;
}
if (this.spacing) {
this.buffer += " ";
this.spacing = false;
}
if (text != " ") {
this.buffer += text;
}
}
/**
* Handle parsing of whitespace
*
* @param {string} ch the character to check
* @returns {boolean} true if the character was processed as whitespace, false otherwise
*/
parseWS(ch) {
if (/[ \t]/.test(ch)) {
if (!this.mono) {
this.spacing = true;
} else {
this.add(ch);
}
return true;
}
return false;
}
/**
* @param {string} tagName label for block type to set
* @private
*/
setTag(tagName) {
this.emitBlock();
this[tagName] = true;
this.modStack.unshift(tagName);
}
/**
* @param {string} tagName label for block type to unset
* @private
*/
unsetTag(tagName) {
this.emitBlock();
this[tagName] = false;
this.modStack.shift();
}
/**
* @param {string} tagName label for block type we are currently processing
* @param {string|RegExp} tag string to match in text
* @returns {boolean} true if the tag was processed, false otherwise
*/
parseStartTag(tagName, tag) {
// Note: if 'mono' passed as tagName, there is a double check here. This is OK
if (!this.mono && !this[tagName] && this.match(tag)) {
this.setTag(tagName);
return true;
}
return false;
}
/**
* @param {string|RegExp} tag
* @param {number} [advance=true] if set, advance current position in text
* @returns {boolean} true if match at given position, false otherwise
* @private
*/
match(tag, advance = true) {
let [regExp, length] = this.prepareRegExp(tag);
let matched = regExp.test(this.text.substr(this.position, length));
if (matched && advance) {
this.position += length - 1;
}
return matched;
}
/**
* @param {string} tagName label for block type we are currently processing
* @param {string|RegExp} tag string to match in text
* @param {RegExp} [nextTag] regular expression to match for characters *following* the current tag
* @returns {boolean} true if the tag was processed, false otherwise
*/
parseEndTag(tagName, tag, nextTag) {
let checkTag = (this.mod() === tagName);
if (tagName === 'mono') { // special handling for 'mono'
checkTag = checkTag && this.mono;
} else {
checkTag = checkTag && !this.mono;
}
if (checkTag && this.match(tag)) {
if (nextTag !== undefined) {
// Purpose of the following match is to prevent a direct unset/set of a given tag
// E.g. '*bold **still bold*' => '*bold still bold*'
if ((this.position === this.text.length-1) || this.match(nextTag, false)) {
this.unsetTag(tagName);
}
} else {
this.unsetTag(tagName);
}
return true;
}
return false;
}
/**
* @param {string|RegExp} tag string to match in text
* @param {value} value string to replace tag with, if found at current position
* @returns {boolean} true if the tag was processed, false otherwise
*/
replace(tag, value) {
if (this.match(tag)) {
this.add(value);
this.position += length - 1;
return true;
}
return false;
}
/**
* Create a regular expression for the tag if it isn't already one.
*
* @param {string|RegExp} tag string to match in text
* @returns {[RegExp, number]} regular expression to use and length of input string to match
* @private
*/
prepareRegExp(tag) {
let length;
let regExp;
if (tag instanceof RegExp) {
regExp = tag;
length = 1; // ASSUMPTION: regexp only tests one character
} else {
// use prepared regexp if present
var prepared = tagPattern[tag];
if (prepared !== undefined) {
regExp = prepared;
} else {
regExp = new RegExp(tag);
}
length = tag.length;
}
return [regExp, length];
}
}
/**
* Helper class for Label which explodes the label text into lines and blocks within lines
@ -159,123 +427,43 @@ class LabelSplitter {
* @returns {Array}
*/
splitHtmlBlocks(text) {
let blocks = [];
// TODO: consolidate following + methods/closures with splitMarkdownBlocks()
// NOTE: sequences of tabs and spaces are reduced to single space; scan usage of `this.spacing` within method
let s = {
bold: false,
ital: false,
mono: false,
spacing: false,
position: 0,
buffer: "",
modStack: []
};
let s = new MarkupAccumulator(text);
s.mod = function() {
return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
};
let parseEntities = (ch) => {
if (/&/.test(ch)) {
let parsed = s.replace(s.text, '&lt;', '<')
|| s.replace(s.text, '&amp;', '&');
s.modName = function() {
if (this.modStack.length === 0)
return 'normal';
else if (this.modStack[0] === 'mono')
return 'mono';
else {
if (s.bold && s.ital) {
return 'boldital';
} else if (s.bold) {
return 'bold';
} else if (s.ital) {
return 'ital';
if (!parsed) {
s.add("&");
}
}
};
s.emitBlock = function(override=false) { // eslint-disable-line no-unused-vars
if (this.spacing) {
this.add(" ");
this.spacing = false;
}
if (this.buffer.length > 0) {
blocks.push({ text: this.buffer, mod: this.modName() });
this.buffer = "";
return true;
}
};
s.add = function(text) {
if (text === " ") {
s.spacing = true;
}
if (s.spacing) {
this.buffer += " ";
this.spacing = false;
}
if (text != " ") {
this.buffer += text;
}
return false;
};
while (s.position < text.length) {
let ch = text.charAt(s.position);
if (/[ \t]/.test(ch)) {
if (!s.mono) {
s.spacing = true;
} else {
s.add(ch);
}
} else if (/</.test(ch)) {
if (!s.mono && !s.bold && /<b>/.test(text.substr(s.position,3))) {
s.emitBlock();
s.bold = true;
s.modStack.unshift("bold");
s.position += 2;
} else if (!s.mono && !s.ital && /<i>/.test(text.substr(s.position,3))) {
s.emitBlock();
s.ital = true;
s.modStack.unshift("ital");
s.position += 2;
} else if (!s.mono && /<code>/.test(text.substr(s.position,6))) {
s.emitBlock();
s.mono = true;
s.modStack.unshift("mono");
s.position += 5;
} else if (!s.mono && (s.mod() === 'bold') && /<\/b>/.test(text.substr(s.position,4))) {
s.emitBlock();
s.bold = false;
s.modStack.shift();
s.position += 3;
} else if (!s.mono && (s.mod() === 'ital') && /<\/i>/.test(text.substr(s.position,4))) {
s.emitBlock();
s.ital = false;
s.modStack.shift();
s.position += 3;
} else if ((s.mod() === 'mono') && /<\/code>/.test(text.substr(s.position,7))) {
s.emitBlock();
s.mono = false;
s.modStack.shift();
s.position += 6;
} else {
s.add(ch);
}
} else if (/&/.test(ch)) {
if (/&lt;/.test(text.substr(s.position,4))) {
s.add("<");
s.position += 3;
} else if (/&amp;/.test(text.substr(s.position,5))) {
s.add("&");
s.position += 4;
} else {
s.add("&");
}
} else {
while (s.position < s.text.length) {
let ch = s.text.charAt(s.position);
let parsed = s.parseWS(ch)
|| (/</.test(ch) && (
s.parseStartTag('bold', '<b>')
|| s.parseStartTag('ital', '<i>')
|| s.parseStartTag('mono', '<code>')
|| s.parseEndTag('bold', '</b>')
|| s.parseEndTag('ital', '</i>')
|| s.parseEndTag('mono', '</code>')))
|| parseEntities(ch);
if (!parsed) {
s.add(ch);
}
s.position++
}
s.emitBlock();
return blocks;
return s.blocks;
}
@ -285,129 +473,49 @@ class LabelSplitter {
* @returns {Array}
*/
splitMarkdownBlocks(text) {
let blocks = [];
// TODO: consolidate following + methods/closures with splitHtmlBlocks()
// NOTE: sequences of tabs and spaces are reduced to single space; scan usage of `this.spacing` within method
let s = {
bold: false,
ital: false,
mono: false,
beginable: true,
spacing: false,
position: 0,
buffer: "",
modStack: []
};
s.mod = function() {
return (this.modStack.length === 0) ? 'normal' : this.modStack[0];
};
s.modName = function() {
if (this.modStack.length === 0)
return 'normal';
else if (this.modStack[0] === 'mono')
return 'mono';
else {
if (s.bold && s.ital) {
return 'boldital';
} else if (s.bold) {
return 'bold';
} else if (s.ital) {
return 'ital';
}
}
};
s.emitBlock = function(override=false) { // eslint-disable-line no-unused-vars
if (this.spacing) {
this.add(" ");
this.spacing = false;
}
if (this.buffer.length > 0) {
blocks.push({ text: this.buffer, mod: this.modName() });
this.buffer = "";
}
};
let s = new MarkupAccumulator(text);
let beginable = true;
s.add = function(text) {
if (text === " ") {
s.spacing = true;
}
if (s.spacing) {
this.buffer += " ";
this.spacing = false;
}
if (text != " ") {
this.buffer += text;
}
};
while (s.position < text.length) {
let ch = text.charAt(s.position);
if (/[ \t]/.test(ch)) {
if (!s.mono) {
s.spacing = true;
} else {
s.add(ch);
}
s.beginable = true
} else if (/\\/.test(ch)) {
if (s.position < text.length+1) {
let parseOverride = (ch) => {
if (/\\/.test(ch)) {
if (s.position < this.text.length + 1) {
s.position++;
ch = text.charAt(s.position);
ch = this.text.charAt(s.position);
if (/ \t/.test(ch)) {
s.spacing = true;
} else {
s.add(ch);
s.beginable = false;
beginable = false;
}
}
} else if (!s.mono && !s.bold && (s.beginable || s.spacing) && /\*/.test(ch)) {
s.emitBlock();
s.bold = true;
s.modStack.unshift("bold");
} else if (!s.mono && !s.ital && (s.beginable || s.spacing) && /\_/.test(ch)) {
s.emitBlock();
s.ital = true;
s.modStack.unshift("ital");
} else if (!s.mono && (s.beginable || s.spacing) && /`/.test(ch)) {
s.emitBlock();
s.mono = true;
s.modStack.unshift("mono");
} else if (!s.mono && (s.mod() === "bold") && /\*/.test(ch)) {
if ((s.position === text.length-1) || /[.,_` \t\n]/.test(text.charAt(s.position+1))) {
s.emitBlock();
s.bold = false;
s.modStack.shift();
} else {
s.add(ch);
}
} else if (!s.mono && (s.mod() === "ital") && /\_/.test(ch)) {
if ((s.position === text.length-1) || /[.,*` \t\n]/.test(text.charAt(s.position+1))) {
s.emitBlock();
s.ital = false;
s.modStack.shift();
} else {
s.add(ch);
}
} else if (s.mono && (s.mod() === "mono") && /`/.test(ch)) {
if ((s.position === text.length-1) || (/[.,*_ \t\n]/.test(text.charAt(s.position+1)))) {
s.emitBlock();
s.mono = false;
s.modStack.shift();
} else {
s.add(ch);
}
} else {
return true
}
return false;
}
while (s.position < s.text.length) {
let ch = s.text.charAt(s.position);
let parsed = s.parseWS(ch)
|| parseOverride(ch)
|| ((beginable || s.spacing) && (
s.parseStartTag('bold', '*')
|| s.parseStartTag('ital', '_')
|| s.parseStartTag('mono', '`')))
|| s.parseEndTag('bold', '*', 'afterBold')
|| s.parseEndTag('ital', '_', 'afterItal')
|| s.parseEndTag('mono', '`', 'afterMono');
if (!parsed) {
s.add(ch);
s.beginable = false;
beginable = false;
}
s.position++
}
s.emitBlock();
return blocks;
return s.blocks;
}

+ 1
- 0
test/Label.test.js View File

@ -2,6 +2,7 @@
* TODO - add tests for:
* ====
*
* - html entities
* - html unclosed or unopened tags
* - html tag combinations with no font defined (e.g. bold within mono)
* - Unit tests for bad font shorthands.

Loading…
Cancel
Save