|
|
- (function () {
- var output, Converter;
- if (typeof exports === "object" && typeof require === "function") { // we're in a CommonJS (e.g. Node.js) module
- output = exports;
- Converter = require("./Markdown.Converter").Converter;
- } else {
- output = window.Markdown;
- Converter = Markdown.Converter;
- }
-
- output.getSanitizingConverter = function () {
- var converter = new Converter();
- converter.hooks.chain("postConversion", sanitizeHtml);
- converter.hooks.chain("postConversion", balanceTags);
- return converter;
- }
-
- function sanitizeHtml(html) {
- return html.replace(/<[^>]*>?/gi, sanitizeTag);
- }
-
- // (tags that can be opened/closed) | (tags that stand alone)
- var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
- // <a href="url..." optional title>|</a>
- var a_white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;
-
- // <img src="url..." optional width optional height optional alt optional title
- var img_white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;
-
- function sanitizeTag(tag) {
- if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white))
- return tag;
- else
- return "";
- }
-
- /// <summary>
- /// attempt to balance HTML tags in the html string
- /// by removing any unmatched opening or closing tags
- /// IMPORTANT: we *assume* HTML has *already* been
- /// sanitized and is safe/sane before balancing!
- ///
- /// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
- /// </summary>
- function balanceTags(html) {
-
- if (html == "")
- return "";
-
- var re = /<\/?\w+[^>]*(\s|$|>)/g;
- // convert everything to lower case; this makes
- // our case insensitive comparisons easier
- var tags = html.toLowerCase().match(re);
-
- // no HTML tags present? nothing to do; exit now
- var tagcount = (tags || []).length;
- if (tagcount == 0)
- return html;
-
- var tagname, tag;
- var ignoredtags = "<p><img><br><li><hr>";
- var match;
- var tagpaired = [];
- var tagremove = [];
- var needsRemoval = false;
-
- // loop through matched tags in forward order
- for (var ctag = 0; ctag < tagcount; ctag++) {
- tagname = tags[ctag].replace(/<\/?(\w+).*/, "$1");
- // skip any already paired tags
- // and skip tags in our ignore list; assume they're self-closed
- if (tagpaired[ctag] || ignoredtags.search("<" + tagname + ">") > -1)
- continue;
-
- tag = tags[ctag];
- match = -1;
-
- if (!/^<\//.test(tag)) {
- // this is an opening tag
- // search forwards (next tags), look for closing tags
- for (var ntag = ctag + 1; ntag < tagcount; ntag++) {
- if (!tagpaired[ntag] && tags[ntag] == "</" + tagname + ">") {
- match = ntag;
- break;
- }
- }
- }
-
- if (match == -1)
- needsRemoval = tagremove[ctag] = true; // mark for removal
- else
- tagpaired[match] = true; // mark paired
- }
-
- if (!needsRemoval)
- return html;
-
- // delete all orphaned tags from the string
-
- var ctag = 0;
- html = html.replace(re, function (match) {
- var res = tagremove[ctag] ? "" : match;
- ctag++;
- return res;
- });
- return html;
- }
- })();
|