not really known
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

108 lines
3.8 KiB

  1. (function () {
  2. var output, Converter;
  3. if (typeof exports === "object" && typeof require === "function") { // we're in a CommonJS (e.g. Node.js) module
  4. output = exports;
  5. Converter = require("./Markdown.Converter").Converter;
  6. } else {
  7. output = window.Markdown;
  8. Converter = Markdown.Converter;
  9. }
  10. output.getSanitizingConverter = function () {
  11. var converter = new Converter();
  12. converter.hooks.chain("postConversion", sanitizeHtml);
  13. converter.hooks.chain("postConversion", balanceTags);
  14. return converter;
  15. }
  16. function sanitizeHtml(html) {
  17. return html.replace(/<[^>]*>?/gi, sanitizeTag);
  18. }
  19. // (tags that can be opened/closed) | (tags that stand alone)
  20. var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
  21. // <a href="url..." optional title>|</a>
  22. var a_white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;
  23. // <img src="url..." optional width optional height optional alt optional title
  24. var img_white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;
  25. function sanitizeTag(tag) {
  26. if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white))
  27. return tag;
  28. else
  29. return "";
  30. }
  31. /// <summary>
  32. /// attempt to balance HTML tags in the html string
  33. /// by removing any unmatched opening or closing tags
  34. /// IMPORTANT: we *assume* HTML has *already* been
  35. /// sanitized and is safe/sane before balancing!
  36. ///
  37. /// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
  38. /// </summary>
  39. function balanceTags(html) {
  40. if (html == "")
  41. return "";
  42. var re = /<\/?\w+[^>]*(\s|$|>)/g;
  43. // convert everything to lower case; this makes
  44. // our case insensitive comparisons easier
  45. var tags = html.toLowerCase().match(re);
  46. // no HTML tags present? nothing to do; exit now
  47. var tagcount = (tags || []).length;
  48. if (tagcount == 0)
  49. return html;
  50. var tagname, tag;
  51. var ignoredtags = "<p><img><br><li><hr>";
  52. var match;
  53. var tagpaired = [];
  54. var tagremove = [];
  55. var needsRemoval = false;
  56. // loop through matched tags in forward order
  57. for (var ctag = 0; ctag < tagcount; ctag++) {
  58. tagname = tags[ctag].replace(/<\/?(\w+).*/, "$1");
  59. // skip any already paired tags
  60. // and skip tags in our ignore list; assume they're self-closed
  61. if (tagpaired[ctag] || ignoredtags.search("<" + tagname + ">") > -1)
  62. continue;
  63. tag = tags[ctag];
  64. match = -1;
  65. if (!/^<\//.test(tag)) {
  66. // this is an opening tag
  67. // search forwards (next tags), look for closing tags
  68. for (var ntag = ctag + 1; ntag < tagcount; ntag++) {
  69. if (!tagpaired[ntag] && tags[ntag] == "</" + tagname + ">") {
  70. match = ntag;
  71. break;
  72. }
  73. }
  74. }
  75. if (match == -1)
  76. needsRemoval = tagremove[ctag] = true; // mark for removal
  77. else
  78. tagpaired[match] = true; // mark paired
  79. }
  80. if (!needsRemoval)
  81. return html;
  82. // delete all orphaned tags from the string
  83. var ctag = 0;
  84. html = html.replace(re, function (match) {
  85. var res = tagremove[ctag] ? "" : match;
  86. ctag++;
  87. return res;
  88. });
  89. return html;
  90. }
  91. })();