From af6c64207f0a1d77cddf301329084a9a60263bfe Mon Sep 17 00:00:00 2001
From: juanf <juanf>
Date: Fri, 5 Feb 2016 10:36:59 +0000
Subject: [PATCH] SSDM-3146 : HTML Sanitizer before showing properties on forms
 + drag and drop images disabled from rich text editor.

SVN: 35639
---
 .../1/as/webapps/eln-lims/html/index.html     |    3 +
 .../webapps/eln-lims/html/js/util/FormUtil.js |   13 +-
 .../lib/caja-HTML-sanitizer/js/lib/html4.js   |  371 ++++++
 .../lib/caja-HTML-sanitizer/js/lib/uri.js     |  752 +++++++++++
 .../lib/caja-HTML-sanitizer/js/sanitizer.js   | 1120 +++++++++++++++++
 5 files changed, 2255 insertions(+), 4 deletions(-)
 create mode 100644 plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/html4.js
 create mode 100644 plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/uri.js
 create mode 100644 plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/sanitizer.js

diff --git a/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/index.html b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/index.html
index 9d343e4dac9..a7ea0d01646 100644
--- a/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/index.html
+++ b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/index.html
@@ -62,6 +62,9 @@
 	<script type="text/javascript" src="./lib/filesaver/js/FileSaver.js"></script>
 	<script type="text/javascript" src="./lib/drawingboard/js/drawingboard.min.js"></script>
 	<script type="text/javascript" src="./lib/summernote/js/summernote.min.js"></script>
+	<script type="text/javascript" src="./lib/caja-HTML-sanitizer/js/lib/html4.js"></script>
+	<script type="text/javascript" src="./lib/caja-HTML-sanitizer/js/lib/uri.js"></script>
+	<script type="text/javascript" src="./lib/caja-HTML-sanitizer/js/sanitizer.js"></script>
 	
 	<!-- First party libraries -->
 	<script type="text/javascript" src="./lib/openbis/js/v1/openbis.js"></script>
diff --git a/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/js/util/FormUtil.js b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/js/util/FormUtil.js
index d75e2615b2b..7f9f7c5ce78 100644
--- a/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/js/util/FormUtil.js
+++ b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/js/util/FormUtil.js
@@ -614,12 +614,13 @@ var FormUtil = new function() {
 	// Rich Text Editor Support - (Summernote)
 	//
 	this.activateRichTextProperties = function() {
-		$('textarea').summernote({toolbar: [
+		$('textarea').summernote({
+			toolbar: [
 		['Font Style', ['fontname', 'fontsize', 'color', 'bold', 'italic', 'underline', 'strikethrough', 'superscript', 'subscript', 'clear']],
 		['Paragraph style ', ['style', 'ol', 'ul', 'paragraph', 'height']],
 		['Insert', ['link', 'table', 'hr']],
-		['Misc', ['fullscreen', 'undo', 'redo', 'help']],
-		]});
+		['Misc', ['fullscreen', 'undo', 'redo', 'help']],],
+		disableDragAndDrop: true});
 	}
 	
 	this.updateModelRichTextProperties = function(properties) {
@@ -638,11 +639,15 @@ var FormUtil = new function() {
 		if(propertyType.dataType === "MULTILINE_VARCHAR") {
 			var originalValue = entity.properties[propertyType.code];
 			if(originalValue) {
+				//Take envelope out if pressent
 				var bodyStart = originalValue.indexOf("<body>");
 				var bodyEnd = originalValue.indexOf("</body>");
 				if(bodyStart !== -1 && bodyEnd !== -1) {
-					entity.properties[propertyType.code] = originalValue.substring(bodyStart + 6, bodyEnd);
+					originalValue = originalValue.substring(bodyStart + 6, bodyEnd);
 				}
+				//Clean the contents
+				originalValue = html.sanitize(originalValue);
+				entity.properties[propertyType.code] = originalValue;
 			}
 		}
 	}
diff --git a/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/html4.js b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/html4.js
new file mode 100644
index 00000000000..f5ec982bad1
--- /dev/null
+++ b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/html4.js
@@ -0,0 +1,371 @@
+/* Copyright Google Inc.
+ * Licensed under the Apache Licence Version 2.0
+ * Autogenerated at Fri Aug 13 11:26:55 PDT 2010
+ * @provides html4
+ */
+var html4 = {};
+html4 .atype = {
+    'NONE': 0,
+    'URI': 1,
+    'URI_FRAGMENT': 11,
+    'SCRIPT': 2,
+    'STYLE': 3,
+    'ID': 4,
+    'IDREF': 5,
+    'IDREFS': 6,
+    'GLOBAL_NAME': 7,
+    'LOCAL_NAME': 8,
+    'CLASSES': 9,
+    'FRAME_TARGET': 10
+};
+html4 .ATTRIBS = {
+    '*::class': 9,
+    '*::dir': 0,
+    '*::id': 4,
+    '*::lang': 0,
+    '*::onclick': 2,
+    '*::ondblclick': 2,
+    '*::onkeydown': 2,
+    '*::onkeypress': 2,
+    '*::onkeyup': 2,
+    '*::onload': 2,
+    '*::onmousedown': 2,
+    '*::onmousemove': 2,
+    '*::onmouseout': 2,
+    '*::onmouseover': 2,
+    '*::onmouseup': 2,
+    '*::style': 3,
+    '*::title': 0,
+    'a::accesskey': 0,
+    'a::coords': 0,
+    'a::href': 1,
+    'a::hreflang': 0,
+    'a::name': 7,
+    'a::onblur': 2,
+    'a::onfocus': 2,
+    'a::rel': 0,
+    'a::rev': 0,
+    'a::shape': 0,
+    'a::tabindex': 0,
+    'a::target': 10,
+    'a::type': 0,
+    'area::accesskey': 0,
+    'area::alt': 0,
+    'area::coords': 0,
+    'area::href': 1,
+    'area::nohref': 0,
+    'area::onblur': 2,
+    'area::onfocus': 2,
+    'area::shape': 0,
+    'area::tabindex': 0,
+    'area::target': 10,
+    'bdo::dir': 0,
+    'blockquote::cite': 1,
+    'br::clear': 0,
+    'button::accesskey': 0,
+    'button::disabled': 0,
+    'button::name': 8,
+    'button::onblur': 2,
+    'button::onfocus': 2,
+    'button::tabindex': 0,
+    'button::type': 0,
+    'button::value': 0,
+    'caption::align': 0,
+    'col::align': 0,
+    'col::char': 0,
+    'col::charoff': 0,
+    'col::span': 0,
+    'col::valign': 0,
+    'col::width': 0,
+    'colgroup::align': 0,
+    'colgroup::char': 0,
+    'colgroup::charoff': 0,
+    'colgroup::span': 0,
+    'colgroup::valign': 0,
+    'colgroup::width': 0,
+    'del::cite': 1,
+    'del::datetime': 0,
+    'dir::compact': 0,
+    'div::align': 0,
+    'dl::compact': 0,
+    'font::color': 0,
+    'font::face': 0,
+    'font::size': 0,
+    'form::accept': 0,
+    'form::action': 1,
+    'form::autocomplete': 0,
+    'form::enctype': 0,
+    'form::method': 0,
+    'form::name': 7,
+    'form::onreset': 2,
+    'form::onsubmit': 2,
+    'form::target': 10,
+    'h1::align': 0,
+    'h2::align': 0,
+    'h3::align': 0,
+    'h4::align': 0,
+    'h5::align': 0,
+    'h6::align': 0,
+    'hr::align': 0,
+    'hr::noshade': 0,
+    'hr::size': 0,
+    'hr::width': 0,
+    'iframe::align': 0,
+    'iframe::frameborder': 0,
+    'iframe::height': 0,
+    'iframe::marginheight': 0,
+    'iframe::marginwidth': 0,
+    'iframe::width': 0,
+    'img::align': 0,
+    'img::alt': 0,
+    'img::border': 0,
+    'img::height': 0,
+    'img::hspace': 0,
+    'img::ismap': 0,
+    'img::name': 7,
+    'img::src': 1,
+    'img::usemap': 11,
+    'img::vspace': 0,
+    'img::width': 0,
+    'input::accept': 0,
+    'input::accesskey': 0,
+    'input::align': 0,
+    'input::alt': 0,
+    'input::autocomplete': 0,
+    'input::checked': 0,
+    'input::disabled': 0,
+    'input::ismap': 0,
+    'input::maxlength': 0,
+    'input::name': 8,
+    'input::onblur': 2,
+    'input::onchange': 2,
+    'input::onfocus': 2,
+    'input::onselect': 2,
+    'input::readonly': 0,
+    'input::size': 0,
+    'input::src': 1,
+    'input::tabindex': 0,
+    'input::type': 0,
+    'input::usemap': 11,
+    'input::value': 0,
+    'ins::cite': 1,
+    'ins::datetime': 0,
+    'label::accesskey': 0,
+    'label::for': 5,
+    'label::onblur': 2,
+    'label::onfocus': 2,
+    'legend::accesskey': 0,
+    'legend::align': 0,
+    'li::type': 0,
+    'li::value': 0,
+    'map::name': 7,
+    'menu::compact': 0,
+    'ol::compact': 0,
+    'ol::start': 0,
+    'ol::type': 0,
+    'optgroup::disabled': 0,
+    'optgroup::label': 0,
+    'option::disabled': 0,
+    'option::label': 0,
+    'option::selected': 0,
+    'option::value': 0,
+    'p::align': 0,
+    'pre::width': 0,
+    'q::cite': 1,
+    'select::disabled': 0,
+    'select::multiple': 0,
+    'select::name': 8,
+    'select::onblur': 2,
+    'select::onchange': 2,
+    'select::onfocus': 2,
+    'select::size': 0,
+    'select::tabindex': 0,
+    'table::align': 0,
+    'table::bgcolor': 0,
+    'table::border': 0,
+    'table::cellpadding': 0,
+    'table::cellspacing': 0,
+    'table::frame': 0,
+    'table::rules': 0,
+    'table::summary': 0,
+    'table::width': 0,
+    'tbody::align': 0,
+    'tbody::char': 0,
+    'tbody::charoff': 0,
+    'tbody::valign': 0,
+    'td::abbr': 0,
+    'td::align': 0,
+    'td::axis': 0,
+    'td::bgcolor': 0,
+    'td::char': 0,
+    'td::charoff': 0,
+    'td::colspan': 0,
+    'td::headers': 6,
+    'td::height': 0,
+    'td::nowrap': 0,
+    'td::rowspan': 0,
+    'td::scope': 0,
+    'td::valign': 0,
+    'td::width': 0,
+    'textarea::accesskey': 0,
+    'textarea::cols': 0,
+    'textarea::disabled': 0,
+    'textarea::name': 8,
+    'textarea::onblur': 2,
+    'textarea::onchange': 2,
+    'textarea::onfocus': 2,
+    'textarea::onselect': 2,
+    'textarea::readonly': 0,
+    'textarea::rows': 0,
+    'textarea::tabindex': 0,
+    'tfoot::align': 0,
+    'tfoot::char': 0,
+    'tfoot::charoff': 0,
+    'tfoot::valign': 0,
+    'th::abbr': 0,
+    'th::align': 0,
+    'th::axis': 0,
+    'th::bgcolor': 0,
+    'th::char': 0,
+    'th::charoff': 0,
+    'th::colspan': 0,
+    'th::headers': 6,
+    'th::height': 0,
+    'th::nowrap': 0,
+    'th::rowspan': 0,
+    'th::scope': 0,
+    'th::valign': 0,
+    'th::width': 0,
+    'thead::align': 0,
+    'thead::char': 0,
+    'thead::charoff': 0,
+    'thead::valign': 0,
+    'tr::align': 0,
+    'tr::bgcolor': 0,
+    'tr::char': 0,
+    'tr::charoff': 0,
+    'tr::valign': 0,
+    'ul::compact': 0,
+    'ul::type': 0
+};
+html4 .eflags = {
+    'OPTIONAL_ENDTAG': 1,
+    'EMPTY': 2,
+    'CDATA': 4,
+    'RCDATA': 8,
+    'UNSAFE': 16,
+    'FOLDABLE': 32,
+    'SCRIPT': 64,
+    'STYLE': 128
+};
+html4 .ELEMENTS = {
+    'a': 0,
+    'abbr': 0,
+    'acronym': 0,
+    'address': 0,
+    'applet': 16,
+    'area': 2,
+    'b': 0,
+    'base': 18,
+    'basefont': 18,
+    'bdo': 0,
+    'big': 0,
+    'blockquote': 0,
+    'body': 49,
+    'br': 2,
+    'button': 0,
+    'caption': 0,
+    'center': 0,
+    'cite': 0,
+    'code': 0,
+    'col': 2,
+    'colgroup': 1,
+    'dd': 1,
+    'del': 0,
+    'dfn': 0,
+    'dir': 0,
+    'div': 0,
+    'dl': 0,
+    'dt': 1,
+    'em': 0,
+    'fieldset': 0,
+    'font': 0,
+    'form': 0,
+    'frame': 18,
+    'frameset': 16,
+    'h1': 0,
+    'h2': 0,
+    'h3': 0,
+    'h4': 0,
+    'h5': 0,
+    'h6': 0,
+    'head': 49,
+    'hr': 2,
+    'html': 49,
+    'i': 0,
+    'iframe': 4,
+    'img': 2,
+    'input': 2,
+    'ins': 0,
+    'isindex': 18,
+    'kbd': 0,
+    'label': 0,
+    'legend': 0,
+    'li': 1,
+    'link': 18,
+    'map': 0,
+    'menu': 0,
+    'meta': 18,
+    'noframes': 20,
+    'noscript': 20,
+    'object': 16,
+    'ol': 0,
+    'optgroup': 0,
+    'option': 1,
+    'p': 1,
+    'param': 18,
+    'pre': 0,
+    'q': 0,
+    's': 0,
+    'samp': 0,
+    'script': 84,
+    'select': 0,
+    'small': 0,
+    'span': 0,
+    'strike': 0,
+    'strong': 0,
+    'style': 148,
+    'sub': 0,
+    'sup': 0,
+    'table': 0,
+    'tbody': 1,
+    'td': 1,
+    'textarea': 8,
+    'tfoot': 1,
+    'th': 1,
+    'thead': 1,
+    'title': 24,
+    'tr': 1,
+    'tt': 0,
+    'u': 0,
+    'ul': 0,
+    'var': 0
+};
+
+html4 .URIEFFECTS = {
+
+};
+html4 .LOADERTYPES = {}
+
+if (typeof exports !== 'undefined') {
+    if (typeof module !== 'undefined' && module.exports) {
+        exports = module.exports = html4;
+    }
+    exports.URI = html4;
+} else {
+
+    // Exports for closure compiler.
+    if (typeof window !== 'undefined') {
+        window['html4'] = html4;
+    }
+}
diff --git a/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/uri.js b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/uri.js
new file mode 100644
index 00000000000..8b64dc6458a
--- /dev/null
+++ b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/lib/uri.js
@@ -0,0 +1,752 @@
+// Copyright (C) 2010 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * @fileoverview
+ * Implements RFC 3986 for parsing/formatting URIs.
+ *
+ * @author mikesamuel@gmail.com
+ * \@provides URI
+ * \@overrides window
+ */
+
+var URI = (function () {
+
+    /**
+     * creates a uri from the string form.  The parser is relaxed, so special
+     * characters that aren't escaped but don't cause ambiguities will not cause
+     * parse failures.
+     *
+     * @return {URI|null}
+     */
+    function parse(uriStr) {
+        var m = ('' + uriStr).match(URI_RE_);
+        if (!m) { return null; }
+        return new URI(
+            nullIfAbsent(m[1]),
+            nullIfAbsent(m[2]),
+            nullIfAbsent(m[3]),
+            nullIfAbsent(m[4]),
+            nullIfAbsent(m[5]),
+            nullIfAbsent(m[6]),
+            nullIfAbsent(m[7]));
+    }
+
+
+    /**
+     * creates a uri from the given parts.
+     *
+     * @param scheme {string} an unencoded scheme such as "http" or null
+     * @param credentials {string} unencoded user credentials or null
+     * @param domain {string} an unencoded domain name or null
+     * @param port {number} a port number in [1, 32768].
+     *    -1 indicates no port, as does null.
+     * @param path {string} an unencoded path
+     * @param query {Array.<string>|string|null} a list of unencoded cgi
+     *   parameters where even values are keys and odds the corresponding values
+     *   or an unencoded query.
+     * @param fragment {string} an unencoded fragment without the "#" or null.
+     * @return {URI}
+     */
+    function create(scheme, credentials, domain, port, path, query, fragment) {
+        var uri = new URI(
+            encodeIfExists2(scheme, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_),
+            encodeIfExists2(
+                credentials, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_),
+            encodeIfExists(domain),
+            port > 0 ? port.toString() : null,
+            encodeIfExists2(path, URI_DISALLOWED_IN_PATH_),
+            null,
+            encodeIfExists(fragment));
+        if (query) {
+            if ('string' === typeof query) {
+                uri.setRawQuery(query.replace(/[^?&=0-9A-Za-z_\-~.%]/g, encodeOne));
+            } else {
+                uri.setAllParameters(query);
+            }
+        }
+        return uri;
+    }
+    function encodeIfExists(unescapedPart) {
+        if ('string' == typeof unescapedPart) {
+            return encodeURIComponent(unescapedPart);
+        }
+        return null;
+    };
+    /**
+     * if unescapedPart is non null, then escapes any characters in it that aren't
+     * valid characters in a url and also escapes any special characters that
+     * appear in extra.
+     *
+     * @param unescapedPart {string}
+     * @param extra {RegExp} a character set of characters in [\01-\177].
+     * @return {string|null} null iff unescapedPart == null.
+     */
+    function encodeIfExists2(unescapedPart, extra) {
+        if ('string' == typeof unescapedPart) {
+            return encodeURI(unescapedPart).replace(extra, encodeOne);
+        }
+        return null;
+    };
+    /** converts a character in [\01-\177] to its url encoded equivalent. */
+    function encodeOne(ch) {
+        var n = ch.charCodeAt(0);
+        return '%' + '0123456789ABCDEF'.charAt((n >> 4) & 0xf) +
+            '0123456789ABCDEF'.charAt(n & 0xf);
+    }
+
+    /**
+     * {@updoc
+     *  $ normPath('foo/./bar')
+     *  # 'foo/bar'
+     *  $ normPath('./foo')
+     *  # 'foo'
+     *  $ normPath('foo/.')
+     *  # 'foo'
+     *  $ normPath('foo//bar')
+     *  # 'foo/bar'
+     * }
+     */
+    function normPath(path) {
+        return path.replace(/(^|\/)\.(?:\/|$)/g, '$1').replace(/\/{2,}/g, '/');
+    }
+
+    var PARENT_DIRECTORY_HANDLER = new RegExp(
+        ''
+            // A path break
+            + '(/|^)'
+            // followed by a non .. path element
+            // (cannot be . because normPath is used prior to this RegExp)
+            + '(?:[^./][^/]*|\\.{2,}(?:[^./][^/]*)|\\.{3,}[^/]*)'
+            // followed by .. followed by a path break.
+            + '/\\.\\.(?:/|$)');
+
+    var PARENT_DIRECTORY_HANDLER_RE = new RegExp(PARENT_DIRECTORY_HANDLER);
+
+    var EXTRA_PARENT_PATHS_RE = /^(?:\.\.\/)*(?:\.\.$)?/;
+
+    /**
+     * Normalizes its input path and collapses all . and .. sequences except for
+     * .. sequences that would take it above the root of the current parent
+     * directory.
+     * {@updoc
+     *  $ collapse_dots('foo/../bar')
+     *  # 'bar'
+     *  $ collapse_dots('foo/./bar')
+     *  # 'foo/bar'
+     *  $ collapse_dots('foo/../bar/./../../baz')
+     *  # 'baz'
+     *  $ collapse_dots('../foo')
+     *  # '../foo'
+     *  $ collapse_dots('../foo').replace(EXTRA_PARENT_PATHS_RE, '')
+     *  # 'foo'
+     * }
+     */
+    function collapse_dots(path) {
+        if (path === null) { return null; }
+        var p = normPath(path);
+        // Only /../ left to flatten
+        var r = PARENT_DIRECTORY_HANDLER_RE;
+        // We replace with $1 which matches a / before the .. because this
+        // guarantees that:
+        // (1) we have at most 1 / between the adjacent place,
+        // (2) always have a slash if there is a preceding path section, and
+        // (3) we never turn a relative path into an absolute path.
+        for (var q; (q = p.replace(r, '$1')) != p; p = q) {};
+        return p;
+    }
+
+    /**
+     * resolves a relative url string to a base uri.
+     * @return {URI}
+     */
+    function resolve(baseUri, relativeUri) {
+        // there are several kinds of relative urls:
+        // 1. //foo - replaces everything from the domain on.  foo is a domain name
+        // 2. foo - replaces the last part of the path, the whole query and fragment
+        // 3. /foo - replaces the the path, the query and fragment
+        // 4. ?foo - replace the query and fragment
+        // 5. #foo - replace the fragment only
+
+        var absoluteUri = baseUri.clone();
+        // we satisfy these conditions by looking for the first part of relativeUri
+        // that is not blank and applying defaults to the rest
+
+        var overridden = relativeUri.hasScheme();
+
+        if (overridden) {
+            absoluteUri.setRawScheme(relativeUri.getRawScheme());
+        } else {
+            overridden = relativeUri.hasCredentials();
+        }
+
+        if (overridden) {
+            absoluteUri.setRawCredentials(relativeUri.getRawCredentials());
+        } else {
+            overridden = relativeUri.hasDomain();
+        }
+
+        if (overridden) {
+            absoluteUri.setRawDomain(relativeUri.getRawDomain());
+        } else {
+            overridden = relativeUri.hasPort();
+        }
+
+        var rawPath = relativeUri.getRawPath();
+        var simplifiedPath = collapse_dots(rawPath);
+        if (overridden) {
+            absoluteUri.setPort(relativeUri.getPort());
+            simplifiedPath = simplifiedPath
+                && simplifiedPath.replace(EXTRA_PARENT_PATHS_RE, '');
+        } else {
+            overridden = !!rawPath;
+            if (overridden) {
+                // resolve path properly
+                if (simplifiedPath.charCodeAt(0) !== 0x2f /* / */) {  // path is relative
+                    var absRawPath = collapse_dots(absoluteUri.getRawPath() || '')
+                        .replace(EXTRA_PARENT_PATHS_RE, '');
+                    var slash = absRawPath.lastIndexOf('/') + 1;
+                    simplifiedPath = collapse_dots(
+                        (slash ? absRawPath.substring(0, slash) : '')
+                            + collapse_dots(rawPath))
+                        .replace(EXTRA_PARENT_PATHS_RE, '');
+                }
+            } else {
+                simplifiedPath = simplifiedPath
+                    && simplifiedPath.replace(EXTRA_PARENT_PATHS_RE, '');
+                if (simplifiedPath !== rawPath) {
+                    absoluteUri.setRawPath(simplifiedPath);
+                }
+            }
+        }
+
+        if (overridden) {
+            absoluteUri.setRawPath(simplifiedPath);
+        } else {
+            overridden = relativeUri.hasQuery();
+        }
+
+        if (overridden) {
+            absoluteUri.setRawQuery(relativeUri.getRawQuery());
+        } else {
+            overridden = relativeUri.hasFragment();
+        }
+
+        if (overridden) {
+            absoluteUri.setRawFragment(relativeUri.getRawFragment());
+        }
+
+        return absoluteUri;
+    }
+
+    /**
+     * a mutable URI.
+     *
+     * This class contains setters and getters for the parts of the URI.
+     * The <tt>getXYZ</tt>/<tt>setXYZ</tt> methods return the decoded part -- so
+     * <code>uri.parse('/foo%20bar').getPath()</code> will return the decoded path,
+     * <tt>/foo bar</tt>.
+     *
+     * <p>The raw versions of fields are available too.
+     * <code>uri.parse('/foo%20bar').getRawPath()</code> will return the raw path,
+     * <tt>/foo%20bar</tt>.  Use the raw setters with care, since
+     * <code>URI::toString</code> is not guaranteed to return a valid url if a
+     * raw setter was used.
+     *
+     * <p>All setters return <tt>this</tt> and so may be chained, a la
+     * <code>uri.parse('/foo').setFragment('part').toString()</code>.
+     *
+     * <p>You should not use this constructor directly -- please prefer the factory
+     * functions {@link uri.parse}, {@link uri.create}, {@link uri.resolve}
+     * instead.</p>
+     *
+     * <p>The parameters are all raw (assumed to be properly escaped) parts, and
+     * any (but not all) may be null.  Undefined is not allowed.</p>
+     *
+     * @constructor
+     */
+    function URI(
+        rawScheme,
+        rawCredentials, rawDomain, port,
+        rawPath, rawQuery, rawFragment) {
+        this.scheme_ = rawScheme;
+        this.credentials_ = rawCredentials;
+        this.domain_ = rawDomain;
+        this.port_ = port;
+        this.path_ = rawPath;
+        this.query_ = rawQuery;
+        this.fragment_ = rawFragment;
+        /**
+         * @type {Array|null}
+         */
+        this.paramCache_ = null;
+    }
+
+    /** returns the string form of the url. */
+    URI.prototype.toString = function () {
+        var out = [];
+        if (null !== this.scheme_) { out.push(this.scheme_, ':'); }
+        if (null !== this.domain_) {
+            out.push('//');
+            if (null !== this.credentials_) { out.push(this.credentials_, '@'); }
+            out.push(this.domain_);
+            if (null !== this.port_) { out.push(':', this.port_.toString()); }
+        }
+        if (null !== this.path_) { out.push(this.path_); }
+        if (null !== this.query_) { out.push('?', this.query_); }
+        if (null !== this.fragment_) { out.push('#', this.fragment_); }
+        return out.join('');
+    };
+
+    URI.prototype.clone = function () {
+        return new URI(this.scheme_, this.credentials_, this.domain_, this.port_,
+            this.path_, this.query_, this.fragment_);
+    };
+
+    URI.prototype.getScheme = function () {
+        // HTML5 spec does not require the scheme to be lowercased but
+        // all common browsers except Safari lowercase the scheme.
+        return this.scheme_ && decodeURIComponent(this.scheme_).toLowerCase();
+    };
+    URI.prototype.getRawScheme = function () {
+        return this.scheme_;
+    };
+    URI.prototype.setScheme = function (newScheme) {
+        this.scheme_ = encodeIfExists2(
+            newScheme, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_);
+        return this;
+    };
+    URI.prototype.setRawScheme = function (newScheme) {
+        this.scheme_ = newScheme ? newScheme : null;
+        return this;
+    };
+    URI.prototype.hasScheme = function () {
+        return null !== this.scheme_;
+    };
+
+
+    URI.prototype.getCredentials = function () {
+        return this.credentials_ && decodeURIComponent(this.credentials_);
+    };
+    URI.prototype.getRawCredentials = function () {
+        return this.credentials_;
+    };
+    URI.prototype.setCredentials = function (newCredentials) {
+        this.credentials_ = encodeIfExists2(
+            newCredentials, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_);
+
+        return this;
+    };
+    URI.prototype.setRawCredentials = function (newCredentials) {
+        this.credentials_ = newCredentials ? newCredentials : null;
+        return this;
+    };
+    URI.prototype.hasCredentials = function () {
+        return null !== this.credentials_;
+    };
+
+
+    URI.prototype.getDomain = function () {
+        return this.domain_ && decodeURIComponent(this.domain_);
+    };
+    URI.prototype.getRawDomain = function () {
+        return this.domain_;
+    };
+    URI.prototype.setDomain = function (newDomain) {
+        return this.setRawDomain(newDomain && encodeURIComponent(newDomain));
+    };
+    URI.prototype.setRawDomain = function (newDomain) {
+        this.domain_ = newDomain ? newDomain : null;
+        // Maintain the invariant that paths must start with a slash when the URI
+        // is not path-relative.
+        return this.setRawPath(this.path_);
+    };
+    URI.prototype.hasDomain = function () {
+        return null !== this.domain_;
+    };
+
+
+    URI.prototype.getPort = function () {
+        return this.port_ && decodeURIComponent(this.port_);
+    };
+    URI.prototype.setPort = function (newPort) {
+        if (newPort) {
+            newPort = Number(newPort);
+            if (newPort !== (newPort & 0xffff)) {
+                throw new Error('Bad port number ' + newPort);
+            }
+            this.port_ = '' + newPort;
+        } else {
+            this.port_ = null;
+        }
+        return this;
+    };
+    URI.prototype.hasPort = function () {
+        return null !== this.port_;
+    };
+
+
+    URI.prototype.getPath = function () {
+        return this.path_ && decodeURIComponent(this.path_);
+    };
+    URI.prototype.getRawPath = function () {
+        return this.path_;
+    };
+    URI.prototype.setPath = function (newPath) {
+        return this.setRawPath(encodeIfExists2(newPath, URI_DISALLOWED_IN_PATH_));
+    };
+    URI.prototype.setRawPath = function (newPath) {
+        if (newPath) {
+            newPath = String(newPath);
+            this.path_ =
+                // Paths must start with '/' unless this is a path-relative URL.
+                (!this.domain_ || /^\//.test(newPath)) ? newPath : '/' + newPath;
+        } else {
+            this.path_ = null;
+        }
+        return this;
+    };
+    URI.prototype.hasPath = function () {
+        return null !== this.path_;
+    };
+
+
+    URI.prototype.getQuery = function () {
+        // From http://www.w3.org/Addressing/URL/4_URI_Recommentations.html
+        // Within the query string, the plus sign is reserved as shorthand notation
+        // for a space.
+        return this.query_ && decodeURIComponent(this.query_).replace(/\+/g, ' ');
+    };
+    URI.prototype.getRawQuery = function () {
+        return this.query_;
+    };
+    URI.prototype.setQuery = function (newQuery) {
+        this.paramCache_ = null;
+        this.query_ = encodeIfExists(newQuery);
+        return this;
+    };
+    URI.prototype.setRawQuery = function (newQuery) {
+        this.paramCache_ = null;
+        this.query_ = newQuery ? newQuery : null;
+        return this;
+    };
+    URI.prototype.hasQuery = function () {
+        return null !== this.query_;
+    };
+
+    /**
+     * sets the query given a list of strings of the form
+     * [ key0, value0, key1, value1, ... ].
+     *
+     * <p><code>uri.setAllParameters(['a', 'b', 'c', 'd']).getQuery()</code>
+     * will yield <code>'a=b&c=d'</code>.
+     */
+    URI.prototype.setAllParameters = function (params) {
+        if (typeof params === 'object') {
+            if (!(params instanceof Array)
+                && (params instanceof Object
+                || Object.prototype.toString.call(params) !== '[object Array]')) {
+                var newParams = [];
+                var i = -1;
+                for (var k in params) {
+                    var v = params[k];
+                    if ('string' === typeof v) {
+                        newParams[++i] = k;
+                        newParams[++i] = v;
+                    }
+                }
+                params = newParams;
+            }
+        }
+        this.paramCache_ = null;
+        var queryBuf = [];
+        var separator = '';
+        for (var j = 0; j < params.length;) {
+            var k = params[j++];
+            var v = params[j++];
+            queryBuf.push(separator, encodeURIComponent(k.toString()));
+            separator = '&';
+            if (v) {
+                queryBuf.push('=', encodeURIComponent(v.toString()));
+            }
+        }
+        this.query_ = queryBuf.join('');
+        return this;
+    };
+    URI.prototype.checkParameterCache_ = function () {
+        if (!this.paramCache_) {
+            var q = this.query_;
+            if (!q) {
+                this.paramCache_ = [];
+            } else {
+                var cgiParams = q.split(/[&\?]/);
+                var out = [];
+                var k = -1;
+                for (var i = 0; i < cgiParams.length; ++i) {
+                    var m = cgiParams[i].match(/^([^=]*)(?:=(.*))?$/);
+                    // From http://www.w3.org/Addressing/URL/4_URI_Recommentations.html
+                    // Within the query string, the plus sign is reserved as shorthand
+                    // notation for a space.
+                    out[++k] = decodeURIComponent(m[1]).replace(/\+/g, ' ');
+                    out[++k] = decodeURIComponent(m[2] || '').replace(/\+/g, ' ');
+                }
+                this.paramCache_ = out;
+            }
+        }
+    };
+    /**
+     * sets the values of the named cgi parameters.
+     *
+     * <p>So, <code>uri.parse('foo?a=b&c=d&e=f').setParameterValues('c', ['new'])
+     * </code> yields <tt>foo?a=b&c=new&e=f</tt>.</p>
+     *
+     * @param key {string}
+     * @param values {Array.<string>} the new values.  If values is a single string
+     *   then it will be treated as the sole value.
+     */
+    URI.prototype.setParameterValues = function (key, values) {
+        // be nice and avoid subtle bugs where [] operator on string performs charAt
+        // on some browsers and crashes on IE
+        if (typeof values === 'string') {
+            values = [ values ];
+        }
+
+        this.checkParameterCache_();
+        var newValueIndex = 0;
+        var pc = this.paramCache_;
+        var params = [];
+        for (var i = 0, k = 0; i < pc.length; i += 2) {
+            if (key === pc[i]) {
+                if (newValueIndex < values.length) {
+                    params.push(key, values[newValueIndex++]);
+                }
+            } else {
+                params.push(pc[i], pc[i + 1]);
+            }
+        }
+        while (newValueIndex < values.length) {
+            params.push(key, values[newValueIndex++]);
+        }
+        this.setAllParameters(params);
+        return this;
+    };
+    URI.prototype.removeParameter = function (key) {
+        return this.setParameterValues(key, []);
+    };
+    /**
+     * returns the parameters specified in the query part of the uri as a list of
+     * keys and values like [ key0, value0, key1, value1, ... ].
+     *
+     * @return {Array.<string>}
+     */
+    URI.prototype.getAllParameters = function () {
+        this.checkParameterCache_();
+        return this.paramCache_.slice(0, this.paramCache_.length);
+    };
+    /**
+     * returns the value<b>s</b> for a given cgi parameter as a list of decoded
+     * query parameter values.
+     * @return {Array.<string>}
+     */
+    URI.prototype.getParameterValues = function (paramNameUnescaped) {
+        this.checkParameterCache_();
+        var values = [];
+        for (var i = 0; i < this.paramCache_.length; i += 2) {
+            if (paramNameUnescaped === this.paramCache_[i]) {
+                values.push(this.paramCache_[i + 1]);
+            }
+        }
+        return values;
+    };
+    /**
+     * returns a map of cgi parameter names to (non-empty) lists of values.
+     * @return {Object.<string,Array.<string>>}
+     */
+    URI.prototype.getParameterMap = function (paramNameUnescaped) {
+        this.checkParameterCache_();
+        var paramMap = {};
+        for (var i = 0; i < this.paramCache_.length; i += 2) {
+            var key = this.paramCache_[i++],
+                value = this.paramCache_[i++];
+            if (!(key in paramMap)) {
+                paramMap[key] = [value];
+            } else {
+                paramMap[key].push(value);
+            }
+        }
+        return paramMap;
+    };
+    /**
+     * returns the first value for a given cgi parameter or null if the given
+     * parameter name does not appear in the query string.
+     * If the given parameter name does appear, but has no '<tt>=</tt>' following
+     * it, then the empty string will be returned.
+     * @return {string|null}
+     */
+    URI.prototype.getParameterValue = function (paramNameUnescaped) {
+        this.checkParameterCache_();
+        for (var i = 0; i < this.paramCache_.length; i += 2) {
+            if (paramNameUnescaped === this.paramCache_[i]) {
+                return this.paramCache_[i + 1];
+            }
+        }
+        return null;
+    };
+
+    URI.prototype.getFragment = function () {
+        return this.fragment_ && decodeURIComponent(this.fragment_);
+    };
+    URI.prototype.getRawFragment = function () {
+        return this.fragment_;
+    };
+    URI.prototype.setFragment = function (newFragment) {
+        this.fragment_ = newFragment ? encodeURIComponent(newFragment) : null;
+        return this;
+    };
+    URI.prototype.setRawFragment = function (newFragment) {
+        this.fragment_ = newFragment ? newFragment : null;
+        return this;
+    };
+    URI.prototype.hasFragment = function () {
+        return null !== this.fragment_;
+    };
+
+    function nullIfAbsent(matchPart) {
+        return ('string' == typeof matchPart) && (matchPart.length > 0)
+            ? matchPart
+            : null;
+    }
+
+
+
+
+    /**
+     * a regular expression for breaking a URI into its component parts.
+     *
+     * <p>http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#RFC2234 says
+     * As the "first-match-wins" algorithm is identical to the "greedy"
+     * disambiguation method used by POSIX regular expressions, it is natural and
+     * commonplace to use a regular expression for parsing the potential five
+     * components of a URI reference.
+     *
+     * <p>The following line is the regular expression for breaking-down a
+     * well-formed URI reference into its components.
+     *
+     * <pre>
+     * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+     *  12            3  4          5       6  7        8 9
+     * </pre>
+     *
+     * <p>The numbers in the second line above are only to assist readability; they
+     * indicate the reference points for each subexpression (i.e., each paired
+     * parenthesis). We refer to the value matched for subexpression <n> as $<n>.
+     * For example, matching the above expression to
+     * <pre>
+     *     http://www.ics.uci.edu/pub/ietf/uri/#Related
+     * </pre>
+     * results in the following subexpression matches:
+     * <pre>
+     *    $1 = http:
+     *    $2 = http
+     *    $3 = //www.ics.uci.edu
+     *    $4 = www.ics.uci.edu
+     *    $5 = /pub/ietf/uri/
+     *    $6 = <undefined>
+     *    $7 = <undefined>
+     *    $8 = #Related
+     *    $9 = Related
+     * </pre>
+     * where <undefined> indicates that the component is not present, as is the
+     * case for the query component in the above example. Therefore, we can
+     * determine the value of the five components as
+     * <pre>
+     *    scheme    = $2
+     *    authority = $4
+     *    path      = $5
+     *    query     = $7
+     *    fragment  = $9
+     * </pre>
+     *
+     * <p>msamuel: I have modified the regular expression slightly to expose the
+     * credentials, domain, and port separately from the authority.
+     * The modified version yields
+     * <pre>
+     *    $1 = http              scheme
+     *    $2 = <undefined>       credentials -\
+     *    $3 = www.ics.uci.edu   domain       | authority
+     *    $4 = <undefined>       port        -/
+     *    $5 = /pub/ietf/uri/    path
+     *    $6 = <undefined>       query without ?
+     *    $7 = Related           fragment without #
+     * </pre>
+     */
+    var URI_RE_ = new RegExp(
+        "^" +
+            "(?:" +
+            "([^:/?#]+)" +         // scheme
+            ":)?" +
+            "(?://" +
+            "(?:([^/?#]*)@)?" +    // credentials
+            "([^/?#:@]*)" +        // domain
+            "(?::([0-9]+))?" +     // port
+            ")?" +
+            "([^?#]+)?" +            // path
+            "(?:\\?([^#]*))?" +      // query
+            "(?:#(.*))?" +           // fragment
+            "$"
+    );
+
+    var URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_ = /[#\/\?@]/g;
+    var URI_DISALLOWED_IN_PATH_ = /[\#\?]/g;
+
+    URI.parse = parse;
+    URI.create = create;
+    URI.resolve = resolve;
+    URI.collapse_dots = collapse_dots;  // Visible for testing.
+
+// lightweight string-based api for loadModuleMaker
+    URI.utils = {
+        mimeTypeOf: function (uri) {
+            var uriObj = parse(uri);
+            if (/\.html$/.test(uriObj.getPath())) {
+                return 'text/html';
+            } else {
+                return 'application/javascript';
+            }
+        },
+        resolve: function (base, uri) {
+            if (base) {
+                return resolve(parse(base), parse(uri)).toString();
+            } else {
+                return '' + uri;
+            }
+        }
+    };
+
+
+    return URI;
+})();
+
+if (typeof exports !== 'undefined') {
+    if (typeof module !== 'undefined' && module.exports) {
+        exports = module.exports = URI;
+    }
+    exports.URI = URI;
+} else {
+
+    // Exports for closure compiler.
+    if (typeof window !== 'undefined') {
+        window['URI'] = URI;
+    }
+}
diff --git a/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/sanitizer.js b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/sanitizer.js
new file mode 100644
index 00000000000..18739c1e4b3
--- /dev/null
+++ b/plasmid/source/core-plugins/eln-lims/1/as/webapps/eln-lims/html/lib/caja-HTML-sanitizer/js/sanitizer.js
@@ -0,0 +1,1120 @@
+// Copyright (C) 2006 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * @fileoverview
+ * An HTML sanitizer that can satisfy a variety of security policies.
+ *
+ * <p>
+ * The HTML sanitizer is built around a SAX parser and HTML element and
+ * attributes schemas.
+ *
+ * If the cssparser is loaded, inline styles are sanitized using the
+ * css property and value schemas.  Else they are remove during
+ * sanitization.
+ *
+ * If it exists, uses parseCssDeclarations, sanitizeCssProperty,  cssSchema
+ *
+ * @author mikesamuel@gmail.com
+ * @author jasvir@gmail.com
+ * \@requires html4, URI
+ * \@overrides window
+ * \@provides html, html_sanitize
+ */
+
+// The Turkish i seems to be a non-issue, but abort in case it is.
+if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; }
+
+/**
+ * \@namespace
+ */
+var html = (function(html4) {
+
+    // For closure compiler
+    var parseCssDeclarations, sanitizeCssProperty, cssSchema;
+    if ('undefined' !== typeof window) {
+        parseCssDeclarations = window['parseCssDeclarations'];
+        sanitizeCssProperty = window['sanitizeCssProperty'];
+        cssSchema = window['cssSchema'];
+    }
+
+    // The keys of this object must be 'quoted' or JSCompiler will mangle them!
+    // This is a partial list -- lookupEntity() uses the host browser's parser
+    // (when available) to implement full entity lookup.
+    // Note that entities are in general case-sensitive; the uppercase ones are
+    // explicitly defined by HTML5 (presumably as compatibility).
+    var ENTITIES = {
+        'lt': '<',
+        'LT': '<',
+        'gt': '>',
+        'GT': '>',
+        'amp': '&',
+        'AMP': '&',
+        'quot': '"',
+        'apos': '\'',
+        'nbsp': '\u00a0'
+    };
+
+    // Patterns for types of entity/character reference names.
+    var decimalEscapeRe = /^#(\d+)$/;
+    var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/;
+    // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html
+    var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/;
+    // Used as a hook to invoke the browser's entity parsing. <textarea> is used
+    // because its content is parsed for entities but not tags.
+    // TODO(kpreid): This retrieval is a kludge and leads to silent loss of
+    // functionality if the document isn't available.
+    var entityLookupElement =
+        ('undefined' !== typeof window && window['document'])
+            ? window['document'].createElement('textarea') : null;
+    /**
+     * Decodes an HTML entity.
+     *
+     * {\@updoc
+     * $ lookupEntity('lt')
+     * # '<'
+     * $ lookupEntity('GT')
+     * # '>'
+     * $ lookupEntity('amp')
+     * # '&'
+     * $ lookupEntity('nbsp')
+     * # '\xA0'
+     * $ lookupEntity('apos')
+     * # "'"
+     * $ lookupEntity('quot')
+     * # '"'
+     * $ lookupEntity('#xa')
+     * # '\n'
+     * $ lookupEntity('#10')
+     * # '\n'
+     * $ lookupEntity('#x0a')
+     * # '\n'
+     * $ lookupEntity('#010')
+     * # '\n'
+     * $ lookupEntity('#x00A')
+     * # '\n'
+     * $ lookupEntity('Pi')      // Known failure
+     * # '\u03A0'
+     * $ lookupEntity('pi')      // Known failure
+     * # '\u03C0'
+     * }
+     *
+     * @param {string} name the content between the '&' and the ';'.
+     * @return {string} a single unicode code-point as a string.
+     */
+    function lookupEntity(name) {
+        // TODO: entity lookup as specified by HTML5 actually depends on the
+        // presence of the ";".
+        if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; }
+        var m = name.match(decimalEscapeRe);
+        if (m) {
+            return String.fromCharCode(parseInt(m[1], 10));
+        } else if (!!(m = name.match(hexEscapeRe))) {
+            return String.fromCharCode(parseInt(m[1], 16));
+        } else if (entityLookupElement && safeEntityNameRe.test(name)) {
+            entityLookupElement.innerHTML = '&' + name + ';';
+            var text = entityLookupElement.textContent;
+            ENTITIES[name] = text;
+            return text;
+        } else {
+            return '&' + name + ';';
+        }
+    }
+
+    function decodeOneEntity(_, name) {
+        return lookupEntity(name);
+    }
+
+    var nulRe = /\0/g;
+    function stripNULs(s) {
+        return s.replace(nulRe, '');
+    }
+
+    var ENTITY_RE_1 = /&(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/g;
+    var ENTITY_RE_2 = /^(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/;
+    /**
+     * The plain text of a chunk of HTML CDATA which possibly containing.
+     *
+     * {\@updoc
+     * $ unescapeEntities('')
+     * # ''
+     * $ unescapeEntities('hello World!')
+     * # 'hello World!'
+     * $ unescapeEntities('1 &lt; 2 &amp;&AMP; 4 &gt; 3&#10;')
+     * # '1 < 2 && 4 > 3\n'
+     * $ unescapeEntities('&lt;&lt <- unfinished entity&gt;')
+     * # '<&lt <- unfinished entity>'
+     * $ unescapeEntities('/foo?bar=baz&copy=true')  // & often unescaped in URLS
+     * # '/foo?bar=baz&copy=true'
+     * $ unescapeEntities('pi=&pi;&#x3c0;, Pi=&Pi;\u03A0') // FIXME: known failure
+     * # 'pi=\u03C0\u03c0, Pi=\u03A0\u03A0'
+     * }
+     *
+     * @param {string} s a chunk of HTML CDATA.  It must not start or end inside
+     *     an HTML entity.
+     */
+    function unescapeEntities(s) {
+	if(s) {
+	    return s.replace(ENTITY_RE_1, decodeOneEntity);
+	}
+	else {
+	    return s;
+	}
+    }
+
+    var ampRe = /&/g;
+    var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
+    var ltRe = /[<]/g;
+    var gtRe = />/g;
+    var quotRe = /\"/g;
+
+    /**
+     * Escapes HTML special characters in attribute values.
+     *
+     * {\@updoc
+     * $ escapeAttrib('')
+     * # ''
+     * $ escapeAttrib('"<<&==&>>"')  // Do not just escape the first occurrence.
+     * # '&#34;&lt;&lt;&amp;&#61;&#61;&amp;&gt;&gt;&#34;'
+     * $ escapeAttrib('Hello <World>!')
+     * # 'Hello &lt;World&gt;!'
+     * }
+     */
+    function escapeAttrib(s) {
+	if(s) {
+	    return ('' + s).replace(ampRe, '&amp;').replace(ltRe, '&lt;')
+            .replace(gtRe, '&gt;').replace(quotRe, '&#34;');
+	}
+	else {
+	    return s;
+	}
+        
+    }
+
+    /**
+     * Escape entities in RCDATA that can be escaped without changing the meaning.
+     * {\@updoc
+     * $ normalizeRCData('1 < 2 &&amp; 3 > 4 &amp;& 5 &lt; 7&8')
+     * # '1 &lt; 2 &amp;&amp; 3 &gt; 4 &amp;&amp; 5 &lt; 7&amp;8'
+     * }
+     */
+    function normalizeRCData(rcdata) {
+	if(rcdata) {
+	    return rcdata
+                .replace(looseAmpRe, '&amp;$1')
+                .replace(ltRe, '&lt;')
+                .replace(gtRe, '&gt;');
+	}
+	else {
+	    return rcdata;
+	}
+    }
+
+    // TODO(felix8a): validate sanitizer regexs against the HTML5 grammar at
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html
+
+    // We initially split input so that potentially meaningful characters
+    // like '<' and '>' are separate tokens, using a fast dumb process that
+    // ignores quoting.  Then we walk that token stream, and when we see a
+    // '<' that's the start of a tag, we use ATTR_RE to extract tag
+    // attributes from the next token.  That token will never have a '>'
+    // character.  However, it might have an unbalanced quote character, and
+    // when we see that, we combine additional tokens to balance the quote.
+
+    var ATTR_RE = new RegExp(
+        '^\\s*' +
+            '([-.:\\w]+)' +             // 1 = Attribute name
+            '(?:' + (
+            '\\s*(=)\\s*' +           // 2 = Is there a value?
+                '(' + (                   // 3 = Attribute value
+                // TODO(felix8a): maybe use backref to match quotes
+                '(\")[^\"]*(\"|$)' +    // 4, 5 = Double-quoted string
+                    '|' +
+                    '(\')[^\']*(\'|$)' +    // 6, 7 = Single-quoted string
+                    '|' +
+                    // Positive lookahead to prevent interpretation of
+                    // <foo a= b=c> as <foo a='b=c'>
+                    // TODO(felix8a): might be able to drop this case
+                    '(?=[a-z][-\\w]*\\s*=)' +
+                    '|' +
+                    // Unquoted value that isn't an attribute name
+                    // (since we didn't match the positive lookahead above)
+                    '[^\"\'\\s]*' ) +
+                ')' ) +
+            ')?',
+        'i');
+
+    // false on IE<=8, true on most other browsers
+    var splitWillCapture = ('a,b'.split(/(,)/).length === 3);
+
+    // bitmask for tags with special parsing, like <script> and <textarea>
+    var EFLAGS_TEXT = html4.eflags['CDATA'] | html4.eflags['RCDATA'];
+
+    /**
+     * Given a SAX-like event handler, produce a function that feeds those
+     * events and a parameter to the event handler.
+     *
+     * The event handler has the form:{@code
+     * {
+   *   // Name is an upper-case HTML tag name.  Attribs is an array of
+   *   // alternating upper-case attribute names, and attribute values.  The
+   *   // attribs array is reused by the parser.  Param is the value passed to
+   *   // the saxParser.
+   *   startTag: function (name, attribs, param) { ... },
+   *   endTag:   function (name, param) { ... },
+   *   pcdata:   function (text, param) { ... },
+   *   rcdata:   function (text, param) { ... },
+   *   cdata:    function (text, param) { ... },
+   *   startDoc: function (param) { ... },
+   *   endDoc:   function (param) { ... }
+   * }}
+     *
+     * @param {Object} handler a record containing event handlers.
+     * @return {function(string, Object)} A function that takes a chunk of HTML
+     *     and a parameter.  The parameter is passed on to the handler methods.
+     */
+    function makeSaxParser(handler) {
+        // Accept quoted or unquoted keys (Closure compat)
+        var hcopy = {
+            cdata: handler.cdata || handler['cdata'],
+            comment: handler.comment || handler['comment'],
+            endDoc: handler.endDoc || handler['endDoc'],
+            endTag: handler.endTag || handler['endTag'],
+            pcdata: handler.pcdata || handler['pcdata'],
+            rcdata: handler.rcdata || handler['rcdata'],
+            startDoc: handler.startDoc || handler['startDoc'],
+            startTag: handler.startTag || handler['startTag']
+        };
+        return function(htmlText, param) {
+            return parse(htmlText, hcopy, param);
+        };
+    }
+
+    // Parsing strategy is to split input into parts that might be lexically
+    // meaningful (every ">" becomes a separate part), and then recombine
+    // parts if we discover they're in a different context.
+
+    // TODO(felix8a): Significant performance regressions from -legacy,
+    // tested on
+    //    Chrome 18.0
+    //    Firefox 11.0
+    //    IE 6, 7, 8, 9
+    //    Opera 11.61
+    //    Safari 5.1.3
+    // Many of these are unusual patterns that are linearly slower and still
+    // pretty fast (eg 1ms to 5ms), so not necessarily worth fixing.
+
+    // TODO(felix8a): "<script> && && && ... <\/script>" is slower on all
+    // browsers.  The hotspot is htmlSplit.
+
+    // TODO(felix8a): "<p title='>>>>...'><\/p>" is slower on all browsers.
+    // This is partly htmlSplit, but the hotspot is parseTagAndAttrs.
+
+    // TODO(felix8a): "<a><\/a><a><\/a>..." is slower on IE9.
+    // "<a>1<\/a><a>1<\/a>..." is faster, "<a><\/a>2<a><\/a>2..." is faster.
+
+    // TODO(felix8a): "<p<p<p..." is slower on IE[6-8]
+
+    var continuationMarker = {};
+    function parse(htmlText, handler, param) {
+        var m, p, tagName;
+        var parts = htmlSplit(htmlText);
+        var state = {
+            noMoreGT: false,
+            noMoreEndComments: false
+        };
+        parseCPS(handler, parts, 0, state, param);
+    }
+
+    function continuationMaker(h, parts, initial, state, param) {
+        return function () {
+            parseCPS(h, parts, initial, state, param);
+        };
+    }
+
+    function parseCPS(h, parts, initial, state, param) {
+        try {
+            if (h.startDoc && initial == 0) { h.startDoc(param); }
+            var m, p, tagName;
+            for (var pos = initial, end = parts.length; pos < end;) {
+                var current = parts[pos++];
+                var next = parts[pos];
+                switch (current) {
+                    case '&':
+                        if (ENTITY_RE_2.test(next)) {
+                            if (h.pcdata) {
+                                h.pcdata('&' + next, param, continuationMarker,
+                                    continuationMaker(h, parts, pos, state, param));
+                            }
+                            pos++;
+                        } else {
+                            if (h.pcdata) { h.pcdata("&amp;", param, continuationMarker,
+                                continuationMaker(h, parts, pos, state, param));
+                            }
+                        }
+                        break;
+                    case '<\/':
+                        if ((m = /^([-\w:]+)[^\'\"]*/.exec(next))) {
+                            if (m[0].length === next.length && parts[pos + 1] === '>') {
+                                // fast case, no attribute parsing needed
+                                pos += 2;
+                                tagName = m[1].toLowerCase();
+                                if (h.endTag) {
+                                    h.endTag(tagName, param, continuationMarker,
+                                        continuationMaker(h, parts, pos, state, param));
+                                }
+                            } else {
+                                // slow case, need to parse attributes
+                                // TODO(felix8a): do we really care about misparsing this?
+                                pos = parseEndTag(
+                                    parts, pos, h, param, continuationMarker, state);
+                            }
+                        } else {
+                            if (h.pcdata) {
+                                h.pcdata('&lt;/', param, continuationMarker,
+                                    continuationMaker(h, parts, pos, state, param));
+                            }
+                        }
+                        break;
+                    case '<':
+                        if (m = /^([-\w:]+)\s*\/?/.exec(next)) {
+                            if (m[0].length === next.length && parts[pos + 1] === '>') {
+                                // fast case, no attribute parsing needed
+                                pos += 2;
+                                tagName = m[1].toLowerCase();
+                                if (h.startTag) {
+                                    h.startTag(tagName, [], param, continuationMarker,
+                                        continuationMaker(h, parts, pos, state, param));
+                                }
+                                // tags like <script> and <textarea> have special parsing
+                                var eflags = html4.ELEMENTS[tagName];
+                                if (eflags & EFLAGS_TEXT) {
+                                    var tag = { name: tagName, next: pos, eflags: eflags };
+                                    pos = parseText(
+                                        parts, tag, h, param, continuationMarker, state);
+                                }
+                            } else {
+                                // slow case, need to parse attributes
+                                pos = parseStartTag(
+                                    parts, pos, h, param, continuationMarker, state);
+                            }
+                        } else {
+                            if (h.pcdata) {
+                                h.pcdata('&lt;', param, continuationMarker,
+                                    continuationMaker(h, parts, pos, state, param));
+                            }
+                        }
+                        break;
+                    case '<\!--':
+                        // The pathological case is n copies of '<\!--' without '-->', and
+                        // repeated failure to find '-->' is quadratic.  We avoid that by
+                        // remembering when search for '-->' fails.
+                        if (!state.noMoreEndComments) {
+                            // A comment <\!--x--> is split into three tokens:
+                            //   '<\!--', 'x--', '>'
+                            // We want to find the next '>' token that has a preceding '--'.
+                            // pos is at the 'x--'.
+                            for (p = pos + 1; p < end; p++) {
+                                if (parts[p] === '>' && /--$/.test(parts[p - 1])) { break; }
+                            }
+                            if (p < end) {
+                                if (h.comment) {
+                                    var comment = parts.slice(pos, p).join('');
+                                    h.comment(
+                                        comment.substr(0, comment.length - 2), param,
+                                        continuationMarker,
+                                        continuationMaker(h, parts, p + 1, state, param));
+                                }
+                                pos = p + 1;
+                            } else {
+                                state.noMoreEndComments = true;
+                            }
+                        }
+                        if (state.noMoreEndComments) {
+                            if (h.pcdata) {
+                                h.pcdata('&lt;!--', param, continuationMarker,
+                                    continuationMaker(h, parts, pos, state, param));
+                            }
+                        }
+                        break;
+                    case '<\!':
+                        if (!/^\w/.test(next)) {
+                            if (h.pcdata) {
+                                h.pcdata('&lt;!', param, continuationMarker,
+                                    continuationMaker(h, parts, pos, state, param));
+                            }
+                        } else {
+                            // similar to noMoreEndComment logic
+                            if (!state.noMoreGT) {
+                                for (p = pos + 1; p < end; p++) {
+                                    if (parts[p] === '>') { break; }
+                                }
+                                if (p < end) {
+                                    pos = p + 1;
+                                } else {
+                                    state.noMoreGT = true;
+                                }
+                            }
+                            if (state.noMoreGT) {
+                                if (h.pcdata) {
+                                    h.pcdata('&lt;!', param, continuationMarker,
+                                        continuationMaker(h, parts, pos, state, param));
+                                }
+                            }
+                        }
+                        break;
+                    case '<?':
+                        // similar to noMoreEndComment logic
+                        if (!state.noMoreGT) {
+                            for (p = pos + 1; p < end; p++) {
+                                if (parts[p] === '>') { break; }
+                            }
+                            if (p < end) {
+                                pos = p + 1;
+                            } else {
+                                state.noMoreGT = true;
+                            }
+                        }
+                        if (state.noMoreGT) {
+                            if (h.pcdata) {
+                                h.pcdata('&lt;?', param, continuationMarker,
+                                    continuationMaker(h, parts, pos, state, param));
+                            }
+                        }
+                        break;
+                    case '>':
+                        if (h.pcdata) {
+                            h.pcdata("&gt;", param, continuationMarker,
+                                continuationMaker(h, parts, pos, state, param));
+                        }
+                        break;
+                    case '':
+                        break;
+                    default:
+                        if (h.pcdata) {
+                            h.pcdata(current, param, continuationMarker,
+                                continuationMaker(h, parts, pos, state, param));
+                        }
+                        break;
+                }
+            }
+            if (h.endDoc) { h.endDoc(param); }
+        } catch (e) {
+            if (e !== continuationMarker) { throw e; }
+        }
+    }
+
+    // Split str into parts for the html parser.
+    function htmlSplit(str) {
+        // can't hoist this out of the function because of the re.exec loop.
+        var re = /(<\/|<\!--|<[!?]|[&<>])/g;
+        str += '';
+        if (splitWillCapture) {
+            return str.split(re);
+        } else {
+            var parts = [];
+            var lastPos = 0;
+            var m;
+            while ((m = re.exec(str)) !== null) {
+                parts.push(str.substring(lastPos, m.index));
+                parts.push(m[0]);
+                lastPos = m.index + m[0].length;
+            }
+            parts.push(str.substring(lastPos));
+            return parts;
+        }
+    }
+
+    function parseEndTag(parts, pos, h, param, continuationMarker, state) {
+        var tag = parseTagAndAttrs(parts, pos);
+        // drop unclosed tags
+        if (!tag) { return parts.length; }
+        if (h.endTag) {
+            h.endTag(tag.name, param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+        }
+        return tag.next;
+    }
+
+    function parseStartTag(parts, pos, h, param, continuationMarker, state) {
+        var tag = parseTagAndAttrs(parts, pos);
+        // drop unclosed tags
+        if (!tag) { return parts.length; }
+        if (h.startTag) {
+            h.startTag(tag.name, tag.attrs, param, continuationMarker,
+                continuationMaker(h, parts, tag.next, state, param));
+        }
+        // tags like <script> and <textarea> have special parsing
+        if (tag.eflags & EFLAGS_TEXT) {
+            return parseText(parts, tag, h, param, continuationMarker, state);
+        } else {
+            return tag.next;
+        }
+    }
+
+    var endTagRe = {};
+
+    // Tags like <script> and <textarea> are flagged as CDATA or RCDATA,
+    // which means everything is text until we see the correct closing tag.
+    function parseText(parts, tag, h, param, continuationMarker, state) {
+        var end = parts.length;
+        if (!endTagRe.hasOwnProperty(tag.name)) {
+            endTagRe[tag.name] = new RegExp('^' + tag.name + '(?:[\\s\\/]|$)', 'i');
+        }
+        var re = endTagRe[tag.name];
+        var first = tag.next;
+        var p = tag.next + 1;
+        for (; p < end; p++) {
+            if (parts[p - 1] === '<\/' && re.test(parts[p])) { break; }
+        }
+        if (p < end) { p -= 1; }
+        var buf = parts.slice(first, p).join('');
+        if (tag.eflags & html4.eflags['CDATA']) {
+            if (h.cdata) {
+                h.cdata(buf, param, continuationMarker,
+                    continuationMaker(h, parts, p, state, param));
+            }
+        } else if (tag.eflags & html4.eflags['RCDATA']) {
+            if (h.rcdata) {
+                h.rcdata(normalizeRCData(buf), param, continuationMarker,
+                    continuationMaker(h, parts, p, state, param));
+            }
+        } else {
+            throw new Error('bug');
+        }
+        return p;
+    }
+
+    // at this point, parts[pos-1] is either "<" or "<\/".
+    function parseTagAndAttrs(parts, pos) {
+        var m = /^([-\w:]+)/.exec(parts[pos]);
+        var tag = {};
+        tag.name = m[1].toLowerCase();
+        tag.eflags = html4.ELEMENTS[tag.name];
+        var buf = parts[pos].substr(m[0].length);
+        // Find the next '>'.  We optimistically assume this '>' is not in a
+        // quoted context, and further down we fix things up if it turns out to
+        // be quoted.
+        var p = pos + 1;
+        var end = parts.length;
+        for (; p < end; p++) {
+            if (parts[p] === '>') { break; }
+            buf += parts[p];
+        }
+        if (end <= p) { return void 0; }
+        var attrs = [];
+        while (buf !== '') {
+            m = ATTR_RE.exec(buf);
+            if (!m) {
+                // No attribute found: skip garbage
+                buf = buf.replace(/^[\s\S][^a-z\s]*/, '');
+
+            } else if ((m[4] && !m[5]) || (m[6] && !m[7])) {
+                // Unterminated quote: slurp to the next unquoted '>'
+                var quote = m[4] || m[6];
+                var sawQuote = false;
+                var abuf = [buf, parts[p++]];
+                for (; p < end; p++) {
+                    if (sawQuote) {
+                        if (parts[p] === '>') { break; }
+                    } else if (0 <= parts[p].indexOf(quote)) {
+                        sawQuote = true;
+                    }
+                    abuf.push(parts[p]);
+                }
+                // Slurp failed: lose the garbage
+                if (end <= p) { break; }
+                // Otherwise retry attribute parsing
+                buf = abuf.join('');
+                continue;
+
+            } else {
+                // We have an attribute
+                var aName = m[1].toLowerCase();
+                var aValue = m[2] ? decodeValue(m[3]) : '';
+                attrs.push(aName, aValue);
+                buf = buf.substr(m[0].length);
+            }
+        }
+        tag.attrs = attrs;
+        tag.next = p + 1;
+        return tag;
+    }
+
+    function decodeValue(v) {
+        var q = v.charCodeAt(0);
+        if (q === 0x22 || q === 0x27) { // " or '
+            v = v.substr(1, v.length - 2);
+        }
+        return unescapeEntities(stripNULs(v));
+    }
+
+    /**
+     * Returns a function that strips unsafe tags and attributes from html.
+     * @param {function(string, Array.<string>): ?Array.<string>} tagPolicy
+     *     A function that takes (tagName, attribs[]), where tagName is a key in
+     *     html4.ELEMENTS and attribs is an array of alternating attribute names
+     *     and values.  It should return a record (as follows), or null to delete
+     *     the element.  It's okay for tagPolicy to modify the attribs array,
+     *     but the same array is reused, so it should not be held between calls.
+     *     Record keys:
+     *        attribs: (required) Sanitized attributes array.
+     *        tagName: Replacement tag name.
+     * @return {function(string, Array)} A function that sanitizes a string of
+     *     HTML and appends result strings to the second argument, an array.
+     */
+    function makeHtmlSanitizer(tagPolicy) {
+        var stack;
+        var ignoring;
+        var emit = function (text, out) {
+            if (!ignoring) { out.push(text); }
+        };
+        return makeSaxParser({
+            'startDoc': function(_) {
+                stack = [];
+                ignoring = false;
+            },
+            'startTag': function(tagNameOrig, attribs, out) {
+                if (ignoring) { return; }
+                if (!html4.ELEMENTS.hasOwnProperty(tagNameOrig)) { return; }
+                var eflagsOrig = html4.ELEMENTS[tagNameOrig];
+                if (eflagsOrig & html4.eflags['FOLDABLE']) {
+                    return;
+                }
+
+                var decision = tagPolicy(tagNameOrig, attribs);
+                if (!decision) {
+                    ignoring = !(eflagsOrig & html4.eflags['EMPTY']);
+                    return;
+                } else if (typeof decision !== 'object') {
+                    throw new Error('tagPolicy did not return object (old API?)');
+                }
+                if ('attribs' in decision) {
+                    attribs = decision['attribs'];
+                } else {
+                    throw new Error('tagPolicy gave no attribs');
+                }
+                var eflagsRep;
+                var tagNameRep;
+                if ('tagName' in decision) {
+                    tagNameRep = decision['tagName'];
+                    eflagsRep = html4.ELEMENTS[tagNameRep];
+                } else {
+                    tagNameRep = tagNameOrig;
+                    eflagsRep = eflagsOrig;
+                }
+                // TODO(mikesamuel): relying on tagPolicy not to insert unsafe
+                // attribute names.
+
+                // If this is an optional-end-tag element and either this element or its
+                // previous like sibling was rewritten, then insert a close tag to
+                // preserve structure.
+                if (eflagsOrig & html4.eflags['OPTIONAL_ENDTAG']) {
+                    var onStack = stack[stack.length - 1];
+                    if (onStack && onStack.orig === tagNameOrig &&
+                        (onStack.rep !== tagNameRep || tagNameOrig !== tagNameRep)) {
+                        out.push('<\/', onStack.rep, '>');
+                    }
+                }
+
+                if (!(eflagsOrig & html4.eflags['EMPTY'])) {
+                    stack.push({orig: tagNameOrig, rep: tagNameRep});
+                }
+
+                out.push('<', tagNameRep);
+                for (var i = 0, n = attribs.length; i < n; i += 2) {
+                    var attribName = attribs[i],
+                        value = attribs[i + 1];
+                    if (value !== null && value !== void 0) {
+                        out.push(' ', attribName, '="', escapeAttrib(value), '"');
+                    }
+                }
+                out.push('>');
+
+                if ((eflagsOrig & html4.eflags['EMPTY'])
+                    && !(eflagsRep & html4.eflags['EMPTY'])) {
+                    // replacement is non-empty, synthesize end tag
+                    out.push('<\/', tagNameRep, '>');
+                }
+            },
+            'endTag': function(tagName, out) {
+                if (ignoring) {
+                    ignoring = false;
+                    return;
+                }
+                if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
+                var eflags = html4.ELEMENTS[tagName];
+                if (!(eflags & (html4.eflags['EMPTY'] | html4.eflags['FOLDABLE']))) {
+                    var index;
+                    if (eflags & html4.eflags['OPTIONAL_ENDTAG']) {
+                        for (index = stack.length; --index >= 0;) {
+                            var stackElOrigTag = stack[index].orig;
+                            if (stackElOrigTag === tagName) { break; }
+                            if (!(html4.ELEMENTS[stackElOrigTag] &
+                                html4.eflags['OPTIONAL_ENDTAG'])) {
+                                // Don't pop non optional end tags looking for a match.
+                                return;
+                            }
+                        }
+                    } else {
+                        for (index = stack.length; --index >= 0;) {
+                            if (stack[index].orig === tagName) { break; }
+                        }
+                    }
+                    if (index < 0) { return; }  // Not opened.
+                    for (var i = stack.length; --i > index;) {
+                        var stackElRepTag = stack[i].rep;
+                        if (!(html4.ELEMENTS[stackElRepTag] &
+                            html4.eflags['OPTIONAL_ENDTAG'])) {
+                            out.push('<\/', stackElRepTag, '>');
+                        }
+                    }
+                    if (index < stack.length) {
+                        tagName = stack[index].rep;
+                    }
+                    stack.length = index;
+                    out.push('<\/', tagName, '>');
+                }
+            },
+            'pcdata': emit,
+            'rcdata': emit,
+            'cdata': emit,
+            'endDoc': function(out) {
+                for (; stack.length; stack.length--) {
+                    out.push('<\/', stack[stack.length - 1].rep, '>');
+                }
+            }
+        });
+    }
+
+    var ALLOWED_URI_SCHEMES = /^(?:https?|mailto)$/i;
+
+    function safeUri(uri, effect, ltype, hints, naiveUriRewriter) {
+        if (!naiveUriRewriter) { return null; }
+        try {
+            var parsed = URI.parse('' + uri);
+            if (parsed) {
+                if (!parsed.hasScheme() ||
+                    ALLOWED_URI_SCHEMES.test(parsed.getScheme())) {
+                    var safe = naiveUriRewriter(parsed, effect, ltype, hints);
+                    return safe ? safe.toString() : null;
+                }
+            }
+        } catch (e) {
+            return null;
+        }
+        return null;
+    }
+
+    function log(logger, tagName, attribName, oldValue, newValue) {
+        if (!attribName) {
+            logger(tagName + " removed", {
+                change: "removed",
+                tagName: tagName
+            });
+        }
+        if (oldValue !== newValue) {
+            var changed = "changed";
+            if (oldValue && !newValue) {
+                changed = "removed";
+            } else if (!oldValue && newValue)  {
+                changed = "added";
+            }
+            logger(tagName + "." + attribName + " " + changed, {
+                change: changed,
+                tagName: tagName,
+                attribName: attribName,
+                oldValue: oldValue,
+                newValue: newValue
+            });
+        }
+    }
+
+    function lookupAttribute(map, tagName, attribName) {
+        var attribKey;
+        attribKey = tagName + '::' + attribName;
+        if (map.hasOwnProperty(attribKey)) {
+            return map[attribKey];
+        }
+        attribKey = '*::' + attribName;
+        if (map.hasOwnProperty(attribKey)) {
+            return map[attribKey];
+        }
+        return void 0;
+    }
+    function getAttributeType(tagName, attribName) {
+        return lookupAttribute(html4.ATTRIBS, tagName, attribName);
+    }
+    function getLoaderType(tagName, attribName) {
+        return lookupAttribute(html4.LOADERTYPES, tagName, attribName);
+    }
+    function getUriEffect(tagName, attribName) {
+        return lookupAttribute(html4.URIEFFECTS, tagName, attribName);
+    }
+
+    /**
+     * Sanitizes attributes on an HTML tag.
+     * @param {string} tagName An HTML tag name in lowercase.
+     * @param {Array.<?string>} attribs An array of alternating names and values.
+     * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
+     *     apply to URI attributes; it can return a new string value, or null to
+     *     delete the attribute.  If unspecified, URI attributes are deleted.
+     * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
+     *     to attributes containing HTML names, element IDs, and space-separated
+     *     lists of classes; it can return a new string value, or null to delete
+     *     the attribute.  If unspecified, these attributes are kept unchanged.
+     * @return {Array.<?string>} The sanitized attributes as a list of alternating
+     *     names and values, where a null value means to omit the attribute.
+     */
+    function sanitizeAttribs(tagName, attribs,
+        opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+        // TODO(felix8a): it's obnoxious that domado duplicates much of this
+        // TODO(felix8a): maybe consistently enforce constraints like target=
+        for (var i = 0; i < attribs.length; i += 2) {
+            var attribName = attribs[i];
+            var value = attribs[i + 1];
+            var oldValue = value;
+            var atype = null, attribKey;
+            if ((attribKey = tagName + '::' + attribName,
+                html4.ATTRIBS.hasOwnProperty(attribKey)) ||
+                (attribKey = '*::' + attribName,
+                    html4.ATTRIBS.hasOwnProperty(attribKey))) {
+                atype = html4.ATTRIBS[attribKey];
+            }
+            if (atype !== null) {
+                switch (atype) {
+                    case html4.atype['NONE']: break;
+                    case html4.atype['SCRIPT']:
+                        value = null;
+                        if (opt_logger) {
+                            log(opt_logger, tagName, attribName, oldValue, value);
+                        }
+                        break;
+                    case html4.atype['STYLE']:
+                        if ('undefined' === typeof parseCssDeclarations) {
+                            value = null;
+                            if (opt_logger) {
+                                log(opt_logger, tagName, attribName, oldValue, value);
+                            }
+                            break;
+                        }
+                        var sanitizedDeclarations = [];
+                        parseCssDeclarations(
+                            value,
+                            {
+                                'declaration': function (property, tokens) {
+                                    var normProp = property.toLowerCase();
+                                    sanitizeCssProperty(
+                                        normProp, tokens,
+                                        opt_naiveUriRewriter
+                                            ? function (url) {
+                                            return safeUri(
+                                                url, html4.ueffects.SAME_DOCUMENT,
+                                                html4.ltypes.SANDBOXED,
+                                                {
+                                                    "TYPE": "CSS",
+                                                    "CSS_PROP": normProp
+                                                }, opt_naiveUriRewriter);
+                                        }
+                                            : null);
+                                    if (tokens.length) {
+                                        sanitizedDeclarations.push(
+                                            normProp + ': ' + tokens.join(' '));
+                                    }
+                                }
+                            });
+                        value = sanitizedDeclarations.length > 0 ?
+                            sanitizedDeclarations.join(' ; ') : null;
+                        if (opt_logger) {
+                            log(opt_logger, tagName, attribName, oldValue, value);
+                        }
+                        break;
+                    case html4.atype['ID']:
+                    case html4.atype['IDREF']:
+                    case html4.atype['IDREFS']:
+                    case html4.atype['GLOBAL_NAME']:
+                    case html4.atype['LOCAL_NAME']:
+                    case html4.atype['CLASSES']:
+                        value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
+                        if (opt_logger) {
+                            log(opt_logger, tagName, attribName, oldValue, value);
+                        }
+                        break;
+                    case html4.atype['URI']:
+                        value = safeUri(value,
+                            getUriEffect(tagName, attribName),
+                            getLoaderType(tagName, attribName),
+                            {
+                                "TYPE": "MARKUP",
+                                "XML_ATTR": attribName,
+                                "XML_TAG": tagName
+                            }, opt_naiveUriRewriter);
+                        if (opt_logger) {
+                            log(opt_logger, tagName, attribName, oldValue, value);
+                        }
+                        break;
+                    case html4.atype['URI_FRAGMENT']:
+                        if (value && '#' === value.charAt(0)) {
+                            value = value.substring(1);  // remove the leading '#'
+                            value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
+                            if (value !== null && value !== void 0) {
+                                value = '#' + value;  // restore the leading '#'
+                            }
+                        } else {
+                            value = null;
+                        }
+                        if (opt_logger) {
+                            log(opt_logger, tagName, attribName, oldValue, value);
+                        }
+                        break;
+                    default:
+                        value = null;
+                        if (opt_logger) {
+                            log(opt_logger, tagName, attribName, oldValue, value);
+                        }
+                        break;
+                }
+            } else {
+                value = null;
+                if (opt_logger) {
+                    log(opt_logger, tagName, attribName, oldValue, value);
+                }
+            }
+            attribs[i + 1] = value;
+        }
+        return attribs;
+    }
+
+    /**
+     * Creates a tag policy that omits all tags marked UNSAFE in html4-defs.js
+     * and applies the default attribute sanitizer with the supplied policy for
+     * URI attributes and NMTOKEN attributes.
+     * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
+     *     apply to URI attributes.  If not given, URI attributes are deleted.
+     * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
+     *     to attributes containing HTML names, element IDs, and space-separated
+     *     lists of classes.  If not given, such attributes are left unchanged.
+     * @return {function(string, Array.<?string>)} A tagPolicy suitable for
+     *     passing to html.sanitize.
+     */
+    function makeTagPolicy(
+        opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+        return function(tagName, attribs) {
+            if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) {
+                return {
+                    'attribs': sanitizeAttribs(tagName, attribs,
+                        opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger)
+                };
+            } else {
+                if (opt_logger) {
+                    log(opt_logger, tagName, undefined, undefined, undefined);
+                }
+            }
+        };
+    }
+
+    /**
+     * Sanitizes HTML tags and attributes according to a given policy.
+     * @param {string} inputHtml The HTML to sanitize.
+     * @param {function(string, Array.<?string>)} tagPolicy A function that
+     *     decides which tags to accept and sanitizes their attributes (see
+     *     makeHtmlSanitizer above for details).
+     * @return {string} The sanitized HTML.
+     */
+    function sanitizeWithPolicy(inputHtml, tagPolicy) {
+        var outputArray = [];
+        makeHtmlSanitizer(tagPolicy)(inputHtml, outputArray);
+        return outputArray.join('');
+    }
+
+    /**
+     * Strips unsafe tags and attributes from HTML.
+     * @param {string} inputHtml The HTML to sanitize.
+     * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
+     *     apply to URI attributes.  If not given, URI attributes are deleted.
+     * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
+     *     to attributes containing HTML names, element IDs, and space-separated
+     *     lists of classes.  If not given, such attributes are left unchanged.
+     */
+    function sanitize(inputHtml,
+        opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+        var tagPolicy = makeTagPolicy(
+            opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
+        return sanitizeWithPolicy(inputHtml, tagPolicy);
+    }
+
+    // Export both quoted and unquoted names for Closure linkage.
+    var html = {};
+    html.escapeAttrib = html['escapeAttrib'] = escapeAttrib;
+    html.makeHtmlSanitizer = html['makeHtmlSanitizer'] = makeHtmlSanitizer;
+    html.makeSaxParser = html['makeSaxParser'] = makeSaxParser;
+    html.makeTagPolicy = html['makeTagPolicy'] = makeTagPolicy;
+    html.normalizeRCData = html['normalizeRCData'] = normalizeRCData;
+    html.sanitize = html['sanitize'] = sanitize;
+    html.sanitizeAttribs = html['sanitizeAttribs'] = sanitizeAttribs;
+    html.sanitizeWithPolicy = html['sanitizeWithPolicy'] = sanitizeWithPolicy;
+    html.unescapeEntities = html['unescapeEntities'] = unescapeEntities;
+    return html;
+})(html4);
+
+var html_sanitize = html['sanitize'];
+
+// Exports for Closure compiler.  Note this file is also cajoled
+// for domado and run in an environment without 'window'
+if (typeof window !== 'undefined') {
+    window['html'] = html;
+    window['html_sanitize'] = html_sanitize;
+}
+
+var Sanitizer = {};
+
+// Ensure backwards compatibility
+Sanitizer.escapeAttrib = html.escapeAttrib;
+Sanitizer.makeHtmlSanitizer = html.makeHtmlSanitizer;
+Sanitizer.makeSaxParser = html.makeSaxParser;
+Sanitizer.makeTagPolicy = html.makeTagPolicy;
+Sanitizer.normalizeRCData = html.normalizeRCData
+Sanitizer.sanitizeAttribs = html.sanitizeAttribs
+Sanitizer.sanitizeWithPolicy = html.sanitizeWithPolicy
+Sanitizer.unescapeEntities = html.unescapeEntities
+Sanitizer.escape = html.escapeAttrib;
+
+// https://github.com/theSmaw/Caja-HTML-Sanitizer/issues/8
+Sanitizer.sanitize = function(inputHtml, opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+    if (typeof(inputHtml) === "string") {
+        inputHtml = inputHtml.replace(/<([a-zA-Z]+)([^>]*)\/>/g, '<$1$2></$1>');
+    }
+    
+    if (inputHtml) {
+        return html.sanitize(inputHtml, opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
+    }
+    else {
+        return inputHtml;
+    }
+    
+}
+
+// the browser, add 'Sanitizer' as a global object via a string identifier,
+// for Closure Compiler "advanced" mode.
+if (typeof exports !== 'undefined') {
+    if (typeof module !== 'undefined' && module.exports) {
+        exports = module.exports = Sanitizer;
+    }
+    exports.Sanitizer = Sanitizer;
+} else {
+    this.Sanitizer = Sanitizer;
+}
-- 
GitLab