aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/java/org/owasp/encoder/JavaScriptEncoder.java
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/main/java/org/owasp/encoder/JavaScriptEncoder.java')
-rw-r--r--core/src/main/java/org/owasp/encoder/JavaScriptEncoder.java298
1 files changed, 298 insertions, 0 deletions
diff --git a/core/src/main/java/org/owasp/encoder/JavaScriptEncoder.java b/core/src/main/java/org/owasp/encoder/JavaScriptEncoder.java
new file mode 100644
index 0000000..5ff0335
--- /dev/null
+++ b/core/src/main/java/org/owasp/encoder/JavaScriptEncoder.java
@@ -0,0 +1,298 @@
+// Copyright (c) 2012 Jeff Ichnowski
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// * Redistributions of source code must retain the above
+// copyright notice, this list of conditions and the following
+// disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// * Neither the name of the OWASP nor the names of its
+// contributors may be used to endorse or promote products
+// derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+package org.owasp.encoder;
+
+import java.nio.CharBuffer;
+import java.nio.charset.CoderResult;
+
+/**
+ * JavaScriptEncoder -- An encoder for JavaScript string contexts.
+ *
+ * @author jeffi
+ */
+class JavaScriptEncoder extends Encoder {
+
+ /**
+ * Mode of operation constants for the JavaScriptEncoder.
+ */
+ enum Mode {
+ /**
+ * Standard encoding of JavaScript Strings. Escape sequences are chosen
+ * according to what is the shortest sequence possible for the
+ * character.
+ */
+ SOURCE,
+ /**
+ * Encoding for use in HTML attributes. Quote characters are escaped
+ * using hex encodes instead of backslashes. The alternate would be to
+ * use a sequence of encodes that would actually be longer. In this mode
+ * double-quote is "\x22" and single-quote is "\x27". (In HTML
+ * attributes the alternate would be encoding "\"" and "\'" with entity
+ * escapes to "\"" and "\&39;").
+ */
+ ATTRIBUTE,
+ /**
+ * Encoding for use in HTML script blocks. The main concern here is
+ * permaturely terminating a script block with a closing "</" inside
+ * the string. This encoding escapes "/" as "\/" to prevent such
+ * termination.
+ */
+ BLOCK,
+ /**
+ * Encodes for use in either HTML script attributes or blocks.
+ * Essentially this is both special escapes from HTML_ATTRIBUTE and
+ * HTML_CONTENT combined.
+ */
+ HTML,;
+ }
+
+ /**
+ * The mode of operations--used for toString implementation.
+ */
+ private final Mode _mode;
+ /**
+ * True if quotation characters should be hex encoded. Hex encoding quotes
+ * allows JavaScript to be included in XML attributes without additional
+ * XML-based encoding.
+ */
+ private final boolean _hexEncodeQuotes;
+ /**
+ * An array of 4 32-bit integers used as bitmasks to check if a character
+ * needs encoding or not. If the bit is set, the character is valid and does
+ * not need encoding.
+ */
+ private final int[] _validMasks;
+ /**
+ * True if the output should only include ASCII characters. Valid non-ASCII
+ * characters that would normally not be encoded, will be encoded.
+ */
+ private final boolean _asciiOnly;
+
+ /**
+ * Constructs a new JavaScriptEncoder for the specified contextual mode.
+ *
+ * @param mode the mode of operation
+ * @param asciiOnly true if only ASCII characters should be included in the
+ * output (all code-points outside the ASCII range will be encoded).
+ */
+ JavaScriptEncoder(Mode mode, boolean asciiOnly) {
+ // TODO: after some testing it appears that an array of int masks
+ // is faster than two longs, or an array of longs or an array of bytes
+ // the other encoders based upon masks should be switched to ints.
+ // (to be clear, it's much faster on 32-bit VMS, and just slightly
+ // faster on 64-bit VMS)
+ _mode = mode;
+
+ // Note: this probably needs to be repeated everywhere this trick is
+ // used, but here seems like as good a place as any. According to
+ // the Java spec (x << y) where x and y are integers, is evaluated
+ // as (x << (y & 31)). Or put another way, only the lower 5 bits
+ // of the shift amount are considered.
+ _validMasks = new int[]{
+ 0,
+ -1 & ~((1 << '\'') | (1 << '\"')),
+ -1 & ~((1 << '\\')),
+ asciiOnly ? ~(1 << Unicode.DEL) : -1,};
+
+ if (mode == Mode.BLOCK || mode == Mode.HTML) {
+ // in <script> blocks, we need to prevent the browser from seeing
+ // "</anything>" and "<!--". To do so we escape "/" as "\/" and
+ // escape "-" as "\-". Both could be solved with a hex encoding
+ // on "<" but we figure "<" appears often in script strings and
+ // the backslash encoding is more readable than a hex encoding.
+ // (And note, a backslash encoding would not prevent the exploits
+ // on "</...>" and "<!--".
+ // In short "</script>" is escaped as "<\/script>" and "<!--" is
+ // escaped as "<!\-\-".
+ _validMasks[1] &= ~((1 << '/') | (1 << '-'));
+ }
+ if (mode != Mode.SOURCE) {
+ _validMasks[1] &= ~(1 << '&');
+ }
+
+ _asciiOnly = asciiOnly;
+ _hexEncodeQuotes = (mode == Mode.ATTRIBUTE || mode == Mode.HTML);
+ }
+
+ @Override
+ int maxEncodedLength(int n) {
+ return n * 6;
+ }
+
+ @Override
+ int firstEncodedOffset(String input, int off, int len) {
+ final int n = off + len;
+ final int[] validMasks = this._validMasks;
+ for (int i = off; i < n; ++i) {
+ char ch = input.charAt(i);
+ if (ch < 128) {
+ if ((validMasks[ch >>> 5] & (1 << ch)) == 0) {
+ return i;
+ }
+ } else if (_asciiOnly || ch == Unicode.LINE_SEPARATOR || ch == Unicode.PARAGRAPH_SEPARATOR) {
+ return i;
+ }
+ }
+ return n;
+ }
+
+ @Override
+ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
+ final char[] in = input.array();
+ final char[] out = output.array();
+ int i = input.arrayOffset() + input.position();
+ final int n = input.arrayOffset() + input.limit();
+ int j = output.arrayOffset() + output.position();
+ final int m = output.arrayOffset() + output.limit();
+
+ final int[] validMasks = this._validMasks;
+
+ for (; i < n; ++i) {
+ char ch = in[i];
+
+ hexEncoded:
+ {
+ encoded:
+ {
+ if (ch < 128) {
+ if ((validMasks[ch >>> 5] & (1 << ch)) == 0) {
+ break encoded;
+ }
+ } else if (_asciiOnly || ch == Unicode.LINE_SEPARATOR || ch == Unicode.PARAGRAPH_SEPARATOR) {
+ if (ch <= 0xff) {
+ break hexEncoded;
+ }
+ if (j + 6 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = 'u';
+ out[j++] = HEX[ch >>> 3 * HEX_SHIFT];
+ out[j++] = HEX[ch >>> 2 * HEX_SHIFT & HEX_MASK];
+ out[j++] = HEX[ch >>> HEX_SHIFT & HEX_MASK];
+ out[j++] = HEX[ch & HEX_MASK];
+ continue;
+ }
+ if (j >= m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = ch;
+ continue;
+ }
+
+ switch (ch) {
+ case '\b':
+ if (j + 2 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = 'b';
+ continue;
+ case '\t':
+ if (j + 2 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = 't';
+ continue;
+ case '\n':
+ if (j + 2 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = 'n';
+ continue;
+ // Per Mike Samuel "\v should not be used since some
+ // versions of IE treat it as a literal letter 'v'"
+// case 0x0b: // '\v'
+// if (j+1 >= m) {
+// return overflow(input, i, output, j);
+// }
+// out[j++] = '\\';
+// out[j++] = 'v';
+// break;
+ case '\f':
+ if (j + 2 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = 'f';
+ continue;
+ case '\r':
+ if (j + 2 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = 'r';
+ continue;
+ case '\'':
+ case '\"':
+ if (_hexEncodeQuotes) {
+ break hexEncoded;
+ }
+ // fall through
+ case '\\':
+ case '/':
+ case '-':
+ // We'll only see '/' and '-' here in the BLOCK and HTML
+ // modes otherwise it will be accepted as valid by the
+ // bitmasks.
+ if (j + 2 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = ch;
+ continue;
+ default:
+ break;
+ }
+ }
+
+ if (j + 4 > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '\\';
+ out[j++] = 'x';
+ out[j++] = HEX[ch >>> HEX_SHIFT];
+ out[j++] = HEX[ch & HEX_MASK];
+ }
+
+ return underflow(input, i, output, j);
+ }
+
+ @Override
+ public String toString() {
+ return "JavaScriptEncoder(mode=" + _mode + "," + (_asciiOnly ? "ASCII" : "UNICODE") + ")";
+ }
+}