1 files changed, 285 insertions, 0 deletions
diff --git a/src/proguard/classfile/constant/Utf8Constant.java b/src/proguard/classfile/constant/Utf8Constant.java
new file mode 100644
index 0000000..ae419c9
--- /dev/null
+++ b/src/proguard/classfile/constant/Utf8Constant.java
@@ -0,0 +1,285 @@
+/*
+ * ProGuard -- shrinking, optimization, obfuscation, and preverification
+ *             of Java bytecode.
+ *
+ * Copyright (c) 2002-2009 Eric Lafortune (eric@graphics.cornell.edu)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+package proguard.classfile.constant;
+
+import proguard.classfile.*;
+import proguard.classfile.constant.visitor.ConstantVisitor;
+
+import java.io.UnsupportedEncodingException;
+
+/**
+ * This Constant represents a UTF-8 constant in the constant pool.
+ *
+ * @author Eric Lafortune
+ */
+public class Utf8Constant extends Constant
+{
+    private static final char TWO_BYTE_LIMIT     = 0x80;
+    private static final int  TWO_BYTE_CONSTANT1 = 0xc0;
+    private static final int  TWO_BYTE_CONSTANT2 = 0x80;
+    private static final int  TWO_BYTE_SHIFT1    = 6;
+    private static final int  TWO_BYTE_MASK1     = 0x1f;
+    private static final int  TWO_BYTE_MASK2     = 0x3f;
+
+    private static final char THREE_BYTE_LIMIT     = 0x800;
+    private static final int  THREE_BYTE_CONSTANT1 = 0xe0;
+    private static final int  THREE_BYTE_CONSTANT2 = 0x80;
+    private static final int  THREE_BYTE_CONSTANT3 = 0x80;
+    private static final int  THREE_BYTE_SHIFT1    = 12;
+    private static final int  THREE_BYTE_SHIFT2    = 6;
+    private static final int  THREE_BYTE_MASK1     = 0x0f;
+    private static final int  THREE_BYTE_MASK2     = 0x3f;
+    private static final int  THREE_BYTE_MASK3     = 0x3f;
+
+
+    // There are a lot of Utf8Constant objects, so we're optimising their storage.
+    // Initially, we're storing the UTF-8 bytes in a byte array.
+    // When the corresponding String is requested, we ditch the array and just
+    // store the String.
+
+    //private int u2length;
+    private byte[] bytes;
+
+    private String string;
+
+
+    /**
+     * Creates an uninitialized Utf8Constant.
+     *
+     */
+    public Utf8Constant()
+    {
+    }
+
+
+    /**
+     * Creates a Utf8Constant containing the given string.
+     */
+    public Utf8Constant(String string)
+    {
+        this.bytes  = null;
+        this.string = string;
+    }
+
+
+    /**
+     * Initializes the UTF-8 data with an array of bytes.
+     */
+    public void setBytes(byte[] bytes)
+    {
+        this.bytes  = bytes;
+        this.string = null;
+    }
+
+
+    /**
+     * Returns the UTF-8 data as an array of bytes.
+     */
+    public byte[] getBytes()
+    {
+        try
+        {
+            switchToByteArrayRepresentation();
+        }
+        catch (UnsupportedEncodingException ex)
+        {
+            throw new RuntimeException(ex.getMessage());
+        }
+
+        return bytes;
+    }
+
+
+    /**
+     * Initializes the UTF-8 data with a String.
+     */
+    public void setString(String utf8String)
+    {
+        this.bytes  = null;
+        this.string = utf8String;
+    }
+
+
+    /**
+     * Returns the UTF-8 data as a String.
+     */
+    public String getString()
+    {
+        try
+        {
+            switchToStringRepresentation();
+        }
+        catch (UnsupportedEncodingException ex)
+        {
+            throw new RuntimeException(ex.getMessage());
+        }
+
+        return string;
+    }
+
+
+    // Implementations for Constant.
+
+    public int getTag()
+    {
+        return ClassConstants.CONSTANT_Utf8;
+    }
+
+    public void accept(Clazz clazz, ConstantVisitor constantVisitor)
+    {
+        constantVisitor.visitUtf8Constant(clazz, this);
+    }
+
+
+    // Small utility methods.
+
+    /**
+     * Switches to a byte array representation of the UTF-8 data.
+     */
+    private void switchToByteArrayRepresentation() throws UnsupportedEncodingException
+    {
+        if (bytes == null)
+        {
+            bytes  = getByteArrayRepresentation(string);
+            string = null;
+        }
+    }
+
+
+    /**
+     * Switches to a String representation of the UTF-8 data.
+     */
+    private void switchToStringRepresentation() throws UnsupportedEncodingException
+    {
+        if (string == null)
+        {
+            string = getStringRepresentation(bytes);
+            bytes  = null;
+        }
+    }
+
+
+    /**
+     * Returns the modified UTF-8 byte array representation of the given string.
+     */
+    private byte[] getByteArrayRepresentation(String string) throws UnsupportedEncodingException
+    {
+        // We're computing the byte array ourselves, because the implementation
+        // of String.getBytes("UTF-8") has a bug, at least up to JRE 1.4.2.
+        // Also note the special treatment of the 0 character.
+
+        // Compute the byte array length.
+        int byteLength   = 0;
+        int stringLength = string.length();
+        for (int stringIndex = 0; stringIndex < stringLength; stringIndex++)
+        {
+            char c = string.charAt(stringIndex);
+
+            // The character is represented by one, two, or three bytes.
+            byteLength += c == 0                ? 2 :
+                          c <  TWO_BYTE_LIMIT   ? 1 :
+                          c <  THREE_BYTE_LIMIT ? 2 :
+                                                  3;
+        }
+
+        // Allocate the byte array with the computed length.
+        byte[] bytes  = new byte[byteLength];
+
+        // Fill out the array.
+        int byteIndex = 0;
+        for (int stringIndex = 0; stringIndex < stringLength; stringIndex++)
+        {
+            char c = string.charAt(stringIndex);
+            if (c == 0)
+            {
+                // The 0 character gets a two-byte representation in classes.
+                bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT1;
+                bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT2;
+            }
+            else if (c < TWO_BYTE_LIMIT)
+            {
+                // The character is represented by a single byte.
+                bytes[byteIndex++] = (byte)c;
+            }
+            else if (c < THREE_BYTE_LIMIT)
+            {
+                // The character is represented by two bytes.
+                bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT1 | ((c >>> TWO_BYTE_SHIFT1) & TWO_BYTE_MASK1));
+                bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT2 | ( c                      & TWO_BYTE_MASK2));
+            }
+            else
+            {
+                // The character is represented by three bytes.
+                bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT1 | ((c >>> THREE_BYTE_SHIFT1) & THREE_BYTE_MASK1));
+                bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT2 | ((c >>> THREE_BYTE_SHIFT2) & THREE_BYTE_MASK2));
+                bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT3 | ( c                        & THREE_BYTE_MASK3));
+            }
+        }
+
+        return bytes;
+    }
+
+
+    /**
+     * Returns the String representation of the given modified UTF-8 byte array.
+     */
+    private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException
+    {
+        // We're computing the string ourselves, because the implementation
+        // of "new String(bytes)" doesn't honor the special treatment of
+        // the 0 character in JRE 1.6_u11.
+
+        // Allocate the byte array with the computed length.
+        char[] chars  = new char[bytes.length];
+
+        // Fill out the array.
+        int charIndex = 0;
+        int byteIndex = 0;
+        while (byteIndex < bytes.length)
+        {
+
+            int b = bytes[byteIndex++] & 0xff;
+
+            // Depending on the flag bits in the first byte, the character
+            // is represented by a single byte, by two bytes, or by three
+            // bytes. We're not checking the redundant flag bits in the
+            // second byte and the third byte.
+            try
+            {
+                chars[charIndex++] =
+                    (char)(b < TWO_BYTE_CONSTANT1   ? b                                                          :
+
+                           b < THREE_BYTE_CONSTANT1 ? ((b                  & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) |
+                                                      ((bytes[byteIndex++] & TWO_BYTE_MASK2)                   ) :
+
+                                                      ((b                  & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) |
+                                                      ((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) |
+                                                      ((bytes[byteIndex++] & THREE_BYTE_MASK3)                     ));
+            }
+            catch (ArrayIndexOutOfBoundsException e)
+            {
+                throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]");
+            }
+        }
+
+        return new String(chars, 0, charIndex);
+    }
+}