diff options
Diffstat (limited to 'src/proguard/classfile/constant/Utf8Constant.java')
-rw-r--r-- | src/proguard/classfile/constant/Utf8Constant.java | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/src/proguard/classfile/constant/Utf8Constant.java b/src/proguard/classfile/constant/Utf8Constant.java new file mode 100644 index 0000000..ae419c9 --- /dev/null +++ b/src/proguard/classfile/constant/Utf8Constant.java @@ -0,0 +1,285 @@ +/* + * ProGuard -- shrinking, optimization, obfuscation, and preverification + * of Java bytecode. + * + * Copyright (c) 2002-2009 Eric Lafortune (eric@graphics.cornell.edu) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package proguard.classfile.constant; + +import proguard.classfile.*; +import proguard.classfile.constant.visitor.ConstantVisitor; + +import java.io.UnsupportedEncodingException; + +/** + * This Constant represents a UTF-8 constant in the constant pool. + * + * @author Eric Lafortune + */ +public class Utf8Constant extends Constant +{ + private static final char TWO_BYTE_LIMIT = 0x80; + private static final int TWO_BYTE_CONSTANT1 = 0xc0; + private static final int TWO_BYTE_CONSTANT2 = 0x80; + private static final int TWO_BYTE_SHIFT1 = 6; + private static final int TWO_BYTE_MASK1 = 0x1f; + private static final int TWO_BYTE_MASK2 = 0x3f; + + private static final char THREE_BYTE_LIMIT = 0x800; + private static final int THREE_BYTE_CONSTANT1 = 0xe0; + private static final int THREE_BYTE_CONSTANT2 = 0x80; + private static final int THREE_BYTE_CONSTANT3 = 0x80; + private static final int THREE_BYTE_SHIFT1 = 12; + private static final int THREE_BYTE_SHIFT2 = 6; + private static final int THREE_BYTE_MASK1 = 0x0f; + private static final int THREE_BYTE_MASK2 = 0x3f; + private static final int THREE_BYTE_MASK3 = 0x3f; + + + // There are a lot of Utf8Constant objects, so we're optimising their storage. + // Initially, we're storing the UTF-8 bytes in a byte array. + // When the corresponding String is requested, we ditch the array and just + // store the String. + + //private int u2length; + private byte[] bytes; + + private String string; + + + /** + * Creates an uninitialized Utf8Constant. + * + */ + public Utf8Constant() + { + } + + + /** + * Creates a Utf8Constant containing the given string. + */ + public Utf8Constant(String string) + { + this.bytes = null; + this.string = string; + } + + + /** + * Initializes the UTF-8 data with an array of bytes. + */ + public void setBytes(byte[] bytes) + { + this.bytes = bytes; + this.string = null; + } + + + /** + * Returns the UTF-8 data as an array of bytes. + */ + public byte[] getBytes() + { + try + { + switchToByteArrayRepresentation(); + } + catch (UnsupportedEncodingException ex) + { + throw new RuntimeException(ex.getMessage()); + } + + return bytes; + } + + + /** + * Initializes the UTF-8 data with a String. + */ + public void setString(String utf8String) + { + this.bytes = null; + this.string = utf8String; + } + + + /** + * Returns the UTF-8 data as a String. + */ + public String getString() + { + try + { + switchToStringRepresentation(); + } + catch (UnsupportedEncodingException ex) + { + throw new RuntimeException(ex.getMessage()); + } + + return string; + } + + + // Implementations for Constant. + + public int getTag() + { + return ClassConstants.CONSTANT_Utf8; + } + + public void accept(Clazz clazz, ConstantVisitor constantVisitor) + { + constantVisitor.visitUtf8Constant(clazz, this); + } + + + // Small utility methods. + + /** + * Switches to a byte array representation of the UTF-8 data. + */ + private void switchToByteArrayRepresentation() throws UnsupportedEncodingException + { + if (bytes == null) + { + bytes = getByteArrayRepresentation(string); + string = null; + } + } + + + /** + * Switches to a String representation of the UTF-8 data. + */ + private void switchToStringRepresentation() throws UnsupportedEncodingException + { + if (string == null) + { + string = getStringRepresentation(bytes); + bytes = null; + } + } + + + /** + * Returns the modified UTF-8 byte array representation of the given string. + */ + private byte[] getByteArrayRepresentation(String string) throws UnsupportedEncodingException + { + // We're computing the byte array ourselves, because the implementation + // of String.getBytes("UTF-8") has a bug, at least up to JRE 1.4.2. + // Also note the special treatment of the 0 character. + + // Compute the byte array length. + int byteLength = 0; + int stringLength = string.length(); + for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) + { + char c = string.charAt(stringIndex); + + // The character is represented by one, two, or three bytes. + byteLength += c == 0 ? 2 : + c < TWO_BYTE_LIMIT ? 1 : + c < THREE_BYTE_LIMIT ? 2 : + 3; + } + + // Allocate the byte array with the computed length. + byte[] bytes = new byte[byteLength]; + + // Fill out the array. + int byteIndex = 0; + for (int stringIndex = 0; stringIndex < stringLength; stringIndex++) + { + char c = string.charAt(stringIndex); + if (c == 0) + { + // The 0 character gets a two-byte representation in classes. + bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT1; + bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT2; + } + else if (c < TWO_BYTE_LIMIT) + { + // The character is represented by a single byte. + bytes[byteIndex++] = (byte)c; + } + else if (c < THREE_BYTE_LIMIT) + { + // The character is represented by two bytes. + bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT1 | ((c >>> TWO_BYTE_SHIFT1) & TWO_BYTE_MASK1)); + bytes[byteIndex++] = (byte)(TWO_BYTE_CONSTANT2 | ( c & TWO_BYTE_MASK2)); + } + else + { + // The character is represented by three bytes. + bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT1 | ((c >>> THREE_BYTE_SHIFT1) & THREE_BYTE_MASK1)); + bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT2 | ((c >>> THREE_BYTE_SHIFT2) & THREE_BYTE_MASK2)); + bytes[byteIndex++] = (byte)(THREE_BYTE_CONSTANT3 | ( c & THREE_BYTE_MASK3)); + } + } + + return bytes; + } + + + /** + * Returns the String representation of the given modified UTF-8 byte array. + */ + private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException + { + // We're computing the string ourselves, because the implementation + // of "new String(bytes)" doesn't honor the special treatment of + // the 0 character in JRE 1.6_u11. + + // Allocate the byte array with the computed length. + char[] chars = new char[bytes.length]; + + // Fill out the array. + int charIndex = 0; + int byteIndex = 0; + while (byteIndex < bytes.length) + { + + int b = bytes[byteIndex++] & 0xff; + + // Depending on the flag bits in the first byte, the character + // is represented by a single byte, by two bytes, or by three + // bytes. We're not checking the redundant flag bits in the + // second byte and the third byte. + try + { + chars[charIndex++] = + (char)(b < TWO_BYTE_CONSTANT1 ? b : + + b < THREE_BYTE_CONSTANT1 ? ((b & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) | + ((bytes[byteIndex++] & TWO_BYTE_MASK2) ) : + + ((b & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) | + ((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) | + ((bytes[byteIndex++] & THREE_BYTE_MASK3) )); + } + catch (ArrayIndexOutOfBoundsException e) + { + throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]"); + } + } + + return new String(chars, 0, charIndex); + } +} |