diff options
Diffstat (limited to 'android_icu4j/src/main/java')
5 files changed, 626 insertions, 0 deletions
diff --git a/android_icu4j/src/main/java/com/android/icu/charset/CharsetDecoderICU.java b/android_icu4j/src/main/java/com/android/icu/charset/CharsetDecoderICU.java new file mode 100644 index 000000000..6e6aeee76 --- /dev/null +++ b/android_icu4j/src/main/java/com/android/icu/charset/CharsetDecoderICU.java @@ -0,0 +1,217 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2006, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ + /** + * A JNI interface for ICU converters. + * + * + * @author Ram Viswanadha, IBM + */ +package com.android.icu.charset; + +import dalvik.annotation.optimization.ReachabilitySensitive; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import libcore.util.EmptyArray; + +final class CharsetDecoderICU extends CharsetDecoder { + private static final int MAX_CHARS_PER_BYTE = 2; + + private static final int INPUT_OFFSET = 0; + private static final int OUTPUT_OFFSET = 1; + private static final int INVALID_BYTE_COUNT = 2; + /* + * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input bytes consumed + * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output chars written + * data[INVALID_BYTE_COUNT] = number of invalid bytes + */ + private final int[] data = new int[3]; + + /* Handle to the ICU converter that is opened, cleaned up via NativeAllocationRegistry. */ + @ReachabilitySensitive + private long converterHandle = 0; + + private byte[] input = null; + private char[] output= null; + + private byte[] allocatedInput = null; + private char[] allocatedOutput = null; + + // These instance variables are always assigned in the methods before being used. This class + // is inherently thread-unsafe so we don't have to worry about synchronization. + private int inEnd; + private int outEnd; + + public static CharsetDecoderICU newInstance(Charset cs, String icuCanonicalName) { + // This complexity is necessary to ensure that even if the constructor, superclass + // constructor, or call to updateCallback throw, we still free the native peer. + long address = 0; + CharsetDecoderICU result; + try { + address = NativeConverter.openConverter(icuCanonicalName); + float averageCharsPerByte = NativeConverter.getAveCharsPerByte(address); + result = new CharsetDecoderICU(cs, averageCharsPerByte, address); + } catch (Throwable t) { + if (address != 0) { + NativeConverter.closeConverter(address); + } + throw t; + } + // An exception in registerConverter() will deallocate address: + NativeConverter.registerConverter(result, address); + result.updateCallback(); + return result; + } + + private CharsetDecoderICU(Charset cs, float averageCharsPerByte, long address) { + super(cs, averageCharsPerByte, MAX_CHARS_PER_BYTE); + this.converterHandle = address; + } + + @Override protected void implReplaceWith(String newReplacement) { + updateCallback(); + } + + @Override protected final void implOnMalformedInput(CodingErrorAction newAction) { + updateCallback(); + } + + @Override protected final void implOnUnmappableCharacter(CodingErrorAction newAction) { + updateCallback(); + } + + private void updateCallback() { + NativeConverter.setCallbackDecode(converterHandle, this); + } + + @Override protected void implReset() { + NativeConverter.resetByteToChar(converterHandle); + data[INPUT_OFFSET] = 0; + data[OUTPUT_OFFSET] = 0; + data[INVALID_BYTE_COUNT] = 0; + output = null; + input = null; + allocatedInput = null; + allocatedOutput = null; + inEnd = 0; + outEnd = 0; + } + + @Override protected final CoderResult implFlush(CharBuffer out) { + try { + // ICU needs to see an empty input. + input = EmptyArray.BYTE; + inEnd = 0; + data[INPUT_OFFSET] = 0; + + data[OUTPUT_OFFSET] = getArray(out); + data[INVALID_BYTE_COUNT] = 0; // Make sure we don't see earlier errors. + + int error = NativeConverter.decode(converterHandle, input, inEnd, output, outEnd, data, true); + if (NativeConverter.U_FAILURE(error)) { + if (error == NativeConverter.U_BUFFER_OVERFLOW_ERROR) { + return CoderResult.OVERFLOW; + } else if (error == NativeConverter.U_TRUNCATED_CHAR_FOUND) { + if (data[INVALID_BYTE_COUNT] > 0) { + return CoderResult.malformedForLength(data[INVALID_BYTE_COUNT]); + } + } + } + return CoderResult.UNDERFLOW; + } finally { + setPosition(out); + implReset(); + } + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + + data[INPUT_OFFSET] = getArray(in); + data[OUTPUT_OFFSET]= getArray(out); + + try { + int error = NativeConverter.decode(converterHandle, input, inEnd, output, outEnd, data, false); + if (NativeConverter.U_FAILURE(error)) { + if (error == NativeConverter.U_BUFFER_OVERFLOW_ERROR) { + return CoderResult.OVERFLOW; + } else if (error == NativeConverter.U_INVALID_CHAR_FOUND) { + return CoderResult.unmappableForLength(data[INVALID_BYTE_COUNT]); + } else if (error == NativeConverter.U_ILLEGAL_CHAR_FOUND) { + return CoderResult.malformedForLength(data[INVALID_BYTE_COUNT]); + } else { + throw new AssertionError(error); + } + } + // Decoding succeeded: give us more data. + return CoderResult.UNDERFLOW; + } finally { + setPosition(in); + setPosition(out); + } + } + + + private int getArray(CharBuffer out) { + if (out.hasArray()) { + output = out.array(); + outEnd = out.arrayOffset() + out.limit(); + return out.arrayOffset() + out.position(); + } else { + outEnd = out.remaining(); + if (allocatedOutput == null || outEnd > allocatedOutput.length) { + allocatedOutput = new char[outEnd]; + } + // The array's start position is 0. + output = allocatedOutput; + return 0; + } + } + + private int getArray(ByteBuffer in) { + if (in.hasArray()) { + input = in.array(); + inEnd = in.arrayOffset() + in.limit(); + return in.arrayOffset() + in.position(); + } else { + inEnd = in.remaining(); + if (allocatedInput == null || inEnd > allocatedInput.length) { + allocatedInput = new byte[inEnd]; + } + // Copy the input buffer into the allocated array. + int pos = in.position(); + in.get(allocatedInput, 0, inEnd); + in.position(pos); + // The array's start position is 0. + input = allocatedInput; + return 0; + } + } + + private void setPosition(CharBuffer out) { + if (out.hasArray()) { + out.position(out.position() + data[OUTPUT_OFFSET]); + } else { + out.put(output, 0, data[OUTPUT_OFFSET]); + } + // release reference to output array, which may not be ours + output = null; + } + + private void setPosition(ByteBuffer in) { + in.position(in.position() + data[INPUT_OFFSET]); + // release reference to input array, which may not be ours + input = null; + } +} diff --git a/android_icu4j/src/main/java/com/android/icu/charset/CharsetEncoderICU.java b/android_icu4j/src/main/java/com/android/icu/charset/CharsetEncoderICU.java new file mode 100644 index 000000000..23a339fc0 --- /dev/null +++ b/android_icu4j/src/main/java/com/android/icu/charset/CharsetEncoderICU.java @@ -0,0 +1,255 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2006, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ +/** + * A JNI interface for ICU converters. + * + * + * @author Ram Viswanadha, IBM + */ +package com.android.icu.charset; + +import dalvik.annotation.optimization.ReachabilitySensitive; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.util.HashMap; +import java.util.Map; +import libcore.util.EmptyArray; + +final class CharsetEncoderICU extends CharsetEncoder { + private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>(); + static { + // ICU has different default replacements to the RI in some cases. There are many + // additional cases, but this covers all the charsets that Java guarantees will be + // available, which is where compatibility seems most important. (The RI even uses + // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that + // byte corresponds to an entirely different character.) + // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it + // can represent it, but this is what the RI does... + byte[] questionMark = new byte[] { (byte) '?' }; + DEFAULT_REPLACEMENTS.put("UTF-8", questionMark); + DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark); + DEFAULT_REPLACEMENTS.put("US-ASCII", questionMark); + } + + private static final int INPUT_OFFSET = 0; + private static final int OUTPUT_OFFSET = 1; + private static final int INVALID_CHAR_COUNT = 2; + /* + * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed + * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written + * data[INVALID_CHARS] = number of invalid chars + */ + private int[] data = new int[3]; + + /* handle to the ICU converter that is opened */ + @ReachabilitySensitive + private final long converterHandle; + + private char[] input = null; + private byte[] output = null; + + private char[] allocatedInput = null; + private byte[] allocatedOutput = null; + + // These instance variables are always assigned in the methods before being used. This class + // is inherently thread-unsafe so we don't have to worry about synchronization. + private int inEnd; + private int outEnd; + + public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) { + // This complexity is necessary to ensure that even if the constructor, superclass + // constructor, or call to updateCallback throw, we still free the native peer. + long address = 0; + CharsetEncoderICU result; + try { + address = NativeConverter.openConverter(icuCanonicalName); + float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address); + float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address); + byte[] replacement = makeReplacement(icuCanonicalName, address); + result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address); + } catch (Throwable t) { + if (address != 0) { + NativeConverter.closeConverter(address); + } + throw t; + } + // An exception in registerConverter() will deallocate address: + NativeConverter.registerConverter(result, address); + result.updateCallback(); + return result; + } + + private static byte[] makeReplacement(String icuCanonicalName, long address) { + // We have our own map of RI-compatible default replacements (where ICU disagrees)... + byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName); + if (replacement != null) { + return replacement.clone(); + } + // ...but fall back to asking ICU. + return NativeConverter.getSubstitutionBytes(address); + } + + private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) { + super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true); + // Our native peer needs to know what just happened... + this.converterHandle = address; + } + + @Override protected void implReplaceWith(byte[] newReplacement) { + updateCallback(); + } + + @Override protected void implOnMalformedInput(CodingErrorAction newAction) { + updateCallback(); + } + + @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) { + updateCallback(); + } + + private void updateCallback() { + NativeConverter.setCallbackEncode(converterHandle, this); + } + + @Override protected void implReset() { + NativeConverter.resetCharToByte(converterHandle); + data[INPUT_OFFSET] = 0; + data[OUTPUT_OFFSET] = 0; + data[INVALID_CHAR_COUNT] = 0; + output = null; + input = null; + allocatedInput = null; + allocatedOutput = null; + inEnd = 0; + outEnd = 0; + } + + @Override protected CoderResult implFlush(ByteBuffer out) { + try { + // ICU needs to see an empty input. + input = EmptyArray.CHAR; + inEnd = 0; + data[INPUT_OFFSET] = 0; + + data[OUTPUT_OFFSET] = getArray(out); + data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors. + + int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true); + if (NativeConverter.U_FAILURE(error)) { + if (error == NativeConverter.U_BUFFER_OVERFLOW_ERROR) { + return CoderResult.OVERFLOW; + } else if (error == NativeConverter.U_TRUNCATED_CHAR_FOUND) { + if (data[INVALID_CHAR_COUNT] > 0) { + return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]); + } + } + } + return CoderResult.UNDERFLOW; + } finally { + setPosition(out); + implReset(); + } + } + + @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + + data[INPUT_OFFSET] = getArray(in); + data[OUTPUT_OFFSET]= getArray(out); + data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors. + + try { + int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false); + if (NativeConverter.U_FAILURE(error)) { + if (error == NativeConverter.U_BUFFER_OVERFLOW_ERROR) { + return CoderResult.OVERFLOW; + } else if (error == NativeConverter.U_INVALID_CHAR_FOUND) { + return CoderResult.unmappableForLength(data[INVALID_CHAR_COUNT]); + } else if (error == NativeConverter.U_ILLEGAL_CHAR_FOUND) { + return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]); + } else { + throw new AssertionError(error); + } + } + // Decoding succeeded: give us more data. + return CoderResult.UNDERFLOW; + } finally { + setPosition(in); + setPosition(out); + } + } + + private int getArray(ByteBuffer out) { + if (out.hasArray()) { + output = out.array(); + outEnd = out.arrayOffset() + out.limit(); + return out.arrayOffset() + out.position(); + } else { + outEnd = out.remaining(); + if (allocatedOutput == null || outEnd > allocatedOutput.length) { + allocatedOutput = new byte[outEnd]; + } + // The array's start position is 0 + output = allocatedOutput; + return 0; + } + } + + private int getArray(CharBuffer in) { + if (in.hasArray()) { + input = in.array(); + inEnd = in.arrayOffset() + in.limit(); + return in.arrayOffset() + in.position(); + } else { + inEnd = in.remaining(); + if (allocatedInput == null || inEnd > allocatedInput.length) { + allocatedInput = new char[inEnd]; + } + // Copy the input buffer into the allocated array. + int pos = in.position(); + in.get(allocatedInput, 0, inEnd); + in.position(pos); + // The array's start position is 0 + input = allocatedInput; + return 0; + } + } + + private void setPosition(ByteBuffer out) { + if (out.hasArray()) { + out.position(data[OUTPUT_OFFSET] - out.arrayOffset()); + } else { + out.put(output, 0, data[OUTPUT_OFFSET]); + } + // release reference to output array, which may not be ours + output = null; + } + + private void setPosition(CharBuffer in) { + int position = in.position() + data[INPUT_OFFSET] - data[INVALID_CHAR_COUNT]; + if (position < 0) { + // The calculated position might be negative if we encountered an + // invalid char that spanned input buffers. We adjust it to 0 in this case. + // + // NOTE: The API doesn't allow us to adjust the position of the previous + // input buffer. (Doing that wouldn't serve any useful purpose anyway.) + position = 0; + } + + in.position(position); + // release reference to input array, which may not be ours + input = null; + } +} diff --git a/android_icu4j/src/main/java/com/android/icu/charset/CharsetICU.java b/android_icu4j/src/main/java/com/android/icu/charset/CharsetICU.java new file mode 100644 index 000000000..5ca6dcf8e --- /dev/null +++ b/android_icu4j/src/main/java/com/android/icu/charset/CharsetICU.java @@ -0,0 +1,43 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2005, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ + +package com.android.icu.charset; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +/** + * This class is used from native code associated with {@link NativeConverter}. + */ +final class CharsetICU extends Charset { + private final String icuCanonicalName; + + protected CharsetICU(String canonicalName, String icuCanonName, String[] aliases) { + super(canonicalName, aliases); + icuCanonicalName = icuCanonName; + } + + public CharsetDecoder newDecoder() { + return CharsetDecoderICU.newInstance(this, icuCanonicalName); + } + + public CharsetEncoder newEncoder() { + return CharsetEncoderICU.newInstance(this, icuCanonicalName); + } + + public boolean contains(Charset cs) { + if (cs == null) { + return false; + } else if (this.equals(cs)) { + return true; + } + return NativeConverter.contains(this.name(), cs.name()); + } +} diff --git a/android_icu4j/src/main/java/com/android/icu/charset/NativeConverter.java b/android_icu4j/src/main/java/com/android/icu/charset/NativeConverter.java new file mode 100644 index 000000000..f6d1e6440 --- /dev/null +++ b/android_icu4j/src/main/java/com/android/icu/charset/NativeConverter.java @@ -0,0 +1,96 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2006, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ + +package com.android.icu.charset; + +import libcore.util.NativeAllocationRegistry; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +public final class NativeConverter { + + private static final NativeAllocationRegistry registry = new NativeAllocationRegistry( + NativeConverter.class.getClassLoader(), getNativeFinalizer(), getNativeSize()); + + public static native int decode(long converterHandle, byte[] input, int inEnd, + char[] output, int outEnd, int[] data, boolean flush); + + public static native int encode(long converterHandle, char[] input, int inEnd, + byte[] output, int outEnd, int[] data, boolean flush); + + public static native long openConverter(String charsetName); + public static native void closeConverter(long converterHandle); + + public static void registerConverter(Object referrent, long converterHandle) { + registry.registerNativeAllocation(referrent, converterHandle); + } + + public static native void resetByteToChar(long converterHandle); + public static native void resetCharToByte(long converterHandle); + + public static native byte[] getSubstitutionBytes(long converterHandle); + + public static native int getMaxBytesPerChar(long converterHandle); + public static native float getAveBytesPerChar(long converterHandle); + public static native float getAveCharsPerByte(long converterHandle); + + public static native boolean contains(String converterName1, String converterName2); + + + @libcore.api.IntraCoreApi + public static native String[] getAvailableCharsetNames(); + + @libcore.api.IntraCoreApi + public static native Charset charsetForName(String charsetName); + + // Translates from Java's enum to the magic numbers #defined in "NativeConverter.cpp". + private static int translateCodingErrorAction(CodingErrorAction action) { + if (action == CodingErrorAction.REPORT) { + return 0; + } else if (action == CodingErrorAction.IGNORE) { + return 1; + } else if (action == CodingErrorAction.REPLACE) { + return 2; + } else { + throw new AssertionError(); // Someone changed the enum. + } + } + + public static void setCallbackDecode(long converterHandle, CharsetDecoder decoder) { + setCallbackDecode(converterHandle, + translateCodingErrorAction(decoder.malformedInputAction()), + translateCodingErrorAction(decoder.unmappableCharacterAction()), + decoder.replacement()); + } + private static native void setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, String subChars); + + public static void setCallbackEncode(long converterHandle, CharsetEncoder encoder) { + setCallbackEncode(converterHandle, + translateCodingErrorAction(encoder.malformedInputAction()), + translateCodingErrorAction(encoder.unmappableCharacterAction()), + encoder.replacement()); + } + private static native void setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes); + + public static native long getNativeFinalizer(); + public static native long getNativeSize(); + + // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU. + public static final int U_ZERO_ERROR = 0; + public static final int U_INVALID_CHAR_FOUND = 10; + public static final int U_TRUNCATED_CHAR_FOUND = 11; + public static final int U_ILLEGAL_CHAR_FOUND = 12; + public static final int U_BUFFER_OVERFLOW_ERROR = 15; + public static boolean U_FAILURE(int error) { + return error > U_ZERO_ERROR; + } +} diff --git a/android_icu4j/src/main/java/com/android/icu/charset/TEST_MAPPING b/android_icu4j/src/main/java/com/android/icu/charset/TEST_MAPPING new file mode 100644 index 000000000..ec926f990 --- /dev/null +++ b/android_icu4j/src/main/java/com/android/icu/charset/TEST_MAPPING @@ -0,0 +1,15 @@ +{ + "presubmit": [ + { + "name": "CtsLibcoreTestCases", + "options": [ + { + "include-filter": "libcore.java.nio.charset" + }, + { + "include-filter": "org.apache.harmony.tests.java.nio.charset" + } + ] + } + ] +}
\ No newline at end of file |