diff options
Diffstat (limited to 'icu/icu4j/main/classes/core')
-rw-r--r-- | icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java | 405 | ||||
-rw-r--r-- | icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java | 795 |
2 files changed, 1200 insertions, 0 deletions
diff --git a/icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java new file mode 100644 index 00000000..a257c1c6 --- /dev/null +++ b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java @@ -0,0 +1,405 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +/* + ******************************************************************************* + * Copyright (C) 1996-2007, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.text; + +/** + * An interface defining constants for the Standard Compression Scheme for Unicode (SCSU) as + * outlined in <A HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical Report #6</A>. + * + * @author Stephen F. Booth + * @version 1.1 05 Aug 99 + * @version 1.0 26 Jul 99 + */ +interface SCSU { + // ========================== + // Generic window shift + // ========================== + static final int COMPRESSIONOFFSET = 0x80; + + // ========================== + // Number of windows + // ========================== + static final int NUMWINDOWS = 8; + static final int NUMSTATICWINDOWS = 8; + + // ========================== + // Indicates a window index is invalid + // ========================== + static final int INVALIDWINDOW = -1; + + // ========================== + // Indicates a character doesn't exist in input (past end of buffer) + // ========================== + static final int INVALIDCHAR = -1; + + // ========================== + // Compression modes + // ========================== + static final int SINGLEBYTEMODE = 0; + static final int UNICODEMODE = 1; + + // ========================== + // Maximum value for a window's index + // ========================== + static final int MAXINDEX = 0xFF; + + // ========================== + // Reserved index value (characters belongs to first block) + // ========================== + static final int RESERVEDINDEX = 0x00; + + // ========================== + // Indices for scripts which cross a half-block boundary + // ========================== + static final int LATININDEX = 0xF9; + static final int IPAEXTENSIONINDEX = 0xFA; + static final int GREEKINDEX = 0xFB; + static final int ARMENIANINDEX = 0xFC; + static final int HIRAGANAINDEX = 0xFD; + static final int KATAKANAINDEX = 0xFE; + static final int HALFWIDTHKATAKANAINDEX = 0xFF; + + // ========================== + // Single-byte mode tags + // ========================== + static final int SDEFINEX = 0x0B; + static final int SRESERVED = 0x0C; // reserved value + static final int SQUOTEU = 0x0E; + static final int SCHANGEU = 0x0F; + + static final int SQUOTE0 = 0x01; + static final int SQUOTE1 = 0x02; + static final int SQUOTE2 = 0x03; + static final int SQUOTE3 = 0x04; + static final int SQUOTE4 = 0x05; + static final int SQUOTE5 = 0x06; + static final int SQUOTE6 = 0x07; + static final int SQUOTE7 = 0x08; + + static final int SCHANGE0 = 0x10; + static final int SCHANGE1 = 0x11; + static final int SCHANGE2 = 0x12; + static final int SCHANGE3 = 0x13; + static final int SCHANGE4 = 0x14; + static final int SCHANGE5 = 0x15; + static final int SCHANGE6 = 0x16; + static final int SCHANGE7 = 0x17; + + static final int SDEFINE0 = 0x18; + static final int SDEFINE1 = 0x19; + static final int SDEFINE2 = 0x1A; + static final int SDEFINE3 = 0x1B; + static final int SDEFINE4 = 0x1C; + static final int SDEFINE5 = 0x1D; + static final int SDEFINE6 = 0x1E; + static final int SDEFINE7 = 0x1F; + + // ========================== + // Unicode mode tags + // ========================== + static final int UCHANGE0 = 0xE0; + static final int UCHANGE1 = 0xE1; + static final int UCHANGE2 = 0xE2; + static final int UCHANGE3 = 0xE3; + static final int UCHANGE4 = 0xE4; + static final int UCHANGE5 = 0xE5; + static final int UCHANGE6 = 0xE6; + static final int UCHANGE7 = 0xE7; + + static final int UDEFINE0 = 0xE8; + static final int UDEFINE1 = 0xE9; + static final int UDEFINE2 = 0xEA; + static final int UDEFINE3 = 0xEB; + static final int UDEFINE4 = 0xEC; + static final int UDEFINE5 = 0xED; + static final int UDEFINE6 = 0xEE; + static final int UDEFINE7 = 0xEF; + + static final int UQUOTEU = 0xF0; + static final int UDEFINEX = 0xF1; + static final int URESERVED = 0xF2; // reserved value + + // ========================== + // Class variables + // ========================== + + /** For window offset mapping */ + static final int[] sOffsetTable = { + // table generated by CompressionTableGenerator + 0x0, + 0x80, + 0x100, + 0x180, + 0x200, + 0x280, + 0x300, + 0x380, + 0x400, + 0x480, + 0x500, + 0x580, + 0x600, + 0x680, + 0x700, + 0x780, + 0x800, + 0x880, + 0x900, + 0x980, + 0xa00, + 0xa80, + 0xb00, + 0xb80, + 0xc00, + 0xc80, + 0xd00, + 0xd80, + 0xe00, + 0xe80, + 0xf00, + 0xf80, + 0x1000, + 0x1080, + 0x1100, + 0x1180, + 0x1200, + 0x1280, + 0x1300, + 0x1380, + 0x1400, + 0x1480, + 0x1500, + 0x1580, + 0x1600, + 0x1680, + 0x1700, + 0x1780, + 0x1800, + 0x1880, + 0x1900, + 0x1980, + 0x1a00, + 0x1a80, + 0x1b00, + 0x1b80, + 0x1c00, + 0x1c80, + 0x1d00, + 0x1d80, + 0x1e00, + 0x1e80, + 0x1f00, + 0x1f80, + 0x2000, + 0x2080, + 0x2100, + 0x2180, + 0x2200, + 0x2280, + 0x2300, + 0x2380, + 0x2400, + 0x2480, + 0x2500, + 0x2580, + 0x2600, + 0x2680, + 0x2700, + 0x2780, + 0x2800, + 0x2880, + 0x2900, + 0x2980, + 0x2a00, + 0x2a80, + 0x2b00, + 0x2b80, + 0x2c00, + 0x2c80, + 0x2d00, + 0x2d80, + 0x2e00, + 0x2e80, + 0x2f00, + 0x2f80, + 0x3000, + 0x3080, + 0x3100, + 0x3180, + 0x3200, + 0x3280, + 0x3300, + 0x3380, + 0xe000, + 0xe080, + 0xe100, + 0xe180, + 0xe200, + 0xe280, + 0xe300, + 0xe380, + 0xe400, + 0xe480, + 0xe500, + 0xe580, + 0xe600, + 0xe680, + 0xe700, + 0xe780, + 0xe800, + 0xe880, + 0xe900, + 0xe980, + 0xea00, + 0xea80, + 0xeb00, + 0xeb80, + 0xec00, + 0xec80, + 0xed00, + 0xed80, + 0xee00, + 0xee80, + 0xef00, + 0xef80, + 0xf000, + 0xf080, + 0xf100, + 0xf180, + 0xf200, + 0xf280, + 0xf300, + 0xf380, + 0xf400, + 0xf480, + 0xf500, + 0xf580, + 0xf600, + 0xf680, + 0xf700, + 0xf780, + 0xf800, + 0xf880, + 0xf900, + 0xf980, + 0xfa00, + 0xfa80, + 0xfb00, + 0xfb80, + 0xfc00, + 0xfc80, + 0xfd00, + 0xfd80, + 0xfe00, + 0xfe80, + 0xff00, + 0xff80, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0xc0, + 0x250, + 0x370, + 0x530, + 0x3040, + 0x30a0, + 0xff60 + }; + + /** Static compression window offsets */ + static final int[] sOffsets = { + 0x0000, // for quoting single-byte mode tags + 0x0080, // Latin-1 Supplement + 0x0100, // Latin Extended-A + 0x0300, // Combining Diacritical Marks + 0x2000, // General Punctuation + 0x2080, // Curency Symbols + 0x2100, // Letterlike Symbols and Number Forms + 0x3000 // CJK Symbols and Punctuation + }; +} diff --git a/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java new file mode 100644 index 00000000..6e2a2d71 --- /dev/null +++ b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java @@ -0,0 +1,795 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +/* + ******************************************************************************* + * Copyright (C) 1996-2016, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.text; + +/** + * A decompression engine implementing the Standard Compression Scheme for Unicode (SCSU) as + * outlined in <A HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical Report #6</A>. + * + * <p><STRONG>USAGE</STRONG> + * + * <p>The static methods on <TT>UnicodeDecompressor</TT> may be used in a straightforward manner to + * decompress simple strings: + * + * <PRE> + * byte [] compressed = ... ; // get compressed bytes from somewhere + * String result = UnicodeDecompressor.decompress(compressed); + * </PRE> + * + * <p>The static methods have a fairly large memory footprint. For finer-grained control over memory + * usage, <TT>UnicodeDecompressor</TT> offers more powerful APIs allowing iterative decompression: + * + * <PRE> + * // Decompress an array "bytes" of length "len" using a buffer of 512 chars + * // to the Writer "out" + * + * UnicodeDecompressor myDecompressor = new UnicodeDecompressor(); + * final static int BUFSIZE = 512; + * char [] charBuffer = new char [ BUFSIZE ]; + * int charsWritten = 0; + * int [] bytesRead = new int [1]; + * int totalBytesDecompressed = 0; + * int totalCharsWritten = 0; + * + * do { + * // do the decompression + * charsWritten = myDecompressor.decompress(bytes, totalBytesDecompressed, + * len, bytesRead, + * charBuffer, 0, BUFSIZE); + * + * // do something with the current set of chars + * out.write(charBuffer, 0, charsWritten); + * + * // update the no. of bytes decompressed + * totalBytesDecompressed += bytesRead[0]; + * + * // update the no. of chars written + * totalCharsWritten += charsWritten; + * + * } while(totalBytesDecompressed < len); + * + * myDecompressor.reset(); // reuse decompressor + * </PRE> + * + * <p>Decompression is performed according to the standard set forth in <A + * HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical Report #6</A> + * + * @see UnicodeCompressor + * @author Stephen F. Booth + * @stable ICU 2.4 + */ +public final class UnicodeDecompressor implements SCSU { + // ========================== + // Instance variables + // ========================== + + /** Alias to current dynamic window */ + private int fCurrentWindow = 0; + + /** Dynamic compression window offsets */ + private int[] fOffsets = new int[NUMWINDOWS]; + + /** Current compression mode */ + private int fMode = SINGLEBYTEMODE; + + /** Size of our internal buffer */ + private static final int BUFSIZE = 3; + + /** Internal buffer for saving state */ + private byte[] fBuffer = new byte[BUFSIZE]; + + /** Number of characters in our internal buffer */ + private int fBufferLength = 0; + + /** + * Create a UnicodeDecompressor. Sets all windows to their default values. + * + * @see #reset + * @stable ICU 2.4 + */ + public UnicodeDecompressor() { + reset(); // initialize to defaults + } + + /** + * Decompress a byte array into a String. + * + * @param buffer The byte array to decompress. + * @return A String containing the decompressed characters. + * @see #decompress(byte [], int, int) + * @stable ICU 2.4 + */ + public static String decompress(byte[] buffer) { + char[] buf = decompress(buffer, 0, buffer.length); + return new String(buf); + } + + /** + * Decompress a byte array into a Unicode character array. + * + * @param buffer The byte array to decompress. + * @param start The start of the byte run to decompress. + * @param limit The limit of the byte run to decompress. + * @return A character array containing the decompressed bytes. + * @see #decompress(byte []) + * @stable ICU 2.4 + */ + public static char[] decompress(byte[] buffer, int start, int limit) { + UnicodeDecompressor comp = new UnicodeDecompressor(); + + // use a buffer we know will never overflow + // in the worst case, each byte will decompress + // to a surrogate pair (buffer must be at least 2 chars) + int len = Math.max(2, 2 * (limit - start)); + char[] temp = new char[len]; + + int charCount = comp.decompress(buffer, start, limit, null, temp, 0, len); + + char[] result = new char[charCount]; + System.arraycopy(temp, 0, result, 0, charCount); + return result; + } + + /** + * Decompress a byte array into a Unicode character array. + * + * <p>This function will either completely fill the output buffer, or consume the entire input. + * + * @param byteBuffer The byte buffer to decompress. + * @param byteBufferStart The start of the byte run to decompress. + * @param byteBufferLimit The limit of the byte run to decompress. + * @param bytesRead A one-element array. If not null, on return the number of bytes read from + * byteBuffer. + * @param charBuffer A buffer to receive the decompressed data. This buffer must be at minimum + * two characters in size. + * @param charBufferStart The starting offset to which to write decompressed data. + * @param charBufferLimit The limiting offset for writing decompressed data. + * @return The number of Unicode characters written to charBuffer. + * @stable ICU 2.4 + */ + public int decompress( + byte[] byteBuffer, + int byteBufferStart, + int byteBufferLimit, + int[] bytesRead, + char[] charBuffer, + int charBufferStart, + int charBufferLimit) { + // the current position in the source byte buffer + int bytePos = byteBufferStart; + + // the current position in the target char buffer + int ucPos = charBufferStart; + + // the current byte from the source buffer + int aByte = 0x00; + + // charBuffer must be at least 2 chars in size + if (charBuffer.length < 2 || (charBufferLimit - charBufferStart) < 2) + throw new IllegalArgumentException("charBuffer.length < 2"); + + // if our internal buffer isn't empty, flush its contents + // to the output buffer before doing any more decompression + if (fBufferLength > 0) { + + int newBytes = 0; + + // fill the buffer completely, to guarantee one full character + if (fBufferLength != BUFSIZE) { + newBytes = fBuffer.length - fBufferLength; + + // verify there are newBytes bytes in byteBuffer + if (byteBufferLimit - byteBufferStart < newBytes) + newBytes = byteBufferLimit - byteBufferStart; + + System.arraycopy(byteBuffer, byteBufferStart, fBuffer, fBufferLength, newBytes); + } + + // reset buffer length to 0 before recursive call + fBufferLength = 0; + + // call self recursively to decompress the buffer + int count = + decompress( + fBuffer, + 0, + fBuffer.length, + null, + charBuffer, + charBufferStart, + charBufferLimit); + + // update the positions into the arrays + ucPos += count; + bytePos += newBytes; + } + + // the main decompression loop + mainLoop: + while (bytePos < byteBufferLimit && ucPos < charBufferLimit) { + switch (fMode) { + case SINGLEBYTEMODE: + // single-byte mode decompression loop + singleByteModeLoop: + while (bytePos < byteBufferLimit && ucPos < charBufferLimit) { + aByte = byteBuffer[bytePos++] & 0xFF; + switch (aByte) { + // All bytes from 0x80 through 0xFF are remapped + // to chars or surrogate pairs according to the + // currently active window + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + // For offsets <= 0xFFFF, convert to a single char + // by adding the window's offset and subtracting + // the generic compression offset + if (fOffsets[fCurrentWindow] <= 0xFFFF) { + charBuffer[ucPos++] = + (char) + (aByte + + fOffsets[fCurrentWindow] + - COMPRESSIONOFFSET); + } + // For offsets > 0x10000, convert to a surrogate pair by + // normBase = window's offset - 0x10000 + // high surr. = 0xD800 + (normBase >> 10) + // low surr. = 0xDC00 + (normBase & 0x3FF) + (byte & 0x7F) + else { + // make sure there is enough room to write + // both characters + // if not, save state and break out + if ((ucPos + 1) >= charBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + int normalizedBase = fOffsets[fCurrentWindow] - 0x10000; + charBuffer[ucPos++] = (char) (0xD800 + (normalizedBase >> 10)); + charBuffer[ucPos++] = + (char) + (0xDC00 + + (normalizedBase & 0x3FF) + + (aByte & 0x7F)); + } + break; + + // bytes from 0x20 through 0x7F are treated as ASCII and + // are remapped to chars by padding the high byte + // (this is the same as quoting from static window 0) + // NUL (0x00), HT (0x09), CR (0x0A), LF (0x0D) + // are treated as ASCII as well + case 0x00: + case 0x09: + case 0x0A: + case 0x0D: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + charBuffer[ucPos++] = (char) aByte; + break; + + // quote unicode + case SQUOTEU: + // verify we have two bytes following tag + // if not, save state and break out + if ((bytePos + 1) >= byteBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + aByte = byteBuffer[bytePos++]; + charBuffer[ucPos++] = + (char) (aByte << 8 | (byteBuffer[bytePos++] & 0xFF)); + break; + + // switch to Unicode mode + case SCHANGEU: + fMode = UNICODEMODE; + break singleByteModeLoop; + // break; + + // handle all quote tags + case SQUOTE0: + case SQUOTE1: + case SQUOTE2: + case SQUOTE3: + case SQUOTE4: + case SQUOTE5: + case SQUOTE6: + case SQUOTE7: + // verify there is a byte following the tag + // if not, save state and break out + if (bytePos >= byteBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + // if the byte is in the range 0x00 - 0x7F, use + // static window n otherwise, use dynamic window n + int dByte = byteBuffer[bytePos++] & 0xFF; + charBuffer[ucPos++] = + (char) + (dByte + + (dByte >= 0x00 && dByte < 0x80 + ? sOffsets[aByte - SQUOTE0] + : (fOffsets[aByte - SQUOTE0] + - COMPRESSIONOFFSET))); + break; + + // handle all change tags + case SCHANGE0: + case SCHANGE1: + case SCHANGE2: + case SCHANGE3: + case SCHANGE4: + case SCHANGE5: + case SCHANGE6: + case SCHANGE7: + fCurrentWindow = aByte - SCHANGE0; + break; + + // handle all define tags + case SDEFINE0: + case SDEFINE1: + case SDEFINE2: + case SDEFINE3: + case SDEFINE4: + case SDEFINE5: + case SDEFINE6: + case SDEFINE7: + // verify there is a byte following the tag + // if not, save state and break out + if (bytePos >= byteBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + fCurrentWindow = aByte - SDEFINE0; + fOffsets[fCurrentWindow] = + sOffsetTable[byteBuffer[bytePos++] & 0xFF]; + break; + + // handle define extended tag + case SDEFINEX: + // verify we have two bytes following tag + // if not, save state and break out + if ((bytePos + 1) >= byteBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + aByte = byteBuffer[bytePos++] & 0xFF; + fCurrentWindow = (aByte & 0xE0) >> 5; + fOffsets[fCurrentWindow] = + 0x10000 + + (0x80 + * (((aByte & 0x1F) << 8) + | (byteBuffer[bytePos++] & 0xFF))); + break; + + // reserved, shouldn't happen + case SRESERVED: + break; + } // end switch + } // end while + break; + + case UNICODEMODE: + // unicode mode decompression loop + unicodeModeLoop: + while (bytePos < byteBufferLimit && ucPos < charBufferLimit) { + aByte = byteBuffer[bytePos++] & 0xFF; + switch (aByte) { + // handle all define tags + case UDEFINE0: + case UDEFINE1: + case UDEFINE2: + case UDEFINE3: + case UDEFINE4: + case UDEFINE5: + case UDEFINE6: + case UDEFINE7: + // verify there is a byte following tag + // if not, save state and break out + if (bytePos >= byteBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + fCurrentWindow = aByte - UDEFINE0; + fOffsets[fCurrentWindow] = + sOffsetTable[byteBuffer[bytePos++] & 0xFF]; + fMode = SINGLEBYTEMODE; + break unicodeModeLoop; + // break; + + // handle define extended tag + case UDEFINEX: + // verify we have two bytes following tag + // if not, save state and break out + if ((bytePos + 1) >= byteBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + aByte = byteBuffer[bytePos++] & 0xFF; + fCurrentWindow = (aByte & 0xE0) >> 5; + fOffsets[fCurrentWindow] = + 0x10000 + + (0x80 + * (((aByte & 0x1F) << 8) + | (byteBuffer[bytePos++] & 0xFF))); + fMode = SINGLEBYTEMODE; + break unicodeModeLoop; + // break; + + // handle all change tags + case UCHANGE0: + case UCHANGE1: + case UCHANGE2: + case UCHANGE3: + case UCHANGE4: + case UCHANGE5: + case UCHANGE6: + case UCHANGE7: + fCurrentWindow = aByte - UCHANGE0; + fMode = SINGLEBYTEMODE; + break unicodeModeLoop; + // break; + + // quote unicode + case UQUOTEU: + // verify we have two bytes following tag + // if not, save state and break out + if (bytePos >= byteBufferLimit - 1) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + aByte = byteBuffer[bytePos++]; + charBuffer[ucPos++] = + (char) (aByte << 8 | (byteBuffer[bytePos++] & 0xFF)); + break; + + default: + // verify there is a byte following tag + // if not, save state and break out + if (bytePos >= byteBufferLimit) { + --bytePos; + System.arraycopy( + byteBuffer, + bytePos, + fBuffer, + 0, + byteBufferLimit - bytePos); + fBufferLength = byteBufferLimit - bytePos; + bytePos += fBufferLength; + break mainLoop; + } + + charBuffer[ucPos++] = + (char) (aByte << 8 | (byteBuffer[bytePos++] & 0xFF)); + break; + } // end switch + } // end while + break; + } // end switch( fMode ) + } // end while + + // fill in output parameter + if (bytesRead != null) bytesRead[0] = (bytePos - byteBufferStart); + + // return # of chars written + return (ucPos - charBufferStart); + } + + /** + * Reset the decompressor to its initial state. + * + * @stable ICU 2.4 + */ + public void reset() { + // reset dynamic windows + fOffsets[0] = 0x0080; // Latin-1 + fOffsets[1] = 0x00C0; // Latin-1 Supplement + Latin Extended-A + fOffsets[2] = 0x0400; // Cyrillic + fOffsets[3] = 0x0600; // Arabic + fOffsets[4] = 0x0900; // Devanagari + fOffsets[5] = 0x3040; // Hiragana + fOffsets[6] = 0x30A0; // Katakana + fOffsets[7] = 0xFF00; // Fullwidth ASCII + + fCurrentWindow = 0; // Make current window Latin-1 + fMode = SINGLEBYTEMODE; // Always start in single-byte mode + fBufferLength = 0; // Empty buffer + } +} |