aboutsummaryrefslogtreecommitdiff
path: root/icu/icu4j/main/classes/core
diff options
context:
space:
mode:
Diffstat (limited to 'icu/icu4j/main/classes/core')
-rw-r--r--icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java405
-rw-r--r--icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java795
2 files changed, 1200 insertions, 0 deletions
diff --git a/icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java
new file mode 100644
index 00000000..a257c1c6
--- /dev/null
+++ b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/SCSU.java
@@ -0,0 +1,405 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+/**
+ * An interface defining constants for the Standard Compression Scheme for Unicode (SCSU) as
+ * outlined in <A HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical Report #6</A>.
+ *
+ * @author Stephen F. Booth
+ * @version 1.1 05 Aug 99
+ * @version 1.0 26 Jul 99
+ */
+interface SCSU {
+ // ==========================
+ // Generic window shift
+ // ==========================
+ static final int COMPRESSIONOFFSET = 0x80;
+
+ // ==========================
+ // Number of windows
+ // ==========================
+ static final int NUMWINDOWS = 8;
+ static final int NUMSTATICWINDOWS = 8;
+
+ // ==========================
+ // Indicates a window index is invalid
+ // ==========================
+ static final int INVALIDWINDOW = -1;
+
+ // ==========================
+ // Indicates a character doesn't exist in input (past end of buffer)
+ // ==========================
+ static final int INVALIDCHAR = -1;
+
+ // ==========================
+ // Compression modes
+ // ==========================
+ static final int SINGLEBYTEMODE = 0;
+ static final int UNICODEMODE = 1;
+
+ // ==========================
+ // Maximum value for a window's index
+ // ==========================
+ static final int MAXINDEX = 0xFF;
+
+ // ==========================
+ // Reserved index value (characters belongs to first block)
+ // ==========================
+ static final int RESERVEDINDEX = 0x00;
+
+ // ==========================
+ // Indices for scripts which cross a half-block boundary
+ // ==========================
+ static final int LATININDEX = 0xF9;
+ static final int IPAEXTENSIONINDEX = 0xFA;
+ static final int GREEKINDEX = 0xFB;
+ static final int ARMENIANINDEX = 0xFC;
+ static final int HIRAGANAINDEX = 0xFD;
+ static final int KATAKANAINDEX = 0xFE;
+ static final int HALFWIDTHKATAKANAINDEX = 0xFF;
+
+ // ==========================
+ // Single-byte mode tags
+ // ==========================
+ static final int SDEFINEX = 0x0B;
+ static final int SRESERVED = 0x0C; // reserved value
+ static final int SQUOTEU = 0x0E;
+ static final int SCHANGEU = 0x0F;
+
+ static final int SQUOTE0 = 0x01;
+ static final int SQUOTE1 = 0x02;
+ static final int SQUOTE2 = 0x03;
+ static final int SQUOTE3 = 0x04;
+ static final int SQUOTE4 = 0x05;
+ static final int SQUOTE5 = 0x06;
+ static final int SQUOTE6 = 0x07;
+ static final int SQUOTE7 = 0x08;
+
+ static final int SCHANGE0 = 0x10;
+ static final int SCHANGE1 = 0x11;
+ static final int SCHANGE2 = 0x12;
+ static final int SCHANGE3 = 0x13;
+ static final int SCHANGE4 = 0x14;
+ static final int SCHANGE5 = 0x15;
+ static final int SCHANGE6 = 0x16;
+ static final int SCHANGE7 = 0x17;
+
+ static final int SDEFINE0 = 0x18;
+ static final int SDEFINE1 = 0x19;
+ static final int SDEFINE2 = 0x1A;
+ static final int SDEFINE3 = 0x1B;
+ static final int SDEFINE4 = 0x1C;
+ static final int SDEFINE5 = 0x1D;
+ static final int SDEFINE6 = 0x1E;
+ static final int SDEFINE7 = 0x1F;
+
+ // ==========================
+ // Unicode mode tags
+ // ==========================
+ static final int UCHANGE0 = 0xE0;
+ static final int UCHANGE1 = 0xE1;
+ static final int UCHANGE2 = 0xE2;
+ static final int UCHANGE3 = 0xE3;
+ static final int UCHANGE4 = 0xE4;
+ static final int UCHANGE5 = 0xE5;
+ static final int UCHANGE6 = 0xE6;
+ static final int UCHANGE7 = 0xE7;
+
+ static final int UDEFINE0 = 0xE8;
+ static final int UDEFINE1 = 0xE9;
+ static final int UDEFINE2 = 0xEA;
+ static final int UDEFINE3 = 0xEB;
+ static final int UDEFINE4 = 0xEC;
+ static final int UDEFINE5 = 0xED;
+ static final int UDEFINE6 = 0xEE;
+ static final int UDEFINE7 = 0xEF;
+
+ static final int UQUOTEU = 0xF0;
+ static final int UDEFINEX = 0xF1;
+ static final int URESERVED = 0xF2; // reserved value
+
+ // ==========================
+ // Class variables
+ // ==========================
+
+ /** For window offset mapping */
+ static final int[] sOffsetTable = {
+ // table generated by CompressionTableGenerator
+ 0x0,
+ 0x80,
+ 0x100,
+ 0x180,
+ 0x200,
+ 0x280,
+ 0x300,
+ 0x380,
+ 0x400,
+ 0x480,
+ 0x500,
+ 0x580,
+ 0x600,
+ 0x680,
+ 0x700,
+ 0x780,
+ 0x800,
+ 0x880,
+ 0x900,
+ 0x980,
+ 0xa00,
+ 0xa80,
+ 0xb00,
+ 0xb80,
+ 0xc00,
+ 0xc80,
+ 0xd00,
+ 0xd80,
+ 0xe00,
+ 0xe80,
+ 0xf00,
+ 0xf80,
+ 0x1000,
+ 0x1080,
+ 0x1100,
+ 0x1180,
+ 0x1200,
+ 0x1280,
+ 0x1300,
+ 0x1380,
+ 0x1400,
+ 0x1480,
+ 0x1500,
+ 0x1580,
+ 0x1600,
+ 0x1680,
+ 0x1700,
+ 0x1780,
+ 0x1800,
+ 0x1880,
+ 0x1900,
+ 0x1980,
+ 0x1a00,
+ 0x1a80,
+ 0x1b00,
+ 0x1b80,
+ 0x1c00,
+ 0x1c80,
+ 0x1d00,
+ 0x1d80,
+ 0x1e00,
+ 0x1e80,
+ 0x1f00,
+ 0x1f80,
+ 0x2000,
+ 0x2080,
+ 0x2100,
+ 0x2180,
+ 0x2200,
+ 0x2280,
+ 0x2300,
+ 0x2380,
+ 0x2400,
+ 0x2480,
+ 0x2500,
+ 0x2580,
+ 0x2600,
+ 0x2680,
+ 0x2700,
+ 0x2780,
+ 0x2800,
+ 0x2880,
+ 0x2900,
+ 0x2980,
+ 0x2a00,
+ 0x2a80,
+ 0x2b00,
+ 0x2b80,
+ 0x2c00,
+ 0x2c80,
+ 0x2d00,
+ 0x2d80,
+ 0x2e00,
+ 0x2e80,
+ 0x2f00,
+ 0x2f80,
+ 0x3000,
+ 0x3080,
+ 0x3100,
+ 0x3180,
+ 0x3200,
+ 0x3280,
+ 0x3300,
+ 0x3380,
+ 0xe000,
+ 0xe080,
+ 0xe100,
+ 0xe180,
+ 0xe200,
+ 0xe280,
+ 0xe300,
+ 0xe380,
+ 0xe400,
+ 0xe480,
+ 0xe500,
+ 0xe580,
+ 0xe600,
+ 0xe680,
+ 0xe700,
+ 0xe780,
+ 0xe800,
+ 0xe880,
+ 0xe900,
+ 0xe980,
+ 0xea00,
+ 0xea80,
+ 0xeb00,
+ 0xeb80,
+ 0xec00,
+ 0xec80,
+ 0xed00,
+ 0xed80,
+ 0xee00,
+ 0xee80,
+ 0xef00,
+ 0xef80,
+ 0xf000,
+ 0xf080,
+ 0xf100,
+ 0xf180,
+ 0xf200,
+ 0xf280,
+ 0xf300,
+ 0xf380,
+ 0xf400,
+ 0xf480,
+ 0xf500,
+ 0xf580,
+ 0xf600,
+ 0xf680,
+ 0xf700,
+ 0xf780,
+ 0xf800,
+ 0xf880,
+ 0xf900,
+ 0xf980,
+ 0xfa00,
+ 0xfa80,
+ 0xfb00,
+ 0xfb80,
+ 0xfc00,
+ 0xfc80,
+ 0xfd00,
+ 0xfd80,
+ 0xfe00,
+ 0xfe80,
+ 0xff00,
+ 0xff80,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0xc0,
+ 0x250,
+ 0x370,
+ 0x530,
+ 0x3040,
+ 0x30a0,
+ 0xff60
+ };
+
+ /** Static compression window offsets */
+ static final int[] sOffsets = {
+ 0x0000, // for quoting single-byte mode tags
+ 0x0080, // Latin-1 Supplement
+ 0x0100, // Latin Extended-A
+ 0x0300, // Combining Diacritical Marks
+ 0x2000, // General Punctuation
+ 0x2080, // Curency Symbols
+ 0x2100, // Letterlike Symbols and Number Forms
+ 0x3000 // CJK Symbols and Punctuation
+ };
+}
diff --git a/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java
new file mode 100644
index 00000000..6e2a2d71
--- /dev/null
+++ b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java
@@ -0,0 +1,795 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2016, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+/**
+ * A decompression engine implementing the Standard Compression Scheme for Unicode (SCSU) as
+ * outlined in <A HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical Report #6</A>.
+ *
+ * <p><STRONG>USAGE</STRONG>
+ *
+ * <p>The static methods on <TT>UnicodeDecompressor</TT> may be used in a straightforward manner to
+ * decompress simple strings:
+ *
+ * <PRE>
+ * byte [] compressed = ... ; // get compressed bytes from somewhere
+ * String result = UnicodeDecompressor.decompress(compressed);
+ * </PRE>
+ *
+ * <p>The static methods have a fairly large memory footprint. For finer-grained control over memory
+ * usage, <TT>UnicodeDecompressor</TT> offers more powerful APIs allowing iterative decompression:
+ *
+ * <PRE>
+ * // Decompress an array "bytes" of length "len" using a buffer of 512 chars
+ * // to the Writer "out"
+ *
+ * UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
+ * final static int BUFSIZE = 512;
+ * char [] charBuffer = new char [ BUFSIZE ];
+ * int charsWritten = 0;
+ * int [] bytesRead = new int [1];
+ * int totalBytesDecompressed = 0;
+ * int totalCharsWritten = 0;
+ *
+ * do {
+ * // do the decompression
+ * charsWritten = myDecompressor.decompress(bytes, totalBytesDecompressed,
+ * len, bytesRead,
+ * charBuffer, 0, BUFSIZE);
+ *
+ * // do something with the current set of chars
+ * out.write(charBuffer, 0, charsWritten);
+ *
+ * // update the no. of bytes decompressed
+ * totalBytesDecompressed += bytesRead[0];
+ *
+ * // update the no. of chars written
+ * totalCharsWritten += charsWritten;
+ *
+ * } while(totalBytesDecompressed &lt; len);
+ *
+ * myDecompressor.reset(); // reuse decompressor
+ * </PRE>
+ *
+ * <p>Decompression is performed according to the standard set forth in <A
+ * HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical Report #6</A>
+ *
+ * @see UnicodeCompressor
+ * @author Stephen F. Booth
+ * @stable ICU 2.4
+ */
+public final class UnicodeDecompressor implements SCSU {
+ // ==========================
+ // Instance variables
+ // ==========================
+
+ /** Alias to current dynamic window */
+ private int fCurrentWindow = 0;
+
+ /** Dynamic compression window offsets */
+ private int[] fOffsets = new int[NUMWINDOWS];
+
+ /** Current compression mode */
+ private int fMode = SINGLEBYTEMODE;
+
+ /** Size of our internal buffer */
+ private static final int BUFSIZE = 3;
+
+ /** Internal buffer for saving state */
+ private byte[] fBuffer = new byte[BUFSIZE];
+
+ /** Number of characters in our internal buffer */
+ private int fBufferLength = 0;
+
+ /**
+ * Create a UnicodeDecompressor. Sets all windows to their default values.
+ *
+ * @see #reset
+ * @stable ICU 2.4
+ */
+ public UnicodeDecompressor() {
+ reset(); // initialize to defaults
+ }
+
+ /**
+ * Decompress a byte array into a String.
+ *
+ * @param buffer The byte array to decompress.
+ * @return A String containing the decompressed characters.
+ * @see #decompress(byte [], int, int)
+ * @stable ICU 2.4
+ */
+ public static String decompress(byte[] buffer) {
+ char[] buf = decompress(buffer, 0, buffer.length);
+ return new String(buf);
+ }
+
+ /**
+ * Decompress a byte array into a Unicode character array.
+ *
+ * @param buffer The byte array to decompress.
+ * @param start The start of the byte run to decompress.
+ * @param limit The limit of the byte run to decompress.
+ * @return A character array containing the decompressed bytes.
+ * @see #decompress(byte [])
+ * @stable ICU 2.4
+ */
+ public static char[] decompress(byte[] buffer, int start, int limit) {
+ UnicodeDecompressor comp = new UnicodeDecompressor();
+
+ // use a buffer we know will never overflow
+ // in the worst case, each byte will decompress
+ // to a surrogate pair (buffer must be at least 2 chars)
+ int len = Math.max(2, 2 * (limit - start));
+ char[] temp = new char[len];
+
+ int charCount = comp.decompress(buffer, start, limit, null, temp, 0, len);
+
+ char[] result = new char[charCount];
+ System.arraycopy(temp, 0, result, 0, charCount);
+ return result;
+ }
+
+ /**
+ * Decompress a byte array into a Unicode character array.
+ *
+ * <p>This function will either completely fill the output buffer, or consume the entire input.
+ *
+ * @param byteBuffer The byte buffer to decompress.
+ * @param byteBufferStart The start of the byte run to decompress.
+ * @param byteBufferLimit The limit of the byte run to decompress.
+ * @param bytesRead A one-element array. If not null, on return the number of bytes read from
+ * byteBuffer.
+ * @param charBuffer A buffer to receive the decompressed data. This buffer must be at minimum
+ * two characters in size.
+ * @param charBufferStart The starting offset to which to write decompressed data.
+ * @param charBufferLimit The limiting offset for writing decompressed data.
+ * @return The number of Unicode characters written to charBuffer.
+ * @stable ICU 2.4
+ */
+ public int decompress(
+ byte[] byteBuffer,
+ int byteBufferStart,
+ int byteBufferLimit,
+ int[] bytesRead,
+ char[] charBuffer,
+ int charBufferStart,
+ int charBufferLimit) {
+ // the current position in the source byte buffer
+ int bytePos = byteBufferStart;
+
+ // the current position in the target char buffer
+ int ucPos = charBufferStart;
+
+ // the current byte from the source buffer
+ int aByte = 0x00;
+
+ // charBuffer must be at least 2 chars in size
+ if (charBuffer.length < 2 || (charBufferLimit - charBufferStart) < 2)
+ throw new IllegalArgumentException("charBuffer.length < 2");
+
+ // if our internal buffer isn't empty, flush its contents
+ // to the output buffer before doing any more decompression
+ if (fBufferLength > 0) {
+
+ int newBytes = 0;
+
+ // fill the buffer completely, to guarantee one full character
+ if (fBufferLength != BUFSIZE) {
+ newBytes = fBuffer.length - fBufferLength;
+
+ // verify there are newBytes bytes in byteBuffer
+ if (byteBufferLimit - byteBufferStart < newBytes)
+ newBytes = byteBufferLimit - byteBufferStart;
+
+ System.arraycopy(byteBuffer, byteBufferStart, fBuffer, fBufferLength, newBytes);
+ }
+
+ // reset buffer length to 0 before recursive call
+ fBufferLength = 0;
+
+ // call self recursively to decompress the buffer
+ int count =
+ decompress(
+ fBuffer,
+ 0,
+ fBuffer.length,
+ null,
+ charBuffer,
+ charBufferStart,
+ charBufferLimit);
+
+ // update the positions into the arrays
+ ucPos += count;
+ bytePos += newBytes;
+ }
+
+ // the main decompression loop
+ mainLoop:
+ while (bytePos < byteBufferLimit && ucPos < charBufferLimit) {
+ switch (fMode) {
+ case SINGLEBYTEMODE:
+ // single-byte mode decompression loop
+ singleByteModeLoop:
+ while (bytePos < byteBufferLimit && ucPos < charBufferLimit) {
+ aByte = byteBuffer[bytePos++] & 0xFF;
+ switch (aByte) {
+ // All bytes from 0x80 through 0xFF are remapped
+ // to chars or surrogate pairs according to the
+ // currently active window
+ case 0x80:
+ case 0x81:
+ case 0x82:
+ case 0x83:
+ case 0x84:
+ case 0x85:
+ case 0x86:
+ case 0x87:
+ case 0x88:
+ case 0x89:
+ case 0x8A:
+ case 0x8B:
+ case 0x8C:
+ case 0x8D:
+ case 0x8E:
+ case 0x8F:
+ case 0x90:
+ case 0x91:
+ case 0x92:
+ case 0x93:
+ case 0x94:
+ case 0x95:
+ case 0x96:
+ case 0x97:
+ case 0x98:
+ case 0x99:
+ case 0x9A:
+ case 0x9B:
+ case 0x9C:
+ case 0x9D:
+ case 0x9E:
+ case 0x9F:
+ case 0xA0:
+ case 0xA1:
+ case 0xA2:
+ case 0xA3:
+ case 0xA4:
+ case 0xA5:
+ case 0xA6:
+ case 0xA7:
+ case 0xA8:
+ case 0xA9:
+ case 0xAA:
+ case 0xAB:
+ case 0xAC:
+ case 0xAD:
+ case 0xAE:
+ case 0xAF:
+ case 0xB0:
+ case 0xB1:
+ case 0xB2:
+ case 0xB3:
+ case 0xB4:
+ case 0xB5:
+ case 0xB6:
+ case 0xB7:
+ case 0xB8:
+ case 0xB9:
+ case 0xBA:
+ case 0xBB:
+ case 0xBC:
+ case 0xBD:
+ case 0xBE:
+ case 0xBF:
+ case 0xC0:
+ case 0xC1:
+ case 0xC2:
+ case 0xC3:
+ case 0xC4:
+ case 0xC5:
+ case 0xC6:
+ case 0xC7:
+ case 0xC8:
+ case 0xC9:
+ case 0xCA:
+ case 0xCB:
+ case 0xCC:
+ case 0xCD:
+ case 0xCE:
+ case 0xCF:
+ case 0xD0:
+ case 0xD1:
+ case 0xD2:
+ case 0xD3:
+ case 0xD4:
+ case 0xD5:
+ case 0xD6:
+ case 0xD7:
+ case 0xD8:
+ case 0xD9:
+ case 0xDA:
+ case 0xDB:
+ case 0xDC:
+ case 0xDD:
+ case 0xDE:
+ case 0xDF:
+ case 0xE0:
+ case 0xE1:
+ case 0xE2:
+ case 0xE3:
+ case 0xE4:
+ case 0xE5:
+ case 0xE6:
+ case 0xE7:
+ case 0xE8:
+ case 0xE9:
+ case 0xEA:
+ case 0xEB:
+ case 0xEC:
+ case 0xED:
+ case 0xEE:
+ case 0xEF:
+ case 0xF0:
+ case 0xF1:
+ case 0xF2:
+ case 0xF3:
+ case 0xF4:
+ case 0xF5:
+ case 0xF6:
+ case 0xF7:
+ case 0xF8:
+ case 0xF9:
+ case 0xFA:
+ case 0xFB:
+ case 0xFC:
+ case 0xFD:
+ case 0xFE:
+ case 0xFF:
+ // For offsets <= 0xFFFF, convert to a single char
+ // by adding the window's offset and subtracting
+ // the generic compression offset
+ if (fOffsets[fCurrentWindow] <= 0xFFFF) {
+ charBuffer[ucPos++] =
+ (char)
+ (aByte
+ + fOffsets[fCurrentWindow]
+ - COMPRESSIONOFFSET);
+ }
+ // For offsets > 0x10000, convert to a surrogate pair by
+ // normBase = window's offset - 0x10000
+ // high surr. = 0xD800 + (normBase >> 10)
+ // low surr. = 0xDC00 + (normBase & 0x3FF) + (byte & 0x7F)
+ else {
+ // make sure there is enough room to write
+ // both characters
+ // if not, save state and break out
+ if ((ucPos + 1) >= charBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ int normalizedBase = fOffsets[fCurrentWindow] - 0x10000;
+ charBuffer[ucPos++] = (char) (0xD800 + (normalizedBase >> 10));
+ charBuffer[ucPos++] =
+ (char)
+ (0xDC00
+ + (normalizedBase & 0x3FF)
+ + (aByte & 0x7F));
+ }
+ break;
+
+ // bytes from 0x20 through 0x7F are treated as ASCII and
+ // are remapped to chars by padding the high byte
+ // (this is the same as quoting from static window 0)
+ // NUL (0x00), HT (0x09), CR (0x0A), LF (0x0D)
+ // are treated as ASCII as well
+ case 0x00:
+ case 0x09:
+ case 0x0A:
+ case 0x0D:
+ case 0x20:
+ case 0x21:
+ case 0x22:
+ case 0x23:
+ case 0x24:
+ case 0x25:
+ case 0x26:
+ case 0x27:
+ case 0x28:
+ case 0x29:
+ case 0x2A:
+ case 0x2B:
+ case 0x2C:
+ case 0x2D:
+ case 0x2E:
+ case 0x2F:
+ case 0x30:
+ case 0x31:
+ case 0x32:
+ case 0x33:
+ case 0x34:
+ case 0x35:
+ case 0x36:
+ case 0x37:
+ case 0x38:
+ case 0x39:
+ case 0x3A:
+ case 0x3B:
+ case 0x3C:
+ case 0x3D:
+ case 0x3E:
+ case 0x3F:
+ case 0x40:
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x45:
+ case 0x46:
+ case 0x47:
+ case 0x48:
+ case 0x49:
+ case 0x4A:
+ case 0x4B:
+ case 0x4C:
+ case 0x4D:
+ case 0x4E:
+ case 0x4F:
+ case 0x50:
+ case 0x51:
+ case 0x52:
+ case 0x53:
+ case 0x54:
+ case 0x55:
+ case 0x56:
+ case 0x57:
+ case 0x58:
+ case 0x59:
+ case 0x5A:
+ case 0x5B:
+ case 0x5C:
+ case 0x5D:
+ case 0x5E:
+ case 0x5F:
+ case 0x60:
+ case 0x61:
+ case 0x62:
+ case 0x63:
+ case 0x64:
+ case 0x65:
+ case 0x66:
+ case 0x67:
+ case 0x68:
+ case 0x69:
+ case 0x6A:
+ case 0x6B:
+ case 0x6C:
+ case 0x6D:
+ case 0x6E:
+ case 0x6F:
+ case 0x70:
+ case 0x71:
+ case 0x72:
+ case 0x73:
+ case 0x74:
+ case 0x75:
+ case 0x76:
+ case 0x77:
+ case 0x78:
+ case 0x79:
+ case 0x7A:
+ case 0x7B:
+ case 0x7C:
+ case 0x7D:
+ case 0x7E:
+ case 0x7F:
+ charBuffer[ucPos++] = (char) aByte;
+ break;
+
+ // quote unicode
+ case SQUOTEU:
+ // verify we have two bytes following tag
+ // if not, save state and break out
+ if ((bytePos + 1) >= byteBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ aByte = byteBuffer[bytePos++];
+ charBuffer[ucPos++] =
+ (char) (aByte << 8 | (byteBuffer[bytePos++] & 0xFF));
+ break;
+
+ // switch to Unicode mode
+ case SCHANGEU:
+ fMode = UNICODEMODE;
+ break singleByteModeLoop;
+ // break;
+
+ // handle all quote tags
+ case SQUOTE0:
+ case SQUOTE1:
+ case SQUOTE2:
+ case SQUOTE3:
+ case SQUOTE4:
+ case SQUOTE5:
+ case SQUOTE6:
+ case SQUOTE7:
+ // verify there is a byte following the tag
+ // if not, save state and break out
+ if (bytePos >= byteBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ // if the byte is in the range 0x00 - 0x7F, use
+ // static window n otherwise, use dynamic window n
+ int dByte = byteBuffer[bytePos++] & 0xFF;
+ charBuffer[ucPos++] =
+ (char)
+ (dByte
+ + (dByte >= 0x00 && dByte < 0x80
+ ? sOffsets[aByte - SQUOTE0]
+ : (fOffsets[aByte - SQUOTE0]
+ - COMPRESSIONOFFSET)));
+ break;
+
+ // handle all change tags
+ case SCHANGE0:
+ case SCHANGE1:
+ case SCHANGE2:
+ case SCHANGE3:
+ case SCHANGE4:
+ case SCHANGE5:
+ case SCHANGE6:
+ case SCHANGE7:
+ fCurrentWindow = aByte - SCHANGE0;
+ break;
+
+ // handle all define tags
+ case SDEFINE0:
+ case SDEFINE1:
+ case SDEFINE2:
+ case SDEFINE3:
+ case SDEFINE4:
+ case SDEFINE5:
+ case SDEFINE6:
+ case SDEFINE7:
+ // verify there is a byte following the tag
+ // if not, save state and break out
+ if (bytePos >= byteBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ fCurrentWindow = aByte - SDEFINE0;
+ fOffsets[fCurrentWindow] =
+ sOffsetTable[byteBuffer[bytePos++] & 0xFF];
+ break;
+
+ // handle define extended tag
+ case SDEFINEX:
+ // verify we have two bytes following tag
+ // if not, save state and break out
+ if ((bytePos + 1) >= byteBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ aByte = byteBuffer[bytePos++] & 0xFF;
+ fCurrentWindow = (aByte & 0xE0) >> 5;
+ fOffsets[fCurrentWindow] =
+ 0x10000
+ + (0x80
+ * (((aByte & 0x1F) << 8)
+ | (byteBuffer[bytePos++] & 0xFF)));
+ break;
+
+ // reserved, shouldn't happen
+ case SRESERVED:
+ break;
+ } // end switch
+ } // end while
+ break;
+
+ case UNICODEMODE:
+ // unicode mode decompression loop
+ unicodeModeLoop:
+ while (bytePos < byteBufferLimit && ucPos < charBufferLimit) {
+ aByte = byteBuffer[bytePos++] & 0xFF;
+ switch (aByte) {
+ // handle all define tags
+ case UDEFINE0:
+ case UDEFINE1:
+ case UDEFINE2:
+ case UDEFINE3:
+ case UDEFINE4:
+ case UDEFINE5:
+ case UDEFINE6:
+ case UDEFINE7:
+ // verify there is a byte following tag
+ // if not, save state and break out
+ if (bytePos >= byteBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ fCurrentWindow = aByte - UDEFINE0;
+ fOffsets[fCurrentWindow] =
+ sOffsetTable[byteBuffer[bytePos++] & 0xFF];
+ fMode = SINGLEBYTEMODE;
+ break unicodeModeLoop;
+ // break;
+
+ // handle define extended tag
+ case UDEFINEX:
+ // verify we have two bytes following tag
+ // if not, save state and break out
+ if ((bytePos + 1) >= byteBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ aByte = byteBuffer[bytePos++] & 0xFF;
+ fCurrentWindow = (aByte & 0xE0) >> 5;
+ fOffsets[fCurrentWindow] =
+ 0x10000
+ + (0x80
+ * (((aByte & 0x1F) << 8)
+ | (byteBuffer[bytePos++] & 0xFF)));
+ fMode = SINGLEBYTEMODE;
+ break unicodeModeLoop;
+ // break;
+
+ // handle all change tags
+ case UCHANGE0:
+ case UCHANGE1:
+ case UCHANGE2:
+ case UCHANGE3:
+ case UCHANGE4:
+ case UCHANGE5:
+ case UCHANGE6:
+ case UCHANGE7:
+ fCurrentWindow = aByte - UCHANGE0;
+ fMode = SINGLEBYTEMODE;
+ break unicodeModeLoop;
+ // break;
+
+ // quote unicode
+ case UQUOTEU:
+ // verify we have two bytes following tag
+ // if not, save state and break out
+ if (bytePos >= byteBufferLimit - 1) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ aByte = byteBuffer[bytePos++];
+ charBuffer[ucPos++] =
+ (char) (aByte << 8 | (byteBuffer[bytePos++] & 0xFF));
+ break;
+
+ default:
+ // verify there is a byte following tag
+ // if not, save state and break out
+ if (bytePos >= byteBufferLimit) {
+ --bytePos;
+ System.arraycopy(
+ byteBuffer,
+ bytePos,
+ fBuffer,
+ 0,
+ byteBufferLimit - bytePos);
+ fBufferLength = byteBufferLimit - bytePos;
+ bytePos += fBufferLength;
+ break mainLoop;
+ }
+
+ charBuffer[ucPos++] =
+ (char) (aByte << 8 | (byteBuffer[bytePos++] & 0xFF));
+ break;
+ } // end switch
+ } // end while
+ break;
+ } // end switch( fMode )
+ } // end while
+
+ // fill in output parameter
+ if (bytesRead != null) bytesRead[0] = (bytePos - byteBufferStart);
+
+ // return # of chars written
+ return (ucPos - charBufferStart);
+ }
+
+ /**
+ * Reset the decompressor to its initial state.
+ *
+ * @stable ICU 2.4
+ */
+ public void reset() {
+ // reset dynamic windows
+ fOffsets[0] = 0x0080; // Latin-1
+ fOffsets[1] = 0x00C0; // Latin-1 Supplement + Latin Extended-A
+ fOffsets[2] = 0x0400; // Cyrillic
+ fOffsets[3] = 0x0600; // Arabic
+ fOffsets[4] = 0x0900; // Devanagari
+ fOffsets[5] = 0x3040; // Hiragana
+ fOffsets[6] = 0x30A0; // Katakana
+ fOffsets[7] = 0xFF00; // Fullwidth ASCII
+
+ fCurrentWindow = 0; // Make current window Latin-1
+ fMode = SINGLEBYTEMODE; // Always start in single-byte mode
+ fBufferLength = 0; // Empty buffer
+ }
+}