aboutsummaryrefslogtreecommitdiff
path: root/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java
diff options
context:
space:
mode:
Diffstat (limited to 'icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java')
-rw-r--r--icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java559
1 files changed, 0 insertions, 559 deletions
diff --git a/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java b/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java
deleted file mode 100644
index 6789469f..00000000
--- a/icu/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java
+++ /dev/null
@@ -1,559 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-/*
- *******************************************************************************
- * Copyright (C) 1996-2016, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-
-package com.ibm.icu.text;
-
-/**
-* A decompression engine implementing the Standard Compression Scheme
-* for Unicode (SCSU) as outlined in <A
-* HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical
-* Report #6</A>.
-*
-* <P><STRONG>USAGE</STRONG></P>
-*
-* <P>The static methods on <TT>UnicodeDecompressor</TT> may be used in a
-* straightforward manner to decompress simple strings:</P>
-*
-* <PRE>
-* byte [] compressed = ... ; // get compressed bytes from somewhere
-* String result = UnicodeDecompressor.decompress(compressed);
-* </PRE>
-*
-* <P>The static methods have a fairly large memory footprint.
-* For finer-grained control over memory usage,
-* <TT>UnicodeDecompressor</TT> offers more powerful APIs allowing
-* iterative decompression:</P>
-*
-* <PRE>
-* // Decompress an array "bytes" of length "len" using a buffer of 512 chars
-* // to the Writer "out"
-*
-* UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
-* final static int BUFSIZE = 512;
-* char [] charBuffer = new char [ BUFSIZE ];
-* int charsWritten = 0;
-* int [] bytesRead = new int [1];
-* int totalBytesDecompressed = 0;
-* int totalCharsWritten = 0;
-*
-* do {
-* // do the decompression
-* charsWritten = myDecompressor.decompress(bytes, totalBytesDecompressed,
-* len, bytesRead,
-* charBuffer, 0, BUFSIZE);
-*
-* // do something with the current set of chars
-* out.write(charBuffer, 0, charsWritten);
-*
-* // update the no. of bytes decompressed
-* totalBytesDecompressed += bytesRead[0];
-*
-* // update the no. of chars written
-* totalCharsWritten += charsWritten;
-*
-* } while(totalBytesDecompressed &lt; len);
-*
-* myDecompressor.reset(); // reuse decompressor
-* </PRE>
-*
-* <P>Decompression is performed according to the standard set forth in
-* <A HREF="http://www.unicode.org/unicode/reports/tr6">Unicode Technical
-* Report #6</A></P>
-*
-* @see UnicodeCompressor
-*
-* @author Stephen F. Booth
-* @stable ICU 2.4
-*/
-public final class UnicodeDecompressor implements SCSU
-{
- //==========================
- // Instance variables
- //==========================
-
- /** Alias to current dynamic window */
- private int fCurrentWindow = 0;
-
- /** Dynamic compression window offsets */
- private int [] fOffsets = new int [ NUMWINDOWS ];
-
- /** Current compression mode */
- private int fMode = SINGLEBYTEMODE;
-
- /** Size of our internal buffer */
- private final static int BUFSIZE = 3;
-
- /** Internal buffer for saving state */
- private byte [] fBuffer = new byte [BUFSIZE];
-
- /** Number of characters in our internal buffer */
- private int fBufferLength = 0;
-
-
- /**
- * Create a UnicodeDecompressor.
- * Sets all windows to their default values.
- * @see #reset
- * @stable ICU 2.4
- */
- public UnicodeDecompressor(){
- reset(); // initialize to defaults
- }
-
- /**
- * Decompress a byte array into a String.
- * @param buffer The byte array to decompress.
- * @return A String containing the decompressed characters.
- * @see #decompress(byte [], int, int)
- * @stable ICU 2.4
- */
- public static String decompress(byte [] buffer){
- char [] buf = decompress(buffer, 0, buffer.length);
- return new String(buf);
- }
-
- /**
- * Decompress a byte array into a Unicode character array.
- * @param buffer The byte array to decompress.
- * @param start The start of the byte run to decompress.
- * @param limit The limit of the byte run to decompress.
- * @return A character array containing the decompressed bytes.
- * @see #decompress(byte [])
- * @stable ICU 2.4
- */
- public static char [] decompress(byte [] buffer, int start, int limit) {
- UnicodeDecompressor comp = new UnicodeDecompressor();
-
- // use a buffer we know will never overflow
- // in the worst case, each byte will decompress
- // to a surrogate pair (buffer must be at least 2 chars)
- int len = Math.max(2, 2 * (limit - start));
- char [] temp = new char [len];
-
- int charCount = comp.decompress(buffer, start, limit, null,
- temp, 0, len);
-
- char [] result = new char [charCount];
- System.arraycopy(temp, 0, result, 0, charCount);
- return result;
- }
-
- /**
- * Decompress a byte array into a Unicode character array.
- *
- * This function will either completely fill the output buffer,
- * or consume the entire input.
- *
- * @param byteBuffer The byte buffer to decompress.
- * @param byteBufferStart The start of the byte run to decompress.
- * @param byteBufferLimit The limit of the byte run to decompress.
- * @param bytesRead A one-element array. If not null, on return
- * the number of bytes read from byteBuffer.
- * @param charBuffer A buffer to receive the decompressed data.
- * This buffer must be at minimum two characters in size.
- * @param charBufferStart The starting offset to which to write
- * decompressed data.
- * @param charBufferLimit The limiting offset for writing
- * decompressed data.
- * @return The number of Unicode characters written to charBuffer.
- * @stable ICU 2.4
- */
- public int decompress(byte [] byteBuffer,
- int byteBufferStart,
- int byteBufferLimit,
- int [] bytesRead,
- char [] charBuffer,
- int charBufferStart,
- int charBufferLimit)
- {
- // the current position in the source byte buffer
- int bytePos = byteBufferStart;
-
- // the current position in the target char buffer
- int ucPos = charBufferStart;
-
- // the current byte from the source buffer
- int aByte = 0x00;
-
-
- // charBuffer must be at least 2 chars in size
- if(charBuffer.length < 2 || (charBufferLimit - charBufferStart) < 2)
- throw new IllegalArgumentException("charBuffer.length < 2");
-
- // if our internal buffer isn't empty, flush its contents
- // to the output buffer before doing any more decompression
- if(fBufferLength > 0) {
-
- int newBytes = 0;
-
- // fill the buffer completely, to guarantee one full character
- if(fBufferLength != BUFSIZE) {
- newBytes = fBuffer.length - fBufferLength;
-
- // verify there are newBytes bytes in byteBuffer
- if(byteBufferLimit - byteBufferStart < newBytes)
- newBytes = byteBufferLimit - byteBufferStart;
-
- System.arraycopy(byteBuffer, byteBufferStart,
- fBuffer, fBufferLength, newBytes);
- }
-
- // reset buffer length to 0 before recursive call
- fBufferLength = 0;
-
- // call self recursively to decompress the buffer
- int count = decompress(fBuffer, 0, fBuffer.length, null,
- charBuffer, charBufferStart,
- charBufferLimit);
-
- // update the positions into the arrays
- ucPos += count;
- bytePos += newBytes;
- }
-
- // the main decompression loop
- mainLoop:
- while(bytePos < byteBufferLimit && ucPos < charBufferLimit) {
- switch(fMode) {
- case SINGLEBYTEMODE:
- // single-byte mode decompression loop
- singleByteModeLoop:
- while(bytePos < byteBufferLimit && ucPos < charBufferLimit) {
- aByte = byteBuffer[bytePos++] & 0xFF;
- switch(aByte) {
- // All bytes from 0x80 through 0xFF are remapped
- // to chars or surrogate pairs according to the
- // currently active window
- case 0x80: case 0x81: case 0x82: case 0x83: case 0x84:
- case 0x85: case 0x86: case 0x87: case 0x88: case 0x89:
- case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E:
- case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93:
- case 0x94: case 0x95: case 0x96: case 0x97: case 0x98:
- case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D:
- case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2:
- case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7:
- case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC:
- case 0xAD: case 0xAE: case 0xAF: case 0xB0: case 0xB1:
- case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6:
- case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB:
- case 0xBC: case 0xBD: case 0xBE: case 0xBF: case 0xC0:
- case 0xC1: case 0xC2: case 0xC3: case 0xC4: case 0xC5:
- case 0xC6: case 0xC7: case 0xC8: case 0xC9: case 0xCA:
- case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF:
- case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4:
- case 0xD5: case 0xD6: case 0xD7: case 0xD8: case 0xD9:
- case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE:
- case 0xDF: case 0xE0: case 0xE1: case 0xE2: case 0xE3:
- case 0xE4: case 0xE5: case 0xE6: case 0xE7: case 0xE8:
- case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED:
- case 0xEE: case 0xEF: case 0xF0: case 0xF1: case 0xF2:
- case 0xF3: case 0xF4: case 0xF5: case 0xF6: case 0xF7:
- case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC:
- case 0xFD: case 0xFE: case 0xFF:
- // For offsets <= 0xFFFF, convert to a single char
- // by adding the window's offset and subtracting
- // the generic compression offset
- if(fOffsets[ fCurrentWindow ] <= 0xFFFF) {
- charBuffer[ucPos++] = (char)
- (aByte + fOffsets[ fCurrentWindow ]
- - COMPRESSIONOFFSET);
- }
- // For offsets > 0x10000, convert to a surrogate pair by
- // normBase = window's offset - 0x10000
- // high surr. = 0xD800 + (normBase >> 10)
- // low surr. = 0xDC00 + (normBase & 0x3FF) + (byte & 0x7F)
- else {
- // make sure there is enough room to write
- // both characters
- // if not, save state and break out
- if((ucPos + 1) >= charBufferLimit) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- int normalizedBase = fOffsets[ fCurrentWindow ]
- - 0x10000;
- charBuffer[ucPos++] = (char)
- (0xD800 + (normalizedBase >> 10));
- charBuffer[ucPos++] = (char)
- (0xDC00 + (normalizedBase & 0x3FF)+(aByte & 0x7F));
- }
- break;
-
- // bytes from 0x20 through 0x7F are treated as ASCII and
- // are remapped to chars by padding the high byte
- // (this is the same as quoting from static window 0)
- // NUL (0x00), HT (0x09), CR (0x0A), LF (0x0D)
- // are treated as ASCII as well
- case 0x00: case 0x09: case 0x0A: case 0x0D:
- case 0x20: case 0x21: case 0x22: case 0x23: case 0x24:
- case 0x25: case 0x26: case 0x27: case 0x28: case 0x29:
- case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E:
- case 0x2F: case 0x30: case 0x31: case 0x32: case 0x33:
- case 0x34: case 0x35: case 0x36: case 0x37: case 0x38:
- case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D:
- case 0x3E: case 0x3F: case 0x40: case 0x41: case 0x42:
- case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
- case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C:
- case 0x4D: case 0x4E: case 0x4F: case 0x50: case 0x51:
- case 0x52: case 0x53: case 0x54: case 0x55: case 0x56:
- case 0x57: case 0x58: case 0x59: case 0x5A: case 0x5B:
- case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60:
- case 0x61: case 0x62: case 0x63: case 0x64: case 0x65:
- case 0x66: case 0x67: case 0x68: case 0x69: case 0x6A:
- case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
- case 0x70: case 0x71: case 0x72: case 0x73: case 0x74:
- case 0x75: case 0x76: case 0x77: case 0x78: case 0x79:
- case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E:
- case 0x7F:
- charBuffer[ucPos++] = (char) aByte;
- break;
-
- // quote unicode
- case SQUOTEU:
- // verify we have two bytes following tag
- // if not, save state and break out
- if( (bytePos + 1) >= byteBufferLimit ) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- aByte = byteBuffer[bytePos++];
- charBuffer[ucPos++] = (char)
- (aByte << 8 | (byteBuffer[bytePos++] & 0xFF));
- break;
-
- // switch to Unicode mode
- case SCHANGEU:
- fMode = UNICODEMODE;
- break singleByteModeLoop;
- //break;
-
- // handle all quote tags
- case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:
- case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:
- // verify there is a byte following the tag
- // if not, save state and break out
- if(bytePos >= byteBufferLimit) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- // if the byte is in the range 0x00 - 0x7F, use
- // static window n otherwise, use dynamic window n
- int dByte = byteBuffer[bytePos++] & 0xFF;
- charBuffer[ucPos++] = (char)
- (dByte+ (dByte >= 0x00 && dByte < 0x80
- ? sOffsets[aByte - SQUOTE0]
- : (fOffsets[aByte - SQUOTE0]
- - COMPRESSIONOFFSET)));
- break;
-
- // handle all change tags
- case SCHANGE0: case SCHANGE1: case SCHANGE2: case SCHANGE3:
- case SCHANGE4: case SCHANGE5: case SCHANGE6: case SCHANGE7:
- fCurrentWindow = aByte - SCHANGE0;
- break;
-
- // handle all define tags
- case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
- case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
- // verify there is a byte following the tag
- // if not, save state and break out
- if(bytePos >= byteBufferLimit) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- fCurrentWindow = aByte - SDEFINE0;
- fOffsets[fCurrentWindow] =
- sOffsetTable[byteBuffer[bytePos++] & 0xFF];
- break;
-
- // handle define extended tag
- case SDEFINEX:
- // verify we have two bytes following tag
- // if not, save state and break out
- if((bytePos + 1) >= byteBufferLimit ) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- aByte = byteBuffer[bytePos++] & 0xFF;
- fCurrentWindow = (aByte & 0xE0) >> 5;
- fOffsets[fCurrentWindow] = 0x10000 +
- (0x80 * (((aByte & 0x1F) << 8)
- | (byteBuffer[bytePos++] & 0xFF)));
- break;
-
- // reserved, shouldn't happen
- case SRESERVED:
- break;
-
- } // end switch
- } // end while
- break;
-
- case UNICODEMODE:
- // unicode mode decompression loop
- unicodeModeLoop:
- while(bytePos < byteBufferLimit && ucPos < charBufferLimit) {
- aByte = byteBuffer[bytePos++] & 0xFF;
- switch(aByte) {
- // handle all define tags
- case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
- case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
- // verify there is a byte following tag
- // if not, save state and break out
- if(bytePos >= byteBufferLimit ) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- fCurrentWindow = aByte - UDEFINE0;
- fOffsets[fCurrentWindow] =
- sOffsetTable[byteBuffer[bytePos++] & 0xFF];
- fMode = SINGLEBYTEMODE;
- break unicodeModeLoop;
- //break;
-
- // handle define extended tag
- case UDEFINEX:
- // verify we have two bytes following tag
- // if not, save state and break out
- if((bytePos + 1) >= byteBufferLimit ) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- aByte = byteBuffer[bytePos++] & 0xFF;
- fCurrentWindow = (aByte & 0xE0) >> 5;
- fOffsets[fCurrentWindow] = 0x10000 +
- (0x80 * (((aByte & 0x1F) << 8)
- | (byteBuffer[bytePos++] & 0xFF)));
- fMode = SINGLEBYTEMODE;
- break unicodeModeLoop;
- //break;
-
- // handle all change tags
- case UCHANGE0: case UCHANGE1: case UCHANGE2: case UCHANGE3:
- case UCHANGE4: case UCHANGE5: case UCHANGE6: case UCHANGE7:
- fCurrentWindow = aByte - UCHANGE0;
- fMode = SINGLEBYTEMODE;
- break unicodeModeLoop;
- //break;
-
- // quote unicode
- case UQUOTEU:
- // verify we have two bytes following tag
- // if not, save state and break out
- if(bytePos >= byteBufferLimit - 1) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- aByte = byteBuffer[bytePos++];
- charBuffer[ucPos++] = (char)
- (aByte << 8 | (byteBuffer[bytePos++] & 0xFF));
- break;
-
- default:
- // verify there is a byte following tag
- // if not, save state and break out
- if(bytePos >= byteBufferLimit ) {
- --bytePos;
- System.arraycopy(byteBuffer, bytePos,
- fBuffer, 0,
- byteBufferLimit - bytePos);
- fBufferLength = byteBufferLimit - bytePos;
- bytePos += fBufferLength;
- break mainLoop;
- }
-
- charBuffer[ucPos++] = (char)
- (aByte << 8 | (byteBuffer[bytePos++] & 0xFF));
- break;
-
- } // end switch
- } // end while
- break;
-
- } // end switch( fMode )
- } // end while
-
- // fill in output parameter
- if(bytesRead != null)
- bytesRead [0] = (bytePos - byteBufferStart);
-
- // return # of chars written
- return (ucPos - charBufferStart);
- }
-
- /**
- * Reset the decompressor to its initial state.
- * @stable ICU 2.4
- */
- public void reset()
- {
- // reset dynamic windows
- fOffsets[0] = 0x0080; // Latin-1
- fOffsets[1] = 0x00C0; // Latin-1 Supplement + Latin Extended-A
- fOffsets[2] = 0x0400; // Cyrillic
- fOffsets[3] = 0x0600; // Arabic
- fOffsets[4] = 0x0900; // Devanagari
- fOffsets[5] = 0x3040; // Hiragana
- fOffsets[6] = 0x30A0; // Katakana
- fOffsets[7] = 0xFF00; // Fullwidth ASCII
-
-
- fCurrentWindow = 0; // Make current window Latin-1
- fMode = SINGLEBYTEMODE; // Always start in single-byte mode
- fBufferLength = 0; // Empty buffer
- }
-}