diff options
Diffstat (limited to 'src/org/tukaani/xz')
-rw-r--r-- | src/org/tukaani/xz/BlockInputStream.java | 11 | ||||
-rw-r--r-- | src/org/tukaani/xz/CloseIgnoringInputStream.java | 2 | ||||
-rw-r--r-- | src/org/tukaani/xz/LZMA2InputStream.java | 1 | ||||
-rw-r--r-- | src/org/tukaani/xz/LZMA2OutputStream.java | 28 | ||||
-rw-r--r-- | src/org/tukaani/xz/LZMAInputStream.java | 34 | ||||
-rw-r--r-- | src/org/tukaani/xz/SeekableXZInputStream.java | 19 | ||||
-rw-r--r-- | src/org/tukaani/xz/SingleXZInputStream.java | 2 | ||||
-rw-r--r-- | src/org/tukaani/xz/XZInputStream.java | 8 | ||||
-rw-r--r-- | src/org/tukaani/xz/XZOutputStream.java | 8 | ||||
-rw-r--r-- | src/org/tukaani/xz/check/CRC64.java | 49 | ||||
-rw-r--r-- | src/org/tukaani/xz/lz/Hash234.java | 8 | ||||
-rw-r--r-- | src/org/tukaani/xz/lz/LZDecoder.java | 39 | ||||
-rw-r--r-- | src/org/tukaani/xz/package-info.java | 10 | ||||
-rw-r--r-- | src/org/tukaani/xz/rangecoder/RangeDecoder.java | 1 |
14 files changed, 159 insertions, 61 deletions
diff --git a/src/org/tukaani/xz/BlockInputStream.java b/src/org/tukaani/xz/BlockInputStream.java index 1931bd6..a9fff5f 100644 --- a/src/org/tukaani/xz/BlockInputStream.java +++ b/src/org/tukaani/xz/BlockInputStream.java @@ -44,17 +44,18 @@ class BlockInputStream extends InputStream { this.verifyCheck = verifyCheck; inData = new DataInputStream(in); - byte[] buf = new byte[DecoderUtil.BLOCK_HEADER_SIZE_MAX]; - // Block Header Size or Index Indicator - inData.readFully(buf, 0, 1); + int b = inData.readUnsignedByte(); // See if this begins the Index field. - if (buf[0] == 0x00) + if (b == 0x00) throw new IndexIndicatorException(); // Read the rest of the Block Header. - headerSize = 4 * ((buf[0] & 0xFF) + 1); + headerSize = 4 * (b + 1); + + final byte[] buf = new byte[headerSize]; + buf[0] = (byte)b; inData.readFully(buf, 1, headerSize - 1); // Validate the CRC32. diff --git a/src/org/tukaani/xz/CloseIgnoringInputStream.java b/src/org/tukaani/xz/CloseIgnoringInputStream.java index db68ddb..c29f268 100644 --- a/src/org/tukaani/xz/CloseIgnoringInputStream.java +++ b/src/org/tukaani/xz/CloseIgnoringInputStream.java @@ -18,7 +18,7 @@ import java.io.FilterInputStream; * {@code close()} to release memory allocated from an {@link ArrayCache} * but don't want to close the underlying {@code InputStream}. * For example: - * <p><blockquote><pre> + * <blockquote><pre> * InputStream rawdec = new LZMA2InputStream( * new CloseIgnoringInputStream(myInputStream), * myDictSize, null, myArrayCache); diff --git a/src/org/tukaani/xz/LZMA2InputStream.java b/src/org/tukaani/xz/LZMA2InputStream.java index 9708052..c494a07 100644 --- a/src/org/tukaani/xz/LZMA2InputStream.java +++ b/src/org/tukaani/xz/LZMA2InputStream.java @@ -13,6 +13,7 @@ package org.tukaani.xz; import java.io.InputStream; import java.io.DataInputStream; import java.io.IOException; +import java.io.EOFException; import org.tukaani.xz.lz.LZDecoder; import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer; import org.tukaani.xz.lzma.LZMADecoder; diff --git a/src/org/tukaani/xz/LZMA2OutputStream.java b/src/org/tukaani/xz/LZMA2OutputStream.java index a82a1a5..1bb2d85 100644 --- a/src/org/tukaani/xz/LZMA2OutputStream.java +++ b/src/org/tukaani/xz/LZMA2OutputStream.java @@ -10,7 +10,6 @@ package org.tukaani.xz; -import java.io.DataOutputStream; import java.io.IOException; import org.tukaani.xz.lz.LZEncoder; import org.tukaani.xz.rangecoder.RangeEncoderToBuffer; @@ -22,7 +21,6 @@ class LZMA2OutputStream extends FinishableOutputStream { private final ArrayCache arrayCache; private FinishableOutputStream out; - private final DataOutputStream outData; private LZEncoder lz; private RangeEncoderToBuffer rc; @@ -37,6 +35,8 @@ class LZMA2OutputStream extends FinishableOutputStream { private boolean finished = false; private IOException exception = null; + private final byte[] chunkHeader = new byte[6]; + private final byte[] tempBuf = new byte[1]; private static int getExtraSizeBefore(int dictSize) { @@ -60,7 +60,6 @@ class LZMA2OutputStream extends FinishableOutputStream { this.arrayCache = arrayCache; this.out = out; - outData = new DataOutputStream(out); rc = new RangeEncoderToBuffer(COMPRESSED_SIZE_MAX, arrayCache); int dictSize = options.getDictSize(); @@ -154,13 +153,18 @@ class LZMA2OutputStream extends FinishableOutputStream { } control |= (uncompressedSize - 1) >>> 16; - outData.writeByte(control); - - outData.writeShort(uncompressedSize - 1); - outData.writeShort(compressedSize - 1); + chunkHeader[0] = (byte)control; + chunkHeader[1] = (byte)((uncompressedSize - 1) >>> 8); + chunkHeader[2] = (byte)(uncompressedSize - 1); + chunkHeader[3] = (byte)((compressedSize - 1) >>> 8); + chunkHeader[4] = (byte)(compressedSize - 1); - if (propsNeeded) - outData.writeByte(props); + if (propsNeeded) { + chunkHeader[5] = (byte)props; + out.write(chunkHeader, 0, 6); + } else { + out.write(chunkHeader, 0, 5); + } rc.write(out); @@ -172,8 +176,10 @@ class LZMA2OutputStream extends FinishableOutputStream { private void writeUncompressed(int uncompressedSize) throws IOException { while (uncompressedSize > 0) { int chunkSize = Math.min(uncompressedSize, COMPRESSED_SIZE_MAX); - outData.writeByte(dictResetNeeded ? 0x01 : 0x02); - outData.writeShort(chunkSize - 1); + chunkHeader[0] = (byte)(dictResetNeeded ? 0x01 : 0x02); + chunkHeader[1] = (byte)((chunkSize - 1) >>> 8); + chunkHeader[2] = (byte)(chunkSize - 1); + out.write(chunkHeader, 0, 3); lz.copyUncompressed(out, uncompressedSize, chunkSize); uncompressedSize -= chunkSize; dictResetNeeded = false; diff --git a/src/org/tukaani/xz/LZMAInputStream.java b/src/org/tukaani/xz/LZMAInputStream.java index e46d5bb..1432eb1 100644 --- a/src/org/tukaani/xz/LZMAInputStream.java +++ b/src/org/tukaani/xz/LZMAInputStream.java @@ -13,6 +13,7 @@ package org.tukaani.xz; import java.io.InputStream; import java.io.DataInputStream; import java.io.IOException; +import java.io.EOFException; import org.tukaani.xz.lz.LZDecoder; import org.tukaani.xz.rangecoder.RangeDecoderFromStream; import org.tukaani.xz.lzma.LZMADecoder; @@ -53,6 +54,7 @@ public class LZMAInputStream extends InputStream { private LZMADecoder lzma; private boolean endReached = false; + private boolean relaxedEndCondition = false; private final byte[] tempBuf = new byte[1]; @@ -606,6 +608,33 @@ public class LZMAInputStream extends InputStream { } /** + * Enables relaxed end-of-stream condition when uncompressed size is known. + * This is useful if uncompressed size is known but it is unknown if + * the end of stream (EOS) marker is present. After calling this function, + * both are allowed. + * <p> + * Note that this doesn't actually check if the EOS marker is present. + * This introduces a few minor downsides: + * <ul> + * <li>Some (not all!) streams that would have more data than + * the specified uncompressed size, for example due to data corruption, + * will be accepted as valid.</li> + * <li>After <code>read</code> has returned <code>-1</code> the + * input position might not be at the end of the stream (too little + * input may have been read).</li> + * </ul> + * <p> + * This should be called after the constructor before reading any data + * from the stream. This is a separate function because adding even more + * constructors to this class didn't look like a good alternative. + * + * @since 1.9 + */ + public void enableRelaxedEndCondition() { + relaxedEndCondition = true; + } + + /** * Decompresses the next byte from this input stream. * <p> * Reading lots of data with <code>read()</code> from this input stream @@ -718,9 +747,10 @@ public class LZMAInputStream extends InputStream { if (endReached) { // Checking these helps a lot when catching corrupt // or truncated .lzma files. LZMA Utils doesn't do - // the first check and thus it accepts many invalid + // the second check and thus it accepts many invalid // files that this implementation and XZ Utils don't. - if (!rc.isFinished() || lz.hasPending()) + if (lz.hasPending() || (!relaxedEndCondition + && !rc.isFinished())) throw new CorruptedInputException(); putArraysToCache(); diff --git a/src/org/tukaani/xz/SeekableXZInputStream.java b/src/org/tukaani/xz/SeekableXZInputStream.java index 74f130e..74da2e1 100644 --- a/src/org/tukaani/xz/SeekableXZInputStream.java +++ b/src/org/tukaani/xz/SeekableXZInputStream.java @@ -45,7 +45,7 @@ import org.tukaani.xz.index.BlockInfo; * Block inside a Stream is located using binary search and thus is fast * even with a huge number of Blocks. * - * <h4>Memory usage</h4> + * <h2>Memory usage</h2> * <p> * The amount of memory needed for the Indexes is taken into account when * checking the memory usage limit. Each Stream is calculated to need at @@ -53,7 +53,7 @@ import org.tukaani.xz.index.BlockInfo; * to the next kibibyte. So unless the file has a huge number of Streams or * Blocks, these don't take significant amount of memory. * - * <h4>Creating random-accessible .xz files</h4> + * <h2>Creating random-accessible .xz files</h2> * <p> * When using {@link XZOutputStream}, a new Block can be started by calling * its {@link XZOutputStream#endBlock() endBlock} method. If you know @@ -69,6 +69,21 @@ import org.tukaani.xz.index.BlockInfo; * <code>--block-list=SIZES</code> which allows specifying sizes of * individual Blocks. * + * <h2>Example: getting the uncompressed size of a .xz file</h2> + * <blockquote><pre> + * String filename = "foo.xz"; + * SeekableFileInputStream seekableFile + * = new SeekableFileInputStream(filename); + * + * try { + * SeekableXZInputStream seekableXZ + * = new SeekableXZInputStream(seekableFile); + * System.out.println("Uncompressed size: " + seekableXZ.length()); + * } finally { + * seekableFile.close(); + * } + * </pre></blockquote> + * * @see SeekableFileInputStream * @see XZInputStream * @see XZOutputStream diff --git a/src/org/tukaani/xz/SingleXZInputStream.java b/src/org/tukaani/xz/SingleXZInputStream.java index 8da2be0..e106771 100644 --- a/src/org/tukaani/xz/SingleXZInputStream.java +++ b/src/org/tukaani/xz/SingleXZInputStream.java @@ -28,7 +28,7 @@ import org.tukaani.xz.check.Check; * Unless you know what you are doing, don't use this class to decompress * standalone .xz files. For that purpose, use <code>XZInputStream</code>. * - * <h4>When uncompressed size is known beforehand</h4> + * <h2>When uncompressed size is known beforehand</h2> * <p> * If you are decompressing complete XZ streams and your application knows * exactly how much uncompressed data there should be, it is good to try diff --git a/src/org/tukaani/xz/XZInputStream.java b/src/org/tukaani/xz/XZInputStream.java index 680f647..30374eb 100644 --- a/src/org/tukaani/xz/XZInputStream.java +++ b/src/org/tukaani/xz/XZInputStream.java @@ -22,10 +22,10 @@ import org.tukaani.xz.common.DecoderUtil; * its input stream until the end of the input or until an error occurs. * This supports decompressing concatenated .xz files. * - * <h4>Typical use cases</h4> + * <h2>Typical use cases</h2> * <p> * Getting an input stream to decompress a .xz file: - * <p><blockquote><pre> + * <blockquote><pre> * InputStream infile = new FileInputStream("foo.xz"); * XZInputStream inxz = new XZInputStream(infile); * </pre></blockquote> @@ -42,12 +42,12 @@ import org.tukaani.xz.common.DecoderUtil; * the specified limit, MemoryLimitException will be thrown when reading * from the stream. For example, the following sets the memory usage limit * to 100 MiB: - * <p><blockquote><pre> + * <blockquote><pre> * InputStream infile = new FileInputStream("foo.xz"); * XZInputStream inxz = new XZInputStream(infile, 100 * 1024); * </pre></blockquote> * - * <h4>When uncompressed size is known beforehand</h4> + * <h2>When uncompressed size is known beforehand</h2> * <p> * If you are decompressing complete files and your application knows * exactly how much uncompressed data there should be, it is good to try diff --git a/src/org/tukaani/xz/XZOutputStream.java b/src/org/tukaani/xz/XZOutputStream.java index 107ef7f..63cf5cf 100644 --- a/src/org/tukaani/xz/XZOutputStream.java +++ b/src/org/tukaani/xz/XZOutputStream.java @@ -19,18 +19,18 @@ import org.tukaani.xz.index.IndexEncoder; /** * Compresses into the .xz file format. * - * <h4>Examples</h4> + * <h2>Examples</h2> * <p> * Getting an output stream to compress with LZMA2 using the default * settings and the default integrity check type (CRC64): - * <p><blockquote><pre> + * <blockquote><pre> * FileOutputStream outfile = new FileOutputStream("foo.xz"); * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options()); * </pre></blockquote> * <p> * Using the preset level <code>8</code> for LZMA2 (the default * is <code>6</code>) and SHA-256 instead of CRC64 for integrity checking: - * <p><blockquote><pre> + * <blockquote><pre> * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options(8), * XZ.CHECK_SHA256); * </pre></blockquote> @@ -38,7 +38,7 @@ import org.tukaani.xz.index.IndexEncoder; * Using the x86 BCJ filter together with LZMA2 to compress x86 executables * and printing the memory usage information before creating the * XZOutputStream: - * <p><blockquote><pre> + * <blockquote><pre> * X86Options x86 = new X86Options(); * LZMA2Options lzma2 = new LZMA2Options(); * FilterOptions[] options = { x86, lzma2 }; diff --git a/src/org/tukaani/xz/check/CRC64.java b/src/org/tukaani/xz/check/CRC64.java index 02b15b7..a590a25 100644 --- a/src/org/tukaani/xz/check/CRC64.java +++ b/src/org/tukaani/xz/check/CRC64.java @@ -1,7 +1,8 @@ /* * CRC64 * - * Author: Lasse Collin <lasse.collin@tukaani.org> + * Authors: Brett Okken <brett.okken.os@gmail.com> + * Lasse Collin <lasse.collin@tukaani.org> * * This file has been put into the public domain. * You can do whatever you want with this file. @@ -10,37 +11,53 @@ package org.tukaani.xz.check; public class CRC64 extends Check { - private static final long poly = 0xC96C5795D7870F42L; - private static final long[] crcTable = new long[256]; - - private long crc = -1; + private static final long[][] TABLE = new long[4][256]; static { - for (int b = 0; b < crcTable.length; ++b) { - long r = b; + final long poly64 = 0xC96C5795D7870F42L; + + for (int s = 0; s < 4; ++s) { + for (int b = 0; b < 256; ++b) { + long r = s == 0 ? b : TABLE[s - 1][b]; for (int i = 0; i < 8; ++i) { - if ((r & 1) == 1) - r = (r >>> 1) ^ poly; - else - r >>>= 1; + if ((r & 1) == 1) { + r = (r >>> 1) ^ poly64; + } else { + r >>>= 1; + } } - - crcTable[b] = r; + TABLE[s][b] = r; + } } } + private long crc = -1; + public CRC64() { size = 8; name = "CRC64"; } + @Override public void update(byte[] buf, int off, int len) { - int end = off + len; + final int end = off + len; + int i = off; + + for (int end4 = end - 3; i < end4; i += 4) { + final int tmp = (int)crc; + crc = TABLE[3][(tmp & 0xFF) ^ (buf[i] & 0xFF)] ^ + TABLE[2][((tmp >>> 8) & 0xFF) ^ (buf[i + 1] & 0xFF)] ^ + (crc >>> 32) ^ + TABLE[1][((tmp >>> 16) & 0xFF) ^ (buf[i + 2] & 0xFF)] ^ + TABLE[0][((tmp >>> 24) & 0xFF) ^ (buf[i + 3] & 0xFF)]; + } - while (off < end) - crc = crcTable[(buf[off++] ^ (int)crc) & 0xFF] ^ (crc >>> 8); + while (i < end) + crc = TABLE[0][(buf[i++] & 0xFF) ^ ((int)crc & 0xFF)] ^ + (crc >>> 8); } + @Override public byte[] finish() { long value = ~crc; crc = -1; diff --git a/src/org/tukaani/xz/lz/Hash234.java b/src/org/tukaani/xz/lz/Hash234.java index 299ec44..bfa51b0 100644 --- a/src/org/tukaani/xz/lz/Hash234.java +++ b/src/org/tukaani/xz/lz/Hash234.java @@ -94,9 +94,9 @@ final class Hash234 extends CRC32Hash { hash4Table[hash4Value] = pos; } - void normalize(int normalizeOffset) { - LZEncoder.normalize(hash2Table, HASH_2_SIZE, normalizeOffset); - LZEncoder.normalize(hash3Table, HASH_3_SIZE, normalizeOffset); - LZEncoder.normalize(hash4Table, hash4Size, normalizeOffset); + void normalize(int normalizationOffset) { + LZEncoder.normalize(hash2Table, HASH_2_SIZE, normalizationOffset); + LZEncoder.normalize(hash3Table, HASH_3_SIZE, normalizationOffset); + LZEncoder.normalize(hash4Table, hash4Size, normalizationOffset); } } diff --git a/src/org/tukaani/xz/lz/LZDecoder.java b/src/org/tukaani/xz/lz/LZDecoder.java index 85b2ca1..6115e54 100644 --- a/src/org/tukaani/xz/lz/LZDecoder.java +++ b/src/org/tukaani/xz/lz/LZDecoder.java @@ -92,14 +92,43 @@ public final class LZDecoder { pendingDist = dist; int back = pos - dist - 1; - if (dist >= pos) + if (back < 0) { + // The distance wraps around to the end of the cyclic dictionary + // buffer. We cannot get here if the dictionary isn't full. + assert full == bufSize; back += bufSize; + // Here we will never copy more than dist + 1 bytes and + // so the copying won't repeat from its own output. + // Thus, we can always use arraycopy safely. + int copySize = Math.min(bufSize - back, left); + assert copySize <= dist + 1; + + System.arraycopy(buf, back, buf, pos, copySize); + pos += copySize; + back = 0; + left -= copySize; + + if (left == 0) + return; + } + + assert back < pos; + assert left > 0; + do { - buf[pos++] = buf[back++]; - if (back == bufSize) - back = 0; - } while (--left > 0); + // Determine the number of bytes to copy on this loop iteration: + // copySize is set so that the source and destination ranges + // don't overlap. If "left" is large enough, the destination + // range will start right after the last byte of the source + // range. This way we don't need to advance "back" which + // allows the next iteration of this loop to copy (up to) + // twice the number of bytes. + int copySize = Math.min(left, pos - back); + System.arraycopy(buf, back, buf, pos, copySize); + pos += copySize; + left -= copySize; + } while (left > 0); if (full < pos) full = pos; diff --git a/src/org/tukaani/xz/package-info.java b/src/org/tukaani/xz/package-info.java index 4e961df..ad23233 100644 --- a/src/org/tukaani/xz/package-info.java +++ b/src/org/tukaani/xz/package-info.java @@ -10,7 +10,7 @@ /** * XZ data compression support. * - * <h4>Introduction</h4> + * <h2>Introduction</h2> * <p> * This aims to be a complete implementation of XZ data compression * in pure Java. Features: @@ -25,20 +25,20 @@ * Threading is planned but it is unknown when it will be implemented. * <p> * For the latest source code, see the - * <a href="http://tukaani.org/xz/java.html">home page of XZ for Java</a>. + * <a href="https://tukaani.org/xz/java.html">home page of XZ for Java</a>. * - * <h4>Getting started</h4> + * <h2>Getting started</h2> * <p> * Start by reading the documentation of {@link org.tukaani.xz.XZOutputStream} * and {@link org.tukaani.xz.XZInputStream}. * If you use XZ inside another file format or protocol, * see also {@link org.tukaani.xz.SingleXZInputStream}. * - * <h4>Licensing</h4> + * <h2>Licensing</h2> * <p> * XZ for Java has been put into the public domain, thus you can do * whatever you want with it. All the files in the package have been - * written by Lasse Collin and/or Igor Pavlov. + * written by Lasse Collin, Igor Pavlov, and/or Brett Okken. * <p> * This software is provided "as is", without any warranty. */ diff --git a/src/org/tukaani/xz/rangecoder/RangeDecoder.java b/src/org/tukaani/xz/rangecoder/RangeDecoder.java index e63532e..7bcf718 100644 --- a/src/org/tukaani/xz/rangecoder/RangeDecoder.java +++ b/src/org/tukaani/xz/rangecoder/RangeDecoder.java @@ -10,7 +10,6 @@ package org.tukaani.xz.rangecoder; -import java.io.DataInputStream; import java.io.IOException; public abstract class RangeDecoder extends RangeCoder { |