14 files changed, 159 insertions, 61 deletions
diff --git a/src/org/tukaani/xz/BlockInputStream.java b/src/org/tukaani/xz/BlockInputStream.java
index 1931bd6..a9fff5f 100644
--- a/src/org/tukaani/xz/BlockInputStream.java
+++ b/src/org/tukaani/xz/BlockInputStream.java
@@ -44,17 +44,18 @@ class BlockInputStream extends InputStream {
         this.verifyCheck = verifyCheck;
         inData = new DataInputStream(in);
 
-        byte[] buf = new byte[DecoderUtil.BLOCK_HEADER_SIZE_MAX];
-
         // Block Header Size or Index Indicator
-        inData.readFully(buf, 0, 1);
+        int b = inData.readUnsignedByte();
 
         // See if this begins the Index field.
-        if (buf[0] == 0x00)
+        if (b == 0x00)
             throw new IndexIndicatorException();
 
         // Read the rest of the Block Header.
-        headerSize = 4 * ((buf[0] & 0xFF) + 1);
+        headerSize = 4 * (b + 1);
+
+        final byte[] buf = new byte[headerSize];
+        buf[0] = (byte)b;
         inData.readFully(buf, 1, headerSize - 1);
 
         // Validate the CRC32.
diff --git a/src/org/tukaani/xz/CloseIgnoringInputStream.java b/src/org/tukaani/xz/CloseIgnoringInputStream.java
index db68ddb..c29f268 100644
--- a/src/org/tukaani/xz/CloseIgnoringInputStream.java
+++ b/src/org/tukaani/xz/CloseIgnoringInputStream.java
@@ -18,7 +18,7 @@ import java.io.FilterInputStream;
  * {@code close()} to release memory allocated from an {@link ArrayCache}
  * but don't want to close the underlying {@code InputStream}.
  * For example:
- * <p><blockquote><pre>
+ * <blockquote><pre>
  * InputStream rawdec = new LZMA2InputStream(
  *         new CloseIgnoringInputStream(myInputStream),
  *         myDictSize, null, myArrayCache);
diff --git a/src/org/tukaani/xz/LZMA2InputStream.java b/src/org/tukaani/xz/LZMA2InputStream.java
index 9708052..c494a07 100644
--- a/src/org/tukaani/xz/LZMA2InputStream.java
+++ b/src/org/tukaani/xz/LZMA2InputStream.java
@@ -13,6 +13,7 @@ package org.tukaani.xz;
 import java.io.InputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
+import java.io.EOFException;
 import org.tukaani.xz.lz.LZDecoder;
 import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
 import org.tukaani.xz.lzma.LZMADecoder;
diff --git a/src/org/tukaani/xz/LZMA2OutputStream.java b/src/org/tukaani/xz/LZMA2OutputStream.java
index a82a1a5..1bb2d85 100644
--- a/src/org/tukaani/xz/LZMA2OutputStream.java
+++ b/src/org/tukaani/xz/LZMA2OutputStream.java
@@ -10,7 +10,6 @@
 
 package org.tukaani.xz;
 
-import java.io.DataOutputStream;
 import java.io.IOException;
 import org.tukaani.xz.lz.LZEncoder;
 import org.tukaani.xz.rangecoder.RangeEncoderToBuffer;
@@ -22,7 +21,6 @@ class LZMA2OutputStream extends FinishableOutputStream {
     private final ArrayCache arrayCache;
 
     private FinishableOutputStream out;
-    private final DataOutputStream outData;
 
     private LZEncoder lz;
     private RangeEncoderToBuffer rc;
@@ -37,6 +35,8 @@ class LZMA2OutputStream extends FinishableOutputStream {
     private boolean finished = false;
     private IOException exception = null;
 
+    private final byte[] chunkHeader = new byte[6];
+
     private final byte[] tempBuf = new byte[1];
 
     private static int getExtraSizeBefore(int dictSize) {
@@ -60,7 +60,6 @@ class LZMA2OutputStream extends FinishableOutputStream {
 
         this.arrayCache = arrayCache;
         this.out = out;
-        outData = new DataOutputStream(out);
         rc = new RangeEncoderToBuffer(COMPRESSED_SIZE_MAX, arrayCache);
 
         int dictSize = options.getDictSize();
@@ -154,13 +153,18 @@ class LZMA2OutputStream extends FinishableOutputStream {
         }
 
         control |= (uncompressedSize - 1) >>> 16;
-        outData.writeByte(control);
-
-        outData.writeShort(uncompressedSize - 1);
-        outData.writeShort(compressedSize - 1);
+        chunkHeader[0] = (byte)control;
+        chunkHeader[1] = (byte)((uncompressedSize - 1) >>> 8);
+        chunkHeader[2] = (byte)(uncompressedSize - 1);
+        chunkHeader[3] = (byte)((compressedSize - 1) >>> 8);
+        chunkHeader[4] = (byte)(compressedSize - 1);
 
-        if (propsNeeded)
-            outData.writeByte(props);
+        if (propsNeeded) {
+            chunkHeader[5] = (byte)props;
+            out.write(chunkHeader, 0, 6);
+        } else {
+            out.write(chunkHeader, 0, 5);
+        }
 
         rc.write(out);
 
@@ -172,8 +176,10 @@ class LZMA2OutputStream extends FinishableOutputStream {
     private void writeUncompressed(int uncompressedSize) throws IOException {
         while (uncompressedSize > 0) {
             int chunkSize = Math.min(uncompressedSize, COMPRESSED_SIZE_MAX);
-            outData.writeByte(dictResetNeeded ? 0x01 : 0x02);
-            outData.writeShort(chunkSize - 1);
+            chunkHeader[0] = (byte)(dictResetNeeded ? 0x01 : 0x02);
+            chunkHeader[1] = (byte)((chunkSize - 1) >>> 8);
+            chunkHeader[2] = (byte)(chunkSize - 1);
+            out.write(chunkHeader, 0, 3);
             lz.copyUncompressed(out, uncompressedSize, chunkSize);
             uncompressedSize -= chunkSize;
             dictResetNeeded = false;
diff --git a/src/org/tukaani/xz/LZMAInputStream.java b/src/org/tukaani/xz/LZMAInputStream.java
index e46d5bb..1432eb1 100644
--- a/src/org/tukaani/xz/LZMAInputStream.java
+++ b/src/org/tukaani/xz/LZMAInputStream.java
@@ -13,6 +13,7 @@ package org.tukaani.xz;
 import java.io.InputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
+import java.io.EOFException;
 import org.tukaani.xz.lz.LZDecoder;
 import org.tukaani.xz.rangecoder.RangeDecoderFromStream;
 import org.tukaani.xz.lzma.LZMADecoder;
@@ -53,6 +54,7 @@ public class LZMAInputStream extends InputStream {
     private LZMADecoder lzma;
 
     private boolean endReached = false;
+    private boolean relaxedEndCondition = false;
 
     private final byte[] tempBuf = new byte[1];
 
@@ -606,6 +608,33 @@ public class LZMAInputStream extends InputStream {
     }
 
     /**
+     * Enables relaxed end-of-stream condition when uncompressed size is known.
+     * This is useful if uncompressed size is known but it is unknown if
+     * the end of stream (EOS) marker is present. After calling this function,
+     * both are allowed.
+     * <p>
+     * Note that this doesn't actually check if the EOS marker is present.
+     * This introduces a few minor downsides:
+     * <ul>
+     *   <li>Some (not all!) streams that would have more data than
+     *   the specified uncompressed size, for example due to data corruption,
+     *   will be accepted as valid.</li>
+     *   <li>After <code>read</code> has returned <code>-1</code> the
+     *   input position might not be at the end of the stream (too little
+     *   input may have been read).</li>
+     * </ul>
+     * <p>
+     * This should be called after the constructor before reading any data
+     * from the stream. This is a separate function because adding even more
+     * constructors to this class didn't look like a good alternative.
+     *
+     * @since 1.9
+     */
+    public void enableRelaxedEndCondition() {
+        relaxedEndCondition = true;
+    }
+
+    /**
      * Decompresses the next byte from this input stream.
      * <p>
      * Reading lots of data with <code>read()</code> from this input stream
@@ -718,9 +747,10 @@ public class LZMAInputStream extends InputStream {
                 if (endReached) {
                     // Checking these helps a lot when catching corrupt
                     // or truncated .lzma files. LZMA Utils doesn't do
-                    // the first check and thus it accepts many invalid
+                    // the second check and thus it accepts many invalid
                     // files that this implementation and XZ Utils don't.
-                    if (!rc.isFinished() || lz.hasPending())
+                    if (lz.hasPending() || (!relaxedEndCondition
+                                            && !rc.isFinished()))
                         throw new CorruptedInputException();
 
                     putArraysToCache();
diff --git a/src/org/tukaani/xz/SeekableXZInputStream.java b/src/org/tukaani/xz/SeekableXZInputStream.java
index 74f130e..74da2e1 100644
--- a/src/org/tukaani/xz/SeekableXZInputStream.java
+++ b/src/org/tukaani/xz/SeekableXZInputStream.java
@@ -45,7 +45,7 @@ import org.tukaani.xz.index.BlockInfo;
  * Block inside a Stream is located using binary search and thus is fast
  * even with a huge number of Blocks.
  *
- * <h4>Memory usage</h4>
+ * <h2>Memory usage</h2>
  * <p>
  * The amount of memory needed for the Indexes is taken into account when
  * checking the memory usage limit. Each Stream is calculated to need at
@@ -53,7 +53,7 @@ import org.tukaani.xz.index.BlockInfo;
  * to the next kibibyte. So unless the file has a huge number of Streams or
  * Blocks, these don't take significant amount of memory.
  *
- * <h4>Creating random-accessible .xz files</h4>
+ * <h2>Creating random-accessible .xz files</h2>
  * <p>
  * When using {@link XZOutputStream}, a new Block can be started by calling
  * its {@link XZOutputStream#endBlock() endBlock} method. If you know
@@ -69,6 +69,21 @@ import org.tukaani.xz.index.BlockInfo;
  * <code>--block-list=SIZES</code> which allows specifying sizes of
  * individual Blocks.
  *
+ * <h2>Example: getting the uncompressed size of a .xz file</h2>
+ * <blockquote><pre>
+ * String filename = "foo.xz";
+ * SeekableFileInputStream seekableFile
+ *         = new SeekableFileInputStream(filename);
+ *
+ * try {
+ *     SeekableXZInputStream seekableXZ
+ *             = new SeekableXZInputStream(seekableFile);
+ *     System.out.println("Uncompressed size: " + seekableXZ.length());
+ * } finally {
+ *     seekableFile.close();
+ * }
+ * </pre></blockquote>
+ *
  * @see SeekableFileInputStream
  * @see XZInputStream
  * @see XZOutputStream
diff --git a/src/org/tukaani/xz/SingleXZInputStream.java b/src/org/tukaani/xz/SingleXZInputStream.java
index 8da2be0..e106771 100644
--- a/src/org/tukaani/xz/SingleXZInputStream.java
+++ b/src/org/tukaani/xz/SingleXZInputStream.java
@@ -28,7 +28,7 @@ import org.tukaani.xz.check.Check;
  * Unless you know what you are doing, don't use this class to decompress
  * standalone .xz files. For that purpose, use <code>XZInputStream</code>.
  *
- * <h4>When uncompressed size is known beforehand</h4>
+ * <h2>When uncompressed size is known beforehand</h2>
  * <p>
  * If you are decompressing complete XZ streams and your application knows
  * exactly how much uncompressed data there should be, it is good to try
diff --git a/src/org/tukaani/xz/XZInputStream.java b/src/org/tukaani/xz/XZInputStream.java
index 680f647..30374eb 100644
--- a/src/org/tukaani/xz/XZInputStream.java
+++ b/src/org/tukaani/xz/XZInputStream.java
@@ -22,10 +22,10 @@ import org.tukaani.xz.common.DecoderUtil;
  * its input stream until the end of the input or until an error occurs.
  * This supports decompressing concatenated .xz files.
  *
- * <h4>Typical use cases</h4>
+ * <h2>Typical use cases</h2>
  * <p>
  * Getting an input stream to decompress a .xz file:
- * <p><blockquote><pre>
+ * <blockquote><pre>
  * InputStream infile = new FileInputStream("foo.xz");
  * XZInputStream inxz = new XZInputStream(infile);
  * </pre></blockquote>
@@ -42,12 +42,12 @@ import org.tukaani.xz.common.DecoderUtil;
  * the specified limit, MemoryLimitException will be thrown when reading
  * from the stream. For example, the following sets the memory usage limit
  * to 100&nbsp;MiB:
- * <p><blockquote><pre>
+ * <blockquote><pre>
  * InputStream infile = new FileInputStream("foo.xz");
  * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
  * </pre></blockquote>
  *
- * <h4>When uncompressed size is known beforehand</h4>
+ * <h2>When uncompressed size is known beforehand</h2>
  * <p>
  * If you are decompressing complete files and your application knows
  * exactly how much uncompressed data there should be, it is good to try
diff --git a/src/org/tukaani/xz/XZOutputStream.java b/src/org/tukaani/xz/XZOutputStream.java
index 107ef7f..63cf5cf 100644
--- a/src/org/tukaani/xz/XZOutputStream.java
+++ b/src/org/tukaani/xz/XZOutputStream.java
@@ -19,18 +19,18 @@ import org.tukaani.xz.index.IndexEncoder;
 /**
  * Compresses into the .xz file format.
  *
- * <h4>Examples</h4>
+ * <h2>Examples</h2>
  * <p>
  * Getting an output stream to compress with LZMA2 using the default
  * settings and the default integrity check type (CRC64):
- * <p><blockquote><pre>
+ * <blockquote><pre>
  * FileOutputStream outfile = new FileOutputStream("foo.xz");
  * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options());
  * </pre></blockquote>
  * <p>
  * Using the preset level <code>8</code> for LZMA2 (the default
  * is <code>6</code>) and SHA-256 instead of CRC64 for integrity checking:
- * <p><blockquote><pre>
+ * <blockquote><pre>
  * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options(8),
  *                                           XZ.CHECK_SHA256);
  * </pre></blockquote>
@@ -38,7 +38,7 @@ import org.tukaani.xz.index.IndexEncoder;
  * Using the x86 BCJ filter together with LZMA2 to compress x86 executables
  * and printing the memory usage information before creating the
  * XZOutputStream:
- * <p><blockquote><pre>
+ * <blockquote><pre>
  * X86Options x86 = new X86Options();
  * LZMA2Options lzma2 = new LZMA2Options();
  * FilterOptions[] options = { x86, lzma2 };
diff --git a/src/org/tukaani/xz/check/CRC64.java b/src/org/tukaani/xz/check/CRC64.java
index 02b15b7..a590a25 100644
--- a/src/org/tukaani/xz/check/CRC64.java
+++ b/src/org/tukaani/xz/check/CRC64.java
@@ -1,7 +1,8 @@
 /*
  * CRC64
  *
- * Author: Lasse Collin <lasse.collin@tukaani.org>
+ * Authors: Brett Okken <brett.okken.os@gmail.com>
+ *          Lasse Collin <lasse.collin@tukaani.org>
  *
  * This file has been put into the public domain.
  * You can do whatever you want with this file.
@@ -10,37 +11,53 @@
 package org.tukaani.xz.check;
 
 public class CRC64 extends Check {
-    private static final long poly = 0xC96C5795D7870F42L;
-    private static final long[] crcTable = new long[256];
-
-    private long crc = -1;
+    private static final long[][] TABLE = new long[4][256];
 
     static {
-        for (int b = 0; b < crcTable.length; ++b) {
-                long r = b;
+        final long poly64 = 0xC96C5795D7870F42L;
+
+        for (int s = 0; s < 4; ++s) {
+            for (int b = 0; b < 256; ++b) {
+                long r = s == 0 ? b : TABLE[s - 1][b];
                 for (int i = 0; i < 8; ++i) {
-                        if ((r & 1) == 1)
-                                r = (r >>> 1) ^ poly;
-                        else
-                                r >>>= 1;
+                    if ((r & 1) == 1) {
+                        r = (r >>> 1) ^ poly64;
+                    } else {
+                        r >>>= 1;
+                    }
                 }
-
-                crcTable[b] = r;
+                TABLE[s][b] = r;
+            }
         }
     }
 
+    private long crc = -1;
+
     public CRC64() {
         size = 8;
         name = "CRC64";
     }
 
+    @Override
     public void update(byte[] buf, int off, int len) {
-        int end = off + len;
+        final int end = off + len;
+        int i = off;
+
+        for (int end4 = end - 3; i < end4; i += 4) {
+            final int tmp = (int)crc;
+            crc = TABLE[3][(tmp & 0xFF) ^ (buf[i] & 0xFF)] ^
+                  TABLE[2][((tmp >>> 8) & 0xFF) ^ (buf[i + 1] & 0xFF)] ^
+                  (crc >>> 32) ^
+                  TABLE[1][((tmp >>> 16) & 0xFF) ^ (buf[i + 2] & 0xFF)] ^
+                  TABLE[0][((tmp >>> 24) & 0xFF) ^ (buf[i + 3] & 0xFF)];
+        }
 
-        while (off < end)
-            crc = crcTable[(buf[off++] ^ (int)crc) & 0xFF] ^ (crc >>> 8);
+        while (i < end)
+            crc = TABLE[0][(buf[i++] & 0xFF) ^ ((int)crc & 0xFF)] ^
+                  (crc >>> 8);
     }
 
+    @Override
     public byte[] finish() {
         long value = ~crc;
         crc = -1;
diff --git a/src/org/tukaani/xz/lz/Hash234.java b/src/org/tukaani/xz/lz/Hash234.java
index 299ec44..bfa51b0 100644
--- a/src/org/tukaani/xz/lz/Hash234.java
+++ b/src/org/tukaani/xz/lz/Hash234.java
@@ -94,9 +94,9 @@ final class Hash234 extends CRC32Hash {
         hash4Table[hash4Value] = pos;
     }
 
-    void normalize(int normalizeOffset) {
-        LZEncoder.normalize(hash2Table, HASH_2_SIZE, normalizeOffset);
-        LZEncoder.normalize(hash3Table, HASH_3_SIZE, normalizeOffset);
-        LZEncoder.normalize(hash4Table, hash4Size, normalizeOffset);
+    void normalize(int normalizationOffset) {
+        LZEncoder.normalize(hash2Table, HASH_2_SIZE, normalizationOffset);
+        LZEncoder.normalize(hash3Table, HASH_3_SIZE, normalizationOffset);
+        LZEncoder.normalize(hash4Table, hash4Size, normalizationOffset);
     }
 }
diff --git a/src/org/tukaani/xz/lz/LZDecoder.java b/src/org/tukaani/xz/lz/LZDecoder.java
index 85b2ca1..6115e54 100644
--- a/src/org/tukaani/xz/lz/LZDecoder.java
+++ b/src/org/tukaani/xz/lz/LZDecoder.java
@@ -92,14 +92,43 @@ public final class LZDecoder {
         pendingDist = dist;
 
         int back = pos - dist - 1;
-        if (dist >= pos)
+        if (back < 0) {
+            // The distance wraps around to the end of the cyclic dictionary
+            // buffer. We cannot get here if the dictionary isn't full.
+            assert full == bufSize;
             back += bufSize;
 
+            // Here we will never copy more than dist + 1 bytes and
+            // so the copying won't repeat from its own output.
+            // Thus, we can always use arraycopy safely.
+            int copySize = Math.min(bufSize - back, left);
+            assert copySize <= dist + 1;
+
+            System.arraycopy(buf, back, buf, pos, copySize);
+            pos += copySize;
+            back = 0;
+            left -= copySize;
+
+            if (left == 0)
+                return;
+        }
+
+        assert back < pos;
+        assert left > 0;
+
         do {
-            buf[pos++] = buf[back++];
-            if (back == bufSize)
-                back = 0;
-        } while (--left > 0);
+            // Determine the number of bytes to copy on this loop iteration:
+            // copySize is set so that the source and destination ranges
+            // don't overlap. If "left" is large enough, the destination
+            // range will start right after the last byte of the source
+            // range. This way we don't need to advance "back" which
+            // allows the next iteration of this loop to copy (up to)
+            // twice the number of bytes.
+            int copySize = Math.min(left, pos - back);
+            System.arraycopy(buf, back, buf, pos, copySize);
+            pos += copySize;
+            left -= copySize;
+        } while (left > 0);
 
         if (full < pos)
             full = pos;
diff --git a/src/org/tukaani/xz/package-info.java b/src/org/tukaani/xz/package-info.java
index 4e961df..ad23233 100644
--- a/src/org/tukaani/xz/package-info.java
+++ b/src/org/tukaani/xz/package-info.java
@@ -10,7 +10,7 @@
 /**
  * XZ data compression support.
  *
- * <h4>Introduction</h4>
+ * <h2>Introduction</h2>
  * <p>
  * This aims to be a complete implementation of XZ data compression
  * in pure Java. Features:
@@ -25,20 +25,20 @@
  * Threading is planned but it is unknown when it will be implemented.
  * <p>
  * For the latest source code, see the
- * <a href="http://tukaani.org/xz/java.html">home page of XZ for Java</a>.
+ * <a href="https://tukaani.org/xz/java.html">home page of XZ for Java</a>.
  *
- * <h4>Getting started</h4>
+ * <h2>Getting started</h2>
  * <p>
  * Start by reading the documentation of {@link org.tukaani.xz.XZOutputStream}
  * and {@link org.tukaani.xz.XZInputStream}.
  * If you use XZ inside another file format or protocol,
  * see also {@link org.tukaani.xz.SingleXZInputStream}.
  *
- * <h4>Licensing</h4>
+ * <h2>Licensing</h2>
  * <p>
  * XZ for Java has been put into the public domain, thus you can do
  * whatever you want with it. All the files in the package have been
- * written by Lasse Collin and/or Igor Pavlov.
+ * written by Lasse Collin, Igor Pavlov, and/or Brett Okken.
  * <p>
  * This software is provided "as is", without any warranty.
  */
diff --git a/src/org/tukaani/xz/rangecoder/RangeDecoder.java b/src/org/tukaani/xz/rangecoder/RangeDecoder.java
index e63532e..7bcf718 100644
--- a/src/org/tukaani/xz/rangecoder/RangeDecoder.java
+++ b/src/org/tukaani/xz/rangecoder/RangeDecoder.java
@@ -10,7 +10,6 @@
 
 package org.tukaani.xz.rangecoder;
 
-import java.io.DataInputStream;
 import java.io.IOException;
 
 public abstract class RangeDecoder extends RangeCoder {