Support known uncompressed size in LZMAOutputStream.

This allows storing the uncompressed size to the .lzma header and omitting the end of stream marker. Thanks to Matthias Stevens for the initial patch and testing.
author: Lasse Collin <lasse.collin@tukaani.org> 2016-10-26 18:51:42 +0300
committer: Lasse Collin <lasse.collin@tukaani.org> 2016-10-26 18:51:42 +0300
commit: c7eb0e1e4b3f447788a322f760e1dcaa6781520d (patch)
tree: 52900a938b6f26b1b02572d3d90a3c16f55ab8b3 /src
parent: 9dcae4d105d88e5b6db868a5af9ae99ba6aa3930 (diff)
download: xz-java-c7eb0e1e4b3f447788a322f760e1dcaa6781520d.tar.gz
2 files changed, 57 insertions, 16 deletions
diff --git a/src/LZMAEncDemo.java b/src/LZMAEncDemo.java
index 17b0cef..ba3f213 100644
--- a/src/LZMAEncDemo.java
+++ b/src/LZMAEncDemo.java
@@ -17,16 +17,24 @@ import org.tukaani.xz.*;
  * NOTE: For most purposes, .lzma is a legacy format and usually you should
  * use .xz instead.
  * <p>
- * One optional argument is supported: LZMA preset level which is an integer
- * in the range [0, 9]. The default is 6.
+ * Two optional arguments are supported:
+ * <ol>
+ *   <li>LZMA preset level which is an integer in the range [0, 9].
+ *       The default is 6.</li>
+ *   <li>Uncompressed size of the input as bytes.<li>
+ * </ol>
  */
 class LZMAEncDemo {
     public static void main(String[] args) throws Exception {
         LZMA2Options options = new LZMA2Options();
+        long inputSize = -1;
 
         if (args.length >= 1)
             options.setPreset(Integer.parseInt(args[0]));
 
+        if (args.length >= 2)
+            inputSize = Long.parseLong(args[1]);
+
         System.err.println("Encoder memory usage: "
                            + options.getEncoderMemoryUsage() + " KiB");
         System.err.println("Decoder memory usage: "
@@ -35,7 +43,8 @@ class LZMAEncDemo {
         // LZMAOutputStream writes one byte at a time. It helps a little,
         // especially in the fastest presets, to use BufferedOutputStream.
         OutputStream out = new BufferedOutputStream(System.out);
-        LZMAOutputStream encoder = new LZMAOutputStream(out, options);
+        LZMAOutputStream encoder = new LZMAOutputStream(out, options,
+                                                        inputSize);
 
         byte[] buf = new byte[8192];
         int size;
diff --git a/src/org/tukaani/xz/LZMAOutputStream.java b/src/org/tukaani/xz/LZMAOutputStream.java
index 298e1ed..a9f1918 100644
--- a/src/org/tukaani/xz/LZMAOutputStream.java
+++ b/src/org/tukaani/xz/LZMAOutputStream.java
@@ -30,7 +30,8 @@ public class LZMAOutputStream extends FinishableOutputStream {
 
     private final int props;
     private final boolean useEndMarker;
-    private long uncompressedSize = 0;
+    private final long expectedUncompressedSize;
+    private long currentUncompressedSize = 0;
 
     private boolean finished = false;
     private IOException exception = null;
@@ -38,12 +39,19 @@ public class LZMAOutputStream extends FinishableOutputStream {
     private final byte[] tempBuf = new byte[1];
 
     private LZMAOutputStream(OutputStream out, LZMA2Options options,
-                             boolean useHeader, boolean useEndMarker)
+                             boolean useHeader, boolean useEndMarker,
+                             long expectedUncompressedSize)
             throws IOException {
         if (out == null)
             throw new NullPointerException();
 
+        // -1 indicates unknown and >= 0 are for known sizes.
+        if (expectedUncompressedSize < -1)
+            throw new IllegalArgumentException(
+                    "Invalid expected input size (less than -1)");
+
         this.useEndMarker = useEndMarker;
+        this.expectedUncompressedSize = expectedUncompressedSize;
 
         this.out = out;
         rc = new RangeEncoderToStream(out);
@@ -70,22 +78,31 @@ public class LZMAOutputStream extends FinishableOutputStream {
         props = (options.getPb() * 5 + options.getLp()) * 9 + options.getLc();
 
         if (useHeader) {
+            // Props byte stores lc, lp, and pb.
             out.write(props);
 
+            // Dictionary size is stored as a 32-bit unsigned little endian
+            // integer.
             for (int i = 0; i < 4; ++i) {
                 out.write(dictSize & 0xFF);
                 dictSize >>>= 8;
             }
 
+            // Uncompressed size is stored as a 64-bit unsigned little endian
+            // integer. The max value (-1 in two's complement) indicates
+            // unknown size.
             for (int i = 0; i < 8; ++i)
-                out.write(0xFF);
+                out.write((int)(expectedUncompressedSize >>> (8 * i)) & 0xFF);
         }
     }
 
     /**
      * Creates a new compressor for the legacy .lzma file format.
-     * The files will always use the end of stream marker and thus
-     * will not have the uncompressed size stored in the header.
+     * <p>
+     * If the uncompressed size of the input data is known, it will be stored
+     * in the .lzma header and no end of stream marker will be used. Otherwise
+     * the header will indicate unknown uncompressed size and the end of stream
+     * marker will be used.
      * <p>
      * Note that a preset dictionary cannot be used in .lzma files but
      * it can be used for raw LZMA streams.
@@ -96,18 +113,22 @@ public class LZMAOutputStream extends FinishableOutputStream {
      * @param       options     LZMA compression options; the same class
      *                          is used here as is for LZMA2
      *
+     * @param       inputSize   uncompressed size of the data to be compressed;
+     *                          use <code>-1</code> when unknown
+     *
      * @throws      IOException may be thrown from <code>out</code>
      */
-    public LZMAOutputStream(OutputStream out, LZMA2Options options)
+    public LZMAOutputStream(OutputStream out, LZMA2Options options,
+                            long inputSize)
             throws IOException {
-        this(out, options, true, true);
+        this(out, options, true, inputSize == -1, inputSize);
     }
 
     /**
      * Creates a new compressor for raw LZMA (also known as LZMA1) stream.
      * <p>
      * Raw LZMA streams can be encoded with or without end of stream marker.
-     * When decompressing the stream, one must if the end marker was used
+     * When decompressing the stream, one must know if the end marker was used
      * and tell it to the decompressor. If the end marker wasn't used, the
      * decompressor will also need to know the uncompressed size.
      *
@@ -124,7 +145,7 @@ public class LZMAOutputStream extends FinishableOutputStream {
      */
     public LZMAOutputStream(OutputStream out, LZMA2Options options,
                             boolean useEndMarker) throws IOException {
-        this(out, options, false, useEndMarker);
+        this(out, options, false, useEndMarker, -1);
     }
 
     /**
@@ -142,7 +163,7 @@ public class LZMAOutputStream extends FinishableOutputStream {
      * the end of stream marker.
      */
     public long getUncompressedSize() {
-        return uncompressedSize;
+        return currentUncompressedSize;
     }
 
     public void write(int b) throws IOException {
@@ -160,7 +181,12 @@ public class LZMAOutputStream extends FinishableOutputStream {
         if (finished)
             throw new XZIOException("Stream finished or closed");
 
-        uncompressedSize += len;
+        if (expectedUncompressedSize != -1
+                && expectedUncompressedSize - currentUncompressedSize < len)
+            throw new XZIOException("Expected uncompressed input size ("
+                    + expectedUncompressedSize + " bytes) was exceeded");
+
+        currentUncompressedSize += len;
 
         try {
             while (len > 0) {
@@ -190,9 +216,15 @@ public class LZMAOutputStream extends FinishableOutputStream {
             if (exception != null)
                 throw exception;
 
-            lz.setFinishing();
-
             try {
+                if (expectedUncompressedSize != -1
+                        && expectedUncompressedSize != currentUncompressedSize)
+                    throw new XZIOException("Expected uncompressed size ("
+                            + expectedUncompressedSize + ") doesn't equal "
+                            + "the number of bytes written to the stream ("
+                            + currentUncompressedSize + ")");
+
+                lz.setFinishing();
                 lzma.encodeForLZMA1();
 
                 if (useEndMarker)
author	Lasse Collin <lasse.collin@tukaani.org>	2016-10-26 18:51:42 +0300
committer	Lasse Collin <lasse.collin@tukaani.org>	2016-10-26 18:51:42 +0300
commit	c7eb0e1e4b3f447788a322f760e1dcaa6781520d (patch)
tree	52900a938b6f26b1b02572d3d90a3c16f55ab8b3 /src
parent	9dcae4d105d88e5b6db868a5af9ae99ba6aa3930 (diff)
download	xz-java-c7eb0e1e4b3f447788a322f760e1dcaa6781520d.tar.gz