Restructure SeekableXZInputStream and IndexDecoder.

Now the BlockInfo about the current Block is held in SeekableXZInputStream instead of having a BlockInfo in every IndexDecoder in a multi-Stream .xz file. This will be useful in the next commits that make the Block boundaries visible outside the SeekableXZInputStream.
author: Lasse Collin <lasse.collin@tukaani.org> 2013-04-14 13:20:39 +0300
committer: Lasse Collin <lasse.collin@tukaani.org> 2013-04-14 13:20:39 +0300
commit: 813cabda644da74b3af9d7995dadadad77978738 (patch)
tree: 6c92331f04a072fb0f43d18e2772af3cee64fa8f /src
parent: bee9e2a4a946d6c41f064e8942d747d86ab2cd91 (diff)
download: xz-java-813cabda644da74b3af9d7995dadadad77978738.tar.gz
3 files changed, 157 insertions, 90 deletions
diff --git a/src/org/tukaani/xz/SeekableXZInputStream.java b/src/org/tukaani/xz/SeekableXZInputStream.java
index d6f1d8a..8a8e1e4 100644
--- a/src/org/tukaani/xz/SeekableXZInputStream.java
+++ b/src/org/tukaani/xz/SeekableXZInputStream.java
@@ -98,37 +98,41 @@ public class SeekableXZInputStream extends SeekableInputStream {
     private final ArrayList streams = new ArrayList();
 
     /**
-     * IndexDecoder from which the current Block is being decoded.
-     * The constructor leaves this to point the IndexDecoder of
-     * the first Stream.
+     * Bitmask of all Check IDs seen.
      */
-    private IndexDecoder index;
+    private int checkTypes = 0;
 
     /**
-     * Bitmask of all Check IDs seen.
+     * Uncompressed size of the file (all Streams).
      */
-    private int checkTypes = 0;
+    private long uncompressedSize = 0;
 
     /**
-     * Integrity Check in the current XZ Stream. The constructor leaves
-     * this to point to the Check of the first Stream.
+     * Uncompressed size of the largest XZ Block in the file.
      */
-    private Check check;
+    private long largestBlockSize = 0;
 
     /**
-     * Decoder of the current XZ Block, if any.
+     * Number of XZ Blocks in the file.
      */
-    private BlockInputStream blockDecoder = null;
+    private int blockCount = 0;
 
     /**
-     * Uncompressed size of the file (all Streams).
+     * Size and position information about the current Block.
+     * If there are no Blocks, all values will be <code>-1</code>.
      */
-    private long uncompressedSize = 0;
+    private final BlockInfo curBlockInfo;
 
     /**
-     * Uncompressed size of the largest XZ Block in the file.
+     * Integrity Check in the current XZ Stream. The constructor leaves
+     * this to point to the Check of the first Stream.
      */
-    private long largestBlockSize = 0;
+    private Check check;
+
+    /**
+     * Decoder of the current XZ Block, if any.
+     */
+    private BlockInputStream blockDecoder = null;
 
     /**
      * Current uncompressed position.
@@ -282,6 +286,7 @@ public class SeekableXZInputStream extends SeekableInputStream {
             in.seek(pos - streamFooter.backwardSize);
 
             // Decode the Index field.
+            IndexDecoder index;
             try {
                 index = new IndexDecoder(in, streamFooter, streamPadding,
                                          memoryLimit);
@@ -331,6 +336,12 @@ public class SeekableXZInputStream extends SeekableInputStream {
             if (uncompressedSize < 0)
                 throw new UnsupportedOptionsException("XZ file is too big");
 
+            // Update the Block count and check that it fits into an int.
+            blockCount += index.getRecordCount();
+            if (blockCount < 0)
+                throw new UnsupportedOptionsException(
+                        "XZ file has over " + Integer.MAX_VALUE + " Blocks");
+
             // Add this Stream to the list of Streams.
             streams.add(index);
 
@@ -342,6 +353,23 @@ public class SeekableXZInputStream extends SeekableInputStream {
 
         // Save it now that indexMemoryUsage has been substracted from it.
         this.memoryLimit = memoryLimit;
+
+        // Store the relative offsets of the Streams. This way we don't
+        // need to recalculate them in this class when seeking; the
+        // IndexDecoder instances will handle them.
+        IndexDecoder prev = (IndexDecoder)streams.get(streams.size() - 1);
+        for (int i = streams.size() - 2; i >= 0; --i) {
+            IndexDecoder cur = (IndexDecoder)streams.get(i);
+            cur.setOffsets(prev);
+            prev = cur;
+        }
+
+        // Initialize curBlockInfo to point to the first Stream.
+        // The blockNumber will be left to -1 so that .hasNext()
+        // and .setNext() work to get the first Block when starting
+        // to decompress from the beginning of the file.
+        IndexDecoder first = (IndexDecoder)streams.get(streams.size() - 1);
+        curBlockInfo = new BlockInfo(first);
     }
 
     /**
@@ -581,9 +609,9 @@ public class SeekableXZInputStream extends SeekableInputStream {
         // from the same Stream. If there are no more Blocks in this Stream,
         // then we behave as if seek(long) had been called.
         if (!seekNeeded) {
-            if (index.hasNext()) {
-                BlockInfo info = index.getNext();
-                initBlockDecoder(info);
+            if (curBlockInfo.hasNext()) {
+                curBlockInfo.setNext();
+                initBlockDecoder();
                 return;
             }
 
@@ -603,32 +631,21 @@ public class SeekableXZInputStream extends SeekableInputStream {
         endReached = false;
 
         // Locate the Stream that contains the uncompressed target position.
-        int i = streams.size();
-        assert i >= 1;
-
-        long uncompressedSum = 0;
-        long compressedSum = 0;
-
-        while (true) {
-            index = (IndexDecoder)streams.get(--i);
-            if (uncompressedSum + index.getUncompressedSize() > seekPos)
+        IndexDecoder index;
+        for (int i = 0; ; ++i) {
+            index = (IndexDecoder)streams.get(i);
+            if (index.hasUncompressedOffset(seekPos))
                 break;
-
-            uncompressedSum += index.getUncompressedSize();
-            compressedSum += index.getStreamAndPaddingSize();
-            assert (compressedSum & 3) == 0;
         }
 
-        // Locate the Block from the Stream that contains
-        // the uncompressed target position.
-        BlockInfo info = index.locate(seekPos - uncompressedSum);
-        assert (info.compressedOffset & 3) == 0 : info.compressedOffset;
+        // Locate the Block that contains the uncompressed target position.
+        index.locateBlock(curBlockInfo, seekPos);
 
-        // Adjust the Stream-specific offsets to file offsets.
-        info.compressedOffset += compressedSum;
-        info.uncompressedOffset += uncompressedSum;
-        assert seekPos >= info.uncompressedOffset;
-        assert seekPos < info.uncompressedOffset + info.uncompressedSize;
+        assert (curBlockInfo.compressedOffset & 3) == 0;
+        assert curBlockInfo.uncompressedSize > 0;
+        assert seekPos >= curBlockInfo.uncompressedOffset;
+        assert seekPos < curBlockInfo.uncompressedOffset
+                         + curBlockInfo.uncompressedSize;
 
         // Seek in the underlying stream and create a new Block decoder
         // only if really needed. We can skip it if the current position
@@ -640,17 +657,17 @@ public class SeekableXZInputStream extends SeekableInputStream {
         // In that case, decoding of the current Block hasn't been started
         // yet. (Decoding of a Block won't be started until at least one
         // byte will also be read from it.)
-        if (!(curPos > info.uncompressedOffset && curPos <= seekPos)) {
+        if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
             // Seek to the beginning of the Block.
-            in.seek(info.compressedOffset);
+            in.seek(curBlockInfo.compressedOffset);
 
             // Since it is possible that this Block is from a different
             // Stream than the previous Block, initialize a new Check.
-            check = Check.getInstance(info.streamFlags.checkType);
+            check = Check.getInstance(curBlockInfo.getCheckType());
 
             // Create a new Block decoder.
-            initBlockDecoder(info);
-            curPos = info.uncompressedOffset;
+            initBlockDecoder();
+            curPos = curBlockInfo.uncompressedOffset;
         }
 
         // If the target wasn't at a Block boundary, decompress and throw
@@ -662,23 +679,22 @@ public class SeekableXZInputStream extends SeekableInputStream {
             long skipAmount = seekPos - curPos;
             if (blockDecoder.skip(skipAmount) != skipAmount)
                 throw new CorruptedInputException();
-        }
 
-        curPos = seekPos;
+            curPos = seekPos;
+        }
     }
 
     /**
      * Initializes a new BlockInputStream. This is a helper function for
      * <code>seek()</code>.
      */
-    private void initBlockDecoder(BlockInfo info) throws IOException {
+    private void initBlockDecoder() throws IOException {
         try {
             // Set it to null first so that GC can collect it if memory
             // runs tight when initializing a new BlockInputStream.
             blockDecoder = null;
             blockDecoder = new BlockInputStream(in, check, memoryLimit,
-                                                info.unpaddedSize,
-                                                info.uncompressedSize);
+                    curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize);
         } catch (MemoryLimitException e) {
             // BlockInputStream doesn't know how much memory we had
             // already needed so we need to recreate the exception.
diff --git a/src/org/tukaani/xz/index/BlockInfo.java b/src/org/tukaani/xz/index/BlockInfo.java
index d45fb86..babae7f 100644
--- a/src/org/tukaani/xz/index/BlockInfo.java
+++ b/src/org/tukaani/xz/index/BlockInfo.java
@@ -12,9 +12,27 @@ package org.tukaani.xz.index;
 import org.tukaani.xz.common.StreamFlags;
 
 public class BlockInfo {
-    public StreamFlags streamFlags;
-    public long compressedOffset;
-    public long uncompressedOffset;
-    public long unpaddedSize;
-    public long uncompressedSize;
+    public int blockNumber = -1;
+    public long compressedOffset = -1;
+    public long uncompressedOffset = -1;
+    public long unpaddedSize = -1;
+    public long uncompressedSize = -1;
+
+    IndexDecoder index;
+
+    public BlockInfo(IndexDecoder indexOfFirstStream) {
+        index = indexOfFirstStream;
+    }
+
+    public int getCheckType() {
+        return index.getStreamFlags().checkType;
+    }
+
+    public boolean hasNext() {
+        return index.hasRecord(blockNumber + 1);
+    }
+
+    public void setNext() {
+        index.setBlockInfo(this, blockNumber + 1);
+    }
 }
diff --git a/src/org/tukaani/xz/index/IndexDecoder.java b/src/org/tukaani/xz/index/IndexDecoder.java
index 85c1848..a3ae986 100644
--- a/src/org/tukaani/xz/index/IndexDecoder.java
+++ b/src/org/tukaani/xz/index/IndexDecoder.java
@@ -20,25 +20,29 @@ import org.tukaani.xz.MemoryLimitException;
 import org.tukaani.xz.UnsupportedOptionsException;
 
 public class IndexDecoder extends IndexBase {
-    private final BlockInfo info = new BlockInfo();
+    private final StreamFlags streamFlags;
     private final long streamPadding;
     private final int memoryUsage;
+
+    // Unpadded Size and Uncompressed Size fields
     private final long[] unpadded;
     private final long[] uncompressed;
+
+    // Uncompressed size of the largest Block. It is used by
+    // SeekableXZInputStream to find out the largest Block of the .xz file.
     private long largestBlockSize = 0;
 
-    /**
-     * Current position in the arrays. This is initialized to <code>-1</code>
-     * because then it is possible to use <code>hasNext()</code> and
-     * <code>getNext()</code> to get BlockInfo of the first Block.
-     */
-    private int pos = -1;
+    // Offsets relative to the beginning of the .xz file. These are all zero
+    // for the first Stream in the file.
+    private int recordOffset = 0;
+    private long compressedOffset = 0;
+    private long uncompressedOffset = 0;
 
     public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags,
                         long streamPadding, int memoryLimit)
             throws IOException {
         super(new CorruptedInputException("XZ Index is corrupt"));
-        info.streamFlags = streamFooterFlags;
+        this.streamFlags = streamFooterFlags;
         this.streamPadding = streamPadding;
 
         // If endPos is exceeded before the CRC32 field has been decoded,
@@ -128,31 +132,28 @@ public class IndexDecoder extends IndexBase {
                 throw new CorruptedInputException("XZ Index is corrupt");
     }
 
-    public BlockInfo locate(long target) {
-        assert target < uncompressedSum;
-
-        int left = 0;
-        int right = unpadded.length - 1;
-
-        while (left < right) {
-            int i = left + (right - left) / 2;
-
-            if (uncompressed[i] <= target)
-                left = i + 1;
-            else
-                right = i;
-        }
-
-        pos = left;
-        return getInfo();
+    public void setOffsets(IndexDecoder prev) {
+        // NOTE: SeekableXZInputStream checks that the total number of Blocks
+        // in concatenated Streams fits into an int.
+        recordOffset = prev.recordOffset + (int)prev.recordCount;
+        compressedOffset = prev.compressedOffset
+                           + prev.getStreamSize() + prev.streamPadding;
+        assert (compressedOffset & 3) == 0;
+        uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum;
     }
 
     public int getMemoryUsage() {
         return memoryUsage;
     }
 
-    public long getStreamAndPaddingSize() {
-        return getStreamSize() + streamPadding;
+    public StreamFlags getStreamFlags() {
+        return streamFlags;
+    }
+
+    public int getRecordCount() {
+        // It was already checked in the constructor that it fits into an int.
+        // Otherwise we couldn't have allocated the arrays.
+        return (int)recordCount;
     }
 
     public long getUncompressedSize() {
@@ -163,16 +164,47 @@ public class IndexDecoder extends IndexBase {
         return largestBlockSize;
     }
 
-    public boolean hasNext() {
-        return pos + 1 < recordCount;
+    public boolean hasUncompressedOffset(long pos) {
+        return pos >= uncompressedOffset
+               && pos < uncompressedOffset + uncompressedSum;
     }
 
-    public BlockInfo getNext() {
-        ++pos;
-        return getInfo();
+    public boolean hasRecord(int blockNumber) {
+        return blockNumber >= recordOffset
+               && blockNumber < recordOffset + recordCount;
     }
 
-    private BlockInfo getInfo() {
+    public void locateBlock(BlockInfo info, long target) {
+        assert target >= uncompressedOffset;
+        target -= uncompressedOffset;
+        assert target < uncompressedSum;
+
+        int left = 0;
+        int right = unpadded.length - 1;
+
+        while (left < right) {
+            int i = left + (right - left) / 2;
+
+            if (uncompressed[i] <= target)
+                left = i + 1;
+            else
+                right = i;
+        }
+
+        setBlockInfo(info, recordOffset + left);
+    }
+
+    public void setBlockInfo(BlockInfo info, int blockNumber) {
+        // The caller has checked that the given Block number is inside
+        // this Index.
+        assert blockNumber >= recordOffset;
+        assert blockNumber - recordOffset < recordCount;
+
+        info.index = this;
+        info.blockNumber = blockNumber;
+
+        int pos = blockNumber - recordOffset;
+
         if (pos == 0) {
             info.compressedOffset = 0;
             info.uncompressedOffset = 0;
@@ -184,7 +216,8 @@ public class IndexDecoder extends IndexBase {
         info.unpaddedSize = unpadded[pos] - info.compressedOffset;
         info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset;
 
-        info.compressedOffset += DecoderUtil.STREAM_HEADER_SIZE;
-        return info;
+        info.compressedOffset += compressedOffset
+                                 + DecoderUtil.STREAM_HEADER_SIZE;
+        info.uncompressedOffset += uncompressedOffset;
     }
 }
author	Lasse Collin <lasse.collin@tukaani.org>	2013-04-14 13:20:39 +0300
committer	Lasse Collin <lasse.collin@tukaani.org>	2013-04-14 13:20:39 +0300
commit	813cabda644da74b3af9d7995dadadad77978738 (patch)
tree	6c92331f04a072fb0f43d18e2772af3cee64fa8f /src
parent	bee9e2a4a946d6c41f064e8942d747d86ab2cd91 (diff)
download	xz-java-813cabda644da74b3af9d7995dadadad77978738.tar.gz