diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2013-04-14 13:20:39 +0300 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2013-04-14 13:20:39 +0300 |
commit | 813cabda644da74b3af9d7995dadadad77978738 (patch) | |
tree | 6c92331f04a072fb0f43d18e2772af3cee64fa8f /src | |
parent | bee9e2a4a946d6c41f064e8942d747d86ab2cd91 (diff) | |
download | xz-java-813cabda644da74b3af9d7995dadadad77978738.tar.gz |
Restructure SeekableXZInputStream and IndexDecoder.
Now the BlockInfo about the current Block is held in
SeekableXZInputStream instead of having a BlockInfo
in every IndexDecoder in a multi-Stream .xz file.
This will be useful in the next commits that make the Block
boundaries visible outside the SeekableXZInputStream.
Diffstat (limited to 'src')
-rw-r--r-- | src/org/tukaani/xz/SeekableXZInputStream.java | 116 | ||||
-rw-r--r-- | src/org/tukaani/xz/index/BlockInfo.java | 28 | ||||
-rw-r--r-- | src/org/tukaani/xz/index/IndexDecoder.java | 103 |
3 files changed, 157 insertions, 90 deletions
diff --git a/src/org/tukaani/xz/SeekableXZInputStream.java b/src/org/tukaani/xz/SeekableXZInputStream.java index d6f1d8a..8a8e1e4 100644 --- a/src/org/tukaani/xz/SeekableXZInputStream.java +++ b/src/org/tukaani/xz/SeekableXZInputStream.java @@ -98,37 +98,41 @@ public class SeekableXZInputStream extends SeekableInputStream { private final ArrayList streams = new ArrayList(); /** - * IndexDecoder from which the current Block is being decoded. - * The constructor leaves this to point the IndexDecoder of - * the first Stream. + * Bitmask of all Check IDs seen. */ - private IndexDecoder index; + private int checkTypes = 0; /** - * Bitmask of all Check IDs seen. + * Uncompressed size of the file (all Streams). */ - private int checkTypes = 0; + private long uncompressedSize = 0; /** - * Integrity Check in the current XZ Stream. The constructor leaves - * this to point to the Check of the first Stream. + * Uncompressed size of the largest XZ Block in the file. */ - private Check check; + private long largestBlockSize = 0; /** - * Decoder of the current XZ Block, if any. + * Number of XZ Blocks in the file. */ - private BlockInputStream blockDecoder = null; + private int blockCount = 0; /** - * Uncompressed size of the file (all Streams). + * Size and position information about the current Block. + * If there are no Blocks, all values will be <code>-1</code>. */ - private long uncompressedSize = 0; + private final BlockInfo curBlockInfo; /** - * Uncompressed size of the largest XZ Block in the file. + * Integrity Check in the current XZ Stream. The constructor leaves + * this to point to the Check of the first Stream. */ - private long largestBlockSize = 0; + private Check check; + + /** + * Decoder of the current XZ Block, if any. + */ + private BlockInputStream blockDecoder = null; /** * Current uncompressed position. @@ -282,6 +286,7 @@ public class SeekableXZInputStream extends SeekableInputStream { in.seek(pos - streamFooter.backwardSize); // Decode the Index field. + IndexDecoder index; try { index = new IndexDecoder(in, streamFooter, streamPadding, memoryLimit); @@ -331,6 +336,12 @@ public class SeekableXZInputStream extends SeekableInputStream { if (uncompressedSize < 0) throw new UnsupportedOptionsException("XZ file is too big"); + // Update the Block count and check that it fits into an int. + blockCount += index.getRecordCount(); + if (blockCount < 0) + throw new UnsupportedOptionsException( + "XZ file has over " + Integer.MAX_VALUE + " Blocks"); + // Add this Stream to the list of Streams. streams.add(index); @@ -342,6 +353,23 @@ public class SeekableXZInputStream extends SeekableInputStream { // Save it now that indexMemoryUsage has been substracted from it. this.memoryLimit = memoryLimit; + + // Store the relative offsets of the Streams. This way we don't + // need to recalculate them in this class when seeking; the + // IndexDecoder instances will handle them. + IndexDecoder prev = (IndexDecoder)streams.get(streams.size() - 1); + for (int i = streams.size() - 2; i >= 0; --i) { + IndexDecoder cur = (IndexDecoder)streams.get(i); + cur.setOffsets(prev); + prev = cur; + } + + // Initialize curBlockInfo to point to the first Stream. + // The blockNumber will be left to -1 so that .hasNext() + // and .setNext() work to get the first Block when starting + // to decompress from the beginning of the file. + IndexDecoder first = (IndexDecoder)streams.get(streams.size() - 1); + curBlockInfo = new BlockInfo(first); } /** @@ -581,9 +609,9 @@ public class SeekableXZInputStream extends SeekableInputStream { // from the same Stream. If there are no more Blocks in this Stream, // then we behave as if seek(long) had been called. if (!seekNeeded) { - if (index.hasNext()) { - BlockInfo info = index.getNext(); - initBlockDecoder(info); + if (curBlockInfo.hasNext()) { + curBlockInfo.setNext(); + initBlockDecoder(); return; } @@ -603,32 +631,21 @@ public class SeekableXZInputStream extends SeekableInputStream { endReached = false; // Locate the Stream that contains the uncompressed target position. - int i = streams.size(); - assert i >= 1; - - long uncompressedSum = 0; - long compressedSum = 0; - - while (true) { - index = (IndexDecoder)streams.get(--i); - if (uncompressedSum + index.getUncompressedSize() > seekPos) + IndexDecoder index; + for (int i = 0; ; ++i) { + index = (IndexDecoder)streams.get(i); + if (index.hasUncompressedOffset(seekPos)) break; - - uncompressedSum += index.getUncompressedSize(); - compressedSum += index.getStreamAndPaddingSize(); - assert (compressedSum & 3) == 0; } - // Locate the Block from the Stream that contains - // the uncompressed target position. - BlockInfo info = index.locate(seekPos - uncompressedSum); - assert (info.compressedOffset & 3) == 0 : info.compressedOffset; + // Locate the Block that contains the uncompressed target position. + index.locateBlock(curBlockInfo, seekPos); - // Adjust the Stream-specific offsets to file offsets. - info.compressedOffset += compressedSum; - info.uncompressedOffset += uncompressedSum; - assert seekPos >= info.uncompressedOffset; - assert seekPos < info.uncompressedOffset + info.uncompressedSize; + assert (curBlockInfo.compressedOffset & 3) == 0; + assert curBlockInfo.uncompressedSize > 0; + assert seekPos >= curBlockInfo.uncompressedOffset; + assert seekPos < curBlockInfo.uncompressedOffset + + curBlockInfo.uncompressedSize; // Seek in the underlying stream and create a new Block decoder // only if really needed. We can skip it if the current position @@ -640,17 +657,17 @@ public class SeekableXZInputStream extends SeekableInputStream { // In that case, decoding of the current Block hasn't been started // yet. (Decoding of a Block won't be started until at least one // byte will also be read from it.) - if (!(curPos > info.uncompressedOffset && curPos <= seekPos)) { + if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) { // Seek to the beginning of the Block. - in.seek(info.compressedOffset); + in.seek(curBlockInfo.compressedOffset); // Since it is possible that this Block is from a different // Stream than the previous Block, initialize a new Check. - check = Check.getInstance(info.streamFlags.checkType); + check = Check.getInstance(curBlockInfo.getCheckType()); // Create a new Block decoder. - initBlockDecoder(info); - curPos = info.uncompressedOffset; + initBlockDecoder(); + curPos = curBlockInfo.uncompressedOffset; } // If the target wasn't at a Block boundary, decompress and throw @@ -662,23 +679,22 @@ public class SeekableXZInputStream extends SeekableInputStream { long skipAmount = seekPos - curPos; if (blockDecoder.skip(skipAmount) != skipAmount) throw new CorruptedInputException(); - } - curPos = seekPos; + curPos = seekPos; + } } /** * Initializes a new BlockInputStream. This is a helper function for * <code>seek()</code>. */ - private void initBlockDecoder(BlockInfo info) throws IOException { + private void initBlockDecoder() throws IOException { try { // Set it to null first so that GC can collect it if memory // runs tight when initializing a new BlockInputStream. blockDecoder = null; blockDecoder = new BlockInputStream(in, check, memoryLimit, - info.unpaddedSize, - info.uncompressedSize); + curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize); } catch (MemoryLimitException e) { // BlockInputStream doesn't know how much memory we had // already needed so we need to recreate the exception. diff --git a/src/org/tukaani/xz/index/BlockInfo.java b/src/org/tukaani/xz/index/BlockInfo.java index d45fb86..babae7f 100644 --- a/src/org/tukaani/xz/index/BlockInfo.java +++ b/src/org/tukaani/xz/index/BlockInfo.java @@ -12,9 +12,27 @@ package org.tukaani.xz.index; import org.tukaani.xz.common.StreamFlags; public class BlockInfo { - public StreamFlags streamFlags; - public long compressedOffset; - public long uncompressedOffset; - public long unpaddedSize; - public long uncompressedSize; + public int blockNumber = -1; + public long compressedOffset = -1; + public long uncompressedOffset = -1; + public long unpaddedSize = -1; + public long uncompressedSize = -1; + + IndexDecoder index; + + public BlockInfo(IndexDecoder indexOfFirstStream) { + index = indexOfFirstStream; + } + + public int getCheckType() { + return index.getStreamFlags().checkType; + } + + public boolean hasNext() { + return index.hasRecord(blockNumber + 1); + } + + public void setNext() { + index.setBlockInfo(this, blockNumber + 1); + } } diff --git a/src/org/tukaani/xz/index/IndexDecoder.java b/src/org/tukaani/xz/index/IndexDecoder.java index 85c1848..a3ae986 100644 --- a/src/org/tukaani/xz/index/IndexDecoder.java +++ b/src/org/tukaani/xz/index/IndexDecoder.java @@ -20,25 +20,29 @@ import org.tukaani.xz.MemoryLimitException; import org.tukaani.xz.UnsupportedOptionsException; public class IndexDecoder extends IndexBase { - private final BlockInfo info = new BlockInfo(); + private final StreamFlags streamFlags; private final long streamPadding; private final int memoryUsage; + + // Unpadded Size and Uncompressed Size fields private final long[] unpadded; private final long[] uncompressed; + + // Uncompressed size of the largest Block. It is used by + // SeekableXZInputStream to find out the largest Block of the .xz file. private long largestBlockSize = 0; - /** - * Current position in the arrays. This is initialized to <code>-1</code> - * because then it is possible to use <code>hasNext()</code> and - * <code>getNext()</code> to get BlockInfo of the first Block. - */ - private int pos = -1; + // Offsets relative to the beginning of the .xz file. These are all zero + // for the first Stream in the file. + private int recordOffset = 0; + private long compressedOffset = 0; + private long uncompressedOffset = 0; public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags, long streamPadding, int memoryLimit) throws IOException { super(new CorruptedInputException("XZ Index is corrupt")); - info.streamFlags = streamFooterFlags; + this.streamFlags = streamFooterFlags; this.streamPadding = streamPadding; // If endPos is exceeded before the CRC32 field has been decoded, @@ -128,31 +132,28 @@ public class IndexDecoder extends IndexBase { throw new CorruptedInputException("XZ Index is corrupt"); } - public BlockInfo locate(long target) { - assert target < uncompressedSum; - - int left = 0; - int right = unpadded.length - 1; - - while (left < right) { - int i = left + (right - left) / 2; - - if (uncompressed[i] <= target) - left = i + 1; - else - right = i; - } - - pos = left; - return getInfo(); + public void setOffsets(IndexDecoder prev) { + // NOTE: SeekableXZInputStream checks that the total number of Blocks + // in concatenated Streams fits into an int. + recordOffset = prev.recordOffset + (int)prev.recordCount; + compressedOffset = prev.compressedOffset + + prev.getStreamSize() + prev.streamPadding; + assert (compressedOffset & 3) == 0; + uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum; } public int getMemoryUsage() { return memoryUsage; } - public long getStreamAndPaddingSize() { - return getStreamSize() + streamPadding; + public StreamFlags getStreamFlags() { + return streamFlags; + } + + public int getRecordCount() { + // It was already checked in the constructor that it fits into an int. + // Otherwise we couldn't have allocated the arrays. + return (int)recordCount; } public long getUncompressedSize() { @@ -163,16 +164,47 @@ public class IndexDecoder extends IndexBase { return largestBlockSize; } - public boolean hasNext() { - return pos + 1 < recordCount; + public boolean hasUncompressedOffset(long pos) { + return pos >= uncompressedOffset + && pos < uncompressedOffset + uncompressedSum; } - public BlockInfo getNext() { - ++pos; - return getInfo(); + public boolean hasRecord(int blockNumber) { + return blockNumber >= recordOffset + && blockNumber < recordOffset + recordCount; } - private BlockInfo getInfo() { + public void locateBlock(BlockInfo info, long target) { + assert target >= uncompressedOffset; + target -= uncompressedOffset; + assert target < uncompressedSum; + + int left = 0; + int right = unpadded.length - 1; + + while (left < right) { + int i = left + (right - left) / 2; + + if (uncompressed[i] <= target) + left = i + 1; + else + right = i; + } + + setBlockInfo(info, recordOffset + left); + } + + public void setBlockInfo(BlockInfo info, int blockNumber) { + // The caller has checked that the given Block number is inside + // this Index. + assert blockNumber >= recordOffset; + assert blockNumber - recordOffset < recordCount; + + info.index = this; + info.blockNumber = blockNumber; + + int pos = blockNumber - recordOffset; + if (pos == 0) { info.compressedOffset = 0; info.uncompressedOffset = 0; @@ -184,7 +216,8 @@ public class IndexDecoder extends IndexBase { info.unpaddedSize = unpadded[pos] - info.compressedOffset; info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset; - info.compressedOffset += DecoderUtil.STREAM_HEADER_SIZE; - return info; + info.compressedOffset += compressedOffset + + DecoderUtil.STREAM_HEADER_SIZE; + info.uncompressedOffset += uncompressedOffset; } } |