aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaibo Huang <hhb@google.com>2019-01-18 17:14:56 -0800
committerandroid-build-merger <android-build-merger@google.com>2019-01-18 17:14:56 -0800
commitadb638a42ccea02bcbe3f29cbc82dde61b8d203d (patch)
tree9b56375c8a5b7aa62f7caec1d9b2662f06eb0615
parent6b37cc4aad0109f9bc0da689647bd739520352f4 (diff)
parentff7995651f8ea17b4f7001b8147fffc87ca620f2 (diff)
downloadxz-java-adb638a42ccea02bcbe3f29cbc82dde61b8d203d.tar.gz
Merge tag 'v1.8' into xz-java am: 4ad7198ff9
am: ff7995651f Change-Id: Ia5c4227911477bab50900d0f2b7da76f11fff040
-rw-r--r--.gitignore3
-rw-r--r--COPYING10
-rw-r--r--METADATA19
-rw-r--r--MODULE_LICENSE_PUBLIC_DOMAIN0
-rw-r--r--NEWS95
l---------NOTICE1
-rw-r--r--README55
-rw-r--r--THANKS20
-rw-r--r--build.properties34
-rw-r--r--build.xml162
-rw-r--r--fileset-misc.txt11
-rw-r--r--fileset-src.txt109
-rw-r--r--maven/README2
-rw-r--r--maven/pom_template.xml58
-rw-r--r--src/LZMADecDemo.java80
-rw-r--r--src/LZMAEncDemo.java57
-rw-r--r--src/TestAllocSpeed.java106
-rw-r--r--src/XZDecDemo.java71
-rw-r--r--src/XZEncDemo.java41
-rw-r--r--src/XZSeekDecDemo.java75
-rw-r--r--src/XZSeekEncDemo.java68
-rw-r--r--src/org/tukaani/xz/ARMOptions.java37
-rw-r--r--src/org/tukaani/xz/ARMThumbOptions.java37
-rw-r--r--src/org/tukaani/xz/ArrayCache.java172
-rw-r--r--src/org/tukaani/xz/BCJCoder.java35
-rw-r--r--src/org/tukaani/xz/BCJDecoder.java62
-rw-r--r--src/org/tukaani/xz/BCJEncoder.java49
-rw-r--r--src/org/tukaani/xz/BCJOptions.java57
-rw-r--r--src/org/tukaani/xz/BasicArrayCache.java281
-rw-r--r--src/org/tukaani/xz/BlockInputStream.java305
-rw-r--r--src/org/tukaani/xz/BlockOutputStream.java135
-rw-r--r--src/org/tukaani/xz/CloseIgnoringInputStream.java48
-rw-r--r--src/org/tukaani/xz/CorruptedInputException.java37
-rw-r--r--src/org/tukaani/xz/CountingInputStream.java47
-rw-r--r--src/org/tukaani/xz/CountingOutputStream.java54
-rw-r--r--src/org/tukaani/xz/DeltaCoder.java26
-rw-r--r--src/org/tukaani/xz/DeltaDecoder.java32
-rw-r--r--src/org/tukaani/xz/DeltaEncoder.java37
-rw-r--r--src/org/tukaani/xz/DeltaInputStream.java146
-rw-r--r--src/org/tukaani/xz/DeltaOptions.java103
-rw-r--r--src/org/tukaani/xz/DeltaOutputStream.java113
-rw-r--r--src/org/tukaani/xz/FilterCoder.java16
-rw-r--r--src/org/tukaani/xz/FilterDecoder.java17
-rw-r--r--src/org/tukaani/xz/FilterEncoder.java18
-rw-r--r--src/org/tukaani/xz/FilterOptions.java104
-rw-r--r--src/org/tukaani/xz/FinishableOutputStream.java31
-rw-r--r--src/org/tukaani/xz/FinishableWrapperOutputStream.java70
-rw-r--r--src/org/tukaani/xz/IA64Options.java37
-rw-r--r--src/org/tukaani/xz/IndexIndicatorException.java14
-rw-r--r--src/org/tukaani/xz/LZMA2Coder.java26
-rw-r--r--src/org/tukaani/xz/LZMA2Decoder.java35
-rw-r--r--src/org/tukaani/xz/LZMA2Encoder.java51
-rw-r--r--src/org/tukaani/xz/LZMA2InputStream.java400
-rw-r--r--src/org/tukaani/xz/LZMA2Options.java583
-rw-r--r--src/org/tukaani/xz/LZMA2OutputStream.java270
-rw-r--r--src/org/tukaani/xz/LZMAInputStream.java763
-rw-r--r--src/org/tukaani/xz/LZMAOutputStream.java331
-rw-r--r--src/org/tukaani/xz/MemoryLimitException.java60
-rw-r--r--src/org/tukaani/xz/PowerPCOptions.java37
-rw-r--r--src/org/tukaani/xz/RawCoder.java33
-rw-r--r--src/org/tukaani/xz/ResettableArrayCache.java120
-rw-r--r--src/org/tukaani/xz/SPARCOptions.java37
-rw-r--r--src/org/tukaani/xz/SeekableFileInputStream.java102
-rw-r--r--src/org/tukaani/xz/SeekableInputStream.java81
-rw-r--r--src/org/tukaani/xz/SeekableXZInputStream.java1152
-rw-r--r--src/org/tukaani/xz/SimpleInputStream.java138
-rw-r--r--src/org/tukaani/xz/SimpleOutputStream.java151
-rw-r--r--src/org/tukaani/xz/SingleXZInputStream.java535
-rw-r--r--src/org/tukaani/xz/UncompressedLZMA2OutputStream.java164
-rw-r--r--src/org/tukaani/xz/UnsupportedOptionsException.java34
-rw-r--r--src/org/tukaani/xz/X86Options.java37
-rw-r--r--src/org/tukaani/xz/XZ.java53
-rw-r--r--src/org/tukaani/xz/XZFormatException.java24
-rw-r--r--src/org/tukaani/xz/XZIOException.java27
-rw-r--r--src/org/tukaani/xz/XZInputStream.java527
-rw-r--r--src/org/tukaani/xz/XZOutputStream.java606
-rw-r--r--src/org/tukaani/xz/check/CRC32.java33
-rw-r--r--src/org/tukaani/xz/check/CRC64.java54
-rw-r--r--src/org/tukaani/xz/check/Check.java57
-rw-r--r--src/org/tukaani/xz/check/None.java24
-rw-r--r--src/org/tukaani/xz/check/SHA256.java30
-rw-r--r--src/org/tukaani/xz/common/DecoderUtil.java121
-rw-r--r--src/org/tukaani/xz/common/EncoderUtil.java36
-rw-r--r--src/org/tukaani/xz/common/StreamFlags.java15
-rw-r--r--src/org/tukaani/xz/common/Util.java28
-rw-r--r--src/org/tukaani/xz/delta/DeltaCoder.java27
-rw-r--r--src/org/tukaani/xz/delta/DeltaDecoder.java24
-rw-r--r--src/org/tukaani/xz/delta/DeltaEncoder.java24
-rw-r--r--src/org/tukaani/xz/index/BlockInfo.java38
-rw-r--r--src/org/tukaani/xz/index/IndexBase.java56
-rw-r--r--src/org/tukaani/xz/index/IndexDecoder.java223
-rw-r--r--src/org/tukaani/xz/index/IndexEncoder.java59
-rw-r--r--src/org/tukaani/xz/index/IndexHash.java98
-rw-r--r--src/org/tukaani/xz/index/IndexRecord.java20
-rw-r--r--src/org/tukaani/xz/lz/BT4.java265
-rw-r--r--src/org/tukaani/xz/lz/CRC32Hash.java35
-rw-r--r--src/org/tukaani/xz/lz/HC4.java210
-rw-r--r--src/org/tukaani/xz/lz/Hash234.java102
-rw-r--r--src/org/tukaani/xz/lz/LZDecoder.java133
-rw-r--r--src/org/tukaani/xz/lz/LZEncoder.java428
-rw-r--r--src/org/tukaani/xz/lz/Matches.java22
-rw-r--r--src/org/tukaani/xz/lzma/LZMACoder.java140
-rw-r--r--src/org/tukaani/xz/lzma/LZMADecoder.java199
-rw-r--r--src/org/tukaani/xz/lzma/LZMAEncoder.java750
-rw-r--r--src/org/tukaani/xz/lzma/LZMAEncoderFast.java153
-rw-r--r--src/org/tukaani/xz/lzma/LZMAEncoderNormal.java568
-rw-r--r--src/org/tukaani/xz/lzma/Optimum.java73
-rw-r--r--src/org/tukaani/xz/lzma/State.java75
-rw-r--r--src/org/tukaani/xz/package-info.java45
-rw-r--r--src/org/tukaani/xz/rangecoder/RangeCoder.java26
-rw-r--r--src/org/tukaani/xz/rangecoder/RangeDecoder.java83
-rw-r--r--src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java71
-rw-r--r--src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java41
-rw-r--r--src/org/tukaani/xz/rangecoder/RangeEncoder.java200
-rw-r--r--src/org/tukaani/xz/rangecoder/RangeEncoderToBuffer.java59
-rw-r--r--src/org/tukaani/xz/rangecoder/RangeEncoderToStream.java27
-rw-r--r--src/org/tukaani/xz/simple/ARM.java50
-rw-r--r--src/org/tukaani/xz/simple/ARMThumb.java53
-rw-r--r--src/org/tukaani/xz/simple/IA64.java81
-rw-r--r--src/org/tukaani/xz/simple/PowerPC.java50
-rw-r--r--src/org/tukaani/xz/simple/SPARC.java56
-rw-r--r--src/org/tukaani/xz/simple/SimpleFilter.java14
-rw-r--r--src/org/tukaani/xz/simple/X86.java98
123 files changed, 14648 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7866063
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+build
+extdoc
+*.class
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..c1d404d
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,10 @@
+
+Licensing of XZ for Java
+========================
+
+ All the files in this package have been written by Lasse Collin
+ and/or Igor Pavlov. All these files have been put into the
+ public domain. You can do whatever you want with these files.
+
+ This software is provided "as is", without any warranty.
+
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..942198e
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,19 @@
+name: "XZ for Java"
+description: "This aims to be a complete implementation of XZ data compression in pure Java. Single-threaded streamed compression and decompression and random access decompression have been fully implemented. Threading is planned but it is unknown when it will be implemented."
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://tukaani.org/xz/java.html"
+ }
+ url {
+ type: GIT
+ value: "https://git.tukaani.org/xz-java.git"
+ }
+ version: "v1.8"
+ license_type: UNENCUMBERED
+ last_upgrade_date {
+ year: 2019
+ month: 1
+ day: 18
+ }
+}
diff --git a/MODULE_LICENSE_PUBLIC_DOMAIN b/MODULE_LICENSE_PUBLIC_DOMAIN
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_PUBLIC_DOMAIN
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..3183f2e
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,95 @@
+
+XZ for Java release notes
+=========================
+
+1.8 (2018-01-04)
+
+ * Fix a binary compatibility regression: XZ for Java 1.7 binaries
+ in the Maven Central require Java 9 which is too new. XZ for
+ Java 1.8 binaries require Java 5. (XZ for Java 1.6 and older
+ binaries require Java 1.4.)
+
+ If you are using OpenJDK 9 or later, you will need to edit the
+ "sourcever = 1.5" line in the file "build.properties" before
+ running "ant". Set it to 1.6 or higher. The default value 1.5
+ isn't supported by OpenJDK 9 or later.
+
+ * Add "Automatic-Module-Name" = "org.tukaani.xz".
+
+1.7 (2017-12-29)
+
+ * Fix LZMA2InputStream.available() which could return a too high
+ value in case of uncompressed LZMA2 chunks. This incorrect
+ value was visible via other available() methods too, for example,
+ XZInputStream.available().
+
+ * Add the ArrayCache API. It's a pool-like API to reuse large byte
+ and int arrays between compressor and decompressor instances.
+ If you are (de)compressing many tiny files in a row, taking
+ advantage of this API can improve performance significantly.
+
+1.6 (2016-11-27)
+
+ * Fix LZMA2Options.getInputStream to work with a preset dictionary.
+
+ * Make it possible to disable verification of integrity checks in
+ XZ decompression. It should almost never be used but may be useful
+ in some rare situations. This feature is available via new
+ constructors in XZInputStream, SingleXZInputStream, and
+ SeekableXZInputStream.
+
+ * Add LZMAOutputStream for encoding to raw LZMA (i.e. LZMA1) streams
+ and to the legacy .lzma format.
+
+1.5 (2014-03-08)
+
+ * Fix a wrong assertion in BCJ decoders.
+
+ * Use a field instead of reallocating a temporary one-byte buffer
+ in read() and write() implementations in several classes.
+
+1.4 (2013-09-22)
+
+ * Add LZMAInputStream for decoding .lzma files and raw LZMA streams.
+
+1.3 (2013-05-12)
+
+ * Fix a data corruption bug when flushing the LZMA2 encoder or
+ when using a preset dictionary.
+
+ * Make information about the XZ Block positions and sizes available
+ in SeekableXZInputStream by adding the following public functions:
+ - int getStreamCount()
+ - int getBlockCount()
+ - long getBlockPos(int blockNumber)
+ - long getBlockSize(int blockNumber)
+ - long getBlockCompPos(int blockNumber)
+ - long getBlockCompSize(int blockNumber)
+ - int getBlockCheckType(int blockNumber)
+ - int getBlockNumber(long pos)
+ - void seekToBlock(int blockNumber)
+
+ * Minor improvements to javadoc comments were made.
+
+1.2 (2013-01-29)
+
+ * Use fields instead of reallocating frequently-needed temporary
+ objects in the LZMA encoder.
+
+ * Fix the contents of xz-${version}-sources.jar.
+
+ * Add OSGi attributes to xz.jar.
+
+1.1 (2012-07-05)
+
+ * The depthLimit argument in the LZMA2Options constructor is
+ no longer ignored.
+
+ * LZMA2Options() can no longer throw UnsupportedOptionsException.
+
+ * Fix bugs in the preset dictionary support in the LZMA2 encoder.
+
+1.0 (2011-10-22)
+
+ * The first stable release
+
diff --git a/NOTICE b/NOTICE
new file mode 120000
index 0000000..d24842f
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1 @@
+COPYING \ No newline at end of file
diff --git a/README b/README
new file mode 100644
index 0000000..8869996
--- /dev/null
+++ b/README
@@ -0,0 +1,55 @@
+
+XZ for Java
+===========
+
+Introduction
+
+ This aims to be a complete implementation of XZ data compression
+ in pure Java. Features:
+ - Full support for the .xz file format specification version 1.0.4
+ - Single-threaded streamed compression and decompression
+ - Single-threaded decompression with limited random access support
+ - Raw streams (no .xz headers) for advanced users, including LZMA2
+ with preset dictionary
+
+ Threading is planned but it is unknown when it will be implemented.
+
+ For the latest source code, see the project home page:
+
+ https://tukaani.org/xz/java.html
+
+ The source code is compatible with Java 5 and later.
+
+Building
+
+ It is recommended to use Apache Ant. Type "ant" to compile the
+ classes and create the .jar files. Type "ant doc" to build the
+ javadoc HTML documentation. Note that building the documentation
+ will download a small file named "package-list" from Oracle to
+ enable linking to the documentation of the standard Java classes.
+
+ If you are using OpenJDK 9 or later, you will need to edit the
+ "sourcever = 1.5" line in the file "build.properties" before
+ running "ant". Set it to 1.6 or higher. The default value 1.5
+ isn't supported by OpenJDK 9 or later.
+
+ If you cannot or don't want to use Ant, just compile all .java
+ files under the "src" directory.
+
+Demo programs
+
+ You can test compression with XZEncDemo, which compresses from
+ standard input to standard output:
+
+ java -jar build/jar/XZEncDemo.jar < foo.txt > foo.txt.xz
+
+ You can test decompression with XZDecDemo, which decompresses to
+ standard output:
+
+ java -jar build/jar/XZDecDemo.jar foo.txt.xz
+
+Reporting bugs
+
+ Report bugs to <lasse.collin@tukaani.org> or visit the IRC channel
+ #tukaani on Freenode and talk to Larhzu.
+
diff --git a/THANKS b/THANKS
new file mode 100644
index 0000000..c0a14ed
--- /dev/null
+++ b/THANKS
@@ -0,0 +1,20 @@
+
+Thanks
+======
+
+People (in alphabetical order):
+ - Stefan Bodewig
+ - Carl Hasselskog
+ - Arunesh Mathur
+ - Jim Meyering
+ - Benoit Nadeau
+ - Brett Okken
+ - Igor Sakovich
+ - Christian Schlichtherle
+ - Matthias Stevens
+ - Alyosha Vasilieva
+
+Companies (in alphabetical order):
+ - Cerner
+ - Red Hat
+
diff --git a/build.properties b/build.properties
new file mode 100644
index 0000000..fd5c373
--- /dev/null
+++ b/build.properties
@@ -0,0 +1,34 @@
+#
+# build.properties
+#
+# Author: Lasse Collin <lasse.collin@tukaani.org>
+#
+# This file has been put into the public domain.
+# You can do whatever you want with this file.
+#
+
+title = XZ data compression
+homepage = https://tukaani.org/xz/java.html
+version = 1.8
+debug = false
+
+# sourcever sets -source and -target options for javac.
+#
+# The source code is Java 5 compatible but the oldest -source/-target pair
+# that OpenJDK 9 supports is 1.6 (Java 6). Edit this if you are using
+# OpenJDK 9 or later.
+sourcever = 1.5
+
+src_dir = src
+build_dir = build
+dist_dir = ${build_dir}/dist
+dist_file = ${dist_dir}/xz-java-${version}.zip
+classes_dir = ${build_dir}/classes
+jar_dir = ${build_dir}/jar
+doc_dir = ${build_dir}/doc
+
+extdoc_url = https://docs.oracle.com/javase/9/docs/api
+extdoc_dir = extdoc
+
+pom_template = maven/pom_template.xml
+maven_dir = ${build_dir}/maven
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..4abaec6
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ build.xml
+
+ Author: Lasse Collin <lasse.collin@tukaani.org>
+
+ This file has been put into the public domain.
+ You can do whatever you want with this file.
+-->
+
+<project name="XZ" default="jar">
+
+ <property file="build.properties"/>
+
+ <target name="clean"
+ description="Deletes generated files except 'extdoc/package-list'">
+ <delete dir="${build_dir}"/>
+ </target>
+
+ <target name="distclean" depends="clean"
+ description="Deletes all generated files">
+ <delete dir="extdoc"/>
+ </target>
+
+ <target name="dist" description="Creates a source package (.zip)">
+ <mkdir dir="${dist_dir}"/>
+ <zip destfile="${dist_file}">
+ <fileset dir="." includesfile="fileset-misc.txt"/>
+ <fileset dir="." includesfile="fileset-src.txt"/>
+ </zip>
+ </target>
+
+ <target name="doc"
+ description="Generates HTML documentation with javadoc">
+ <mkdir dir="${doc_dir}"/>
+ <mkdir dir="${extdoc_dir}"/>
+ <get src="${extdoc_url}/package-list"
+ dest="${extdoc_dir}/package-list" skipexisting="true"/>
+ <javadoc sourcepath="${src_dir}" destdir="${doc_dir}"
+ source="${sourcever}" packagenames="org.tukaani.xz"
+ windowtitle="XZ data compression"
+ linkoffline="${extdoc_url} ${extdoc_dir}"/>
+ </target>
+
+ <target name="compile" description="Compiles the classes">
+ <mkdir dir="${classes_dir}"/>
+ <javac srcdir="." sourcepath="${src_dir}" destdir="${classes_dir}"
+ includeAntRuntime="false" debug="${debug}"
+ source="${sourcever}" target="${sourcever}"
+ includesfile="fileset-src.txt"
+ excludes="**/package-info.java">
+ <compilerarg compiler="modern" value="-Xlint"/>
+ </javac>
+ </target>
+
+ <target name="jar" depends="compile"
+ description="Creates JAR packages">
+ <mkdir dir="${jar_dir}"/>
+
+ <jar destfile="${jar_dir}/xz.jar" basedir="${classes_dir}"
+ includes="org/tukaani/xz/**">
+ <manifest>
+ <attribute name="Implementation-Title" value="${title}"/>
+ <attribute name="Implementation-Version" value="${version}"/>
+ <attribute name="Implementation-URL" value="${homepage}"/>
+ <attribute name="Sealed" value="true"/>
+ <attribute name="Automatic-Module-Name"
+ value="org.tukaani.xz"/>
+ <!-- Attributes required for OSGi bundles. -->
+ <attribute name="Bundle-ManifestVersion" value="2"/>
+ <attribute name="Bundle-SymbolicName" value="org.tukaani.xz"/>
+ <attribute name="Bundle-Version" value="${version}"/>
+ <attribute name="Export-Package" value="org.tukaani.xz"/>
+ <attribute name="Bundle-Name" value="${title}"/>
+ <attribute name="Bundle-DocURL" value="${homepage}"/>
+ </manifest>
+ </jar>
+
+ <jar destfile="${jar_dir}/TestAllocSpeed.jar" basedir="${classes_dir}"
+ includes="TestAllocSpeed.class">
+ <manifest>
+ <attribute name="Main-Class" value="TestAllocSpeed"/>
+ <attribute name="Class-Path" value="xz.jar"/>
+ </manifest>
+ </jar>
+
+ <jar destfile="${jar_dir}/XZEncDemo.jar" basedir="${classes_dir}"
+ includes="XZEncDemo.class">
+ <manifest>
+ <attribute name="Main-Class" value="XZEncDemo"/>
+ <attribute name="Class-Path" value="xz.jar"/>
+ </manifest>
+ </jar>
+
+ <jar destfile="${jar_dir}/XZDecDemo.jar" basedir="${classes_dir}"
+ includes="XZDecDemo.class">
+ <manifest>
+ <attribute name="Main-Class" value="XZDecDemo"/>
+ <attribute name="Class-Path" value="xz.jar"/>
+ </manifest>
+ </jar>
+
+ <jar destfile="${jar_dir}/XZSeekEncDemo.jar" basedir="${classes_dir}"
+ includes="XZSeekEncDemo.class">
+ <manifest>
+ <attribute name="Main-Class" value="XZSeekEncDemo"/>
+ <attribute name="Class-Path" value="xz.jar"/>
+ </manifest>
+ </jar>
+
+ <jar destfile="${jar_dir}/XZSeekDecDemo.jar" basedir="${classes_dir}"
+ includes="XZSeekDecDemo.class">
+ <manifest>
+ <attribute name="Main-Class" value="XZSeekDecDemo"/>
+ <attribute name="Class-Path" value="xz.jar"/>
+ </manifest>
+ </jar>
+
+ <jar destfile="${jar_dir}/LZMAEncDemo.jar" basedir="${classes_dir}"
+ includes="LZMAEncDemo.class">
+ <manifest>
+ <attribute name="Main-Class" value="LZMAEncDemo"/>
+ <attribute name="Class-Path" value="xz.jar"/>
+ </manifest>
+ </jar>
+
+ <jar destfile="${jar_dir}/LZMADecDemo.jar" basedir="${classes_dir}"
+ includes="LZMADecDemo.class">
+ <manifest>
+ <attribute name="Main-Class" value="LZMADecDemo"/>
+ <attribute name="Class-Path" value="xz.jar"/>
+ </manifest>
+ </jar>
+ </target>
+
+ <!-- It's an ugly quick hack. Maybe some day there will be a cleaner
+ version (e.g. by using Maven). -->
+ <target name="maven" depends="dist, doc, jar"
+ description="Creates the files for a Maven repository">
+ <mkdir dir="${maven_dir}"/>
+
+ <copy file="${pom_template}" tofile="${maven_dir}/xz-${version}.pom"
+ overwrite="true">
+ <filterset>
+ <filter token="VERSION" value="${version}"/>
+ <filter token="TITLE" value="${title}"/>
+ <filter token="HOMEPAGE" value="${homepage}"/>
+ </filterset>
+ </copy>
+
+ <copy file="${jar_dir}/xz.jar" tofile="${maven_dir}/xz-${version}.jar"
+ preservelastmodified="true" overwrite="true"/>
+
+ <jar destfile="${maven_dir}/xz-${version}-javadoc.jar"
+ basedir="${doc_dir}"/>
+
+ <jar destfile="${maven_dir}/xz-${version}-sources.jar"
+ basedir="${src_dir}" includes="org/tukaani/xz/**"/>
+ </target>
+
+</project>
diff --git a/fileset-misc.txt b/fileset-misc.txt
new file mode 100644
index 0000000..9d16359
--- /dev/null
+++ b/fileset-misc.txt
@@ -0,0 +1,11 @@
+README
+NEWS
+COPYING
+THANKS
+build.xml
+build.properties
+fileset-src.txt
+fileset-misc.txt
+.gitignore
+maven/README
+maven/pom_template.xml
diff --git a/fileset-src.txt b/fileset-src.txt
new file mode 100644
index 0000000..8c539a6
--- /dev/null
+++ b/fileset-src.txt
@@ -0,0 +1,109 @@
+src/TestAllocSpeed.java
+src/LZMADecDemo.java
+src/LZMAEncDemo.java
+src/XZDecDemo.java
+src/XZEncDemo.java
+src/XZSeekDecDemo.java
+src/XZSeekEncDemo.java
+src/org/tukaani/xz/ARMOptions.java
+src/org/tukaani/xz/ARMThumbOptions.java
+src/org/tukaani/xz/ArrayCache.java
+src/org/tukaani/xz/BasicArrayCache.java
+src/org/tukaani/xz/BCJCoder.java
+src/org/tukaani/xz/BCJDecoder.java
+src/org/tukaani/xz/BCJEncoder.java
+src/org/tukaani/xz/BCJOptions.java
+src/org/tukaani/xz/BlockInputStream.java
+src/org/tukaani/xz/BlockOutputStream.java
+src/org/tukaani/xz/CloseIgnoringInputStream.java
+src/org/tukaani/xz/CorruptedInputException.java
+src/org/tukaani/xz/CountingInputStream.java
+src/org/tukaani/xz/CountingOutputStream.java
+src/org/tukaani/xz/DeltaCoder.java
+src/org/tukaani/xz/DeltaDecoder.java
+src/org/tukaani/xz/DeltaEncoder.java
+src/org/tukaani/xz/DeltaInputStream.java
+src/org/tukaani/xz/DeltaOptions.java
+src/org/tukaani/xz/DeltaOutputStream.java
+src/org/tukaani/xz/FilterCoder.java
+src/org/tukaani/xz/FilterDecoder.java
+src/org/tukaani/xz/FilterEncoder.java
+src/org/tukaani/xz/FilterOptions.java
+src/org/tukaani/xz/FinishableOutputStream.java
+src/org/tukaani/xz/FinishableWrapperOutputStream.java
+src/org/tukaani/xz/IA64Options.java
+src/org/tukaani/xz/IndexIndicatorException.java
+src/org/tukaani/xz/LZMA2Coder.java
+src/org/tukaani/xz/LZMA2Decoder.java
+src/org/tukaani/xz/LZMA2Encoder.java
+src/org/tukaani/xz/LZMA2InputStream.java
+src/org/tukaani/xz/LZMA2Options.java
+src/org/tukaani/xz/LZMA2OutputStream.java
+src/org/tukaani/xz/LZMAInputStream.java
+src/org/tukaani/xz/LZMAOutputStream.java
+src/org/tukaani/xz/MemoryLimitException.java
+src/org/tukaani/xz/PowerPCOptions.java
+src/org/tukaani/xz/RawCoder.java
+src/org/tukaani/xz/ResettableArrayCache.java
+src/org/tukaani/xz/SPARCOptions.java
+src/org/tukaani/xz/SeekableFileInputStream.java
+src/org/tukaani/xz/SeekableInputStream.java
+src/org/tukaani/xz/SeekableXZInputStream.java
+src/org/tukaani/xz/SimpleInputStream.java
+src/org/tukaani/xz/SimpleOutputStream.java
+src/org/tukaani/xz/SingleXZInputStream.java
+src/org/tukaani/xz/UncompressedLZMA2OutputStream.java
+src/org/tukaani/xz/UnsupportedOptionsException.java
+src/org/tukaani/xz/X86Options.java
+src/org/tukaani/xz/XZ.java
+src/org/tukaani/xz/XZFormatException.java
+src/org/tukaani/xz/XZIOException.java
+src/org/tukaani/xz/XZInputStream.java
+src/org/tukaani/xz/XZOutputStream.java
+src/org/tukaani/xz/check/CRC32.java
+src/org/tukaani/xz/check/CRC64.java
+src/org/tukaani/xz/check/Check.java
+src/org/tukaani/xz/check/None.java
+src/org/tukaani/xz/check/SHA256.java
+src/org/tukaani/xz/common/DecoderUtil.java
+src/org/tukaani/xz/common/EncoderUtil.java
+src/org/tukaani/xz/common/StreamFlags.java
+src/org/tukaani/xz/common/Util.java
+src/org/tukaani/xz/delta/DeltaCoder.java
+src/org/tukaani/xz/delta/DeltaDecoder.java
+src/org/tukaani/xz/delta/DeltaEncoder.java
+src/org/tukaani/xz/index/BlockInfo.java
+src/org/tukaani/xz/index/IndexBase.java
+src/org/tukaani/xz/index/IndexDecoder.java
+src/org/tukaani/xz/index/IndexEncoder.java
+src/org/tukaani/xz/index/IndexHash.java
+src/org/tukaani/xz/index/IndexRecord.java
+src/org/tukaani/xz/lz/BT4.java
+src/org/tukaani/xz/lz/CRC32Hash.java
+src/org/tukaani/xz/lz/HC4.java
+src/org/tukaani/xz/lz/Hash234.java
+src/org/tukaani/xz/lz/LZDecoder.java
+src/org/tukaani/xz/lz/LZEncoder.java
+src/org/tukaani/xz/lz/Matches.java
+src/org/tukaani/xz/lzma/LZMACoder.java
+src/org/tukaani/xz/lzma/LZMADecoder.java
+src/org/tukaani/xz/lzma/LZMAEncoder.java
+src/org/tukaani/xz/lzma/LZMAEncoderFast.java
+src/org/tukaani/xz/lzma/LZMAEncoderNormal.java
+src/org/tukaani/xz/lzma/Optimum.java
+src/org/tukaani/xz/lzma/State.java
+src/org/tukaani/xz/package-info.java
+src/org/tukaani/xz/rangecoder/RangeCoder.java
+src/org/tukaani/xz/rangecoder/RangeDecoder.java
+src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java
+src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java
+src/org/tukaani/xz/rangecoder/RangeEncoder.java
+src/org/tukaani/xz/rangecoder/RangeEncoderToBuffer.java
+src/org/tukaani/xz/rangecoder/RangeEncoderToStream.java
+src/org/tukaani/xz/simple/ARM.java
+src/org/tukaani/xz/simple/ARMThumb.java
+src/org/tukaani/xz/simple/IA64.java
+src/org/tukaani/xz/simple/PowerPC.java
+src/org/tukaani/xz/simple/SPARC.java
+src/org/tukaani/xz/simple/SimpleFilter.java
+src/org/tukaani/xz/simple/X86.java
diff --git a/maven/README b/maven/README
new file mode 100644
index 0000000..2692a97
--- /dev/null
+++ b/maven/README
@@ -0,0 +1,2 @@
+The pom_template.xml is for a Maven repository but it's not meant
+for building the project. Note that build.xml will replace @foo@ tags.
diff --git a/maven/pom_template.xml b/maven/pom_template.xml
new file mode 100644
index 0000000..235844c
--- /dev/null
+++ b/maven/pom_template.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Author: Lasse Collin <lasse.collin@tukaani.org>
+
+ This file has been put into the public domain.
+ You can do whatever you want with this file.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <groupId>org.tukaani</groupId>
+ <artifactId>xz</artifactId>
+ <version>@VERSION@</version>
+ <packaging>jar</packaging>
+
+ <name>XZ for Java</name>
+ <description>@TITLE@</description>
+ <url>@HOMEPAGE@</url>
+
+ <licenses>
+ <license>
+ <name>Public Domain</name>
+ <comments>You can do whatever you want with this package.</comments>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+
+ <scm>
+ <url>https://git.tukaani.org/?p=xz-java.git</url>
+ <connection>scm:git:https://git.tukaani.org/xz-java.git</connection>
+ </scm>
+
+ <developers>
+ <developer>
+ <name>Lasse Collin</name>
+ <email>lasse.collin@tukaani.org</email>
+ </developer>
+ </developers>
+
+ <contributors>
+ <contributor>
+ <!-- According to Maven docs, it's good to only list those people
+ as <developers> that should be contacted if someone wants
+ to talk with an upstream developer. Thus, Igor Pavlov is
+ marked as a <contributor> even though XZ for Java simply
+ couldn't exist without Igor Pavlov's code. -->
+ <name>Igor Pavlov</name>
+ <url>http://7-zip.org/</url>
+ </contributor>
+ </contributors>
+
+</project>
diff --git a/src/LZMADecDemo.java b/src/LZMADecDemo.java
new file mode 100644
index 0000000..1098725
--- /dev/null
+++ b/src/LZMADecDemo.java
@@ -0,0 +1,80 @@
+/*
+ * LZMADecDemo
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+import java.io.*;
+import org.tukaani.xz.*;
+
+/**
+ * Decompresses .lzma files to standard output. If no arguments are given,
+ * reads from standard input.
+ *
+ * NOTE: For most purposes, .lzma is a legacy format and usually you should
+ * use .xz instead.
+ */
+class LZMADecDemo {
+ public static void main(String[] args) {
+ byte[] buf = new byte[8192];
+ String name = null;
+
+ try {
+ if (args.length == 0) {
+ name = "standard input";
+
+ // No need to use BufferedInputStream with System.in which
+ // seems to be fast with one-byte reads.
+ InputStream in = new LZMAInputStream(System.in);
+
+ int size;
+ while ((size = in.read(buf)) != -1)
+ System.out.write(buf, 0, size);
+
+ } else {
+ // Read from files given on the command line.
+ for (int i = 0; i < args.length; ++i) {
+ name = args[i];
+ InputStream in = new FileInputStream(name);
+
+ try {
+ // In contrast to other classes in org.tukaani.xz,
+ // LZMAInputStream doesn't do buffering internally
+ // and reads one byte at a time. BufferedInputStream
+ // gives a huge performance improvement here but even
+ // then it's slower than the other input streams from
+ // org.tukaani.xz.
+ in = new BufferedInputStream(in);
+ in = new LZMAInputStream(in);
+
+ int size;
+ while ((size = in.read(buf)) != -1)
+ System.out.write(buf, 0, size);
+
+ } finally {
+ // Close FileInputStream (directly or indirectly
+ // via LZMAInputStream, it doesn't matter).
+ in.close();
+ }
+ }
+ }
+ } catch (FileNotFoundException e) {
+ System.err.println("LZMADecDemo: Cannot open " + name + ": "
+ + e.getMessage());
+ System.exit(1);
+
+ } catch (EOFException e) {
+ System.err.println("LZMADecDemo: Unexpected end of input on "
+ + name);
+ System.exit(1);
+
+ } catch (IOException e) {
+ System.err.println("LZMADecDemo: Error decompressing from "
+ + name + ": " + e.getMessage());
+ System.exit(1);
+ }
+ }
+}
diff --git a/src/LZMAEncDemo.java b/src/LZMAEncDemo.java
new file mode 100644
index 0000000..ba3f213
--- /dev/null
+++ b/src/LZMAEncDemo.java
@@ -0,0 +1,57 @@
+/*
+ * LZMAEncDemo
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+import java.io.*;
+import org.tukaani.xz.*;
+
+/**
+ * Compresses a single file from standard input to standard ouput into
+ * the .lzma file format.
+ * <p>
+ * NOTE: For most purposes, .lzma is a legacy format and usually you should
+ * use .xz instead.
+ * <p>
+ * Two optional arguments are supported:
+ * <ol>
+ * <li>LZMA preset level which is an integer in the range [0, 9].
+ * The default is 6.</li>
+ * <li>Uncompressed size of the input as bytes.<li>
+ * </ol>
+ */
+class LZMAEncDemo {
+ public static void main(String[] args) throws Exception {
+ LZMA2Options options = new LZMA2Options();
+ long inputSize = -1;
+
+ if (args.length >= 1)
+ options.setPreset(Integer.parseInt(args[0]));
+
+ if (args.length >= 2)
+ inputSize = Long.parseLong(args[1]);
+
+ System.err.println("Encoder memory usage: "
+ + options.getEncoderMemoryUsage() + " KiB");
+ System.err.println("Decoder memory usage: "
+ + options.getDecoderMemoryUsage() + " KiB");
+
+ // LZMAOutputStream writes one byte at a time. It helps a little,
+ // especially in the fastest presets, to use BufferedOutputStream.
+ OutputStream out = new BufferedOutputStream(System.out);
+ LZMAOutputStream encoder = new LZMAOutputStream(out, options,
+ inputSize);
+
+ byte[] buf = new byte[8192];
+ int size;
+ while ((size = System.in.read(buf)) != -1)
+ encoder.write(buf, 0, size);
+
+ encoder.finish();
+ out.flush();
+ }
+}
diff --git a/src/TestAllocSpeed.java b/src/TestAllocSpeed.java
new file mode 100644
index 0000000..1a14563
--- /dev/null
+++ b/src/TestAllocSpeed.java
@@ -0,0 +1,106 @@
+/*
+ * TestAllocSpeed
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+/*
+ * Usage:
+ * time java -jar build/jar/TestAllocSpeed.jar MODE ITERS THREADS < FILE
+ * where
+ * MODE is "true" for compression or "false" for decompression,
+ * ITERS is the number of iterations to done by each thread,
+ * THREADS is the number of threads, and
+ * FILE is the input file (preferably tiny, but at most 1 MiB).
+ *
+ * Each thread has a different random seed so in compression mode each
+ * thread will use different options in different order. This way the
+ * ArrayCache gets more diverse load.
+ *
+ * Examples:
+ * time java -jar build/jar/TestAllocSpeed.jar true 1000 4 < README
+ * time java -jar build/jar/TestAllocSpeed.jar false 10000 4 < foo.xz
+ */
+
+import java.io.*;
+import java.util.Random;
+import org.tukaani.xz.*;
+
+class TestAllocSpeed implements Runnable {
+ private static boolean compressing;
+ private static int repeats;
+ private static final byte[] testdata = new byte[1 << 20];
+ private static int testdataSize;
+ private static volatile IOException exception = null;
+
+ private final Random rng;
+
+ public TestAllocSpeed(long seed) {
+ rng = new Random(seed);
+ }
+
+ private void compress() throws IOException {
+ ByteArrayOutputStream byteStream = new ByteArrayOutputStream(
+ testdataSize + 1024);
+ LZMA2Options options = new LZMA2Options();
+ options.setDictSize(1 << (16 + rng.nextInt(6)));
+
+ for (int i = 0; i < repeats; ++i) {
+ XZOutputStream out = new XZOutputStream(byteStream, options);
+ out.write(testdata, 0, testdataSize);
+ out.finish();
+ }
+ }
+
+ private void decompress() throws IOException {
+ ByteArrayInputStream byteStream = new ByteArrayInputStream(
+ testdata, 0, testdataSize);
+ byte[] outbuf = new byte[8192];
+
+ for (int i = 0; i < repeats; ++i) {
+ byteStream.reset();
+ XZInputStream in = new XZInputStream(byteStream);
+ while (in.read(outbuf) > 0) {}
+ }
+ }
+
+ public void run() {
+ try {
+ if (compressing) {
+ compress();
+ } else {
+ decompress();
+ }
+ } catch (IOException e) {
+ exception = e;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ compressing = Boolean.parseBoolean(args[0]);
+ repeats = Integer.parseInt(args[1]);
+ final int threadCount = Integer.parseInt(args[2]);
+
+ if (threadCount < 1 || threadCount > 64)
+ throw new Exception("Thread count must be 1-64");
+
+ testdataSize = System.in.read(testdata);
+
+ ArrayCache.setDefaultCache(BasicArrayCache.getInstance());
+
+ Thread[] threads = new Thread[threadCount];
+ for (int i = 0; i < threadCount; ++i) {
+ threads[i] = new Thread(new TestAllocSpeed(i));
+ threads[i].start();
+ }
+
+ for (int i = 0; i < threadCount; ++i)
+ threads[i].join();
+
+ if (exception != null)
+ throw exception;
+ }
+}
diff --git a/src/XZDecDemo.java b/src/XZDecDemo.java
new file mode 100644
index 0000000..6876eea
--- /dev/null
+++ b/src/XZDecDemo.java
@@ -0,0 +1,71 @@
+/*
+ * XZDecDemo
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+import java.io.*;
+import org.tukaani.xz.*;
+
+/**
+ * Decompresses .xz files to standard output. If no arguments are given,
+ * reads from standard input.
+ */
+class XZDecDemo {
+ public static void main(String[] args) {
+ byte[] buf = new byte[8192];
+ String name = null;
+
+ try {
+ if (args.length == 0) {
+ name = "standard input";
+ InputStream in = new XZInputStream(System.in);
+
+ int size;
+ while ((size = in.read(buf)) != -1)
+ System.out.write(buf, 0, size);
+
+ } else {
+ // Read from files given on the command line.
+ for (int i = 0; i < args.length; ++i) {
+ name = args[i];
+ InputStream in = new FileInputStream(name);
+
+ try {
+ // Since XZInputStream does some buffering internally
+ // anyway, BufferedInputStream doesn't seem to be
+ // needed here to improve performance.
+ // in = new BufferedInputStream(in);
+ in = new XZInputStream(in);
+
+ int size;
+ while ((size = in.read(buf)) != -1)
+ System.out.write(buf, 0, size);
+
+ } finally {
+ // Close FileInputStream (directly or indirectly
+ // via XZInputStream, it doesn't matter).
+ in.close();
+ }
+ }
+ }
+ } catch (FileNotFoundException e) {
+ System.err.println("XZDecDemo: Cannot open " + name + ": "
+ + e.getMessage());
+ System.exit(1);
+
+ } catch (EOFException e) {
+ System.err.println("XZDecDemo: Unexpected end of input on "
+ + name);
+ System.exit(1);
+
+ } catch (IOException e) {
+ System.err.println("XZDecDemo: Error decompressing from "
+ + name + ": " + e.getMessage());
+ System.exit(1);
+ }
+ }
+}
diff --git a/src/XZEncDemo.java b/src/XZEncDemo.java
new file mode 100644
index 0000000..e9ae38a
--- /dev/null
+++ b/src/XZEncDemo.java
@@ -0,0 +1,41 @@
+/*
+ * XZEncDemo
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+import java.io.*;
+import org.tukaani.xz.*;
+
+/**
+ * Compresses a single file from standard input to standard ouput into
+ * the .xz file format.
+ * <p>
+ * One optional argument is supported: LZMA2 preset level which is an integer
+ * in the range [0, 9]. The default is 6.
+ */
+class XZEncDemo {
+ public static void main(String[] args) throws Exception {
+ LZMA2Options options = new LZMA2Options();
+
+ if (args.length >= 1)
+ options.setPreset(Integer.parseInt(args[0]));
+
+ System.err.println("Encoder memory usage: "
+ + options.getEncoderMemoryUsage() + " KiB");
+ System.err.println("Decoder memory usage: "
+ + options.getDecoderMemoryUsage() + " KiB");
+
+ XZOutputStream out = new XZOutputStream(System.out, options);
+
+ byte[] buf = new byte[8192];
+ int size;
+ while ((size = System.in.read(buf)) != -1)
+ out.write(buf, 0, size);
+
+ out.finish();
+ }
+}
diff --git a/src/XZSeekDecDemo.java b/src/XZSeekDecDemo.java
new file mode 100644
index 0000000..5c54a87
--- /dev/null
+++ b/src/XZSeekDecDemo.java
@@ -0,0 +1,75 @@
+/*
+ * XZSeekDecDemo
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+import java.io.*;
+import org.tukaani.xz.*;
+
+/**
+ * Decompresses a .xz file in random access mode to standard output.
+ * <p>
+ * Arguments: filename [offset1 length1] [offset2 length2] ...
+ * <p>
+ * If only the filename is given, the whole file is decompressed. The only
+ * difference to XZDecDemo is that this will still use the random access code.
+ * <p>
+ * If one or more of the offset-length pairs are given,
+ * for each pair, <code>length</code> number of bytes are
+ * decompressed from <code>offset</code>.
+ */
+class XZSeekDecDemo {
+ public static void main(String[] args) throws Exception {
+ SeekableFileInputStream file = new SeekableFileInputStream(args[0]);
+ SeekableXZInputStream in = new SeekableXZInputStream(file);
+
+ System.err.println("Number of XZ Streams: " + in.getStreamCount());
+ System.err.println("Number of XZ Blocks: " + in.getBlockCount());
+
+ System.err.println("Uncompressed size: " + in.length() + " B");
+
+ System.err.println("Largest XZ Block size: "
+ + in.getLargestBlockSize() + " B");
+
+ System.err.print("List of Check IDs:");
+ int checkTypes = in.getCheckTypes();
+ for (int i = 0; i < 16; ++i)
+ if ((checkTypes & (1 << i)) != 0)
+ System.err.print(" " + i);
+ System.err.println();
+
+ System.err.println("Index memory usage: "
+ + in.getIndexMemoryUsage() + " KiB");
+
+ byte[] buf = new byte[8192];
+ if (args.length == 1) {
+ int size;
+ while ((size = in.read(buf)) != -1)
+ System.out.write(buf, 0, size);
+ } else {
+ for (int i = 1; i < args.length; i += 2) {
+ int pos = Integer.parseInt(args[i]);
+ int len = Integer.parseInt(args[i + 1]);
+
+ in.seek(pos);
+
+ while (len > 0) {
+ int size = Math.min(len, buf.length);
+ size = in.read(buf, 0, size);
+
+ if (size == -1) {
+ System.err.println("Error: End of file reached");
+ System.exit(1);
+ }
+
+ System.out.write(buf, 0, size);
+ len -= size;
+ }
+ }
+ }
+ }
+}
diff --git a/src/XZSeekEncDemo.java b/src/XZSeekEncDemo.java
new file mode 100644
index 0000000..157e788
--- /dev/null
+++ b/src/XZSeekEncDemo.java
@@ -0,0 +1,68 @@
+/*
+ * XZSeekEncDemo
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+import java.io.*;
+import org.tukaani.xz.*;
+
+/**
+ * Compresses a single file from standard input to standard ouput into
+ * a random-accessible .xz file.
+ * <p>
+ * Arguments: [preset [block size]]
+ * <p>
+ * Preset is an LZMA2 preset level which is an integer in the range [0, 9].
+ * The default is 6.
+ * <p>
+ * Block size specifies the amount of uncompressed data to store per
+ * XZ Block. The default is 1 MiB (1048576 bytes). Bigger means better
+ * compression ratio. Smaller means faster random access.
+ */
+class XZSeekEncDemo {
+ public static void main(String[] args) throws Exception {
+ LZMA2Options options = new LZMA2Options();
+
+ if (args.length >= 1)
+ options.setPreset(Integer.parseInt(args[0]));
+
+ int blockSize = 1024 * 1024;
+ if (args.length >= 2)
+ blockSize = Integer.parseInt(args[1]);
+
+ options.setDictSize(Math.min(options.getDictSize(),
+ Math.max(LZMA2Options.DICT_SIZE_MIN,
+ blockSize)));
+
+ System.err.println("Encoder memory usage: "
+ + options.getEncoderMemoryUsage() + " KiB");
+ System.err.println("Decoder memory usage: "
+ + options.getDecoderMemoryUsage() + " KiB");
+ System.err.println("Block size: " + blockSize + " B");
+
+ XZOutputStream out = new XZOutputStream(System.out, options);
+
+ byte[] buf = new byte[8192];
+ int left = blockSize;
+
+ while (true) {
+ int size = System.in.read(buf, 0, Math.min(buf.length, left));
+ if (size == -1)
+ break;
+
+ out.write(buf, 0, size);
+ left -= size;
+
+ if (left == 0) {
+ out.endBlock();
+ left = blockSize;
+ }
+ }
+
+ out.finish();
+ }
+}
diff --git a/src/org/tukaani/xz/ARMOptions.java b/src/org/tukaani/xz/ARMOptions.java
new file mode 100644
index 0000000..a3c6426
--- /dev/null
+++ b/src/org/tukaani/xz/ARMOptions.java
@@ -0,0 +1,37 @@
+/*
+ * ARMOptions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import org.tukaani.xz.simple.ARM;
+
+/**
+ * BCJ filter for little endian ARM instructions.
+ */
+public class ARMOptions extends BCJOptions {
+ private static final int ALIGNMENT = 4;
+
+ public ARMOptions() {
+ super(ALIGNMENT);
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return new SimpleOutputStream(out, new ARM(true, startOffset));
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new SimpleInputStream(in, new ARM(false, startOffset));
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new BCJEncoder(this, BCJCoder.ARM_FILTER_ID);
+ }
+}
diff --git a/src/org/tukaani/xz/ARMThumbOptions.java b/src/org/tukaani/xz/ARMThumbOptions.java
new file mode 100644
index 0000000..4dcfbe4
--- /dev/null
+++ b/src/org/tukaani/xz/ARMThumbOptions.java
@@ -0,0 +1,37 @@
+/*
+ * ARMThumbOptions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import org.tukaani.xz.simple.ARMThumb;
+
+/**
+ * BCJ filter for little endian ARM-Thumb instructions.
+ */
+public class ARMThumbOptions extends BCJOptions {
+ private static final int ALIGNMENT = 2;
+
+ public ARMThumbOptions() {
+ super(ALIGNMENT);
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return new SimpleOutputStream(out, new ARMThumb(true, startOffset));
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new SimpleInputStream(in, new ARMThumb(false, startOffset));
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new BCJEncoder(this, BCJCoder.ARMTHUMB_FILTER_ID);
+ }
+}
diff --git a/src/org/tukaani/xz/ArrayCache.java b/src/org/tukaani/xz/ArrayCache.java
new file mode 100644
index 0000000..c940d77
--- /dev/null
+++ b/src/org/tukaani/xz/ArrayCache.java
@@ -0,0 +1,172 @@
+/*
+ * ArrayCache
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+/**
+ * Caches large arrays for reuse (base class and a dummy cache implementation).
+ * <p>
+ * When compressing or decompressing many (very) small files in a row, the
+ * time spent in construction of new compressor or decompressor objects
+ * can be longer than the time spent in actual compression or decompression.
+ * A large part of this initialization overhead comes from allocation and
+ * garbage collection of large arrays.
+ * <p>
+ * The {@code ArrayCache} API provides a way to cache large array allocations
+ * for reuse. It can give a major performance improvement when compressing or
+ * decompressing many tiny files. If you are only (de)compressing one or two
+ * files or the files a very big, array caching won't improve anything,
+ * although it won't make anything slower either.
+ * <p>
+ * <b>Important: The users of ArrayCache don't return the allocated arrays
+ * back to the cache in all situations.</b>
+ * This a reason why it's called a cache instead of a pool.
+ * If it is important to be able to return every array back to a cache,
+ * {@link ResettableArrayCache} can be useful.
+ * <p>
+ * In compressors (OutputStreams) the arrays are returned to the cache
+ * when a call to {@code finish()} or {@code close()} returns
+ * successfully (no exceptions are thrown).
+ * <p>
+ * In decompressors (InputStreams) the arrays are returned to the cache when
+ * the decompression is successfully finished ({@code read} returns {@code -1})
+ * or {@code close()} or {@code close(boolean)} is called. This is true even
+ * if closing throws an exception.
+ * <p>
+ * Raw decompressors don't support {@code close(boolean)}. With raw
+ * decompressors, if one wants to put the arrays back to the cache without
+ * closing the underlying {@code InputStream}, one can wrap the
+ * {@code InputStream} into {@link CloseIgnoringInputStream} when creating
+ * the decompressor instance. Then one can use {@code close()}.
+ * <p>
+ * Different cache implementations can be extended from this base class.
+ * All cache implementations must be thread safe.
+ * <p>
+ * This class also works as a dummy cache that simply calls {@code new}
+ * to allocate new arrays and doesn't try to cache anything. A statically
+ * allocated dummy cache is available via {@link #getDummyCache()}.
+ * <p>
+ * If no {@code ArrayCache} is specified when constructing a compressor or
+ * decompressor, the default {@code ArrayCache} implementation is used.
+ * See {@link #getDefaultCache()} and {@link #setDefaultCache(ArrayCache)}.
+ * <p>
+ * This is a class instead of an interface because it's possible that in the
+ * future we may want to cache other array types too. New methods can be
+ * added to this class without breaking existing cache implementations.
+ *
+ * @since 1.7
+ *
+ * @see BasicArrayCache
+ */
+public class ArrayCache {
+ /**
+ * Global dummy cache instance that is returned by {@code getDummyCache()}.
+ */
+ private static final ArrayCache dummyCache = new ArrayCache();
+
+ /**
+ * Global default {@code ArrayCache} that is used when no other cache has
+ * been specified.
+ */
+ private static volatile ArrayCache defaultCache = dummyCache;
+
+ /**
+ * Returns a statically-allocated {@code ArrayCache} instance.
+ * It can be shared by all code that needs a dummy cache.
+ */
+ public static ArrayCache getDummyCache() {
+ return dummyCache;
+ }
+
+ /**
+ * Gets the default {@code ArrayCache} instance.
+ * This is a global cache that is used when the application
+ * specifies nothing else. The default is a dummy cache
+ * (see {@link #getDummyCache()}).
+ */
+ public static ArrayCache getDefaultCache() {
+ // It's volatile so no need for synchronization.
+ return defaultCache;
+ }
+
+ /**
+ * Sets the default {@code ArrayCache} instance.
+ * Use with care. Other libraries using this package probably shouldn't
+ * call this function as libraries cannot know if there are other users
+ * of the xz package in the same application.
+ */
+ public static void setDefaultCache(ArrayCache arrayCache) {
+ if (arrayCache == null)
+ throw new NullPointerException();
+
+ // It's volatile so no need for synchronization.
+ defaultCache = arrayCache;
+ }
+
+ /**
+ * Creates a new {@code ArrayCache} that does no caching
+ * (a dummy cache). If you need a dummy cache, you may want to call
+ * {@link #getDummyCache()} instead.
+ */
+ public ArrayCache() {}
+
+ /**
+ * Allocates a new byte array.
+ * <p>
+ * This implementation simply returns {@code new byte[size]}.
+ *
+ * @param size the minimum size of the array to allocate;
+ * an implementation may return an array that
+ * is larger than the given {@code size}
+ *
+ * @param fillWithZeros if true, the caller expects that the first
+ * {@code size} elements in the array are zero;
+ * if false, the array contents can be anything,
+ * which speeds things up when reusing a cached
+ * array
+ */
+ public byte[] getByteArray(int size, boolean fillWithZeros) {
+ return new byte[size];
+ }
+
+ /**
+ * Puts the given byte array to the cache. The caller must no longer
+ * use the array.
+ * <p>
+ * This implementation does nothing.
+ */
+ public void putArray(byte[] array) {}
+
+ /**
+ * Allocates a new int array.
+ * <p>
+ * This implementation simply returns {@code new int[size]}.
+ *
+ * @param size the minimum size of the array to allocate;
+ * an implementation may return an array that
+ * is larger than the given {@code size}
+ *
+ * @param fillWithZeros if true, the caller expects that the first
+ * {@code size} elements in the array are zero;
+ * if false, the array contents can be anything,
+ * which speeds things up when reusing a cached
+ * array
+ */
+ public int[] getIntArray(int size, boolean fillWithZeros) {
+ return new int[size];
+ }
+
+ /**
+ * Puts the given int array to the cache. The caller must no longer
+ * use the array.
+ * <p>
+ * This implementation does nothing.
+ */
+ public void putArray(int[] array) {}
+}
diff --git a/src/org/tukaani/xz/BCJCoder.java b/src/org/tukaani/xz/BCJCoder.java
new file mode 100644
index 0000000..81862f7
--- /dev/null
+++ b/src/org/tukaani/xz/BCJCoder.java
@@ -0,0 +1,35 @@
+/*
+ * BCJCoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+abstract class BCJCoder implements FilterCoder {
+ public static final long X86_FILTER_ID = 0x04;
+ public static final long POWERPC_FILTER_ID = 0x05;
+ public static final long IA64_FILTER_ID = 0x06;
+ public static final long ARM_FILTER_ID = 0x07;
+ public static final long ARMTHUMB_FILTER_ID = 0x08;
+ public static final long SPARC_FILTER_ID = 0x09;
+
+ public static boolean isBCJFilterID(long filterID) {
+ return filterID >= 0x04 && filterID <= 0x09;
+ }
+
+ public boolean changesSize() {
+ return false;
+ }
+
+ public boolean nonLastOK() {
+ return true;
+ }
+
+ public boolean lastOK() {
+ return false;
+ }
+}
diff --git a/src/org/tukaani/xz/BCJDecoder.java b/src/org/tukaani/xz/BCJDecoder.java
new file mode 100644
index 0000000..31251f2
--- /dev/null
+++ b/src/org/tukaani/xz/BCJDecoder.java
@@ -0,0 +1,62 @@
+/*
+ * BCJDecoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import org.tukaani.xz.simple.*;
+
+class BCJDecoder extends BCJCoder implements FilterDecoder {
+ private final long filterID;
+ private final int startOffset;
+
+ BCJDecoder(long filterID, byte[] props)
+ throws UnsupportedOptionsException {
+ assert isBCJFilterID(filterID);
+ this.filterID = filterID;
+
+ if (props.length == 0) {
+ startOffset = 0;
+ } else if (props.length == 4) {
+ int n = 0;
+ for (int i = 0; i < 4; ++i)
+ n |= (props[i] & 0xFF) << (i * 8);
+
+ startOffset = n;
+ } else {
+ throw new UnsupportedOptionsException(
+ "Unsupported BCJ filter properties");
+ }
+ }
+
+ public int getMemoryUsage() {
+ return SimpleInputStream.getMemoryUsage();
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ SimpleFilter simpleFilter = null;
+
+ if (filterID == X86_FILTER_ID)
+ simpleFilter = new X86(false, startOffset);
+ else if (filterID == POWERPC_FILTER_ID)
+ simpleFilter = new PowerPC(false, startOffset);
+ else if (filterID == IA64_FILTER_ID)
+ simpleFilter = new IA64(false, startOffset);
+ else if (filterID == ARM_FILTER_ID)
+ simpleFilter = new ARM(false, startOffset);
+ else if (filterID == ARMTHUMB_FILTER_ID)
+ simpleFilter = new ARMThumb(false, startOffset);
+ else if (filterID == SPARC_FILTER_ID)
+ simpleFilter = new SPARC(false, startOffset);
+ else
+ assert false;
+
+ return new SimpleInputStream(in, simpleFilter);
+ }
+}
diff --git a/src/org/tukaani/xz/BCJEncoder.java b/src/org/tukaani/xz/BCJEncoder.java
new file mode 100644
index 0000000..90cde79
--- /dev/null
+++ b/src/org/tukaani/xz/BCJEncoder.java
@@ -0,0 +1,49 @@
+/*
+ * BCJEncoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+class BCJEncoder extends BCJCoder implements FilterEncoder {
+ private final BCJOptions options;
+ private final long filterID;
+ private final byte[] props;
+
+ BCJEncoder(BCJOptions options, long filterID) {
+ assert isBCJFilterID(filterID);
+ int startOffset = options.getStartOffset();
+
+ if (startOffset == 0) {
+ props = new byte[0];
+ } else {
+ props = new byte[4];
+ for (int i = 0; i < 4; ++i)
+ props[i] = (byte)(startOffset >>> (i * 8));
+ }
+
+ this.filterID = filterID;
+ this.options = (BCJOptions)options.clone();
+ }
+
+ public long getFilterID() {
+ return filterID;
+ }
+
+ public byte[] getFilterProps() {
+ return props;
+ }
+
+ public boolean supportsFlushing() {
+ return false;
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return options.getOutputStream(out, arrayCache);
+ }
+}
diff --git a/src/org/tukaani/xz/BCJOptions.java b/src/org/tukaani/xz/BCJOptions.java
new file mode 100644
index 0000000..705a2c0
--- /dev/null
+++ b/src/org/tukaani/xz/BCJOptions.java
@@ -0,0 +1,57 @@
+/*
+ * BCJOptions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+abstract class BCJOptions extends FilterOptions {
+ private final int alignment;
+ int startOffset = 0;
+
+ BCJOptions(int alignment) {
+ this.alignment = alignment;
+ }
+
+ /**
+ * Sets the start offset for the address conversions.
+ * Normally this is useless so you shouldn't use this function.
+ * The default value is <code>0</code>.
+ */
+ public void setStartOffset(int startOffset)
+ throws UnsupportedOptionsException {
+ if ((startOffset & (alignment - 1)) != 0)
+ throw new UnsupportedOptionsException(
+ "Start offset must be a multiple of " + alignment);
+
+ this.startOffset = startOffset;
+ }
+
+ /**
+ * Gets the start offset.
+ */
+ public int getStartOffset() {
+ return startOffset;
+ }
+
+ public int getEncoderMemoryUsage() {
+ return SimpleOutputStream.getMemoryUsage();
+ }
+
+ public int getDecoderMemoryUsage() {
+ return SimpleInputStream.getMemoryUsage();
+ }
+
+ public Object clone() {
+ try {
+ return super.clone();
+ } catch (CloneNotSupportedException e) {
+ assert false;
+ throw new RuntimeException();
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/BasicArrayCache.java b/src/org/tukaani/xz/BasicArrayCache.java
new file mode 100644
index 0000000..90ebe1f
--- /dev/null
+++ b/src/org/tukaani/xz/BasicArrayCache.java
@@ -0,0 +1,281 @@
+/*
+ * BasicArrayCache
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.lang.ref.Reference;
+import java.lang.ref.SoftReference;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * A basic {@link ArrayCache} implementation.
+ * <p>
+ * This caches exact array sizes, that is, {@code getByteArray} will return
+ * an array whose size is exactly the requested size. A limited number
+ * of different array sizes are cached at the same time; least recently used
+ * sizes will be dropped from the cache if needed (can happen if several
+ * different (de)compression options are used with the same cache).
+ * <p>
+ * The current implementation uses
+ * {@link java.util.LinkedHashMap LinkedHashMap} to map different array sizes
+ * to separate array-based data structures which hold
+ * {@link java.lang.ref.SoftReference SoftReferences} to the cached arrays.
+ * In the common case this should give good performance and fairly low
+ * memory usage overhead.
+ * <p>
+ * A statically allocated global {@code BasicArrayCache} instance is
+ * available via {@link #getInstance()} which is a good choice in most
+ * situations where caching is wanted.
+ *
+ * @since 1.7
+ */
+public class BasicArrayCache extends ArrayCache {
+ /**
+ * Arrays smaller than this many elements will not be cached.
+ */
+ private static final int CACHEABLE_SIZE_MIN = 32 << 10;
+
+ /**
+ * Number of stacks i.e. how many different array sizes to cache.
+ */
+ private static final int STACKS_MAX = 32;
+
+ /**
+ * Number of arrays of the same type and size to keep in the cache.
+ * (ELEMENTS_PER_STACK - 1) is used as a bit mask so ELEMENTS_PER_STACK
+ * must be a power of two!
+ */
+ private static final int ELEMENTS_PER_STACK = 512;
+
+ /**
+ * A thread-safe stack-like data structure whose {@code push} method
+ * overwrites the oldest element in the stack if the stack is full.
+ */
+ private static class CyclicStack<T> {
+ /**
+ * Array that holds the elements in the cyclic stack.
+ */
+ @SuppressWarnings("unchecked")
+ private final T[] elements = (T[])new Object[ELEMENTS_PER_STACK];
+
+ /**
+ * Read-write position in the {@code refs} array.
+ * The most recently added element is in {@code refs[pos]}.
+ * If it is {@code null}, then the stack is empty and all
+ * elements in {@code refs} are {@code null}.
+ * <p>
+ * Note that {@code pop()} always modifies {@code pos}, even if
+ * the stack is empty. This means that when the first element is
+ * added by {@code push(T)}, it can get added in any position in
+ * {@code refs} and the stack will start growing from there.
+ */
+ private int pos = 0;
+
+ /**
+ * Gets the most recently added element from the stack.
+ * If the stack is empty, {@code null} is returned.
+ */
+ public synchronized T pop() {
+ T e = elements[pos];
+ elements[pos] = null;
+ pos = (pos - 1) & (ELEMENTS_PER_STACK - 1);
+ return e;
+ }
+
+ /**
+ * Adds a new element to the stack. If the stack is full, the oldest
+ * element is overwritten.
+ */
+ public synchronized void push(T e) {
+ pos = (pos + 1) & (ELEMENTS_PER_STACK - 1);
+ elements[pos] = e;
+ }
+ }
+
+ /**
+ * Maps Integer (array size) to stacks of references to arrays. At most
+ * STACKS_MAX number of stacks are kept in the map (LRU cache).
+ */
+ private static class CacheMap<T>
+ extends LinkedHashMap<Integer, CyclicStack<Reference<T>>> {
+ /**
+ * This class won't be serialized but this is needed
+ * to silence a compiler warning.
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Creates a new CacheMap.
+ */
+ public CacheMap() {
+ // The map may momentarily have at most STACKS_MAX + 1 entries
+ // when put(K,V) has inserted a new entry but hasn't called
+ // removeEldestEntry yet. Using 2 * STACKS_MAX as the initial
+ // (and the final) capacity should give good performance. 0.75 is
+ // the default load factor and in this case it guarantees that
+ // the map will never need to be rehashed because
+ // (STACKS_MAX + 1) / 0.75 < 2 * STACKS_MAX.
+ //
+ // That last argument is true to get LRU cache behavior together
+ // with the overriden removeEldestEntry method.
+ super(2 * STACKS_MAX, 0.75f, true);
+ }
+
+ /**
+ * Returns true if the map is full and the least recently used stack
+ * should be removed.
+ */
+ protected boolean removeEldestEntry(
+ Map.Entry<Integer, CyclicStack<Reference<T>>> eldest) {
+ return size() > STACKS_MAX;
+ }
+ }
+
+ /**
+ * Helper class for the singleton instance.
+ * This is allocated only if {@code getInstance()} is called.
+ */
+ private static final class LazyHolder {
+ static final BasicArrayCache INSTANCE = new BasicArrayCache();
+ }
+
+ /**
+ * Returns a statically-allocated {@code BasicArrayCache} instance.
+ * This is often a good choice when a cache is needed.
+ */
+ public static BasicArrayCache getInstance() {
+ return LazyHolder.INSTANCE;
+ }
+
+ /**
+ * Stacks for cached byte arrays.
+ */
+ private final CacheMap<byte[]> byteArrayCache = new CacheMap<byte[]>();
+
+ /**
+ * Stacks for cached int arrays.
+ */
+ private final CacheMap<int[]> intArrayCache = new CacheMap<int[]>();
+
+ /**
+ * Gets {@code T[size]} from the given {@code cache}.
+ * If no such array is found, {@code null} is returned.
+ */
+ private static <T> T getArray(CacheMap<T> cache, int size) {
+ // putArray doesn't add small arrays to the cache and so it's
+ // pointless to look for small arrays here.
+ if (size < CACHEABLE_SIZE_MIN)
+ return null;
+
+ // Try to find a stack that holds arrays of T[size].
+ CyclicStack<Reference<T>> stack;
+ synchronized(cache) {
+ stack = cache.get(size);
+ }
+
+ if (stack == null)
+ return null;
+
+ // Try to find a non-cleared Reference from the stack.
+ T array;
+ do {
+ Reference<T> r = stack.pop();
+ if (r == null)
+ return null;
+
+ array = r.get();
+ } while (array == null);
+
+ return array;
+ }
+
+ /**
+ * Puts the {@code array} of {@code size} elements long into
+ * the {@code cache}.
+ */
+ private static <T> void putArray(CacheMap<T> cache, T array, int size) {
+ // Small arrays aren't cached.
+ if (size < CACHEABLE_SIZE_MIN)
+ return;
+
+ CyclicStack<Reference<T>> stack;
+
+ synchronized(cache) {
+ // Get a stack that holds arrays of T[size]. If no such stack
+ // exists, allocate a new one. If the cache already had STACKS_MAX
+ // number of stacks, the least recently used stack is removed by
+ // cache.put (it calls removeEldestEntry).
+ stack = cache.get(size);
+ if (stack == null) {
+ stack = new CyclicStack<Reference<T>>();
+ cache.put(size, stack);
+ }
+ }
+
+ stack.push(new SoftReference<T>(array));
+ }
+
+ /**
+ * Allocates a new byte array, hopefully reusing an existing
+ * array from the cache.
+ *
+ * @param size size of the array to allocate
+ *
+ * @param fillWithZeros
+ * if true, all the elements of the returned
+ * array will be zero; if false, the contents
+ * of the returned array is undefined
+ */
+ public byte[] getByteArray(int size, boolean fillWithZeros) {
+ byte[] array = getArray(byteArrayCache, size);
+
+ if (array == null)
+ array = new byte[size];
+ else if (fillWithZeros)
+ Arrays.fill(array, (byte)0x00);
+
+ return array;
+ }
+
+ /**
+ * Puts the given byte array to the cache. The caller must no longer
+ * use the array.
+ * <p>
+ * Small arrays aren't cached and will be ignored by this method.
+ */
+ public void putArray(byte[] array) {
+ putArray(byteArrayCache, array, array.length);
+ }
+
+ /**
+ * This is like getByteArray but for int arrays.
+ */
+ public int[] getIntArray(int size, boolean fillWithZeros) {
+ int[] array = getArray(intArrayCache, size);
+
+ if (array == null)
+ array = new int[size];
+ else if (fillWithZeros)
+ Arrays.fill(array, 0);
+
+ return array;
+ }
+
+ /**
+ * Puts the given int array to the cache. The caller must no longer
+ * use the array.
+ * <p>
+ * Small arrays aren't cached and will be ignored by this method.
+ */
+ public void putArray(int[] array) {
+ putArray(intArrayCache, array, array.length);
+ }
+}
diff --git a/src/org/tukaani/xz/BlockInputStream.java b/src/org/tukaani/xz/BlockInputStream.java
new file mode 100644
index 0000000..1931bd6
--- /dev/null
+++ b/src/org/tukaani/xz/BlockInputStream.java
@@ -0,0 +1,305 @@
+/*
+ * BlockInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import org.tukaani.xz.common.DecoderUtil;
+import org.tukaani.xz.check.Check;
+
+class BlockInputStream extends InputStream {
+ private final DataInputStream inData;
+ private final CountingInputStream inCounted;
+ private InputStream filterChain;
+ private final Check check;
+ private final boolean verifyCheck;
+
+ private long uncompressedSizeInHeader = -1;
+ private long compressedSizeInHeader = -1;
+ private long compressedSizeLimit;
+ private final int headerSize;
+ private long uncompressedSize = 0;
+ private boolean endReached = false;
+
+ private final byte[] tempBuf = new byte[1];
+
+ public BlockInputStream(InputStream in,
+ Check check, boolean verifyCheck,
+ int memoryLimit,
+ long unpaddedSizeInIndex,
+ long uncompressedSizeInIndex,
+ ArrayCache arrayCache)
+ throws IOException, IndexIndicatorException {
+ this.check = check;
+ this.verifyCheck = verifyCheck;
+ inData = new DataInputStream(in);
+
+ byte[] buf = new byte[DecoderUtil.BLOCK_HEADER_SIZE_MAX];
+
+ // Block Header Size or Index Indicator
+ inData.readFully(buf, 0, 1);
+
+ // See if this begins the Index field.
+ if (buf[0] == 0x00)
+ throw new IndexIndicatorException();
+
+ // Read the rest of the Block Header.
+ headerSize = 4 * ((buf[0] & 0xFF) + 1);
+ inData.readFully(buf, 1, headerSize - 1);
+
+ // Validate the CRC32.
+ if (!DecoderUtil.isCRC32Valid(buf, 0, headerSize - 4, headerSize - 4))
+ throw new CorruptedInputException("XZ Block Header is corrupt");
+
+ // Check for reserved bits in Block Flags.
+ if ((buf[1] & 0x3C) != 0)
+ throw new UnsupportedOptionsException(
+ "Unsupported options in XZ Block Header");
+
+ // Memory for the Filter Flags field
+ int filterCount = (buf[1] & 0x03) + 1;
+ long[] filterIDs = new long[filterCount];
+ byte[][] filterProps = new byte[filterCount][];
+
+ // Use a stream to parse the fields after the Block Flags field.
+ // Exclude the CRC32 field at the end.
+ ByteArrayInputStream bufStream = new ByteArrayInputStream(
+ buf, 2, headerSize - 6);
+
+ try {
+ // Set the maximum valid compressed size. This is overriden
+ // by the value from the Compressed Size field if it is present.
+ compressedSizeLimit = (DecoderUtil.VLI_MAX & ~3)
+ - headerSize - check.getSize();
+
+ // Decode and validate Compressed Size if the relevant flag
+ // is set in Block Flags.
+ if ((buf[1] & 0x40) != 0x00) {
+ compressedSizeInHeader = DecoderUtil.decodeVLI(bufStream);
+
+ if (compressedSizeInHeader == 0
+ || compressedSizeInHeader > compressedSizeLimit)
+ throw new CorruptedInputException();
+
+ compressedSizeLimit = compressedSizeInHeader;
+ }
+
+ // Decode Uncompressed Size if the relevant flag is set
+ // in Block Flags.
+ if ((buf[1] & 0x80) != 0x00)
+ uncompressedSizeInHeader = DecoderUtil.decodeVLI(bufStream);
+
+ // Decode Filter Flags.
+ for (int i = 0; i < filterCount; ++i) {
+ filterIDs[i] = DecoderUtil.decodeVLI(bufStream);
+
+ long filterPropsSize = DecoderUtil.decodeVLI(bufStream);
+ if (filterPropsSize > bufStream.available())
+ throw new CorruptedInputException();
+
+ filterProps[i] = new byte[(int)filterPropsSize];
+ bufStream.read(filterProps[i]);
+ }
+
+ } catch (IOException e) {
+ throw new CorruptedInputException("XZ Block Header is corrupt");
+ }
+
+ // Check that the remaining bytes are zero.
+ for (int i = bufStream.available(); i > 0; --i)
+ if (bufStream.read() != 0x00)
+ throw new UnsupportedOptionsException(
+ "Unsupported options in XZ Block Header");
+
+ // Validate the Blcok Header against the Index when doing
+ // random access reading.
+ if (unpaddedSizeInIndex != -1) {
+ // Compressed Data must be at least one byte, so if Block Header
+ // and Check alone take as much or more space than the size
+ // stored in the Index, the file is corrupt.
+ int headerAndCheckSize = headerSize + check.getSize();
+ if (headerAndCheckSize >= unpaddedSizeInIndex)
+ throw new CorruptedInputException(
+ "XZ Index does not match a Block Header");
+
+ // The compressed size calculated from Unpadded Size must
+ // match the value stored in the Compressed Size field in
+ // the Block Header.
+ long compressedSizeFromIndex
+ = unpaddedSizeInIndex - headerAndCheckSize;
+ if (compressedSizeFromIndex > compressedSizeLimit
+ || (compressedSizeInHeader != -1
+ && compressedSizeInHeader != compressedSizeFromIndex))
+ throw new CorruptedInputException(
+ "XZ Index does not match a Block Header");
+
+ // The uncompressed size stored in the Index must match
+ // the value stored in the Uncompressed Size field in
+ // the Block Header.
+ if (uncompressedSizeInHeader != -1
+ && uncompressedSizeInHeader != uncompressedSizeInIndex)
+ throw new CorruptedInputException(
+ "XZ Index does not match a Block Header");
+
+ // For further validation, pretend that the values from the Index
+ // were stored in the Block Header.
+ compressedSizeLimit = compressedSizeFromIndex;
+ compressedSizeInHeader = compressedSizeFromIndex;
+ uncompressedSizeInHeader = uncompressedSizeInIndex;
+ }
+
+ // Check if the Filter IDs are supported, decode
+ // the Filter Properties, and check that they are
+ // supported by this decoder implementation.
+ FilterDecoder[] filters = new FilterDecoder[filterIDs.length];
+
+ for (int i = 0; i < filters.length; ++i) {
+ if (filterIDs[i] == LZMA2Coder.FILTER_ID)
+ filters[i] = new LZMA2Decoder(filterProps[i]);
+
+ else if (filterIDs[i] == DeltaCoder.FILTER_ID)
+ filters[i] = new DeltaDecoder(filterProps[i]);
+
+ else if (BCJDecoder.isBCJFilterID(filterIDs[i]))
+ filters[i] = new BCJDecoder(filterIDs[i], filterProps[i]);
+
+ else
+ throw new UnsupportedOptionsException(
+ "Unknown Filter ID " + filterIDs[i]);
+ }
+
+ RawCoder.validate(filters);
+
+ // Check the memory usage limit.
+ if (memoryLimit >= 0) {
+ int memoryNeeded = 0;
+ for (int i = 0; i < filters.length; ++i)
+ memoryNeeded += filters[i].getMemoryUsage();
+
+ if (memoryNeeded > memoryLimit)
+ throw new MemoryLimitException(memoryNeeded, memoryLimit);
+ }
+
+ // Use an input size counter to calculate
+ // the size of the Compressed Data field.
+ inCounted = new CountingInputStream(in);
+
+ // Initialize the filter chain.
+ filterChain = inCounted;
+ for (int i = filters.length - 1; i >= 0; --i)
+ filterChain = filters[i].getInputStream(filterChain, arrayCache);
+ }
+
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (endReached)
+ return -1;
+
+ int ret = filterChain.read(buf, off, len);
+
+ if (ret > 0) {
+ if (verifyCheck)
+ check.update(buf, off, ret);
+
+ uncompressedSize += ret;
+
+ // Catch invalid values.
+ long compressedSize = inCounted.getSize();
+ if (compressedSize < 0
+ || compressedSize > compressedSizeLimit
+ || uncompressedSize < 0
+ || (uncompressedSizeInHeader != -1
+ && uncompressedSize > uncompressedSizeInHeader))
+ throw new CorruptedInputException();
+
+ // Check the Block integrity as soon as possible:
+ // - The filter chain shouldn't return less than requested
+ // unless it hit the end of the input.
+ // - If the uncompressed size is known, we know when there
+ // shouldn't be more data coming. We still need to read
+ // one byte to let the filter chain catch errors and to
+ // let it read end of payload marker(s).
+ if (ret < len || uncompressedSize == uncompressedSizeInHeader) {
+ if (filterChain.read() != -1)
+ throw new CorruptedInputException();
+
+ validate();
+ endReached = true;
+ }
+ } else if (ret == -1) {
+ validate();
+ endReached = true;
+ }
+
+ return ret;
+ }
+
+ private void validate() throws IOException {
+ long compressedSize = inCounted.getSize();
+
+ // Validate Compressed Size and Uncompressed Size if they were
+ // present in Block Header.
+ if ((compressedSizeInHeader != -1
+ && compressedSizeInHeader != compressedSize)
+ || (uncompressedSizeInHeader != -1
+ && uncompressedSizeInHeader != uncompressedSize))
+ throw new CorruptedInputException();
+
+ // Block Padding bytes must be zeros.
+ while ((compressedSize++ & 3) != 0)
+ if (inData.readUnsignedByte() != 0x00)
+ throw new CorruptedInputException();
+
+ // Validate the integrity check if verifyCheck is true.
+ byte[] storedCheck = new byte[check.getSize()];
+ inData.readFully(storedCheck);
+ if (verifyCheck && !Arrays.equals(check.finish(), storedCheck))
+ throw new CorruptedInputException("Integrity check ("
+ + check.getName() + ") does not match");
+ }
+
+ public int available() throws IOException {
+ return filterChain.available();
+ }
+
+ public void close() {
+ // This puts all arrays, that were allocated from ArrayCache,
+ // back to the ArrayCache. The last filter in the chain will
+ // call inCounted.close() which, being an instance of
+ // CloseIgnoringInputStream, won't close() the InputStream that
+ // was provided by the application.
+ try {
+ filterChain.close();
+ } catch (IOException e) {
+ // It's a bug if we get here. The InputStreams that we are closing
+ // are all from this package and they are known to not throw
+ // IOException. (They could throw an IOException if we were
+ // closing the application-supplied InputStream, but
+ // inCounted.close() doesn't do that.)
+ assert false;
+ }
+
+ filterChain = null;
+ }
+
+ public long getUnpaddedSize() {
+ return headerSize + inCounted.getSize() + check.getSize();
+ }
+
+ public long getUncompressedSize() {
+ return uncompressedSize;
+ }
+}
diff --git a/src/org/tukaani/xz/BlockOutputStream.java b/src/org/tukaani/xz/BlockOutputStream.java
new file mode 100644
index 0000000..8ac4407
--- /dev/null
+++ b/src/org/tukaani/xz/BlockOutputStream.java
@@ -0,0 +1,135 @@
+/*
+ * BlockOutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import org.tukaani.xz.common.EncoderUtil;
+import org.tukaani.xz.check.Check;
+
+class BlockOutputStream extends FinishableOutputStream {
+ private final OutputStream out;
+ private final CountingOutputStream outCounted;
+ private FinishableOutputStream filterChain;
+ private final Check check;
+
+ private final int headerSize;
+ private final long compressedSizeLimit;
+ private long uncompressedSize = 0;
+
+ private final byte[] tempBuf = new byte[1];
+
+ public BlockOutputStream(OutputStream out, FilterEncoder[] filters,
+ Check check, ArrayCache arrayCache)
+ throws IOException {
+ this.out = out;
+ this.check = check;
+
+ // Initialize the filter chain.
+ outCounted = new CountingOutputStream(out);
+ filterChain = outCounted;
+ for (int i = filters.length - 1; i >= 0; --i)
+ filterChain = filters[i].getOutputStream(filterChain, arrayCache);
+
+ // Prepare to encode the Block Header field.
+ ByteArrayOutputStream bufStream = new ByteArrayOutputStream();
+
+ // Write a dummy Block Header Size field. The real value is written
+ // once everything else except CRC32 has been written.
+ bufStream.write(0x00);
+
+ // Write Block Flags. Storing Compressed Size or Uncompressed Size
+ // isn't supported for now.
+ bufStream.write(filters.length - 1);
+
+ // List of Filter Flags
+ for (int i = 0; i < filters.length; ++i) {
+ EncoderUtil.encodeVLI(bufStream, filters[i].getFilterID());
+ byte[] filterProps = filters[i].getFilterProps();
+ EncoderUtil.encodeVLI(bufStream, filterProps.length);
+ bufStream.write(filterProps);
+ }
+
+ // Header Padding
+ while ((bufStream.size() & 3) != 0)
+ bufStream.write(0x00);
+
+ byte[] buf = bufStream.toByteArray();
+
+ // Total size of the Block Header: Take the size of the CRC32 field
+ // into account.
+ headerSize = buf.length + 4;
+
+ // This is just a sanity check.
+ if (headerSize > EncoderUtil.BLOCK_HEADER_SIZE_MAX)
+ throw new UnsupportedOptionsException();
+
+ // Block Header Size
+ buf[0] = (byte)(buf.length / 4);
+
+ // Write the Block Header field to the output stream.
+ out.write(buf);
+ EncoderUtil.writeCRC32(out, buf);
+
+ // Calculate the maximum allowed size of the Compressed Data field.
+ // It is hard to exceed it so this is mostly to be pedantic.
+ compressedSizeLimit = (EncoderUtil.VLI_MAX & ~3)
+ - headerSize - check.getSize();
+ }
+
+ public void write(int b) throws IOException {
+ tempBuf[0] = (byte)b;
+ write(tempBuf, 0, 1);
+ }
+
+ public void write(byte[] buf, int off, int len) throws IOException {
+ filterChain.write(buf, off, len);
+ check.update(buf, off, len);
+ uncompressedSize += len;
+ validate();
+ }
+
+ public void flush() throws IOException {
+ filterChain.flush();
+ validate();
+ }
+
+ public void finish() throws IOException {
+ // Finish the Compressed Data field.
+ filterChain.finish();
+ validate();
+
+ // Block Padding
+ for (long i = outCounted.getSize(); (i & 3) != 0; ++i)
+ out.write(0x00);
+
+ // Check
+ out.write(check.finish());
+ }
+
+ private void validate() throws IOException {
+ long compressedSize = outCounted.getSize();
+
+ // It is very hard to trigger this exception.
+ // This is just to be pedantic.
+ if (compressedSize < 0 || compressedSize > compressedSizeLimit
+ || uncompressedSize < 0)
+ throw new XZIOException("XZ Stream has grown too big");
+ }
+
+ public long getUnpaddedSize() {
+ return headerSize + outCounted.getSize() + check.getSize();
+ }
+
+ public long getUncompressedSize() {
+ return uncompressedSize;
+ }
+}
diff --git a/src/org/tukaani/xz/CloseIgnoringInputStream.java b/src/org/tukaani/xz/CloseIgnoringInputStream.java
new file mode 100644
index 0000000..db68ddb
--- /dev/null
+++ b/src/org/tukaani/xz/CloseIgnoringInputStream.java
@@ -0,0 +1,48 @@
+/*
+ * CloseIgnoringInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.FilterInputStream;
+
+/**
+ * An {@code InputStream} wrapper whose {@code close()} does nothing.
+ * This is useful with raw decompressors if you want to call
+ * {@code close()} to release memory allocated from an {@link ArrayCache}
+ * but don't want to close the underlying {@code InputStream}.
+ * For example:
+ * <p><blockquote><pre>
+ * InputStream rawdec = new LZMA2InputStream(
+ * new CloseIgnoringInputStream(myInputStream),
+ * myDictSize, null, myArrayCache);
+ * doSomething(rawdec);
+ * rawdec.close(); // This doesn't close myInputStream.
+ * </pre></blockquote>
+ * <p>
+ * With {@link XZInputStream}, {@link SingleXZInputStream}, and
+ * {@link SeekableXZInputStream} you can use their {@code close(boolean)}
+ * method to avoid closing the underlying {@code InputStream}; with
+ * those classes {@code CloseIgnoringInputStream} isn't needed.
+ *
+ * @since 1.7
+ */
+public class CloseIgnoringInputStream extends FilterInputStream {
+ /**
+ * Creates a new {@code CloseIgnoringInputStream}.
+ */
+ public CloseIgnoringInputStream(InputStream in) {
+ super(in);
+ }
+
+ /**
+ * This does nothing (doesn't call {@code in.close()}).
+ */
+ public void close() {}
+}
diff --git a/src/org/tukaani/xz/CorruptedInputException.java b/src/org/tukaani/xz/CorruptedInputException.java
new file mode 100644
index 0000000..d7d9520
--- /dev/null
+++ b/src/org/tukaani/xz/CorruptedInputException.java
@@ -0,0 +1,37 @@
+/*
+ * CorruptedInputException
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+/**
+ * Thrown when the compressed input data is corrupt.
+ * However, it is possible that some or all of the data
+ * already read from the input stream was corrupt too.
+ */
+public class CorruptedInputException extends XZIOException {
+ private static final long serialVersionUID = 3L;
+
+ /**
+ * Creates a new CorruptedInputException with
+ * the default error detail message.
+ */
+ public CorruptedInputException() {
+ super("Compressed data is corrupt");
+ }
+
+ /**
+ * Creates a new CorruptedInputException with
+ * the specified error detail message.
+ *
+ * @param s error detail message
+ */
+ public CorruptedInputException(String s) {
+ super(s);
+ }
+}
diff --git a/src/org/tukaani/xz/CountingInputStream.java b/src/org/tukaani/xz/CountingInputStream.java
new file mode 100644
index 0000000..8599f97
--- /dev/null
+++ b/src/org/tukaani/xz/CountingInputStream.java
@@ -0,0 +1,47 @@
+/*
+ * CountingInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.FilterInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * Counts the number of bytes read from an input stream.
+ * The <code>close()</code> method does nothing, that is, the underlying
+ * <code>InputStream</code> isn't closed.
+ */
+class CountingInputStream extends CloseIgnoringInputStream {
+ private long size = 0;
+
+ public CountingInputStream(InputStream in) {
+ super(in);
+ }
+
+ public int read() throws IOException {
+ int ret = in.read();
+ if (ret != -1 && size >= 0)
+ ++size;
+
+ return ret;
+ }
+
+ public int read(byte[] b, int off, int len) throws IOException {
+ int ret = in.read(b, off, len);
+ if (ret > 0 && size >= 0)
+ size += ret;
+
+ return ret;
+ }
+
+ public long getSize() {
+ return size;
+ }
+}
diff --git a/src/org/tukaani/xz/CountingOutputStream.java b/src/org/tukaani/xz/CountingOutputStream.java
new file mode 100644
index 0000000..9b3eef3
--- /dev/null
+++ b/src/org/tukaani/xz/CountingOutputStream.java
@@ -0,0 +1,54 @@
+/*
+ * CountingOutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.OutputStream;
+import java.io.IOException;
+
+/**
+ * Counts the number of bytes written to an output stream.
+ * <p>
+ * The <code>finish</code> method does nothing.
+ * This is <code>FinishableOutputStream</code> instead
+ * of <code>OutputStream</code> solely because it allows
+ * using this as the output stream for a chain of raw filters.
+ */
+class CountingOutputStream extends FinishableOutputStream {
+ private final OutputStream out;
+ private long size = 0;
+
+ public CountingOutputStream(OutputStream out) {
+ this.out = out;
+ }
+
+ public void write(int b) throws IOException {
+ out.write(b);
+ if (size >= 0)
+ ++size;
+ }
+
+ public void write(byte[] b, int off, int len) throws IOException {
+ out.write(b, off, len);
+ if (size >= 0)
+ size += len;
+ }
+
+ public void flush() throws IOException {
+ out.flush();
+ }
+
+ public void close() throws IOException {
+ out.close();
+ }
+
+ public long getSize() {
+ return size;
+ }
+}
diff --git a/src/org/tukaani/xz/DeltaCoder.java b/src/org/tukaani/xz/DeltaCoder.java
new file mode 100644
index 0000000..808834c
--- /dev/null
+++ b/src/org/tukaani/xz/DeltaCoder.java
@@ -0,0 +1,26 @@
+/*
+ * DeltaCoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+abstract class DeltaCoder implements FilterCoder {
+ public static final long FILTER_ID = 0x03;
+
+ public boolean changesSize() {
+ return false;
+ }
+
+ public boolean nonLastOK() {
+ return true;
+ }
+
+ public boolean lastOK() {
+ return false;
+ }
+}
diff --git a/src/org/tukaani/xz/DeltaDecoder.java b/src/org/tukaani/xz/DeltaDecoder.java
new file mode 100644
index 0000000..4d21ca2
--- /dev/null
+++ b/src/org/tukaani/xz/DeltaDecoder.java
@@ -0,0 +1,32 @@
+/*
+ * DeltaDecoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+
+class DeltaDecoder extends DeltaCoder implements FilterDecoder {
+ private final int distance;
+
+ DeltaDecoder(byte[] props) throws UnsupportedOptionsException {
+ if (props.length != 1)
+ throw new UnsupportedOptionsException(
+ "Unsupported Delta filter properties");
+
+ distance = (props[0] & 0xFF) + 1;
+ }
+
+ public int getMemoryUsage() {
+ return 1;
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new DeltaInputStream(in, distance);
+ }
+}
diff --git a/src/org/tukaani/xz/DeltaEncoder.java b/src/org/tukaani/xz/DeltaEncoder.java
new file mode 100644
index 0000000..86ba9ea
--- /dev/null
+++ b/src/org/tukaani/xz/DeltaEncoder.java
@@ -0,0 +1,37 @@
+/*
+ * DeltaEncoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+class DeltaEncoder extends DeltaCoder implements FilterEncoder {
+ private final DeltaOptions options;
+ private final byte[] props = new byte[1];
+
+ DeltaEncoder(DeltaOptions options) {
+ props[0] = (byte)(options.getDistance() - 1);
+ this.options = (DeltaOptions)options.clone();
+ }
+
+ public long getFilterID() {
+ return FILTER_ID;
+ }
+
+ public byte[] getFilterProps() {
+ return props;
+ }
+
+ public boolean supportsFlushing() {
+ return true;
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return options.getOutputStream(out, arrayCache);
+ }
+}
diff --git a/src/org/tukaani/xz/DeltaInputStream.java b/src/org/tukaani/xz/DeltaInputStream.java
new file mode 100644
index 0000000..56478f5
--- /dev/null
+++ b/src/org/tukaani/xz/DeltaInputStream.java
@@ -0,0 +1,146 @@
+/*
+ * DeltaInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.IOException;
+import org.tukaani.xz.delta.DeltaDecoder;
+
+/**
+ * Decodes raw Delta-filtered data (no XZ headers).
+ * <p>
+ * The delta filter doesn't change the size of the data and thus it
+ * cannot have an end-of-payload marker. It will simply decode until
+ * its input stream indicates end of input.
+ */
+public class DeltaInputStream extends InputStream {
+ /**
+ * Smallest supported delta calculation distance.
+ */
+ public static final int DISTANCE_MIN = 1;
+
+ /**
+ * Largest supported delta calculation distance.
+ */
+ public static final int DISTANCE_MAX = 256;
+
+ private InputStream in;
+ private final DeltaDecoder delta;
+
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ /**
+ * Creates a new Delta decoder with the given delta calculation distance.
+ *
+ * @param in input stream from which Delta filtered data
+ * is read
+ *
+ * @param distance delta calculation distance, must be in the
+ * range [<code>DISTANCE_MIN</code>,
+ * <code>DISTANCE_MAX</code>]
+ */
+ public DeltaInputStream(InputStream in, int distance) {
+ // Check for null because otherwise null isn't detect
+ // in this constructor.
+ if (in == null)
+ throw new NullPointerException();
+
+ this.in = in;
+ this.delta = new DeltaDecoder(distance);
+ }
+
+ /**
+ * Decode the next byte from this input stream.
+ *
+ * @return the next decoded byte, or <code>-1</code> to indicate
+ * the end of input on the input stream <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ /**
+ * Decode into an array of bytes.
+ * <p>
+ * This calls <code>in.read(buf, off, len)</code> and defilters the
+ * returned data.
+ *
+ * @param buf target buffer for decoded data
+ * @param off start offset in <code>buf</code>
+ * @param len maximum number of bytes to read
+ *
+ * @return number of bytes read, or <code>-1</code> to indicate
+ * the end of the input stream <code>in</code>
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws IOException may be thrown by underlaying input
+ * stream <code>in</code>
+ */
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (len == 0)
+ return 0;
+
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ int size;
+ try {
+ size = in.read(buf, off, len);
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ if (size == -1)
+ return -1;
+
+ delta.decode(buf, off, size);
+ return size;
+ }
+
+ /**
+ * Calls <code>in.available()</code>.
+ *
+ * @return the value returned by <code>in.available()</code>
+ */
+ public int available() throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ return in.available();
+ }
+
+ /**
+ * Closes the stream and calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ */
+ public void close() throws IOException {
+ if (in != null) {
+ try {
+ in.close();
+ } finally {
+ in = null;
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/DeltaOptions.java b/src/org/tukaani/xz/DeltaOptions.java
new file mode 100644
index 0000000..fac74d9
--- /dev/null
+++ b/src/org/tukaani/xz/DeltaOptions.java
@@ -0,0 +1,103 @@
+/*
+ * DeltaOptions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+
+/**
+ * Delta filter options. The Delta filter can be used only as a non-last
+ * filter in the chain, for example Delta + LZMA2.
+ * <p>
+ * Currently only simple byte-wise delta is supported. The only option
+ * is the delta distance, which you should set to match your data.
+ * It's not possible to provide a generic default value for it.
+ * <p>
+ * For example, with distance = 2 and eight-byte input
+ * A1 B1 A2 B3 A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02.
+ * <p>
+ * The Delta filter can be good with uncompressed bitmap images. It can
+ * also help with PCM audio, although special-purpose compressors like
+ * FLAC will give much smaller result at much better compression speed.
+ */
+public class DeltaOptions extends FilterOptions {
+ /**
+ * Smallest supported delta calculation distance.
+ */
+ public static final int DISTANCE_MIN = 1;
+
+ /**
+ * Largest supported delta calculation distance.
+ */
+ public static final int DISTANCE_MAX = 256;
+
+ private int distance = DISTANCE_MIN;
+
+ /**
+ * Creates new Delta options and sets the delta distance to 1 byte.
+ */
+ public DeltaOptions() {}
+
+ /**
+ * Creates new Delta options and sets the distance to the given value.
+ */
+ public DeltaOptions(int distance) throws UnsupportedOptionsException {
+ setDistance(distance);
+ }
+
+ /**
+ * Sets the delta distance in bytes. The new distance must be in
+ * the range [DISTANCE_MIN, DISTANCE_MAX].
+ */
+ public void setDistance(int distance) throws UnsupportedOptionsException {
+ if (distance < DISTANCE_MIN || distance > DISTANCE_MAX)
+ throw new UnsupportedOptionsException(
+ "Delta distance must be in the range [" + DISTANCE_MIN
+ + ", " + DISTANCE_MAX + "]: " + distance);
+
+ this.distance = distance;
+ }
+
+ /**
+ * Gets the delta distance.
+ */
+ public int getDistance() {
+ return distance;
+ }
+
+ public int getEncoderMemoryUsage() {
+ return DeltaOutputStream.getMemoryUsage();
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return new DeltaOutputStream(out, this);
+ }
+
+ public int getDecoderMemoryUsage() {
+ return 1;
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new DeltaInputStream(in, distance);
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new DeltaEncoder(this);
+ }
+
+ public Object clone() {
+ try {
+ return super.clone();
+ } catch (CloneNotSupportedException e) {
+ assert false;
+ throw new RuntimeException();
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/DeltaOutputStream.java b/src/org/tukaani/xz/DeltaOutputStream.java
new file mode 100644
index 0000000..bd880db
--- /dev/null
+++ b/src/org/tukaani/xz/DeltaOutputStream.java
@@ -0,0 +1,113 @@
+/*
+ * DeltaOutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.IOException;
+import org.tukaani.xz.delta.DeltaEncoder;
+
+class DeltaOutputStream extends FinishableOutputStream {
+ private static final int FILTER_BUF_SIZE = 4096;
+
+ private FinishableOutputStream out;
+ private final DeltaEncoder delta;
+ private final byte[] filterBuf = new byte[FILTER_BUF_SIZE];
+
+ private boolean finished = false;
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ static int getMemoryUsage() {
+ return 1 + FILTER_BUF_SIZE / 1024;
+ }
+
+ DeltaOutputStream(FinishableOutputStream out, DeltaOptions options) {
+ this.out = out;
+ delta = new DeltaEncoder(options.getDistance());
+ }
+
+ public void write(int b) throws IOException {
+ tempBuf[0] = (byte)b;
+ write(tempBuf, 0, 1);
+ }
+
+ public void write(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished");
+
+ try {
+ while (len > FILTER_BUF_SIZE) {
+ delta.encode(buf, off, FILTER_BUF_SIZE, filterBuf);
+ out.write(filterBuf);
+ off += FILTER_BUF_SIZE;
+ len -= FILTER_BUF_SIZE;
+ }
+
+ delta.encode(buf, off, len, filterBuf);
+ out.write(filterBuf, 0, len);
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ public void flush() throws IOException {
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ out.flush();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ public void finish() throws IOException {
+ if (!finished) {
+ if (exception != null)
+ throw exception;
+
+ try {
+ out.finish();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ finished = true;
+ }
+ }
+
+ public void close() throws IOException {
+ if (out != null) {
+ try {
+ out.close();
+ } catch (IOException e) {
+ if (exception == null)
+ exception = e;
+ }
+
+ out = null;
+ }
+
+ if (exception != null)
+ throw exception;
+ }
+}
diff --git a/src/org/tukaani/xz/FilterCoder.java b/src/org/tukaani/xz/FilterCoder.java
new file mode 100644
index 0000000..1e95e37
--- /dev/null
+++ b/src/org/tukaani/xz/FilterCoder.java
@@ -0,0 +1,16 @@
+/*
+ * FilterCoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+interface FilterCoder {
+ boolean changesSize();
+ boolean nonLastOK();
+ boolean lastOK();
+}
diff --git a/src/org/tukaani/xz/FilterDecoder.java b/src/org/tukaani/xz/FilterDecoder.java
new file mode 100644
index 0000000..6ec2f83
--- /dev/null
+++ b/src/org/tukaani/xz/FilterDecoder.java
@@ -0,0 +1,17 @@
+/*
+ * FilterDecoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+
+interface FilterDecoder extends FilterCoder {
+ int getMemoryUsage();
+ InputStream getInputStream(InputStream in, ArrayCache arrayCache);
+}
diff --git a/src/org/tukaani/xz/FilterEncoder.java b/src/org/tukaani/xz/FilterEncoder.java
new file mode 100644
index 0000000..b40575e
--- /dev/null
+++ b/src/org/tukaani/xz/FilterEncoder.java
@@ -0,0 +1,18 @@
+/*
+ * FilterEncoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+interface FilterEncoder extends FilterCoder {
+ long getFilterID();
+ byte[] getFilterProps();
+ boolean supportsFlushing();
+ FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache);
+}
diff --git a/src/org/tukaani/xz/FilterOptions.java b/src/org/tukaani/xz/FilterOptions.java
new file mode 100644
index 0000000..34f4450
--- /dev/null
+++ b/src/org/tukaani/xz/FilterOptions.java
@@ -0,0 +1,104 @@
+/*
+ * FilterOptions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * Base class for filter-specific options classes.
+ */
+public abstract class FilterOptions implements Cloneable {
+ /**
+ * Gets how much memory the encoder will need with
+ * the given filter chain. This function simply calls
+ * <code>getEncoderMemoryUsage()</code> for every filter
+ * in the array and returns the sum of the returned values.
+ */
+ public static int getEncoderMemoryUsage(FilterOptions[] options) {
+ int m = 0;
+
+ for (int i = 0; i < options.length; ++i)
+ m += options[i].getEncoderMemoryUsage();
+
+ return m;
+ }
+
+ /**
+ * Gets how much memory the decoder will need with
+ * the given filter chain. This function simply calls
+ * <code>getDecoderMemoryUsage()</code> for every filter
+ * in the array and returns the sum of the returned values.
+ */
+ public static int getDecoderMemoryUsage(FilterOptions[] options) {
+ int m = 0;
+
+ for (int i = 0; i < options.length; ++i)
+ m += options[i].getDecoderMemoryUsage();
+
+ return m;
+ }
+
+ /**
+ * Gets how much memory the encoder will need with these options.
+ */
+ public abstract int getEncoderMemoryUsage();
+
+ /**
+ * Gets a raw (no XZ headers) encoder output stream using these options.
+ * Raw streams are an advanced feature. In most cases you want to store
+ * the compressed data in the .xz container format instead of using
+ * a raw stream. To use this filter in a .xz file, pass this object
+ * to XZOutputStream.
+ * <p>
+ * This is uses ArrayCache.getDefaultCache() as the ArrayCache.
+ */
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
+ return getOutputStream(out, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Gets a raw (no XZ headers) encoder output stream using these options
+ * and the given ArrayCache.
+ * Raw streams are an advanced feature. In most cases you want to store
+ * the compressed data in the .xz container format instead of using
+ * a raw stream. To use this filter in a .xz file, pass this object
+ * to XZOutputStream.
+ */
+ public abstract FinishableOutputStream getOutputStream(
+ FinishableOutputStream out, ArrayCache arrayCache);
+
+ /**
+ * Gets how much memory the decoder will need to decompress the data
+ * that was encoded with these options.
+ */
+ public abstract int getDecoderMemoryUsage();
+
+ /**
+ * Gets a raw (no XZ headers) decoder input stream using these options.
+ * <p>
+ * This is uses ArrayCache.getDefaultCache() as the ArrayCache.
+ */
+ public InputStream getInputStream(InputStream in) throws IOException {
+ return getInputStream(in, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Gets a raw (no XZ headers) decoder input stream using these options
+ * and the given ArrayCache.
+ */
+ public abstract InputStream getInputStream(
+ InputStream in, ArrayCache arrayCache) throws IOException;
+
+ abstract FilterEncoder getFilterEncoder();
+
+ FilterOptions() {}
+}
diff --git a/src/org/tukaani/xz/FinishableOutputStream.java b/src/org/tukaani/xz/FinishableOutputStream.java
new file mode 100644
index 0000000..64d4ca5
--- /dev/null
+++ b/src/org/tukaani/xz/FinishableOutputStream.java
@@ -0,0 +1,31 @@
+/*
+ * FinishableOutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.OutputStream;
+import java.io.IOException;
+
+/**
+ * Output stream that supports finishing without closing
+ * the underlying stream.
+ */
+public abstract class FinishableOutputStream extends OutputStream {
+ /**
+ * Finish the stream without closing the underlying stream.
+ * No more data may be written to the stream after finishing.
+ * <p>
+ * The <code>finish</code> method of <code>FinishableOutputStream</code>
+ * does nothing. Subclasses should override it if they need finishing
+ * support, which is the case, for example, with compressors.
+ *
+ * @throws IOException
+ */
+ public void finish() throws IOException {}
+}
diff --git a/src/org/tukaani/xz/FinishableWrapperOutputStream.java b/src/org/tukaani/xz/FinishableWrapperOutputStream.java
new file mode 100644
index 0000000..2e0ac99
--- /dev/null
+++ b/src/org/tukaani/xz/FinishableWrapperOutputStream.java
@@ -0,0 +1,70 @@
+/*
+ * FinishableWrapperOutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.OutputStream;
+import java.io.IOException;
+
+/**
+ * Wraps an output stream to a finishable output stream for use with
+ * raw encoders. This is not needed for XZ compression and thus most
+ * people will never need this.
+ */
+public class FinishableWrapperOutputStream extends FinishableOutputStream {
+ /**
+ * The {@link java.io.OutputStream OutputStream} that has been
+ * wrapped into a FinishableWrapperOutputStream.
+ */
+ protected OutputStream out;
+
+ /**
+ * Creates a new output stream which support finishing.
+ * The <code>finish()</code> method will do nothing.
+ */
+ public FinishableWrapperOutputStream(OutputStream out) {
+ this.out = out;
+ }
+
+ /**
+ * Calls {@link java.io.OutputStream#write(int) out.write(b)}.
+ */
+ public void write(int b) throws IOException {
+ out.write(b);
+ }
+
+ /**
+ * Calls {@link java.io.OutputStream#write(byte[]) out.write(buf)}.
+ */
+ public void write(byte[] buf) throws IOException {
+ out.write(buf);
+ }
+
+ /**
+ * Calls {@link java.io.OutputStream#write(byte[],int,int)
+ out.write(buf, off, len)}.
+ */
+ public void write(byte[] buf, int off, int len) throws IOException {
+ out.write(buf, off, len);
+ }
+
+ /**
+ * Calls {@link java.io.OutputStream#flush() out.flush()}.
+ */
+ public void flush() throws IOException {
+ out.flush();
+ }
+
+ /**
+ * Calls {@link java.io.OutputStream#close() out.close()}.
+ */
+ public void close() throws IOException {
+ out.close();
+ }
+}
diff --git a/src/org/tukaani/xz/IA64Options.java b/src/org/tukaani/xz/IA64Options.java
new file mode 100644
index 0000000..491edcf
--- /dev/null
+++ b/src/org/tukaani/xz/IA64Options.java
@@ -0,0 +1,37 @@
+/*
+ * IA64Options
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import org.tukaani.xz.simple.IA64;
+
+/**
+ * BCJ filter for Itanium (IA-64) instructions.
+ */
+public class IA64Options extends BCJOptions {
+ private static final int ALIGNMENT = 16;
+
+ public IA64Options() {
+ super(ALIGNMENT);
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return new SimpleOutputStream(out, new IA64(true, startOffset));
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new SimpleInputStream(in, new IA64(false, startOffset));
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new BCJEncoder(this, BCJCoder.IA64_FILTER_ID);
+ }
+}
diff --git a/src/org/tukaani/xz/IndexIndicatorException.java b/src/org/tukaani/xz/IndexIndicatorException.java
new file mode 100644
index 0000000..fc6bc03
--- /dev/null
+++ b/src/org/tukaani/xz/IndexIndicatorException.java
@@ -0,0 +1,14 @@
+/*
+ * IndexIndicatorException
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+class IndexIndicatorException extends Exception {
+ private static final long serialVersionUID = 1L;
+}
diff --git a/src/org/tukaani/xz/LZMA2Coder.java b/src/org/tukaani/xz/LZMA2Coder.java
new file mode 100644
index 0000000..b0963b7
--- /dev/null
+++ b/src/org/tukaani/xz/LZMA2Coder.java
@@ -0,0 +1,26 @@
+/*
+ * LZMA2Coder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+abstract class LZMA2Coder implements FilterCoder {
+ public static final long FILTER_ID = 0x21;
+
+ public boolean changesSize() {
+ return true;
+ }
+
+ public boolean nonLastOK() {
+ return false;
+ }
+
+ public boolean lastOK() {
+ return true;
+ }
+}
diff --git a/src/org/tukaani/xz/LZMA2Decoder.java b/src/org/tukaani/xz/LZMA2Decoder.java
new file mode 100644
index 0000000..71c1c90
--- /dev/null
+++ b/src/org/tukaani/xz/LZMA2Decoder.java
@@ -0,0 +1,35 @@
+/*
+ * LZMA2Decoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+
+class LZMA2Decoder extends LZMA2Coder implements FilterDecoder {
+ private int dictSize;
+
+ LZMA2Decoder(byte[] props) throws UnsupportedOptionsException {
+ // Up to 1.5 GiB dictionary is supported. The bigger ones
+ // are too big for int.
+ if (props.length != 1 || (props[0] & 0xFF) > 37)
+ throw new UnsupportedOptionsException(
+ "Unsupported LZMA2 properties");
+
+ dictSize = 2 | (props[0] & 1);
+ dictSize <<= (props[0] >>> 1) + 11;
+ }
+
+ public int getMemoryUsage() {
+ return LZMA2InputStream.getMemoryUsage(dictSize);
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new LZMA2InputStream(in, dictSize, null, arrayCache);
+ }
+}
diff --git a/src/org/tukaani/xz/LZMA2Encoder.java b/src/org/tukaani/xz/LZMA2Encoder.java
new file mode 100644
index 0000000..6f3cab4
--- /dev/null
+++ b/src/org/tukaani/xz/LZMA2Encoder.java
@@ -0,0 +1,51 @@
+/*
+ * LZMA2Encoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import org.tukaani.xz.lzma.LZMAEncoder;
+
+class LZMA2Encoder extends LZMA2Coder implements FilterEncoder {
+ private final LZMA2Options options;
+ private final byte[] props = new byte[1];
+
+ LZMA2Encoder(LZMA2Options options) {
+ if (options.getPresetDict() != null)
+ throw new IllegalArgumentException(
+ "XZ doesn't support a preset dictionary for now");
+
+ if (options.getMode() == LZMA2Options.MODE_UNCOMPRESSED) {
+ props[0] = (byte)0;
+ } else {
+ int d = Math.max(options.getDictSize(),
+ LZMA2Options.DICT_SIZE_MIN);
+ props[0] = (byte)(LZMAEncoder.getDistSlot(d - 1) - 23);
+ }
+
+ // Make a private copy so that the caller is free to change its copy.
+ this.options = (LZMA2Options)options.clone();
+ }
+
+ public long getFilterID() {
+ return FILTER_ID;
+ }
+
+ public byte[] getFilterProps() {
+ return props;
+ }
+
+ public boolean supportsFlushing() {
+ return true;
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return options.getOutputStream(out, arrayCache);
+ }
+}
diff --git a/src/org/tukaani/xz/LZMA2InputStream.java b/src/org/tukaani/xz/LZMA2InputStream.java
new file mode 100644
index 0000000..9708052
--- /dev/null
+++ b/src/org/tukaani/xz/LZMA2InputStream.java
@@ -0,0 +1,400 @@
+/*
+ * LZMA2InputStream
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import org.tukaani.xz.lz.LZDecoder;
+import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
+import org.tukaani.xz.lzma.LZMADecoder;
+
+/**
+ * Decompresses a raw LZMA2 stream (no XZ headers).
+ */
+public class LZMA2InputStream extends InputStream {
+ /**
+ * Smallest valid LZMA2 dictionary size.
+ * <p>
+ * Very tiny dictionaries would be a performance problem, so
+ * the minimum is 4 KiB.
+ */
+ public static final int DICT_SIZE_MIN = 4096;
+
+ /**
+ * Largest dictionary size supported by this implementation.
+ * <p>
+ * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
+ * This implementation supports only 16 bytes less than 2 GiB for raw
+ * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
+ * limitation is due to Java using signed 32-bit integers for array
+ * indexing. The limitation shouldn't matter much in practice since so
+ * huge dictionaries are not normally used.
+ */
+ public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
+
+ private static final int COMPRESSED_SIZE_MAX = 1 << 16;
+
+ private final ArrayCache arrayCache;
+ private DataInputStream in;
+
+ private LZDecoder lz;
+ private RangeDecoderFromBuffer rc;
+ private LZMADecoder lzma;
+
+ private int uncompressedSize = 0;
+ private boolean isLZMAChunk = false;
+
+ private boolean needDictReset = true;
+ private boolean needProps = true;
+ private boolean endReached = false;
+
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ /**
+ * Gets approximate decompressor memory requirements as kibibytes for
+ * the given dictionary size.
+ *
+ * @param dictSize LZMA2 dictionary size as bytes, must be
+ * in the range [<code>DICT_SIZE_MIN</code>,
+ * <code>DICT_SIZE_MAX</code>]
+ *
+ * @return approximate memory requirements as kibibytes (KiB)
+ */
+ public static int getMemoryUsage(int dictSize) {
+ // The base state is around 30-40 KiB (probabilities etc.),
+ // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
+ // and LZ decoder needs a dictionary buffer.
+ return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
+ }
+
+ private static int getDictSize(int dictSize) {
+ if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
+ throw new IllegalArgumentException(
+ "Unsupported dictionary size " + dictSize);
+
+ // Round dictionary size upward to a multiple of 16. This way LZMA
+ // can use LZDecoder.getPos() for calculating LZMA's posMask.
+ // Note that this check is needed only for raw LZMA2 streams; it is
+ // redundant with .xz.
+ return (dictSize + 15) & ~15;
+ }
+
+ /**
+ * Creates a new input stream that decompresses raw LZMA2 data
+ * from <code>in</code>.
+ * <p>
+ * The caller needs to know the dictionary size used when compressing;
+ * the dictionary size isn't stored as part of a raw LZMA2 stream.
+ * <p>
+ * Specifying a too small dictionary size will prevent decompressing
+ * the stream. Specifying a too big dictionary is waste of memory but
+ * decompression will work.
+ * <p>
+ * There is no need to specify a dictionary bigger than
+ * the uncompressed size of the data even if a bigger dictionary
+ * was used when compressing. If you know the uncompressed size
+ * of the data, this might allow saving some memory.
+ *
+ * @param in input stream from which LZMA2-compressed
+ * data is read
+ *
+ * @param dictSize LZMA2 dictionary size as bytes, must be
+ * in the range [<code>DICT_SIZE_MIN</code>,
+ * <code>DICT_SIZE_MAX</code>]
+ */
+ public LZMA2InputStream(InputStream in, int dictSize) {
+ this(in, dictSize, null);
+ }
+
+ /**
+ * Creates a new LZMA2 decompressor using a preset dictionary.
+ * <p>
+ * This is like <code>LZMA2InputStream(InputStream, int)</code> except
+ * that the dictionary may be initialized using a preset dictionary.
+ * If a preset dictionary was used when compressing the data, the
+ * same preset dictionary must be provided when decompressing.
+ *
+ * @param in input stream from which LZMA2-compressed
+ * data is read
+ *
+ * @param dictSize LZMA2 dictionary size as bytes, must be
+ * in the range [<code>DICT_SIZE_MIN</code>,
+ * <code>DICT_SIZE_MAX</code>]
+ *
+ * @param presetDict preset dictionary or <code>null</code>
+ * to use no preset dictionary
+ */
+ public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
+ this(in, dictSize, presetDict, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new LZMA2 decompressor using a preset dictionary
+ * and array cache.
+ * <p>
+ * This is like <code>LZMA2InputStream(InputStream, int, byte[])</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which LZMA2-compressed
+ * data is read
+ *
+ * @param dictSize LZMA2 dictionary size as bytes, must be
+ * in the range [<code>DICT_SIZE_MIN</code>,
+ * <code>DICT_SIZE_MAX</code>]
+ *
+ * @param presetDict preset dictionary or <code>null</code>
+ * to use no preset dictionary
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @since 1.7
+ */
+ LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict,
+ ArrayCache arrayCache) {
+ // Check for null because otherwise null isn't detect
+ // in this constructor.
+ if (in == null)
+ throw new NullPointerException();
+
+ this.arrayCache = arrayCache;
+ this.in = new DataInputStream(in);
+ this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache);
+ this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
+
+ if (presetDict != null && presetDict.length > 0)
+ needDictReset = false;
+ }
+
+ /**
+ * Decompresses the next byte from this input stream.
+ * <p>
+ * Reading lots of data with <code>read()</code> from this input stream
+ * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
+ * if you need to read lots of data one byte at a time.
+ *
+ * @return the next decompressed byte, or <code>-1</code>
+ * to indicate the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException
+ * compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ /**
+ * Decompresses into an array of bytes.
+ * <p>
+ * If <code>len</code> is zero, no bytes are read and <code>0</code>
+ * is returned. Otherwise this will block until <code>len</code>
+ * bytes have been decompressed, the end of the LZMA2 stream is reached,
+ * or an exception is thrown.
+ *
+ * @param buf target buffer for uncompressed data
+ * @param off start offset in <code>buf</code>
+ * @param len maximum number of uncompressed bytes to read
+ *
+ * @return number of bytes read, or <code>-1</code> to indicate
+ * the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException
+ * compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (len == 0)
+ return 0;
+
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ if (endReached)
+ return -1;
+
+ try {
+ int size = 0;
+
+ while (len > 0) {
+ if (uncompressedSize == 0) {
+ decodeChunkHeader();
+ if (endReached)
+ return size == 0 ? -1 : size;
+ }
+
+ int copySizeMax = Math.min(uncompressedSize, len);
+
+ if (!isLZMAChunk) {
+ lz.copyUncompressed(in, copySizeMax);
+ } else {
+ lz.setLimit(copySizeMax);
+ lzma.decode();
+ }
+
+ int copiedSize = lz.flush(buf, off);
+ off += copiedSize;
+ len -= copiedSize;
+ size += copiedSize;
+ uncompressedSize -= copiedSize;
+
+ if (uncompressedSize == 0)
+ if (!rc.isFinished() || lz.hasPending())
+ throw new CorruptedInputException();
+ }
+
+ return size;
+
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ private void decodeChunkHeader() throws IOException {
+ int control = in.readUnsignedByte();
+
+ if (control == 0x00) {
+ endReached = true;
+ putArraysToCache();
+ return;
+ }
+
+ if (control >= 0xE0 || control == 0x01) {
+ needProps = true;
+ needDictReset = false;
+ lz.reset();
+ } else if (needDictReset) {
+ throw new CorruptedInputException();
+ }
+
+ if (control >= 0x80) {
+ isLZMAChunk = true;
+
+ uncompressedSize = (control & 0x1F) << 16;
+ uncompressedSize += in.readUnsignedShort() + 1;
+
+ int compressedSize = in.readUnsignedShort() + 1;
+
+ if (control >= 0xC0) {
+ needProps = false;
+ decodeProps();
+
+ } else if (needProps) {
+ throw new CorruptedInputException();
+
+ } else if (control >= 0xA0) {
+ lzma.reset();
+ }
+
+ rc.prepareInputBuffer(in, compressedSize);
+
+ } else if (control > 0x02) {
+ throw new CorruptedInputException();
+
+ } else {
+ isLZMAChunk = false;
+ uncompressedSize = in.readUnsignedShort() + 1;
+ }
+ }
+
+ private void decodeProps() throws IOException {
+ int props = in.readUnsignedByte();
+
+ if (props > (4 * 5 + 4) * 9 + 8)
+ throw new CorruptedInputException();
+
+ int pb = props / (9 * 5);
+ props -= pb * 9 * 5;
+ int lp = props / 9;
+ int lc = props - lp * 9;
+
+ if (lc + lp > 4)
+ throw new CorruptedInputException();
+
+ lzma = new LZMADecoder(lz, rc, lc, lp, pb);
+ }
+
+ /**
+ * Returns the number of uncompressed bytes that can be read
+ * without blocking. The value is returned with an assumption
+ * that the compressed input data will be valid. If the compressed
+ * data is corrupt, <code>CorruptedInputException</code> may get
+ * thrown before the number of bytes claimed to be available have
+ * been read from this input stream.
+ * <p>
+ * In LZMA2InputStream, the return value will be non-zero when the
+ * decompressor is in the middle of an LZMA2 chunk. The return value
+ * will then be the number of uncompressed bytes remaining from that
+ * chunk. The return value can also be non-zero in the middle of
+ * an uncompressed chunk, but then the return value depends also on
+ * the <code>available()</code> method of the underlying InputStream.
+ *
+ * @return the number of uncompressed bytes that can be read
+ * without blocking
+ */
+ public int available() throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ return isLZMAChunk ? uncompressedSize
+ : Math.min(uncompressedSize, in.available());
+ }
+
+ private void putArraysToCache() {
+ if (lz != null) {
+ lz.putArraysToCache(arrayCache);
+ lz = null;
+
+ rc.putArraysToCache(arrayCache);
+ rc = null;
+ }
+ }
+
+ /**
+ * Closes the stream and calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ */
+ public void close() throws IOException {
+ if (in != null) {
+ putArraysToCache();
+
+ try {
+ in.close();
+ } finally {
+ in = null;
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/LZMA2Options.java b/src/org/tukaani/xz/LZMA2Options.java
new file mode 100644
index 0000000..21e186e
--- /dev/null
+++ b/src/org/tukaani/xz/LZMA2Options.java
@@ -0,0 +1,583 @@
+/*
+ * LZMA2Options
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.IOException;
+import org.tukaani.xz.lz.LZEncoder;
+import org.tukaani.xz.lzma.LZMAEncoder;
+
+/**
+ * LZMA2 compression options.
+ * <p>
+ * While this allows setting the LZMA2 compression options in detail,
+ * often you only need <code>LZMA2Options()</code> or
+ * <code>LZMA2Options(int)</code>.
+ */
+public class LZMA2Options extends FilterOptions {
+ /**
+ * Minimum valid compression preset level is 0.
+ */
+ public static final int PRESET_MIN = 0;
+
+ /**
+ * Maximum valid compression preset level is 9.
+ */
+ public static final int PRESET_MAX = 9;
+
+ /**
+ * Default compression preset level is 6.
+ */
+ public static final int PRESET_DEFAULT = 6;
+
+ /**
+ * Minimum dictionary size is 4 KiB.
+ */
+ public static final int DICT_SIZE_MIN = 4096;
+
+ /**
+ * Maximum dictionary size for compression is 768 MiB.
+ * <p>
+ * The decompressor supports bigger dictionaries, up to almost 2 GiB.
+ * With HC4 the encoder would support dictionaries bigger than 768 MiB.
+ * The 768 MiB limit comes from the current implementation of BT4 where
+ * we would otherwise hit the limits of signed ints in array indexing.
+ * <p>
+ * If you really need bigger dictionary for decompression,
+ * use {@link LZMA2InputStream} directly.
+ */
+ public static final int DICT_SIZE_MAX = 768 << 20;
+
+ /**
+ * The default dictionary size is 8 MiB.
+ */
+ public static final int DICT_SIZE_DEFAULT = 8 << 20;
+
+ /**
+ * Maximum value for lc + lp is 4.
+ */
+ public static final int LC_LP_MAX = 4;
+
+ /**
+ * The default number of literal context bits is 3.
+ */
+ public static final int LC_DEFAULT = 3;
+
+ /**
+ * The default number of literal position bits is 0.
+ */
+ public static final int LP_DEFAULT = 0;
+
+ /**
+ * Maximum value for pb is 4.
+ */
+ public static final int PB_MAX = 4;
+
+ /**
+ * The default number of position bits is 2.
+ */
+ public static final int PB_DEFAULT = 2;
+
+ /**
+ * Compression mode: uncompressed.
+ * The data is wrapped into a LZMA2 stream without compression.
+ */
+ public static final int MODE_UNCOMPRESSED = 0;
+
+ /**
+ * Compression mode: fast.
+ * This is usually combined with a hash chain match finder.
+ */
+ public static final int MODE_FAST = LZMAEncoder.MODE_FAST;
+
+ /**
+ * Compression mode: normal.
+ * This is usually combined with a binary tree match finder.
+ */
+ public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL;
+
+ /**
+ * Minimum value for <code>niceLen</code> is 8.
+ */
+ public static final int NICE_LEN_MIN = 8;
+
+ /**
+ * Maximum value for <code>niceLen</code> is 273.
+ */
+ public static final int NICE_LEN_MAX = 273;
+
+ /**
+ * Match finder: Hash Chain 2-3-4
+ */
+ public static final int MF_HC4 = LZEncoder.MF_HC4;
+
+ /**
+ * Match finder: Binary tree 2-3-4
+ */
+ public static final int MF_BT4 = LZEncoder.MF_BT4;
+
+ private static final int[] presetToDictSize = {
+ 1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22,
+ 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26 };
+
+ private static final int[] presetToDepthLimit = { 4, 8, 24, 48 };
+
+ private int dictSize;
+ private byte[] presetDict = null;
+ private int lc;
+ private int lp;
+ private int pb;
+ private int mode;
+ private int niceLen;
+ private int mf;
+ private int depthLimit;
+
+ /**
+ * Creates new LZMA2 options and sets them to the default values.
+ * This is equivalent to <code>LZMA2Options(PRESET_DEFAULT)</code>.
+ */
+ public LZMA2Options() {
+ try {
+ setPreset(PRESET_DEFAULT);
+ } catch (UnsupportedOptionsException e) {
+ assert false;
+ throw new RuntimeException();
+ }
+ }
+
+ /**
+ * Creates new LZMA2 options and sets them to the given preset.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>preset</code> is not supported
+ */
+ public LZMA2Options(int preset) throws UnsupportedOptionsException {
+ setPreset(preset);
+ }
+
+ /**
+ * Creates new LZMA2 options and sets them to the given custom values.
+ *
+ * @throws UnsupportedOptionsException
+ * unsupported options were specified
+ */
+ public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode,
+ int niceLen, int mf, int depthLimit)
+ throws UnsupportedOptionsException {
+ setDictSize(dictSize);
+ setLcLp(lc, lp);
+ setPb(pb);
+ setMode(mode);
+ setNiceLen(niceLen);
+ setMatchFinder(mf);
+ setDepthLimit(depthLimit);
+ }
+
+ /**
+ * Sets the compression options to the given preset.
+ * <p>
+ * The presets 0-3 are fast presets with medium compression.
+ * The presets 4-6 are fairly slow presets with high compression.
+ * The default preset (<code>PRESET_DEFAULT</code>) is 6.
+ * <p>
+ * The presets 7-9 are like the preset 6 but use bigger dictionaries
+ * and have higher compressor and decompressor memory requirements.
+ * Unless the uncompressed size of the file exceeds 8&nbsp;MiB,
+ * 16&nbsp;MiB, or 32&nbsp;MiB, it is waste of memory to use the
+ * presets 7, 8, or 9, respectively.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>preset</code> is not supported
+ */
+ public void setPreset(int preset) throws UnsupportedOptionsException {
+ if (preset < 0 || preset > 9)
+ throw new UnsupportedOptionsException(
+ "Unsupported preset: " + preset);
+
+ lc = LC_DEFAULT;
+ lp = LP_DEFAULT;
+ pb = PB_DEFAULT;
+ dictSize = presetToDictSize[preset];
+
+ if (preset <= 3) {
+ mode = MODE_FAST;
+ mf = MF_HC4;
+ niceLen = preset <= 1 ? 128 : NICE_LEN_MAX;
+ depthLimit = presetToDepthLimit[preset];
+ } else {
+ mode = MODE_NORMAL;
+ mf = MF_BT4;
+ niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64;
+ depthLimit = 0;
+ }
+ }
+
+ /**
+ * Sets the dictionary size in bytes.
+ * <p>
+ * The dictionary (or history buffer) holds the most recently seen
+ * uncompressed data. Bigger dictionary usually means better compression.
+ * However, using a dictioanary bigger than the size of the uncompressed
+ * data is waste of memory.
+ * <p>
+ * Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid,
+ * but sizes of 2^n and 2^n&nbsp;+&nbsp;2^(n-1) bytes are somewhat
+ * recommended.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>dictSize</code> is not supported
+ */
+ public void setDictSize(int dictSize) throws UnsupportedOptionsException {
+ if (dictSize < DICT_SIZE_MIN)
+ throw new UnsupportedOptionsException(
+ "LZMA2 dictionary size must be at least 4 KiB: "
+ + dictSize + " B");
+
+ if (dictSize > DICT_SIZE_MAX)
+ throw new UnsupportedOptionsException(
+ "LZMA2 dictionary size must not exceed "
+ + (DICT_SIZE_MAX >> 20) + " MiB: " + dictSize + " B");
+
+ this.dictSize = dictSize;
+ }
+
+ /**
+ * Gets the dictionary size in bytes.
+ */
+ public int getDictSize() {
+ return dictSize;
+ }
+
+ /**
+ * Sets a preset dictionary. Use null to disable the use of
+ * a preset dictionary. By default there is no preset dictionary.
+ * <p>
+ * <b>The .xz format doesn't support a preset dictionary for now.
+ * Do not set a preset dictionary unless you use raw LZMA2.</b>
+ * <p>
+ * Preset dictionary can be useful when compressing many similar,
+ * relatively small chunks of data independently from each other.
+ * A preset dictionary should contain typical strings that occur in
+ * the files being compressed. The most probable strings should be
+ * near the end of the preset dictionary. The preset dictionary used
+ * for compression is also needed for decompression.
+ */
+ public void setPresetDict(byte[] presetDict) {
+ this.presetDict = presetDict;
+ }
+
+ /**
+ * Gets the preset dictionary.
+ */
+ public byte[] getPresetDict() {
+ return presetDict;
+ }
+
+ /**
+ * Sets the number of literal context bits and literal position bits.
+ * <p>
+ * The sum of <code>lc</code> and <code>lp</code> is limited to 4.
+ * Trying to exceed it will throw an exception. This function lets
+ * you change both at the same time.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>lc</code> and <code>lp</code>
+ * are invalid
+ */
+ public void setLcLp(int lc, int lp) throws UnsupportedOptionsException {
+ if (lc < 0 || lp < 0 || lc > LC_LP_MAX || lp > LC_LP_MAX
+ || lc + lp > LC_LP_MAX)
+ throw new UnsupportedOptionsException(
+ "lc + lp must not exceed " + LC_LP_MAX + ": "
+ + lc + " + " + lp);
+
+ this.lc = lc;
+ this.lp = lp;
+ }
+
+ /**
+ * Sets the number of literal context bits.
+ * <p>
+ * All bytes that cannot be encoded as matches are encoded as literals.
+ * That is, literals are simply 8-bit bytes that are encoded one at
+ * a time.
+ * <p>
+ * The literal coding makes an assumption that the highest <code>lc</code>
+ * bits of the previous uncompressed byte correlate with the next byte.
+ * For example, in typical English text, an upper-case letter is often
+ * followed by a lower-case letter, and a lower-case letter is usually
+ * followed by another lower-case letter. In the US-ASCII character set,
+ * the highest three bits are 010 for upper-case letters and 011 for
+ * lower-case letters. When <code>lc</code> is at least 3, the literal
+ * coding can take advantage of this property in the uncompressed data.
+ * <p>
+ * The default value (3) is usually good. If you want maximum compression,
+ * try <code>setLc(4)</code>. Sometimes it helps a little, and sometimes it
+ * makes compression worse. If it makes it worse, test for example
+ * <code>setLc(2)</code> too.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>lc</code> is invalid, or the sum
+ * of <code>lc</code> and <code>lp</code>
+ * exceed LC_LP_MAX
+ */
+ public void setLc(int lc) throws UnsupportedOptionsException {
+ setLcLp(lc, lp);
+ }
+
+ /**
+ * Sets the number of literal position bits.
+ * <p>
+ * This affets what kind of alignment in the uncompressed data is
+ * assumed when encoding literals. See {@link #setPb(int) setPb} for
+ * more information about alignment.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>lp</code> is invalid, or the sum
+ * of <code>lc</code> and <code>lp</code>
+ * exceed LC_LP_MAX
+ */
+ public void setLp(int lp) throws UnsupportedOptionsException {
+ setLcLp(lc, lp);
+ }
+
+ /**
+ * Gets the number of literal context bits.
+ */
+ public int getLc() {
+ return lc;
+ }
+
+ /**
+ * Gets the number of literal position bits.
+ */
+ public int getLp() {
+ return lp;
+ }
+
+ /**
+ * Sets the number of position bits.
+ * <p>
+ * This affects what kind of alignment in the uncompressed data is
+ * assumed in general. The default (2) means four-byte alignment
+ * (2^<code>pb</code> = 2^2 = 4), which is often a good choice when
+ * there's no better guess.
+ * <p>
+ * When the alignment is known, setting the number of position bits
+ * accordingly may reduce the file size a little. For example with text
+ * files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using
+ * <code>setPb(0)</code> can improve compression slightly. For UTF-16
+ * text, <code>setPb(1)</code> is a good choice. If the alignment is
+ * an odd number like 3 bytes, <code>setPb(0)</code> might be the best
+ * choice.
+ * <p>
+ * Even though the assumed alignment can be adjusted with
+ * <code>setPb</code> and <code>setLp</code>, LZMA2 still slightly favors
+ * 16-byte alignment. It might be worth taking into account when designing
+ * file formats that are likely to be often compressed with LZMA2.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>pb</code> is invalid
+ */
+ public void setPb(int pb) throws UnsupportedOptionsException {
+ if (pb < 0 || pb > PB_MAX)
+ throw new UnsupportedOptionsException(
+ "pb must not exceed " + PB_MAX + ": " + pb);
+
+ this.pb = pb;
+ }
+
+ /**
+ * Gets the number of position bits.
+ */
+ public int getPb() {
+ return pb;
+ }
+
+ /**
+ * Sets the compression mode.
+ * <p>
+ * This specifies the method to analyze the data produced by
+ * a match finder. The default is <code>MODE_FAST</code> for presets
+ * 0-3 and <code>MODE_NORMAL</code> for presets 4-9.
+ * <p>
+ * Usually <code>MODE_FAST</code> is used with Hash Chain match finders
+ * and <code>MODE_NORMAL</code> with Binary Tree match finders. This is
+ * also what the presets do.
+ * <p>
+ * The special mode <code>MODE_UNCOMPRESSED</code> doesn't try to
+ * compress the data at all (and doesn't use a match finder) and will
+ * simply wrap it in uncompressed LZMA2 chunks.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>mode</code> is not supported
+ */
+ public void setMode(int mode) throws UnsupportedOptionsException {
+ if (mode < MODE_UNCOMPRESSED || mode > MODE_NORMAL)
+ throw new UnsupportedOptionsException(
+ "Unsupported compression mode: " + mode);
+
+ this.mode = mode;
+ }
+
+ /**
+ * Gets the compression mode.
+ */
+ public int getMode() {
+ return mode;
+ }
+
+ /**
+ * Sets the nice length of matches.
+ * Once a match of at least <code>niceLen</code> bytes is found,
+ * the algorithm stops looking for better matches. Higher values tend
+ * to give better compression at the expense of speed. The default
+ * depends on the preset.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>niceLen</code> is invalid
+ */
+ public void setNiceLen(int niceLen) throws UnsupportedOptionsException {
+ if (niceLen < NICE_LEN_MIN)
+ throw new UnsupportedOptionsException(
+ "Minimum nice length of matches is "
+ + NICE_LEN_MIN + " bytes: " + niceLen);
+
+ if (niceLen > NICE_LEN_MAX)
+ throw new UnsupportedOptionsException(
+ "Maximum nice length of matches is " + NICE_LEN_MAX
+ + ": " + niceLen);
+
+ this.niceLen = niceLen;
+ }
+
+ /**
+ * Gets the nice length of matches.
+ */
+ public int getNiceLen() {
+ return niceLen;
+ }
+
+ /**
+ * Sets the match finder type.
+ * <p>
+ * Match finder has a major effect on compression speed, memory usage,
+ * and compression ratio. Usually Hash Chain match finders are faster
+ * than Binary Tree match finders. The default depends on the preset:
+ * 0-3 use <code>MF_HC4</code> and 4-9 use <code>MF_BT4</code>.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>mf</code> is not supported
+ */
+ public void setMatchFinder(int mf) throws UnsupportedOptionsException {
+ if (mf != MF_HC4 && mf != MF_BT4)
+ throw new UnsupportedOptionsException(
+ "Unsupported match finder: " + mf);
+
+ this.mf = mf;
+ }
+
+ /**
+ * Gets the match finder type.
+ */
+ public int getMatchFinder() {
+ return mf;
+ }
+
+ /**
+ * Sets the match finder search depth limit.
+ * <p>
+ * The default is a special value of <code>0</code> which indicates that
+ * the depth limit should be automatically calculated by the selected
+ * match finder from the nice length of matches.
+ * <p>
+ * Reasonable depth limit for Hash Chain match finders is 4-100 and
+ * 16-1000 for Binary Tree match finders. Using very high values can
+ * make the compressor extremely slow with some files. Avoid settings
+ * higher than 1000 unless you are prepared to interrupt the compression
+ * in case it is taking far too long.
+ *
+ * @throws UnsupportedOptionsException
+ * <code>depthLimit</code> is invalid
+ */
+ public void setDepthLimit(int depthLimit)
+ throws UnsupportedOptionsException {
+ if (depthLimit < 0)
+ throw new UnsupportedOptionsException(
+ "Depth limit cannot be negative: " + depthLimit);
+
+ this.depthLimit = depthLimit;
+ }
+
+ /**
+ * Gets the match finder search depth limit.
+ */
+ public int getDepthLimit() {
+ return depthLimit;
+ }
+
+ public int getEncoderMemoryUsage() {
+ return (mode == MODE_UNCOMPRESSED)
+ ? UncompressedLZMA2OutputStream.getMemoryUsage()
+ : LZMA2OutputStream.getMemoryUsage(this);
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ if (mode == MODE_UNCOMPRESSED)
+ return new UncompressedLZMA2OutputStream(out, arrayCache);
+
+ return new LZMA2OutputStream(out, this, arrayCache);
+ }
+
+ /**
+ * Gets how much memory the LZMA2 decoder will need to decompress the data
+ * that was encoded with these options and stored in a .xz file.
+ * <p>
+ * The returned value may bigger than the value returned by a direct call
+ * to {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size
+ * is not 2^n or 2^n&nbsp;+&nbsp;2^(n-1) bytes. This is because the .xz
+ * headers store the dictionary size in such a format and other values
+ * are rounded up to the next such value. Such rounding is harmess except
+ * it might waste some memory if an unsual dictionary size is used.
+ * <p>
+ * If you use raw LZMA2 streams and unusual dictioanary size, call
+ * {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder
+ * memory requirements.
+ */
+ public int getDecoderMemoryUsage() {
+ // Round the dictionary size up to the next 2^n or 2^n + 2^(n-1).
+ int d = dictSize - 1;
+ d |= d >>> 2;
+ d |= d >>> 3;
+ d |= d >>> 4;
+ d |= d >>> 8;
+ d |= d >>> 16;
+ return LZMA2InputStream.getMemoryUsage(d + 1);
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache)
+ throws IOException {
+ return new LZMA2InputStream(in, dictSize, presetDict, arrayCache);
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new LZMA2Encoder(this);
+ }
+
+ public Object clone() {
+ try {
+ return super.clone();
+ } catch (CloneNotSupportedException e) {
+ assert false;
+ throw new RuntimeException();
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/LZMA2OutputStream.java b/src/org/tukaani/xz/LZMA2OutputStream.java
new file mode 100644
index 0000000..a82a1a5
--- /dev/null
+++ b/src/org/tukaani/xz/LZMA2OutputStream.java
@@ -0,0 +1,270 @@
+/*
+ * LZMA2OutputStream
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import org.tukaani.xz.lz.LZEncoder;
+import org.tukaani.xz.rangecoder.RangeEncoderToBuffer;
+import org.tukaani.xz.lzma.LZMAEncoder;
+
+class LZMA2OutputStream extends FinishableOutputStream {
+ static final int COMPRESSED_SIZE_MAX = 64 << 10;
+
+ private final ArrayCache arrayCache;
+
+ private FinishableOutputStream out;
+ private final DataOutputStream outData;
+
+ private LZEncoder lz;
+ private RangeEncoderToBuffer rc;
+ private LZMAEncoder lzma;
+
+ private final int props; // Cannot change props on the fly for now.
+ private boolean dictResetNeeded = true;
+ private boolean stateResetNeeded = true;
+ private boolean propsNeeded = true;
+
+ private int pendingSize = 0;
+ private boolean finished = false;
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ private static int getExtraSizeBefore(int dictSize) {
+ return COMPRESSED_SIZE_MAX > dictSize
+ ? COMPRESSED_SIZE_MAX - dictSize : 0;
+ }
+
+ static int getMemoryUsage(LZMA2Options options) {
+ // 64 KiB buffer for the range encoder + a little extra + LZMAEncoder
+ int dictSize = options.getDictSize();
+ int extraSizeBefore = getExtraSizeBefore(dictSize);
+ return 70 + LZMAEncoder.getMemoryUsage(options.getMode(),
+ dictSize, extraSizeBefore,
+ options.getMatchFinder());
+ }
+
+ LZMA2OutputStream(FinishableOutputStream out, LZMA2Options options,
+ ArrayCache arrayCache) {
+ if (out == null)
+ throw new NullPointerException();
+
+ this.arrayCache = arrayCache;
+ this.out = out;
+ outData = new DataOutputStream(out);
+ rc = new RangeEncoderToBuffer(COMPRESSED_SIZE_MAX, arrayCache);
+
+ int dictSize = options.getDictSize();
+ int extraSizeBefore = getExtraSizeBefore(dictSize);
+ lzma = LZMAEncoder.getInstance(rc,
+ options.getLc(), options.getLp(), options.getPb(),
+ options.getMode(),
+ dictSize, extraSizeBefore, options.getNiceLen(),
+ options.getMatchFinder(), options.getDepthLimit(),
+ this.arrayCache);
+
+ lz = lzma.getLZEncoder();
+
+ byte[] presetDict = options.getPresetDict();
+ if (presetDict != null && presetDict.length > 0) {
+ lz.setPresetDict(dictSize, presetDict);
+ dictResetNeeded = false;
+ }
+
+ props = (options.getPb() * 5 + options.getLp()) * 9 + options.getLc();
+ }
+
+ public void write(int b) throws IOException {
+ tempBuf[0] = (byte)b;
+ write(tempBuf, 0, 1);
+ }
+
+ public void write(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ while (len > 0) {
+ int used = lz.fillWindow(buf, off, len);
+ off += used;
+ len -= used;
+ pendingSize += used;
+
+ if (lzma.encodeForLZMA2())
+ writeChunk();
+ }
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ private void writeChunk() throws IOException {
+ int compressedSize = rc.finish();
+ int uncompressedSize = lzma.getUncompressedSize();
+
+ assert compressedSize > 0 : compressedSize;
+ assert uncompressedSize > 0 : uncompressedSize;
+
+ // +2 because the header of a compressed chunk is 2 bytes
+ // bigger than the header of an uncompressed chunk.
+ if (compressedSize + 2 < uncompressedSize) {
+ writeLZMA(uncompressedSize, compressedSize);
+ } else {
+ lzma.reset();
+ uncompressedSize = lzma.getUncompressedSize();
+ assert uncompressedSize > 0 : uncompressedSize;
+ writeUncompressed(uncompressedSize);
+ }
+
+ pendingSize -= uncompressedSize;
+ lzma.resetUncompressedSize();
+ rc.reset();
+ }
+
+ private void writeLZMA(int uncompressedSize, int compressedSize)
+ throws IOException {
+ int control;
+
+ if (propsNeeded) {
+ if (dictResetNeeded)
+ control = 0x80 + (3 << 5);
+ else
+ control = 0x80 + (2 << 5);
+ } else {
+ if (stateResetNeeded)
+ control = 0x80 + (1 << 5);
+ else
+ control = 0x80;
+ }
+
+ control |= (uncompressedSize - 1) >>> 16;
+ outData.writeByte(control);
+
+ outData.writeShort(uncompressedSize - 1);
+ outData.writeShort(compressedSize - 1);
+
+ if (propsNeeded)
+ outData.writeByte(props);
+
+ rc.write(out);
+
+ propsNeeded = false;
+ stateResetNeeded = false;
+ dictResetNeeded = false;
+ }
+
+ private void writeUncompressed(int uncompressedSize) throws IOException {
+ while (uncompressedSize > 0) {
+ int chunkSize = Math.min(uncompressedSize, COMPRESSED_SIZE_MAX);
+ outData.writeByte(dictResetNeeded ? 0x01 : 0x02);
+ outData.writeShort(chunkSize - 1);
+ lz.copyUncompressed(out, uncompressedSize, chunkSize);
+ uncompressedSize -= chunkSize;
+ dictResetNeeded = false;
+ }
+
+ stateResetNeeded = true;
+ }
+
+ private void writeEndMarker() throws IOException {
+ assert !finished;
+
+ if (exception != null)
+ throw exception;
+
+ lz.setFinishing();
+
+ try {
+ while (pendingSize > 0) {
+ lzma.encodeForLZMA2();
+ writeChunk();
+ }
+
+ out.write(0x00);
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ finished = true;
+
+ lzma.putArraysToCache(arrayCache);
+ lzma = null;
+ lz = null;
+ rc.putArraysToCache(arrayCache);
+ rc = null;
+ }
+
+ public void flush() throws IOException {
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ lz.setFlushing();
+
+ while (pendingSize > 0) {
+ lzma.encodeForLZMA2();
+ writeChunk();
+ }
+
+ out.flush();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ public void finish() throws IOException {
+ if (!finished) {
+ writeEndMarker();
+
+ try {
+ out.finish();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+ }
+
+ public void close() throws IOException {
+ if (out != null) {
+ if (!finished) {
+ try {
+ writeEndMarker();
+ } catch (IOException e) {}
+ }
+
+ try {
+ out.close();
+ } catch (IOException e) {
+ if (exception == null)
+ exception = e;
+ }
+
+ out = null;
+ }
+
+ if (exception != null)
+ throw exception;
+ }
+}
diff --git a/src/org/tukaani/xz/LZMAInputStream.java b/src/org/tukaani/xz/LZMAInputStream.java
new file mode 100644
index 0000000..e46d5bb
--- /dev/null
+++ b/src/org/tukaani/xz/LZMAInputStream.java
@@ -0,0 +1,763 @@
+/*
+ * LZMAInputStream
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import org.tukaani.xz.lz.LZDecoder;
+import org.tukaani.xz.rangecoder.RangeDecoderFromStream;
+import org.tukaani.xz.lzma.LZMADecoder;
+
+/**
+ * Decompresses legacy .lzma files and raw LZMA streams (no .lzma header).
+ * <p>
+ * <b>IMPORTANT:</b> In contrast to other classes in this package, this class
+ * reads data from its input stream one byte at a time. If the input stream
+ * is for example {@link java.io.FileInputStream}, wrapping it into
+ * {@link java.io.BufferedInputStream} tends to improve performance a lot.
+ * This is not automatically done by this class because there may be use
+ * cases where it is desired that this class won't read any bytes past
+ * the end of the LZMA stream.
+ * <p>
+ * Even when using <code>BufferedInputStream</code>, the performance tends
+ * to be worse (maybe 10-20&nbsp;% slower) than with {@link LZMA2InputStream}
+ * or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data).
+ *
+ * @since 1.4
+ */
+public class LZMAInputStream extends InputStream {
+ /**
+ * Largest dictionary size supported by this implementation.
+ * <p>
+ * LZMA allows dictionaries up to one byte less than 4 GiB. This
+ * implementation supports only 16 bytes less than 2 GiB. This
+ * limitation is due to Java using signed 32-bit integers for array
+ * indexing. The limitation shouldn't matter much in practice since so
+ * huge dictionaries are not normally used.
+ */
+ public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
+
+ private InputStream in;
+ private ArrayCache arrayCache;
+ private LZDecoder lz;
+ private RangeDecoderFromStream rc;
+ private LZMADecoder lzma;
+
+ private boolean endReached = false;
+
+ private final byte[] tempBuf = new byte[1];
+
+ /**
+ * Number of uncompressed bytes left to be decompressed, or -1 if
+ * the end marker is used.
+ */
+ private long remainingSize;
+
+ private IOException exception = null;
+
+ /**
+ * Gets approximate decompressor memory requirements as kibibytes for
+ * the given dictionary size and LZMA properties byte (lc, lp, and pb).
+ *
+ * @param dictSize LZMA dictionary size as bytes, should be
+ * in the range [<code>0</code>,
+ * <code>DICT_SIZE_MAX</code>]
+ *
+ * @param propsByte LZMA properties byte that encodes the values
+ * of lc, lp, and pb
+ *
+ * @return approximate memory requirements as kibibytes (KiB)
+ *
+ * @throws UnsupportedOptionsException
+ * if <code>dictSize</code> is outside
+ * the range [<code>0</code>,
+ * <code>DICT_SIZE_MAX</code>]
+ *
+ * @throws CorruptedInputException
+ * if <code>propsByte</code> is invalid
+ */
+ public static int getMemoryUsage(int dictSize, byte propsByte)
+ throws UnsupportedOptionsException, CorruptedInputException {
+ if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
+ throw new UnsupportedOptionsException(
+ "LZMA dictionary is too big for this implementation");
+
+ int props = propsByte & 0xFF;
+ if (props > (4 * 5 + 4) * 9 + 8)
+ throw new CorruptedInputException("Invalid LZMA properties byte");
+
+ props %= 9 * 5;
+ int lp = props / 9;
+ int lc = props - lp * 9;
+
+ return getMemoryUsage(dictSize, lc, lp);
+ }
+
+ /**
+ * Gets approximate decompressor memory requirements as kibibytes for
+ * the given dictionary size, lc, and lp. Note that pb isn't needed.
+ *
+ * @param dictSize LZMA dictionary size as bytes, must be
+ * in the range [<code>0</code>,
+ * <code>DICT_SIZE_MAX</code>]
+ *
+ * @param lc number of literal context bits, must be
+ * in the range [0, 8]
+ *
+ * @param lp number of literal position bits, must be
+ * in the range [0, 4]
+ *
+ * @return approximate memory requirements as kibibytes (KiB)
+ */
+ public static int getMemoryUsage(int dictSize, int lc, int lp) {
+ if (lc < 0 || lc > 8 || lp < 0 || lp > 4)
+ throw new IllegalArgumentException("Invalid lc or lp");
+
+ // Probability variables have the type "short". There are
+ // 0x300 (768) probability variables in each literal subcoder.
+ // The number of literal subcoders is 2^(lc + lp).
+ //
+ // Roughly 10 KiB for the base state + LZ decoder's dictionary buffer
+ // + sizeof(short) * number probability variables per literal subcoder
+ // * number of literal subcoders
+ return 10 + getDictSize(dictSize) / 1024
+ + ((2 * 0x300) << (lc + lp)) / 1024;
+ }
+
+ private static int getDictSize(int dictSize) {
+ if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
+ throw new IllegalArgumentException(
+ "LZMA dictionary is too big for this implementation");
+
+ // For performance reasons, use a 4 KiB dictionary if something
+ // smaller was requested. It's a rare situation and the performance
+ // difference isn't huge, and it starts to matter mostly when the
+ // dictionary is just a few bytes. But we need to handle the special
+ // case of dictSize == 0 anyway, which is an allowed value but in
+ // practice means one-byte dictionary.
+ //
+ // Note that using a dictionary bigger than specified in the headers
+ // can hide errors if there is a reference to data beyond the original
+ // dictionary size but is still within 4 KiB.
+ if (dictSize < 4096)
+ dictSize = 4096;
+
+ // Round dictionary size upward to a multiple of 16. This way LZMA
+ // can use LZDecoder.getPos() for calculating LZMA's posMask.
+ return (dictSize + 15) & ~15;
+ }
+
+ /**
+ * Creates a new .lzma file format decompressor without
+ * a memory usage limit.
+ *
+ * @param in input stream from which .lzma data is read;
+ * it might be a good idea to wrap it in
+ * <code>BufferedInputStream</code>, see the
+ * note at the top of this page
+ *
+ * @throws CorruptedInputException
+ * file is corrupt or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws UnsupportedOptionsException
+ * dictionary size or uncompressed size is too
+ * big for this implementation
+ *
+ * @throws EOFException
+ * file is truncated or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public LZMAInputStream(InputStream in) throws IOException {
+ this(in, -1);
+ }
+
+ /**
+ * Creates a new .lzma file format decompressor without
+ * a memory usage limit.
+ * <p>
+ * This is identical to <code>LZMAInputStream(InputStream)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which .lzma data is read;
+ * it might be a good idea to wrap it in
+ * <code>BufferedInputStream</code>, see the
+ * note at the top of this page
+ *
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws CorruptedInputException
+ * file is corrupt or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws UnsupportedOptionsException
+ * dictionary size or uncompressed size is too
+ * big for this implementation
+ *
+ * @throws EOFException
+ * file is truncated or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public LZMAInputStream(InputStream in, ArrayCache arrayCache)
+ throws IOException {
+ this(in, -1, arrayCache);
+ }
+
+ /**
+ * Creates a new .lzma file format decompressor with an optional
+ * memory usage limit.
+ *
+ * @param in input stream from which .lzma data is read;
+ * it might be a good idea to wrap it in
+ * <code>BufferedInputStream</code>, see the
+ * note at the top of this page
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @throws CorruptedInputException
+ * file is corrupt or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws UnsupportedOptionsException
+ * dictionary size or uncompressed size is too
+ * big for this implementation
+ *
+ * @throws MemoryLimitException
+ * memory usage limit was exceeded
+ *
+ * @throws EOFException
+ * file is truncated or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public LZMAInputStream(InputStream in, int memoryLimit)
+ throws IOException {
+ this(in, memoryLimit, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new .lzma file format decompressor with an optional
+ * memory usage limit.
+ * <p>
+ * This is identical to <code>LZMAInputStream(InputStream, int)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which .lzma data is read;
+ * it might be a good idea to wrap it in
+ * <code>BufferedInputStream</code>, see the
+ * note at the top of this page
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws CorruptedInputException
+ * file is corrupt or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws UnsupportedOptionsException
+ * dictionary size or uncompressed size is too
+ * big for this implementation
+ *
+ * @throws MemoryLimitException
+ * memory usage limit was exceeded
+ *
+ * @throws EOFException
+ * file is truncated or perhaps not in
+ * the .lzma format at all
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public LZMAInputStream(InputStream in, int memoryLimit,
+ ArrayCache arrayCache) throws IOException {
+ DataInputStream inData = new DataInputStream(in);
+
+ // Properties byte (lc, lp, and pb)
+ byte propsByte = inData.readByte();
+
+ // Dictionary size is an unsigned 32-bit little endian integer.
+ int dictSize = 0;
+ for (int i = 0; i < 4; ++i)
+ dictSize |= inData.readUnsignedByte() << (8 * i);
+
+ // Uncompressed size is an unsigned 64-bit little endian integer.
+ // The maximum 64-bit value is a special case (becomes -1 here)
+ // which indicates that the end marker is used instead of knowing
+ // the uncompressed size beforehand.
+ long uncompSize = 0;
+ for (int i = 0; i < 8; ++i)
+ uncompSize |= (long)inData.readUnsignedByte() << (8 * i);
+
+ // Check the memory usage limit.
+ int memoryNeeded = getMemoryUsage(dictSize, propsByte);
+ if (memoryLimit != -1 && memoryNeeded > memoryLimit)
+ throw new MemoryLimitException(memoryNeeded, memoryLimit);
+
+ initialize(in, uncompSize, propsByte, dictSize, null, arrayCache);
+ }
+
+ /**
+ * Creates a new input stream that decompresses raw LZMA data (no .lzma
+ * header) from <code>in</code>.
+ * <p>
+ * The caller needs to know if the "end of payload marker (EOPM)" alias
+ * "end of stream marker (EOS marker)" alias "end marker" present.
+ * If the end marker isn't used, the caller must know the exact
+ * uncompressed size of the stream.
+ * <p>
+ * The caller also needs to provide the LZMA properties byte that encodes
+ * the number of literal context bits (lc), literal position bits (lp),
+ * and position bits (pb).
+ * <p>
+ * The dictionary size used when compressing is also needed. Specifying
+ * a too small dictionary size will prevent decompressing the stream.
+ * Specifying a too big dictionary is waste of memory but decompression
+ * will work.
+ * <p>
+ * There is no need to specify a dictionary bigger than
+ * the uncompressed size of the data even if a bigger dictionary
+ * was used when compressing. If you know the uncompressed size
+ * of the data, this might allow saving some memory.
+ *
+ * @param in input stream from which compressed
+ * data is read
+ *
+ * @param uncompSize uncompressed size of the LZMA stream or -1
+ * if the end marker is used in the LZMA stream
+ *
+ * @param propsByte LZMA properties byte that has the encoded
+ * values for literal context bits (lc), literal
+ * position bits (lp), and position bits (pb)
+ *
+ * @param dictSize dictionary size as bytes, must be in the range
+ * [<code>0</code>, <code>DICT_SIZE_MAX</code>]
+ *
+ * @throws CorruptedInputException
+ * if <code>propsByte</code> is invalid or
+ * the first input byte is not 0x00
+ *
+ * @throws UnsupportedOptionsException
+ * dictionary size or uncompressed size is too
+ * big for this implementation
+ *
+ *
+ */
+ public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
+ int dictSize) throws IOException {
+ initialize(in, uncompSize, propsByte, dictSize, null,
+ ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new input stream that decompresses raw LZMA data (no .lzma
+ * header) from <code>in</code> optionally with a preset dictionary.
+ *
+ * @param in input stream from which LZMA-compressed
+ * data is read
+ *
+ * @param uncompSize uncompressed size of the LZMA stream or -1
+ * if the end marker is used in the LZMA stream
+ *
+ * @param propsByte LZMA properties byte that has the encoded
+ * values for literal context bits (lc), literal
+ * position bits (lp), and position bits (pb)
+ *
+ * @param dictSize dictionary size as bytes, must be in the range
+ * [<code>0</code>, <code>DICT_SIZE_MAX</code>]
+ *
+ * @param presetDict preset dictionary or <code>null</code>
+ * to use no preset dictionary
+ *
+ * @throws CorruptedInputException
+ * if <code>propsByte</code> is invalid or
+ * the first input byte is not 0x00
+ *
+ * @throws UnsupportedOptionsException
+ * dictionary size or uncompressed size is too
+ * big for this implementation
+ *
+ * @throws EOFException file is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
+ int dictSize, byte[] presetDict)
+ throws IOException {
+ initialize(in, uncompSize, propsByte, dictSize, presetDict,
+ ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new input stream that decompresses raw LZMA data (no .lzma
+ * header) from <code>in</code> optionally with a preset dictionary.
+ * <p>
+ * This is identical to <code>LZMAInputStream(InputStream, long, byte, int,
+ * byte[])</code> except that this also takes the <code>arrayCache</code>
+ * argument.
+ *
+ * @param in input stream from which LZMA-compressed
+ * data is read
+ *
+ * @param uncompSize uncompressed size of the LZMA stream or -1
+ * if the end marker is used in the LZMA stream
+ *
+ * @param propsByte LZMA properties byte that has the encoded
+ * values for literal context bits (lc), literal
+ * position bits (lp), and position bits (pb)
+ *
+ * @param dictSize dictionary size as bytes, must be in the range
+ * [<code>0</code>, <code>DICT_SIZE_MAX</code>]
+ *
+ * @param presetDict preset dictionary or <code>null</code>
+ * to use no preset dictionary
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws CorruptedInputException
+ * if <code>propsByte</code> is invalid or
+ * the first input byte is not 0x00
+ *
+ * @throws UnsupportedOptionsException
+ * dictionary size or uncompressed size is too
+ * big for this implementation
+ *
+ * @throws EOFException file is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
+ int dictSize, byte[] presetDict,
+ ArrayCache arrayCache)
+ throws IOException {
+ initialize(in, uncompSize, propsByte, dictSize, presetDict,
+ arrayCache);
+ }
+
+ /**
+ * Creates a new input stream that decompresses raw LZMA data (no .lzma
+ * header) from <code>in</code> optionally with a preset dictionary.
+ *
+ * @param in input stream from which LZMA-compressed
+ * data is read
+ *
+ * @param uncompSize uncompressed size of the LZMA stream or -1
+ * if the end marker is used in the LZMA stream
+ *
+ * @param lc number of literal context bits, must be
+ * in the range [0, 8]
+ *
+ * @param lp number of literal position bits, must be
+ * in the range [0, 4]
+ *
+ * @param pb number position bits, must be
+ * in the range [0, 4]
+ *
+ * @param dictSize dictionary size as bytes, must be in the range
+ * [<code>0</code>, <code>DICT_SIZE_MAX</code>]
+ *
+ * @param presetDict preset dictionary or <code>null</code>
+ * to use no preset dictionary
+ *
+ * @throws CorruptedInputException
+ * if the first input byte is not 0x00
+ *
+ * @throws EOFException file is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public LZMAInputStream(InputStream in, long uncompSize,
+ int lc, int lp, int pb,
+ int dictSize, byte[] presetDict)
+ throws IOException {
+ initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
+ ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new input stream that decompresses raw LZMA data (no .lzma
+ * header) from <code>in</code> optionally with a preset dictionary.
+ * <p>
+ * This is identical to <code>LZMAInputStream(InputStream, long, int, int,
+ * int, int, byte[])</code> except that this also takes the
+ * <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which LZMA-compressed
+ * data is read
+ *
+ * @param uncompSize uncompressed size of the LZMA stream or -1
+ * if the end marker is used in the LZMA stream
+ *
+ * @param lc number of literal context bits, must be
+ * in the range [0, 8]
+ *
+ * @param lp number of literal position bits, must be
+ * in the range [0, 4]
+ *
+ * @param pb number position bits, must be
+ * in the range [0, 4]
+ *
+ * @param dictSize dictionary size as bytes, must be in the range
+ * [<code>0</code>, <code>DICT_SIZE_MAX</code>]
+ *
+ * @param presetDict preset dictionary or <code>null</code>
+ * to use no preset dictionary
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws CorruptedInputException
+ * if the first input byte is not 0x00
+ *
+ * @throws EOFException file is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public LZMAInputStream(InputStream in, long uncompSize,
+ int lc, int lp, int pb,
+ int dictSize, byte[] presetDict,
+ ArrayCache arrayCache)
+ throws IOException {
+ initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
+ arrayCache);
+ }
+
+ private void initialize(InputStream in, long uncompSize, byte propsByte,
+ int dictSize, byte[] presetDict,
+ ArrayCache arrayCache)
+ throws IOException {
+ // Validate the uncompressed size since the other "initialize" throws
+ // IllegalArgumentException if uncompSize < -1.
+ if (uncompSize < -1)
+ throw new UnsupportedOptionsException(
+ "Uncompressed size is too big");
+
+ // Decode the properties byte. In contrast to LZMA2, there is no
+ // limit of lc + lp <= 4.
+ int props = propsByte & 0xFF;
+ if (props > (4 * 5 + 4) * 9 + 8)
+ throw new CorruptedInputException("Invalid LZMA properties byte");
+
+ int pb = props / (9 * 5);
+ props -= pb * 9 * 5;
+ int lp = props / 9;
+ int lc = props - lp * 9;
+
+ // Validate the dictionary size since the other "initialize" throws
+ // IllegalArgumentException if dictSize is not supported.
+ if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
+ throw new UnsupportedOptionsException(
+ "LZMA dictionary is too big for this implementation");
+
+ initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
+ arrayCache);
+ }
+
+ private void initialize(InputStream in, long uncompSize,
+ int lc, int lp, int pb,
+ int dictSize, byte[] presetDict,
+ ArrayCache arrayCache)
+ throws IOException {
+ // getDictSize validates dictSize and gives a message in
+ // the exception too, so skip validating dictSize here.
+ if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4
+ || pb < 0 || pb > 4)
+ throw new IllegalArgumentException();
+
+ this.in = in;
+ this.arrayCache = arrayCache;
+
+ // If uncompressed size is known, use it to avoid wasting memory for
+ // a uselessly large dictionary buffer.
+ dictSize = getDictSize(dictSize);
+ if (uncompSize >= 0 && dictSize > uncompSize)
+ dictSize = getDictSize((int)uncompSize);
+
+ lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
+ rc = new RangeDecoderFromStream(in);
+ lzma = new LZMADecoder(lz, rc, lc, lp, pb);
+
+ remainingSize = uncompSize;
+ }
+
+ /**
+ * Decompresses the next byte from this input stream.
+ * <p>
+ * Reading lots of data with <code>read()</code> from this input stream
+ * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
+ * if you need to read lots of data one byte at a time.
+ *
+ * @return the next decompressed byte, or <code>-1</code>
+ * to indicate the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException
+ * compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ /**
+ * Decompresses into an array of bytes.
+ * <p>
+ * If <code>len</code> is zero, no bytes are read and <code>0</code>
+ * is returned. Otherwise this will block until <code>len</code>
+ * bytes have been decompressed, the end of the LZMA stream is reached,
+ * or an exception is thrown.
+ *
+ * @param buf target buffer for uncompressed data
+ * @param off start offset in <code>buf</code>
+ * @param len maximum number of uncompressed bytes to read
+ *
+ * @return number of bytes read, or <code>-1</code> to indicate
+ * the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (len == 0)
+ return 0;
+
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ if (endReached)
+ return -1;
+
+ try {
+ int size = 0;
+
+ while (len > 0) {
+ // If uncompressed size is known and thus no end marker will
+ // be present, set the limit so that the uncompressed size
+ // won't be exceeded.
+ int copySizeMax = len;
+ if (remainingSize >= 0 && remainingSize < len)
+ copySizeMax = (int)remainingSize;
+
+ lz.setLimit(copySizeMax);
+
+ // Decode into the dictionary buffer.
+ try {
+ lzma.decode();
+ } catch (CorruptedInputException e) {
+ // The end marker is encoded with a LZMA symbol that
+ // indicates maximum match distance. This is larger
+ // than any supported dictionary and thus causes
+ // CorruptedInputException from LZDecoder.repeat.
+ if (remainingSize != -1 || !lzma.endMarkerDetected())
+ throw e;
+
+ endReached = true;
+
+ // The exception makes lzma.decode() miss the last range
+ // decoder normalization, so do it here. This might
+ // cause an IOException if it needs to read a byte
+ // from the input stream.
+ rc.normalize();
+ }
+
+ // Copy from the dictionary to buf.
+ int copiedSize = lz.flush(buf, off);
+ off += copiedSize;
+ len -= copiedSize;
+ size += copiedSize;
+
+ if (remainingSize >= 0) {
+ // Update the number of bytes left to be decompressed.
+ remainingSize -= copiedSize;
+ assert remainingSize >= 0;
+
+ if (remainingSize == 0)
+ endReached = true;
+ }
+
+ if (endReached) {
+ // Checking these helps a lot when catching corrupt
+ // or truncated .lzma files. LZMA Utils doesn't do
+ // the first check and thus it accepts many invalid
+ // files that this implementation and XZ Utils don't.
+ if (!rc.isFinished() || lz.hasPending())
+ throw new CorruptedInputException();
+
+ putArraysToCache();
+ return size == 0 ? -1 : size;
+ }
+ }
+
+ return size;
+
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ private void putArraysToCache() {
+ if (lz != null) {
+ lz.putArraysToCache(arrayCache);
+ lz = null;
+ }
+ }
+
+ /**
+ * Closes the stream and calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ */
+ public void close() throws IOException {
+ if (in != null) {
+ putArraysToCache();
+
+ try {
+ in.close();
+ } finally {
+ in = null;
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/LZMAOutputStream.java b/src/org/tukaani/xz/LZMAOutputStream.java
new file mode 100644
index 0000000..3a1b7b1
--- /dev/null
+++ b/src/org/tukaani/xz/LZMAOutputStream.java
@@ -0,0 +1,331 @@
+/*
+ * LZMAOutputStream
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.OutputStream;
+import java.io.IOException;
+import org.tukaani.xz.lz.LZEncoder;
+import org.tukaani.xz.rangecoder.RangeEncoderToStream;
+import org.tukaani.xz.lzma.LZMAEncoder;
+
+/**
+ * Compresses into the legacy .lzma file format or into a raw LZMA stream.
+ *
+ * @since 1.6
+ */
+public class LZMAOutputStream extends FinishableOutputStream {
+ private OutputStream out;
+
+ private final ArrayCache arrayCache;
+
+ private LZEncoder lz;
+ private final RangeEncoderToStream rc;
+ private LZMAEncoder lzma;
+
+ private final int props;
+ private final boolean useEndMarker;
+ private final long expectedUncompressedSize;
+ private long currentUncompressedSize = 0;
+
+ private boolean finished = false;
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ private LZMAOutputStream(OutputStream out, LZMA2Options options,
+ boolean useHeader, boolean useEndMarker,
+ long expectedUncompressedSize,
+ ArrayCache arrayCache)
+ throws IOException {
+ if (out == null)
+ throw new NullPointerException();
+
+ // -1 indicates unknown and >= 0 are for known sizes.
+ if (expectedUncompressedSize < -1)
+ throw new IllegalArgumentException(
+ "Invalid expected input size (less than -1)");
+
+ this.useEndMarker = useEndMarker;
+ this.expectedUncompressedSize = expectedUncompressedSize;
+
+ this.arrayCache = arrayCache;
+
+ this.out = out;
+ rc = new RangeEncoderToStream(out);
+
+ int dictSize = options.getDictSize();
+ lzma = LZMAEncoder.getInstance(rc,
+ options.getLc(), options.getLp(), options.getPb(),
+ options.getMode(),
+ dictSize, 0, options.getNiceLen(),
+ options.getMatchFinder(), options.getDepthLimit(),
+ arrayCache);
+
+ lz = lzma.getLZEncoder();
+
+ byte[] presetDict = options.getPresetDict();
+ if (presetDict != null && presetDict.length > 0) {
+ if (useHeader)
+ throw new UnsupportedOptionsException(
+ "Preset dictionary cannot be used in .lzma files "
+ + "(try a raw LZMA stream instead)");
+
+ lz.setPresetDict(dictSize, presetDict);
+ }
+
+ props = (options.getPb() * 5 + options.getLp()) * 9 + options.getLc();
+
+ if (useHeader) {
+ // Props byte stores lc, lp, and pb.
+ out.write(props);
+
+ // Dictionary size is stored as a 32-bit unsigned little endian
+ // integer.
+ for (int i = 0; i < 4; ++i) {
+ out.write(dictSize & 0xFF);
+ dictSize >>>= 8;
+ }
+
+ // Uncompressed size is stored as a 64-bit unsigned little endian
+ // integer. The max value (-1 in two's complement) indicates
+ // unknown size.
+ for (int i = 0; i < 8; ++i)
+ out.write((int)(expectedUncompressedSize >>> (8 * i)) & 0xFF);
+ }
+ }
+
+ /**
+ * Creates a new compressor for the legacy .lzma file format.
+ * <p>
+ * If the uncompressed size of the input data is known, it will be stored
+ * in the .lzma header and no end of stream marker will be used. Otherwise
+ * the header will indicate unknown uncompressed size and the end of stream
+ * marker will be used.
+ * <p>
+ * Note that a preset dictionary cannot be used in .lzma files but
+ * it can be used for raw LZMA streams.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param options LZMA compression options; the same class
+ * is used here as is for LZMA2
+ *
+ * @param inputSize uncompressed size of the data to be compressed;
+ * use <code>-1</code> when unknown
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ */
+ public LZMAOutputStream(OutputStream out, LZMA2Options options,
+ long inputSize)
+ throws IOException {
+ this(out, options, inputSize, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new compressor for the legacy .lzma file format.
+ * <p>
+ * This is identical to
+ * <code>LZMAOutputStream(OutputStream, LZMA2Options, long)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param options LZMA compression options; the same class
+ * is used here as is for LZMA2
+ *
+ * @param inputSize uncompressed size of the data to be compressed;
+ * use <code>-1</code> when unknown
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ *
+ * @since 1.7
+ */
+ public LZMAOutputStream(OutputStream out, LZMA2Options options,
+ long inputSize, ArrayCache arrayCache)
+ throws IOException {
+ this(out, options, true, inputSize == -1, inputSize, arrayCache);
+ }
+
+ /**
+ * Creates a new compressor for raw LZMA (also known as LZMA1) stream.
+ * <p>
+ * Raw LZMA streams can be encoded with or without end of stream marker.
+ * When decompressing the stream, one must know if the end marker was used
+ * and tell it to the decompressor. If the end marker wasn't used, the
+ * decompressor will also need to know the uncompressed size.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param options LZMA compression options; the same class
+ * is used here as is for LZMA2
+ *
+ * @param useEndMarker
+ * if end of stream marker should be written
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ */
+ public LZMAOutputStream(OutputStream out, LZMA2Options options,
+ boolean useEndMarker) throws IOException {
+ this(out, options, useEndMarker, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new compressor for raw LZMA (also known as LZMA1) stream.
+ * <p>
+ * This is identical to
+ * <code>LZMAOutputStream(OutputStream, LZMA2Options, boolean)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param options LZMA compression options; the same class
+ * is used here as is for LZMA2
+ *
+ * @param useEndMarker
+ * if end of stream marker should be written
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ *
+ * @since 1.7
+ */
+ public LZMAOutputStream(OutputStream out, LZMA2Options options,
+ boolean useEndMarker, ArrayCache arrayCache)
+ throws IOException {
+ this(out, options, false, useEndMarker, -1, arrayCache);
+ }
+
+ /**
+ * Returns the LZMA lc/lp/pb properties encoded into a single byte.
+ * This might be useful when handling file formats other than .lzma
+ * that use the same encoding for the LZMA properties as .lzma does.
+ */
+ public int getProps() {
+ return props;
+ }
+
+ /**
+ * Gets the amount of uncompressed data written to the stream.
+ * This is useful when creating raw LZMA streams without
+ * the end of stream marker.
+ */
+ public long getUncompressedSize() {
+ return currentUncompressedSize;
+ }
+
+ public void write(int b) throws IOException {
+ tempBuf[0] = (byte)b;
+ write(tempBuf, 0, 1);
+ }
+
+ public void write(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ if (expectedUncompressedSize != -1
+ && expectedUncompressedSize - currentUncompressedSize < len)
+ throw new XZIOException("Expected uncompressed input size ("
+ + expectedUncompressedSize + " bytes) was exceeded");
+
+ currentUncompressedSize += len;
+
+ try {
+ while (len > 0) {
+ int used = lz.fillWindow(buf, off, len);
+ off += used;
+ len -= used;
+ lzma.encodeForLZMA1();
+ }
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ /**
+ * Flushing isn't supported and will throw XZIOException.
+ */
+ public void flush() throws IOException {
+ throw new XZIOException("LZMAOutputStream does not support flushing");
+ }
+
+ /**
+ * Finishes the stream without closing the underlying OutputStream.
+ */
+ public void finish() throws IOException {
+ if (!finished) {
+ if (exception != null)
+ throw exception;
+
+ try {
+ if (expectedUncompressedSize != -1
+ && expectedUncompressedSize != currentUncompressedSize)
+ throw new XZIOException("Expected uncompressed size ("
+ + expectedUncompressedSize + ") doesn't equal "
+ + "the number of bytes written to the stream ("
+ + currentUncompressedSize + ")");
+
+ lz.setFinishing();
+ lzma.encodeForLZMA1();
+
+ if (useEndMarker)
+ lzma.encodeLZMA1EndMarker();
+
+ rc.finish();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ finished = true;
+
+ lzma.putArraysToCache(arrayCache);
+ lzma = null;
+ lz = null;
+ }
+ }
+
+ /**
+ * Finishes the stream and closes the underlying OutputStream.
+ */
+ public void close() throws IOException {
+ if (out != null) {
+ try {
+ finish();
+ } catch (IOException e) {}
+
+ try {
+ out.close();
+ } catch (IOException e) {
+ if (exception == null)
+ exception = e;
+ }
+
+ out = null;
+ }
+
+ if (exception != null)
+ throw exception;
+ }
+}
diff --git a/src/org/tukaani/xz/MemoryLimitException.java b/src/org/tukaani/xz/MemoryLimitException.java
new file mode 100644
index 0000000..9d766bd
--- /dev/null
+++ b/src/org/tukaani/xz/MemoryLimitException.java
@@ -0,0 +1,60 @@
+/*
+ * MemoryLimitException
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+/**
+ * Thrown when the memory usage limit given to the XZ decompressor
+ * would be exceeded.
+ * <p>
+ * The amount of memory required and the memory usage limit are
+ * included in the error detail message in human readable format.
+ */
+public class MemoryLimitException extends XZIOException {
+ private static final long serialVersionUID = 3L;
+
+ private final int memoryNeeded;
+ private final int memoryLimit;
+
+ /**
+ * Creates a new MemoryLimitException.
+ * <p>
+ * The amount of memory needed and the memory usage limit are
+ * included in the error detail message.
+ *
+ * @param memoryNeeded amount of memory needed as kibibytes (KiB)
+ * @param memoryLimit specified memory usage limit as kibibytes (KiB)
+ */
+ public MemoryLimitException(int memoryNeeded, int memoryLimit) {
+ super("" + memoryNeeded + " KiB of memory would be needed; limit was "
+ + memoryLimit + " KiB");
+
+ this.memoryNeeded = memoryNeeded;
+ this.memoryLimit = memoryLimit;
+ }
+
+ /**
+ * Gets how much memory is required to decompress the data.
+ *
+ * @return amount of memory needed as kibibytes (KiB)
+ */
+ public int getMemoryNeeded() {
+ return memoryNeeded;
+ }
+
+ /**
+ * Gets what the memory usage limit was at the time the exception
+ * was created.
+ *
+ * @return memory usage limit as kibibytes (KiB)
+ */
+ public int getMemoryLimit() {
+ return memoryLimit;
+ }
+}
diff --git a/src/org/tukaani/xz/PowerPCOptions.java b/src/org/tukaani/xz/PowerPCOptions.java
new file mode 100644
index 0000000..9b6fce1
--- /dev/null
+++ b/src/org/tukaani/xz/PowerPCOptions.java
@@ -0,0 +1,37 @@
+/*
+ * PowerPCOptions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import org.tukaani.xz.simple.PowerPC;
+
+/**
+ * BCJ filter for big endian PowerPC instructions.
+ */
+public class PowerPCOptions extends BCJOptions {
+ private static final int ALIGNMENT = 4;
+
+ public PowerPCOptions() {
+ super(ALIGNMENT);
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return new SimpleOutputStream(out, new PowerPC(true, startOffset));
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new SimpleInputStream(in, new PowerPC(false, startOffset));
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new BCJEncoder(this, BCJCoder.POWERPC_FILTER_ID);
+ }
+}
diff --git a/src/org/tukaani/xz/RawCoder.java b/src/org/tukaani/xz/RawCoder.java
new file mode 100644
index 0000000..12c7da8
--- /dev/null
+++ b/src/org/tukaani/xz/RawCoder.java
@@ -0,0 +1,33 @@
+/*
+ * RawCoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+class RawCoder {
+ static void validate(FilterCoder[] filters)
+ throws UnsupportedOptionsException {
+ for (int i = 0; i < filters.length - 1; ++i)
+ if (!filters[i].nonLastOK())
+ throw new UnsupportedOptionsException(
+ "Unsupported XZ filter chain");
+
+ if (!filters[filters.length - 1].lastOK())
+ throw new UnsupportedOptionsException(
+ "Unsupported XZ filter chain");
+
+ int changesSizeCount = 0;
+ for (int i = 0; i < filters.length; ++i)
+ if (filters[i].changesSize())
+ ++changesSizeCount;
+
+ if (changesSizeCount > 3)
+ throw new UnsupportedOptionsException(
+ "Unsupported XZ filter chain");
+ }
+}
diff --git a/src/org/tukaani/xz/ResettableArrayCache.java b/src/org/tukaani/xz/ResettableArrayCache.java
new file mode 100644
index 0000000..2f89c1d
--- /dev/null
+++ b/src/org/tukaani/xz/ResettableArrayCache.java
@@ -0,0 +1,120 @@
+/*
+ * ResettableArrayCache
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An ArrayCache wrapper that remembers what has been allocated
+ * and allows returning all allocations to the underlying cache at once.
+ *
+ * @since 1.7
+ */
+public class ResettableArrayCache extends ArrayCache {
+ private final ArrayCache arrayCache;
+
+ // Lists of arrays that have been allocated from the arrayCache.
+ private final List<byte[]> byteArrays;
+ private final List<int[]> intArrays;
+
+ /**
+ * Creates a new ResettableArrayCache based on the given ArrayCache.
+ */
+ public ResettableArrayCache(ArrayCache arrayCache) {
+ this.arrayCache = arrayCache;
+
+ // Treat the dummy cache as a special case since it's a common case.
+ // With it we don't need to put the arrays back to the cache and
+ // thus we don't need to remember what has been allocated.
+ if (arrayCache == ArrayCache.getDummyCache()) {
+ byteArrays = null;
+ intArrays = null;
+ } else {
+ byteArrays = new ArrayList<byte[]>();
+ intArrays = new ArrayList<int[]>();
+ }
+ }
+
+ public byte[] getByteArray(int size, boolean fillWithZeros) {
+ byte[] array = arrayCache.getByteArray(size, fillWithZeros);
+
+ if (byteArrays != null) {
+ synchronized(byteArrays) {
+ byteArrays.add(array);
+ }
+ }
+
+ return array;
+ }
+
+ public void putArray(byte[] array) {
+ if (byteArrays != null) {
+ // The array is more likely to be near the end of the list so
+ // start the search from the end.
+ synchronized(byteArrays) {
+ int i = byteArrays.lastIndexOf(array);
+ if (i != -1)
+ byteArrays.remove(i);
+ }
+
+ arrayCache.putArray(array);
+ }
+ }
+
+ public int[] getIntArray(int size, boolean fillWithZeros) {
+ int[] array = arrayCache.getIntArray(size, fillWithZeros);
+
+ if (intArrays != null) {
+ synchronized(intArrays) {
+ intArrays.add(array);
+ }
+ }
+
+ return array;
+ }
+
+ public void putArray(int[] array) {
+ if (intArrays != null) {
+ synchronized(intArrays) {
+ int i = intArrays.lastIndexOf(array);
+ if (i != -1)
+ intArrays.remove(i);
+ }
+
+ arrayCache.putArray(array);
+ }
+ }
+
+ /**
+ * Puts all allocated arrays back to the underlying ArrayCache
+ * that haven't already been put there with a call to
+ * {@code putArray}.
+ */
+ public void reset() {
+ if (byteArrays != null) {
+ // Put the arrays to the cache in reverse order: the array that
+ // was allocated first is returned last.
+ synchronized(byteArrays) {
+ for (int i = byteArrays.size() - 1; i >= 0; --i)
+ arrayCache.putArray(byteArrays.get(i));
+
+ byteArrays.clear();
+ }
+
+ synchronized(intArrays) {
+ for (int i = intArrays.size() - 1; i >= 0; --i)
+ arrayCache.putArray(intArrays.get(i));
+
+ intArrays.clear();
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/SPARCOptions.java b/src/org/tukaani/xz/SPARCOptions.java
new file mode 100644
index 0000000..a49dd9e
--- /dev/null
+++ b/src/org/tukaani/xz/SPARCOptions.java
@@ -0,0 +1,37 @@
+/*
+ * SPARCOptions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import org.tukaani.xz.simple.SPARC;
+
+/**
+ * BCJ filter for SPARC.
+ */
+public class SPARCOptions extends BCJOptions {
+ private static final int ALIGNMENT = 4;
+
+ public SPARCOptions() {
+ super(ALIGNMENT);
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return new SimpleOutputStream(out, new SPARC(true, startOffset));
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new SimpleInputStream(in, new SPARC(false, startOffset));
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new BCJEncoder(this, BCJCoder.SPARC_FILTER_ID);
+ }
+}
diff --git a/src/org/tukaani/xz/SeekableFileInputStream.java b/src/org/tukaani/xz/SeekableFileInputStream.java
new file mode 100644
index 0000000..fe2d685
--- /dev/null
+++ b/src/org/tukaani/xz/SeekableFileInputStream.java
@@ -0,0 +1,102 @@
+/*
+ * SeekableFileInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.io.IOException;
+import java.io.FileNotFoundException;
+
+/**
+ * Wraps a {@link java.io.RandomAccessFile RandomAccessFile}
+ * in a SeekableInputStream.
+ */
+public class SeekableFileInputStream extends SeekableInputStream {
+ /**
+ * The RandomAccessFile that has been wrapped
+ * into a SeekableFileInputStream.
+ */
+ protected RandomAccessFile randomAccessFile;
+
+ /**
+ * Creates a new seekable input stream that reads from the specified file.
+ */
+ public SeekableFileInputStream(File file) throws FileNotFoundException {
+ randomAccessFile = new RandomAccessFile(file, "r");
+ }
+
+ /**
+ * Creates a new seekable input stream that reads from a file with
+ * the specified name.
+ */
+ public SeekableFileInputStream(String name) throws FileNotFoundException {
+ randomAccessFile = new RandomAccessFile(name, "r");
+ }
+
+ /**
+ * Creates a new seekable input stream from an existing
+ * <code>RandomAccessFile</code> object.
+ */
+ public SeekableFileInputStream(RandomAccessFile randomAccessFile) {
+ this.randomAccessFile = randomAccessFile;
+ }
+
+ /**
+ * Calls {@link RandomAccessFile#read() randomAccessFile.read()}.
+ */
+ public int read() throws IOException {
+ return randomAccessFile.read();
+ }
+
+ /**
+ * Calls {@link RandomAccessFile#read(byte[]) randomAccessFile.read(buf)}.
+ */
+ public int read(byte[] buf) throws IOException {
+ return randomAccessFile.read(buf);
+ }
+
+ /**
+ * Calls
+ * {@link RandomAccessFile#read(byte[],int,int)
+ * randomAccessFile.read(buf, off, len)}.
+ */
+ public int read(byte[] buf, int off, int len) throws IOException {
+ return randomAccessFile.read(buf, off, len);
+ }
+
+ /**
+ * Calls {@link RandomAccessFile#close() randomAccessFile.close()}.
+ */
+ public void close() throws IOException {
+ randomAccessFile.close();
+ }
+
+ /**
+ * Calls {@link RandomAccessFile#length() randomAccessFile.length()}.
+ */
+ public long length() throws IOException {
+ return randomAccessFile.length();
+ }
+
+ /**
+ * Calls {@link RandomAccessFile#getFilePointer()
+ randomAccessFile.getFilePointer()}.
+ */
+ public long position() throws IOException {
+ return randomAccessFile.getFilePointer();
+ }
+
+ /**
+ * Calls {@link RandomAccessFile#seek(long) randomAccessFile.seek(long)}.
+ */
+ public void seek(long pos) throws IOException {
+ randomAccessFile.seek(pos);
+ }
+}
diff --git a/src/org/tukaani/xz/SeekableInputStream.java b/src/org/tukaani/xz/SeekableInputStream.java
new file mode 100644
index 0000000..a2f908a
--- /dev/null
+++ b/src/org/tukaani/xz/SeekableInputStream.java
@@ -0,0 +1,81 @@
+/*
+ * SeekableInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * Input stream with random access support.
+ */
+public abstract class SeekableInputStream extends InputStream {
+ /**
+ * Seeks <code>n</code> bytes forward in this stream.
+ * <p>
+ * This will not seek past the end of the file. If the current position
+ * is already at or past the end of the file, this doesn't seek at all
+ * and returns <code>0</code>. Otherwise, if skipping <code>n</code> bytes
+ * would cause the position to exceed the stream size, this will do
+ * equivalent of <code>seek(length())</code> and the return value will
+ * be adjusted accordingly.
+ * <p>
+ * If <code>n</code> is negative, the position isn't changed and
+ * the return value is <code>0</code>. It doesn't seek backward
+ * because it would conflict with the specification of
+ * {@link java.io.InputStream#skip(long) InputStream.skip}.
+ *
+ * @return <code>0</code> if <code>n</code> is negative,
+ * less than <code>n</code> if skipping <code>n</code>
+ * bytes would seek past the end of the file,
+ * <code>n</code> otherwise
+ *
+ * @throws IOException might be thrown by {@link #seek(long)}
+ */
+ public long skip(long n) throws IOException {
+ if (n <= 0)
+ return 0;
+
+ long size = length();
+ long pos = position();
+ if (pos >= size)
+ return 0;
+
+ if (size - pos < n)
+ n = size - pos;
+
+ seek(pos + n);
+ return n;
+ }
+
+ /**
+ * Gets the size of the stream.
+ */
+ public abstract long length() throws IOException;
+
+ /**
+ * Gets the current position in the stream.
+ */
+ public abstract long position() throws IOException;
+
+ /**
+ * Seeks to the specified absolute position in the stream.
+ * <p>
+ * Seeking past the end of the file should be supported by the subclasses
+ * unless there is a good reason to do otherwise. If one has seeked
+ * past the end of the stream, <code>read</code> will return
+ * <code>-1</code> to indicate end of stream.
+ *
+ * @param pos new read position in the stream
+ *
+ * @throws IOException if <code>pos</code> is negative or if
+ * a stream-specific I/O error occurs
+ */
+ public abstract void seek(long pos) throws IOException;
+}
diff --git a/src/org/tukaani/xz/SeekableXZInputStream.java b/src/org/tukaani/xz/SeekableXZInputStream.java
new file mode 100644
index 0000000..74f130e
--- /dev/null
+++ b/src/org/tukaani/xz/SeekableXZInputStream.java
@@ -0,0 +1,1152 @@
+/*
+ * SeekableXZInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.EOFException;
+import org.tukaani.xz.common.DecoderUtil;
+import org.tukaani.xz.common.StreamFlags;
+import org.tukaani.xz.check.Check;
+import org.tukaani.xz.index.IndexDecoder;
+import org.tukaani.xz.index.BlockInfo;
+
+/**
+ * Decompresses a .xz file in random access mode.
+ * This supports decompressing concatenated .xz files.
+ * <p>
+ * Each .xz file consist of one or more Streams. Each Stream consist of zero
+ * or more Blocks. Each Stream contains an Index of Streams' Blocks.
+ * The Indexes from all Streams are loaded in RAM by a constructor of this
+ * class. A typical .xz file has only one Stream, and parsing its Index will
+ * need only three or four seeks.
+ * <p>
+ * To make random access possible, the data in a .xz file must be splitted
+ * into multiple Blocks of reasonable size. Decompression can only start at
+ * a Block boundary. When seeking to an uncompressed position that is not at
+ * a Block boundary, decompression starts at the beginning of the Block and
+ * throws away data until the target position is reached. Thus, smaller Blocks
+ * mean faster seeks to arbitrary uncompressed positions. On the other hand,
+ * smaller Blocks mean worse compression. So one has to make a compromise
+ * between random access speed and compression ratio.
+ * <p>
+ * Implementation note: This class uses linear search to locate the correct
+ * Stream from the data structures in RAM. It was the simplest to implement
+ * and should be fine as long as there aren't too many Streams. The correct
+ * Block inside a Stream is located using binary search and thus is fast
+ * even with a huge number of Blocks.
+ *
+ * <h4>Memory usage</h4>
+ * <p>
+ * The amount of memory needed for the Indexes is taken into account when
+ * checking the memory usage limit. Each Stream is calculated to need at
+ * least 1&nbsp;KiB of memory and each Block 16 bytes of memory, rounded up
+ * to the next kibibyte. So unless the file has a huge number of Streams or
+ * Blocks, these don't take significant amount of memory.
+ *
+ * <h4>Creating random-accessible .xz files</h4>
+ * <p>
+ * When using {@link XZOutputStream}, a new Block can be started by calling
+ * its {@link XZOutputStream#endBlock() endBlock} method. If you know
+ * that the decompressor will only need to seek to certain uncompressed
+ * positions, it can be a good idea to start a new Block at (some of) these
+ * positions (and only at these positions to get better compression ratio).
+ * <p>
+ * liblzma in XZ Utils supports starting a new Block with
+ * <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded
+ * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha
+ * also added the option <code>--block-size=SIZE</code> to the xz command
+ * line tool. XZ Utils 5.1.2alpha added a partial implementation of
+ * <code>--block-list=SIZES</code> which allows specifying sizes of
+ * individual Blocks.
+ *
+ * @see SeekableFileInputStream
+ * @see XZInputStream
+ * @see XZOutputStream
+ */
+public class SeekableXZInputStream extends SeekableInputStream {
+ /**
+ * Cache for big arrays.
+ */
+ private final ArrayCache arrayCache;
+
+ /**
+ * The input stream containing XZ compressed data.
+ */
+ private SeekableInputStream in;
+
+ /**
+ * Memory usage limit after the memory usage of the IndexDecoders have
+ * been substracted.
+ */
+ private final int memoryLimit;
+
+ /**
+ * Memory usage of the IndexDecoders.
+ * <code>memoryLimit + indexMemoryUsage</code> equals the original
+ * memory usage limit that was passed to the constructor.
+ */
+ private int indexMemoryUsage = 0;
+
+ /**
+ * List of IndexDecoders, one for each Stream in the file.
+ * The list is in reverse order: The first element is
+ * the last Stream in the file.
+ */
+ private final ArrayList<IndexDecoder> streams
+ = new ArrayList<IndexDecoder>();
+
+ /**
+ * Bitmask of all Check IDs seen.
+ */
+ private int checkTypes = 0;
+
+ /**
+ * Uncompressed size of the file (all Streams).
+ */
+ private long uncompressedSize = 0;
+
+ /**
+ * Uncompressed size of the largest XZ Block in the file.
+ */
+ private long largestBlockSize = 0;
+
+ /**
+ * Number of XZ Blocks in the file.
+ */
+ private int blockCount = 0;
+
+ /**
+ * Size and position information about the current Block.
+ * If there are no Blocks, all values will be <code>-1</code>.
+ */
+ private final BlockInfo curBlockInfo;
+
+ /**
+ * Temporary (and cached) information about the Block whose information
+ * is queried via <code>getBlockPos</code> and related functions.
+ */
+ private final BlockInfo queriedBlockInfo;
+
+ /**
+ * Integrity Check in the current XZ Stream. The constructor leaves
+ * this to point to the Check of the first Stream.
+ */
+ private Check check;
+
+ /**
+ * Flag indicating if the integrity checks will be verified.
+ */
+ private final boolean verifyCheck;
+
+ /**
+ * Decoder of the current XZ Block, if any.
+ */
+ private BlockInputStream blockDecoder = null;
+
+ /**
+ * Current uncompressed position.
+ */
+ private long curPos = 0;
+
+ /**
+ * Target position for seeking.
+ */
+ private long seekPos;
+
+ /**
+ * True when <code>seek(long)</code> has been called but the actual
+ * seeking hasn't been done yet.
+ */
+ private boolean seekNeeded = false;
+
+ /**
+ * True when end of the file was reached. This can be cleared by
+ * calling <code>seek(long)</code>.
+ */
+ private boolean endReached = false;
+
+ /**
+ * Pending exception from an earlier error.
+ */
+ private IOException exception = null;
+
+ /**
+ * Temporary buffer for read(). This avoids reallocating memory
+ * on every read() call.
+ */
+ private final byte[] tempBuf = new byte[1];
+
+ /**
+ * Creates a new seekable XZ decompressor without a memory usage limit.
+ *
+ * @param in seekable input stream containing one or more
+ * XZ Streams; the whole input stream is used
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ data is corrupt or truncated
+ *
+ * @throws UnsupportedOptionsException
+ * XZ headers seem valid but they specify
+ * options not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 6 bytes of input was available
+ * from <code>in</code>, or (unlikely) the size
+ * of the underlying stream got smaller while
+ * this was reading from it
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public SeekableXZInputStream(SeekableInputStream in)
+ throws IOException {
+ this(in, -1);
+ }
+
+ /**
+ * Creates a new seekable XZ decompressor without a memory usage limit.
+ * <p>
+ * This is identical to
+ * <code>SeekableXZInputStream(SeekableInputStream)</code> except that
+ * this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in seekable input stream containing one or more
+ * XZ Streams; the whole input stream is used
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ data is corrupt or truncated
+ *
+ * @throws UnsupportedOptionsException
+ * XZ headers seem valid but they specify
+ * options not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 6 bytes of input was available
+ * from <code>in</code>, or (unlikely) the size
+ * of the underlying stream got smaller while
+ * this was reading from it
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public SeekableXZInputStream(SeekableInputStream in, ArrayCache arrayCache)
+ throws IOException {
+ this(in, -1, arrayCache);
+ }
+
+ /**
+ * Creates a new seekable XZ decomporessor with an optional
+ * memory usage limit.
+ *
+ * @param in seekable input stream containing one or more
+ * XZ Streams; the whole input stream is used
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ data is corrupt or truncated
+ *
+ * @throws UnsupportedOptionsException
+ * XZ headers seem valid but they specify
+ * options not supported by this implementation
+ *
+ * @throws MemoryLimitException
+ * decoded XZ Indexes would need more memory
+ * than allowed by the memory usage limit
+ *
+ * @throws EOFException
+ * less than 6 bytes of input was available
+ * from <code>in</code>, or (unlikely) the size
+ * of the underlying stream got smaller while
+ * this was reading from it
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public SeekableXZInputStream(SeekableInputStream in, int memoryLimit)
+ throws IOException {
+ this(in, memoryLimit, true);
+ }
+
+ /**
+ * Creates a new seekable XZ decomporessor with an optional
+ * memory usage limit.
+ * <p>
+ * This is identical to
+ * <code>SeekableXZInputStream(SeekableInputStream,int)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in seekable input stream containing one or more
+ * XZ Streams; the whole input stream is used
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ data is corrupt or truncated
+ *
+ * @throws UnsupportedOptionsException
+ * XZ headers seem valid but they specify
+ * options not supported by this implementation
+ *
+ * @throws MemoryLimitException
+ * decoded XZ Indexes would need more memory
+ * than allowed by the memory usage limit
+ *
+ * @throws EOFException
+ * less than 6 bytes of input was available
+ * from <code>in</code>, or (unlikely) the size
+ * of the underlying stream got smaller while
+ * this was reading from it
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
+ ArrayCache arrayCache)
+ throws IOException {
+ this(in, memoryLimit, true, arrayCache);
+ }
+
+ /**
+ * Creates a new seekable XZ decomporessor with an optional
+ * memory usage limit and ability to disable verification
+ * of integrity checks.
+ * <p>
+ * Note that integrity check verification should almost never be disabled.
+ * Possible reasons to disable integrity check verification:
+ * <ul>
+ * <li>Trying to recover data from a corrupt .xz file.</li>
+ * <li>Speeding up decompression. This matters mostly with SHA-256
+ * or with files that have compressed extremely well. It's recommended
+ * that integrity checking isn't disabled for performance reasons
+ * unless the file integrity is verified externally in some other
+ * way.</li>
+ * </ul>
+ * <p>
+ * <code>verifyCheck</code> only affects the integrity check of
+ * the actual compressed data. The CRC32 fields in the headers
+ * are always verified.
+ *
+ * @param in seekable input stream containing one or more
+ * XZ Streams; the whole input stream is used
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param verifyCheck if <code>true</code>, the integrity checks
+ * will be verified; this should almost never
+ * be set to <code>false</code>
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ data is corrupt or truncated
+ *
+ * @throws UnsupportedOptionsException
+ * XZ headers seem valid but they specify
+ * options not supported by this implementation
+ *
+ * @throws MemoryLimitException
+ * decoded XZ Indexes would need more memory
+ * than allowed by the memory usage limit
+ *
+ * @throws EOFException
+ * less than 6 bytes of input was available
+ * from <code>in</code>, or (unlikely) the size
+ * of the underlying stream got smaller while
+ * this was reading from it
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.6
+ */
+ public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
+ boolean verifyCheck)
+ throws IOException {
+ this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new seekable XZ decomporessor with an optional
+ * memory usage limit and ability to disable verification
+ * of integrity checks.
+ * <p>
+ * This is identical to
+ * <code>SeekableXZInputStream(SeekableInputStream,int,boolean)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in seekable input stream containing one or more
+ * XZ Streams; the whole input stream is used
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param verifyCheck if <code>true</code>, the integrity checks
+ * will be verified; this should almost never
+ * be set to <code>false</code>
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ data is corrupt or truncated
+ *
+ * @throws UnsupportedOptionsException
+ * XZ headers seem valid but they specify
+ * options not supported by this implementation
+ *
+ * @throws MemoryLimitException
+ * decoded XZ Indexes would need more memory
+ * than allowed by the memory usage limit
+ *
+ * @throws EOFException
+ * less than 6 bytes of input was available
+ * from <code>in</code>, or (unlikely) the size
+ * of the underlying stream got smaller while
+ * this was reading from it
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
+ boolean verifyCheck, ArrayCache arrayCache)
+ throws IOException {
+ this.arrayCache = arrayCache;
+ this.verifyCheck = verifyCheck;
+ this.in = in;
+ DataInputStream inData = new DataInputStream(in);
+
+ // Check the magic bytes in the beginning of the file.
+ {
+ in.seek(0);
+ byte[] buf = new byte[XZ.HEADER_MAGIC.length];
+ inData.readFully(buf);
+ if (!Arrays.equals(buf, XZ.HEADER_MAGIC))
+ throw new XZFormatException();
+ }
+
+ // Get the file size and verify that it is a multiple of 4 bytes.
+ long pos = in.length();
+ if ((pos & 3) != 0)
+ throw new CorruptedInputException(
+ "XZ file size is not a multiple of 4 bytes");
+
+ // Parse the headers starting from the end of the file.
+ byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
+ long streamPadding = 0;
+
+ while (pos > 0) {
+ if (pos < DecoderUtil.STREAM_HEADER_SIZE)
+ throw new CorruptedInputException();
+
+ // Read the potential Stream Footer.
+ in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE);
+ inData.readFully(buf);
+
+ // Skip Stream Padding four bytes at a time.
+ // Skipping more at once would be faster,
+ // but usually there isn't much Stream Padding.
+ if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00
+ && buf[11] == 0x00) {
+ streamPadding += 4;
+ pos -= 4;
+ continue;
+ }
+
+ // It's not Stream Padding. Update pos.
+ pos -= DecoderUtil.STREAM_HEADER_SIZE;
+
+ // Decode the Stream Footer and check if Backward Size
+ // looks reasonable.
+ StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf);
+ if (streamFooter.backwardSize >= pos)
+ throw new CorruptedInputException(
+ "Backward Size in XZ Stream Footer is too big");
+
+ // Check that the Check ID is supported. Store it in case this
+ // is the first Stream in the file.
+ check = Check.getInstance(streamFooter.checkType);
+
+ // Remember which Check IDs have been seen.
+ checkTypes |= 1 << streamFooter.checkType;
+
+ // Seek to the beginning of the Index.
+ in.seek(pos - streamFooter.backwardSize);
+
+ // Decode the Index field.
+ IndexDecoder index;
+ try {
+ index = new IndexDecoder(in, streamFooter, streamPadding,
+ memoryLimit);
+ } catch (MemoryLimitException e) {
+ // IndexDecoder doesn't know how much memory we had
+ // already needed so we need to recreate the exception.
+ assert memoryLimit >= 0;
+ throw new MemoryLimitException(
+ e.getMemoryNeeded() + indexMemoryUsage,
+ memoryLimit + indexMemoryUsage);
+ }
+
+ // Update the memory usage and limit counters.
+ indexMemoryUsage += index.getMemoryUsage();
+ if (memoryLimit >= 0) {
+ memoryLimit -= index.getMemoryUsage();
+ assert memoryLimit >= 0;
+ }
+
+ // Remember the uncompressed size of the largest Block.
+ if (largestBlockSize < index.getLargestBlockSize())
+ largestBlockSize = index.getLargestBlockSize();
+
+ // Calculate the offset to the beginning of this XZ Stream and
+ // check that it looks sane.
+ long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE;
+ if (pos < off)
+ throw new CorruptedInputException("XZ Index indicates "
+ + "too big compressed size for the XZ Stream");
+
+ // Seek to the beginning of this Stream.
+ pos -= off;
+ in.seek(pos);
+
+ // Decode the Stream Header.
+ inData.readFully(buf);
+ StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf);
+
+ // Verify that the Stream Header matches the Stream Footer.
+ if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter))
+ throw new CorruptedInputException(
+ "XZ Stream Footer does not match Stream Header");
+
+ // Update the total uncompressed size of the file and check that
+ // it doesn't overflow.
+ uncompressedSize += index.getUncompressedSize();
+ if (uncompressedSize < 0)
+ throw new UnsupportedOptionsException("XZ file is too big");
+
+ // Update the Block count and check that it fits into an int.
+ blockCount += index.getRecordCount();
+ if (blockCount < 0)
+ throw new UnsupportedOptionsException(
+ "XZ file has over " + Integer.MAX_VALUE + " Blocks");
+
+ // Add this Stream to the list of Streams.
+ streams.add(index);
+
+ // Reset to be ready to parse the next Stream.
+ streamPadding = 0;
+ }
+
+ assert pos == 0;
+
+ // Save it now that indexMemoryUsage has been substracted from it.
+ this.memoryLimit = memoryLimit;
+
+ // Store the relative offsets of the Streams. This way we don't
+ // need to recalculate them in this class when seeking; the
+ // IndexDecoder instances will handle them.
+ IndexDecoder prev = streams.get(streams.size() - 1);
+ for (int i = streams.size() - 2; i >= 0; --i) {
+ IndexDecoder cur = streams.get(i);
+ cur.setOffsets(prev);
+ prev = cur;
+ }
+
+ // Initialize curBlockInfo to point to the first Stream.
+ // The blockNumber will be left to -1 so that .hasNext()
+ // and .setNext() work to get the first Block when starting
+ // to decompress from the beginning of the file.
+ IndexDecoder first = streams.get(streams.size() - 1);
+ curBlockInfo = new BlockInfo(first);
+
+ // queriedBlockInfo needs to be allocated too. The Stream used for
+ // initialization doesn't matter though.
+ queriedBlockInfo = new BlockInfo(first);
+ }
+
+ /**
+ * Gets the types of integrity checks used in the .xz file.
+ * Multiple checks are possible only if there are multiple
+ * concatenated XZ Streams.
+ * <p>
+ * The returned value has a bit set for every check type that is present.
+ * For example, if CRC64 and SHA-256 were used, the return value is
+ * <code>(1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_CRC64)
+ * | (1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_SHA256)</code>.
+ */
+ public int getCheckTypes() {
+ return checkTypes;
+ }
+
+ /**
+ * Gets the amount of memory in kibibytes (KiB) used by
+ * the data structures needed to locate the XZ Blocks.
+ * This is usually useless information but since it is calculated
+ * for memory usage limit anyway, it is nice to make it available to too.
+ */
+ public int getIndexMemoryUsage() {
+ return indexMemoryUsage;
+ }
+
+ /**
+ * Gets the uncompressed size of the largest XZ Block in bytes.
+ * This can be useful if you want to check that the file doesn't
+ * have huge XZ Blocks which could make seeking to arbitrary offsets
+ * very slow. Note that huge Blocks don't automatically mean that
+ * seeking would be slow, for example, seeking to the beginning of
+ * any Block is always fast.
+ */
+ public long getLargestBlockSize() {
+ return largestBlockSize;
+ }
+
+ /**
+ * Gets the number of Streams in the .xz file.
+ *
+ * @since 1.3
+ */
+ public int getStreamCount() {
+ return streams.size();
+ }
+
+ /**
+ * Gets the number of Blocks in the .xz file.
+ *
+ * @since 1.3
+ */
+ public int getBlockCount() {
+ return blockCount;
+ }
+
+ /**
+ * Gets the uncompressed start position of the given Block.
+ *
+ * @throws IndexOutOfBoundsException if
+ * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
+ * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
+ *
+ * @since 1.3
+ */
+ public long getBlockPos(int blockNumber) {
+ locateBlockByNumber(queriedBlockInfo, blockNumber);
+ return queriedBlockInfo.uncompressedOffset;
+ }
+
+ /**
+ * Gets the uncompressed size of the given Block.
+ *
+ * @throws IndexOutOfBoundsException if
+ * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
+ * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
+ *
+ * @since 1.3
+ */
+ public long getBlockSize(int blockNumber) {
+ locateBlockByNumber(queriedBlockInfo, blockNumber);
+ return queriedBlockInfo.uncompressedSize;
+ }
+
+ /**
+ * Gets the position where the given compressed Block starts in
+ * the underlying .xz file.
+ * This information is rarely useful to the users of this class.
+ *
+ * @throws IndexOutOfBoundsException if
+ * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
+ * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
+ *
+ * @since 1.3
+ */
+ public long getBlockCompPos(int blockNumber) {
+ locateBlockByNumber(queriedBlockInfo, blockNumber);
+ return queriedBlockInfo.compressedOffset;
+ }
+
+ /**
+ * Gets the compressed size of the given Block.
+ * This together with the uncompressed size can be used to calculate
+ * the compression ratio of the specific Block.
+ *
+ * @throws IndexOutOfBoundsException if
+ * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
+ * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
+ *
+ * @since 1.3
+ */
+ public long getBlockCompSize(int blockNumber) {
+ locateBlockByNumber(queriedBlockInfo, blockNumber);
+ return (queriedBlockInfo.unpaddedSize + 3) & ~3;
+ }
+
+ /**
+ * Gets integrity check type (Check ID) of the given Block.
+ *
+ * @throws IndexOutOfBoundsException if
+ * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
+ * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
+ *
+ * @see #getCheckTypes()
+ *
+ * @since 1.3
+ */
+ public int getBlockCheckType(int blockNumber) {
+ locateBlockByNumber(queriedBlockInfo, blockNumber);
+ return queriedBlockInfo.getCheckType();
+ }
+
+ /**
+ * Gets the number of the Block that contains the byte at the given
+ * uncompressed position.
+ *
+ * @throws IndexOutOfBoundsException if
+ * <code>pos&nbsp;&lt;&nbsp;0</code> or
+ * <code>pos&nbsp;&gt;=&nbsp;length()</code>.
+ *
+ * @since 1.3
+ */
+ public int getBlockNumber(long pos) {
+ locateBlockByPos(queriedBlockInfo, pos);
+ return queriedBlockInfo.blockNumber;
+ }
+
+ /**
+ * Decompresses the next byte from this input stream.
+ *
+ * @return the next decompressed byte, or <code>-1</code>
+ * to indicate the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ * @throws UnsupportedOptionsException
+ * @throws MemoryLimitException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ /**
+ * Decompresses into an array of bytes.
+ * <p>
+ * If <code>len</code> is zero, no bytes are read and <code>0</code>
+ * is returned. Otherwise this will try to decompress <code>len</code>
+ * bytes of uncompressed data. Less than <code>len</code> bytes may
+ * be read only in the following situations:
+ * <ul>
+ * <li>The end of the compressed data was reached successfully.</li>
+ * <li>An error is detected after at least one but less than
+ * <code>len</code> bytes have already been successfully
+ * decompressed. The next call with non-zero <code>len</code>
+ * will immediately throw the pending exception.</li>
+ * <li>An exception is thrown.</li>
+ * </ul>
+ *
+ * @param buf target buffer for uncompressed data
+ * @param off start offset in <code>buf</code>
+ * @param len maximum number of uncompressed bytes to read
+ *
+ * @return number of bytes read, or <code>-1</code> to indicate
+ * the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ * @throws UnsupportedOptionsException
+ * @throws MemoryLimitException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (len == 0)
+ return 0;
+
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ int size = 0;
+
+ try {
+ if (seekNeeded)
+ seek();
+
+ if (endReached)
+ return -1;
+
+ while (len > 0) {
+ if (blockDecoder == null) {
+ seek();
+ if (endReached)
+ break;
+ }
+
+ int ret = blockDecoder.read(buf, off, len);
+
+ if (ret > 0) {
+ curPos += ret;
+ size += ret;
+ off += ret;
+ len -= ret;
+ } else if (ret == -1) {
+ blockDecoder = null;
+ }
+ }
+ } catch (IOException e) {
+ // We know that the file isn't simply truncated because we could
+ // parse the Indexes in the constructor. So convert EOFException
+ // to CorruptedInputException.
+ if (e instanceof EOFException)
+ e = new CorruptedInputException();
+
+ exception = e;
+ if (size == 0)
+ throw e;
+ }
+
+ return size;
+ }
+
+ /**
+ * Returns the number of uncompressed bytes that can be read
+ * without blocking. The value is returned with an assumption
+ * that the compressed input data will be valid. If the compressed
+ * data is corrupt, <code>CorruptedInputException</code> may get
+ * thrown before the number of bytes claimed to be available have
+ * been read from this input stream.
+ *
+ * @return the number of uncompressed bytes that can be read
+ * without blocking
+ */
+ public int available() throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ if (endReached || seekNeeded || blockDecoder == null)
+ return 0;
+
+ return blockDecoder.available();
+ }
+
+ /**
+ * Closes the stream and calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ * <p>
+ * This is equivalent to <code>close(true)</code>.
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ */
+ public void close() throws IOException {
+ close(true);
+ }
+
+ /**
+ * Closes the stream and optionally calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ * If <code>close(false)</code> has been called, a further
+ * call of <code>close(true)</code> does nothing (it doesn't call
+ * <code>in.close()</code>).
+ * <p>
+ * If you don't want to close the underlying <code>InputStream</code>,
+ * there is usually no need to worry about closing this stream either;
+ * it's fine to do nothing and let the garbage collector handle it.
+ * However, if you are using {@link ArrayCache}, <code>close(false)</code>
+ * can be useful to put the allocated arrays back to the cache without
+ * closing the underlying <code>InputStream</code>.
+ * <p>
+ * Note that if you successfully reach the end of the stream
+ * (<code>read</code> returns <code>-1</code>), the arrays are
+ * automatically put back to the cache by that <code>read</code> call. In
+ * this situation <code>close(false)</code> is redundant (but harmless).
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ *
+ * @since 1.7
+ */
+ public void close(boolean closeInput) throws IOException {
+ if (in != null) {
+ if (blockDecoder != null) {
+ blockDecoder.close();
+ blockDecoder = null;
+ }
+
+ try {
+ if (closeInput)
+ in.close();
+ } finally {
+ in = null;
+ }
+ }
+ }
+
+ /**
+ * Gets the uncompressed size of this input stream. If there are multiple
+ * XZ Streams, the total uncompressed size of all XZ Streams is returned.
+ */
+ public long length() {
+ return uncompressedSize;
+ }
+
+ /**
+ * Gets the current uncompressed position in this input stream.
+ *
+ * @throws XZIOException if the stream has been closed
+ */
+ public long position() throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ return seekNeeded ? seekPos : curPos;
+ }
+
+ /**
+ * Seeks to the specified absolute uncompressed position in the stream.
+ * This only stores the new position, so this function itself is always
+ * very fast. The actual seek is done when <code>read</code> is called
+ * to read at least one byte.
+ * <p>
+ * Seeking past the end of the stream is possible. In that case
+ * <code>read</code> will return <code>-1</code> to indicate
+ * the end of the stream.
+ *
+ * @param pos new uncompressed read position
+ *
+ * @throws XZIOException
+ * if <code>pos</code> is negative, or
+ * if stream has been closed
+ */
+ public void seek(long pos) throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (pos < 0)
+ throw new XZIOException("Negative seek position: " + pos);
+
+ seekPos = pos;
+ seekNeeded = true;
+ }
+
+ /**
+ * Seeks to the beginning of the given XZ Block.
+ *
+ * @throws XZIOException
+ * if <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
+ * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>,
+ * or if stream has been closed
+ *
+ * @since 1.3
+ */
+ public void seekToBlock(int blockNumber) throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (blockNumber < 0 || blockNumber >= blockCount)
+ throw new XZIOException("Invalid XZ Block number: " + blockNumber);
+
+ // This is a bit silly implementation. Here we locate the uncompressed
+ // offset of the specified Block, then when doing the actual seek in
+ // seek(), we need to find the Block number based on seekPos.
+ seekPos = getBlockPos(blockNumber);
+ seekNeeded = true;
+ }
+
+ /**
+ * Does the actual seeking. This is also called when <code>read</code>
+ * needs a new Block to decode.
+ */
+ private void seek() throws IOException {
+ // If seek(long) wasn't called, we simply need to get the next Block
+ // from the same Stream. If there are no more Blocks in this Stream,
+ // then we behave as if seek(long) had been called.
+ if (!seekNeeded) {
+ if (curBlockInfo.hasNext()) {
+ curBlockInfo.setNext();
+ initBlockDecoder();
+ return;
+ }
+
+ seekPos = curPos;
+ }
+
+ seekNeeded = false;
+
+ // Check if we are seeking to or past the end of the file.
+ if (seekPos >= uncompressedSize) {
+ curPos = seekPos;
+
+ if (blockDecoder != null) {
+ blockDecoder.close();
+ blockDecoder = null;
+ }
+
+ endReached = true;
+ return;
+ }
+
+ endReached = false;
+
+ // Locate the Block that contains the uncompressed target position.
+ locateBlockByPos(curBlockInfo, seekPos);
+
+ // Seek in the underlying stream and create a new Block decoder
+ // only if really needed. We can skip it if the current position
+ // is already in the correct Block and the target position hasn't
+ // been decompressed yet.
+ //
+ // NOTE: If curPos points to the beginning of this Block, it's
+ // because it was left there after decompressing an earlier Block.
+ // In that case, decoding of the current Block hasn't been started
+ // yet. (Decoding of a Block won't be started until at least one
+ // byte will also be read from it.)
+ if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
+ // Seek to the beginning of the Block.
+ in.seek(curBlockInfo.compressedOffset);
+
+ // Since it is possible that this Block is from a different
+ // Stream than the previous Block, initialize a new Check.
+ check = Check.getInstance(curBlockInfo.getCheckType());
+
+ // Create a new Block decoder.
+ initBlockDecoder();
+ curPos = curBlockInfo.uncompressedOffset;
+ }
+
+ // If the target wasn't at a Block boundary, decompress and throw
+ // away data to reach the target position.
+ if (seekPos > curPos) {
+ // NOTE: The "if" below is there just in case. In this situation,
+ // blockDecoder.skip will always skip the requested amount
+ // or throw an exception.
+ long skipAmount = seekPos - curPos;
+ if (blockDecoder.skip(skipAmount) != skipAmount)
+ throw new CorruptedInputException();
+
+ curPos = seekPos;
+ }
+ }
+
+ /**
+ * Locates the Block that contains the given uncompressed position.
+ */
+ private void locateBlockByPos(BlockInfo info, long pos) {
+ if (pos < 0 || pos >= uncompressedSize)
+ throw new IndexOutOfBoundsException(
+ "Invalid uncompressed position: " + pos);
+
+ // Locate the Stream that contains the target position.
+ IndexDecoder index;
+ for (int i = 0; ; ++i) {
+ index = streams.get(i);
+ if (index.hasUncompressedOffset(pos))
+ break;
+ }
+
+ // Locate the Block from the Stream that contains the target position.
+ index.locateBlock(info, pos);
+
+ assert (info.compressedOffset & 3) == 0;
+ assert info.uncompressedSize > 0;
+ assert pos >= info.uncompressedOffset;
+ assert pos < info.uncompressedOffset + info.uncompressedSize;
+ }
+
+ /**
+ * Locates the given Block and stores information about it
+ * to <code>info</code>.
+ */
+ private void locateBlockByNumber(BlockInfo info, int blockNumber) {
+ // Validate.
+ if (blockNumber < 0 || blockNumber >= blockCount)
+ throw new IndexOutOfBoundsException(
+ "Invalid XZ Block number: " + blockNumber);
+
+ // Skip the search if info already points to the correct Block.
+ if (info.blockNumber == blockNumber)
+ return;
+
+ // Search the Stream that contains the given Block and then
+ // search the Block from that Stream.
+ for (int i = 0; ; ++i) {
+ IndexDecoder index = streams.get(i);
+ if (index.hasRecord(blockNumber)) {
+ index.setBlockInfo(info, blockNumber);
+ return;
+ }
+ }
+ }
+
+ /**
+ * Initializes a new BlockInputStream. This is a helper function for
+ * <code>seek()</code>.
+ */
+ private void initBlockDecoder() throws IOException {
+ try {
+ // Set it to null first so that GC can collect it if memory
+ // runs tight when initializing a new BlockInputStream.
+ if (blockDecoder != null) {
+ blockDecoder.close();
+ blockDecoder = null;
+ }
+
+ blockDecoder = new BlockInputStream(
+ in, check, verifyCheck, memoryLimit,
+ curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize,
+ arrayCache);
+ } catch (MemoryLimitException e) {
+ // BlockInputStream doesn't know how much memory we had
+ // already needed so we need to recreate the exception.
+ assert memoryLimit >= 0;
+ throw new MemoryLimitException(
+ e.getMemoryNeeded() + indexMemoryUsage,
+ memoryLimit + indexMemoryUsage);
+ } catch (IndexIndicatorException e) {
+ // It cannot be Index so the file must be corrupt.
+ throw new CorruptedInputException();
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/SimpleInputStream.java b/src/org/tukaani/xz/SimpleInputStream.java
new file mode 100644
index 0000000..afd40c7
--- /dev/null
+++ b/src/org/tukaani/xz/SimpleInputStream.java
@@ -0,0 +1,138 @@
+/*
+ * SimpleInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.IOException;
+import org.tukaani.xz.simple.SimpleFilter;
+
+class SimpleInputStream extends InputStream {
+ private static final int FILTER_BUF_SIZE = 4096;
+
+ private InputStream in;
+ private final SimpleFilter simpleFilter;
+
+ private final byte[] filterBuf = new byte[FILTER_BUF_SIZE];
+ private int pos = 0;
+ private int filtered = 0;
+ private int unfiltered = 0;
+
+ private boolean endReached = false;
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ static int getMemoryUsage() {
+ return 1 + FILTER_BUF_SIZE / 1024;
+ }
+
+ SimpleInputStream(InputStream in, SimpleFilter simpleFilter) {
+ // Check for null because otherwise null isn't detect
+ // in this constructor.
+ if (in == null)
+ throw new NullPointerException();
+
+ // The simpleFilter argument comes from this package
+ // so it is known to be non-null already.
+ assert simpleFilter != null;
+
+ this.in = in;
+ this.simpleFilter = simpleFilter;
+ }
+
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (len == 0)
+ return 0;
+
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ try {
+ int size = 0;
+
+ while (true) {
+ // Copy filtered data into the caller-provided buffer.
+ int copySize = Math.min(filtered, len);
+ System.arraycopy(filterBuf, pos, buf, off, copySize);
+ pos += copySize;
+ filtered -= copySize;
+ off += copySize;
+ len -= copySize;
+ size += copySize;
+
+ // If end of filterBuf was reached, move the pending data to
+ // the beginning of the buffer so that more data can be
+ // copied into filterBuf on the next loop iteration.
+ if (pos + filtered + unfiltered == FILTER_BUF_SIZE) {
+ System.arraycopy(filterBuf, pos, filterBuf, 0,
+ filtered + unfiltered);
+ pos = 0;
+ }
+
+ if (len == 0 || endReached)
+ return size > 0 ? size : -1;
+
+ assert filtered == 0;
+
+ // Get more data into the temporary buffer.
+ int inSize = FILTER_BUF_SIZE - (pos + filtered + unfiltered);
+ inSize = in.read(filterBuf, pos + filtered + unfiltered,
+ inSize);
+
+ if (inSize == -1) {
+ // Mark the remaining unfiltered bytes to be ready
+ // to be copied out.
+ endReached = true;
+ filtered = unfiltered;
+ unfiltered = 0;
+ } else {
+ // Filter the data in filterBuf.
+ unfiltered += inSize;
+ filtered = simpleFilter.code(filterBuf, pos, unfiltered);
+ assert filtered <= unfiltered;
+ unfiltered -= filtered;
+ }
+ }
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ public int available() throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ return filtered;
+ }
+
+ public void close() throws IOException {
+ if (in != null) {
+ try {
+ in.close();
+ } finally {
+ in = null;
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/SimpleOutputStream.java b/src/org/tukaani/xz/SimpleOutputStream.java
new file mode 100644
index 0000000..771b1fb
--- /dev/null
+++ b/src/org/tukaani/xz/SimpleOutputStream.java
@@ -0,0 +1,151 @@
+/*
+ * SimpleOutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.IOException;
+import org.tukaani.xz.simple.SimpleFilter;
+
+class SimpleOutputStream extends FinishableOutputStream {
+ private static final int FILTER_BUF_SIZE = 4096;
+
+ private FinishableOutputStream out;
+ private final SimpleFilter simpleFilter;
+
+ private final byte[] filterBuf = new byte[FILTER_BUF_SIZE];
+ private int pos = 0;
+ private int unfiltered = 0;
+
+ private IOException exception = null;
+ private boolean finished = false;
+
+ private final byte[] tempBuf = new byte[1];
+
+ static int getMemoryUsage() {
+ return 1 + FILTER_BUF_SIZE / 1024;
+ }
+
+ SimpleOutputStream(FinishableOutputStream out,
+ SimpleFilter simpleFilter) {
+ if (out == null)
+ throw new NullPointerException();
+
+ this.out = out;
+ this.simpleFilter = simpleFilter;
+ }
+
+ public void write(int b) throws IOException {
+ tempBuf[0] = (byte)b;
+ write(tempBuf, 0, 1);
+ }
+
+ public void write(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ while (len > 0) {
+ // Copy more unfiltered data into filterBuf.
+ int copySize = Math.min(len, FILTER_BUF_SIZE - (pos + unfiltered));
+ System.arraycopy(buf, off, filterBuf, pos + unfiltered, copySize);
+ off += copySize;
+ len -= copySize;
+ unfiltered += copySize;
+
+ // Filter the data in filterBuf.
+ int filtered = simpleFilter.code(filterBuf, pos, unfiltered);
+ assert filtered <= unfiltered;
+ unfiltered -= filtered;
+
+ // Write out the filtered data.
+ try {
+ out.write(filterBuf, pos, filtered);
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ pos += filtered;
+
+ // If end of filterBuf was reached, move the pending unfiltered
+ // data to the beginning of the buffer so that more data can
+ // be copied into filterBuf on the next loop iteration.
+ if (pos + unfiltered == FILTER_BUF_SIZE) {
+ System.arraycopy(filterBuf, pos, filterBuf, 0, unfiltered);
+ pos = 0;
+ }
+ }
+ }
+
+ private void writePending() throws IOException {
+ assert !finished;
+
+ if (exception != null)
+ throw exception;
+
+ try {
+ out.write(filterBuf, pos, unfiltered);
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ finished = true;
+ }
+
+ public void flush() throws IOException {
+ throw new UnsupportedOptionsException("Flushing is not supported");
+ }
+
+ public void finish() throws IOException {
+ if (!finished) {
+ // If it fails, don't call out.finish().
+ writePending();
+
+ try {
+ out.finish();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+ }
+
+ public void close() throws IOException {
+ if (out != null) {
+ if (!finished) {
+ // out.close() must be called even if writePending() fails.
+ // writePending() saves the possible exception so we can
+ // ignore exceptions here.
+ try {
+ writePending();
+ } catch (IOException e) {}
+ }
+
+ try {
+ out.close();
+ } catch (IOException e) {
+ // If there is an earlier exception, the exception
+ // from out.close() is lost.
+ if (exception == null)
+ exception = e;
+ }
+
+ out = null;
+ }
+
+ if (exception != null)
+ throw exception;
+ }
+}
diff --git a/src/org/tukaani/xz/SingleXZInputStream.java b/src/org/tukaani/xz/SingleXZInputStream.java
new file mode 100644
index 0000000..8da2be0
--- /dev/null
+++ b/src/org/tukaani/xz/SingleXZInputStream.java
@@ -0,0 +1,535 @@
+/*
+ * SingleXZInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.EOFException;
+import org.tukaani.xz.common.DecoderUtil;
+import org.tukaani.xz.common.StreamFlags;
+import org.tukaani.xz.index.IndexHash;
+import org.tukaani.xz.check.Check;
+
+/**
+ * Decompresses exactly one XZ Stream in streamed mode (no seeking).
+ * The decompression stops after the first XZ Stream has been decompressed,
+ * and the read position in the input stream is left at the first byte
+ * after the end of the XZ Stream. This can be useful when XZ data has
+ * been stored inside some other file format or protocol.
+ * <p>
+ * Unless you know what you are doing, don't use this class to decompress
+ * standalone .xz files. For that purpose, use <code>XZInputStream</code>.
+ *
+ * <h4>When uncompressed size is known beforehand</h4>
+ * <p>
+ * If you are decompressing complete XZ streams and your application knows
+ * exactly how much uncompressed data there should be, it is good to try
+ * reading one more byte by calling <code>read()</code> and checking
+ * that it returns <code>-1</code>. This way the decompressor will parse the
+ * file footers and verify the integrity checks, giving the caller more
+ * confidence that the uncompressed data is valid.
+ *
+ * @see XZInputStream
+ */
+public class SingleXZInputStream extends InputStream {
+ private InputStream in;
+ private final ArrayCache arrayCache;
+ private final int memoryLimit;
+ private final StreamFlags streamHeaderFlags;
+ private final Check check;
+ private final boolean verifyCheck;
+ private BlockInputStream blockDecoder = null;
+ private final IndexHash indexHash = new IndexHash();
+ private boolean endReached = false;
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ /**
+ * Reads the Stream Header into a buffer.
+ * This is a helper function for the constructors.
+ */
+ private static byte[] readStreamHeader(InputStream in) throws IOException {
+ byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
+ new DataInputStream(in).readFully(streamHeader);
+ return streamHeader;
+ }
+
+ /**
+ * Creates a new XZ decompressor that decompresses exactly one
+ * XZ Stream from <code>in</code> without a memory usage limit.
+ * <p>
+ * This constructor reads and parses the XZ Stream Header (12 bytes)
+ * from <code>in</code>. The header of the first Block is not read
+ * until <code>read</code> is called.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public SingleXZInputStream(InputStream in) throws IOException {
+ this(in, -1);
+ }
+
+ /**
+ * Creates a new XZ decompressor that decompresses exactly one
+ * XZ Stream from <code>in</code> without a memory usage limit.
+ * <p>
+ * This is identical to <code>SingleXZInputStream(InputStream)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public SingleXZInputStream(InputStream in, ArrayCache arrayCache)
+ throws IOException {
+ this(in, -1, arrayCache);
+ }
+
+ /**
+ * Creates a new XZ decompressor that decompresses exactly one
+ * XZ Stream from <code>in</code> with an optional memory usage limit.
+ * <p>
+ * This is identical to <code>SingleXZInputStream(InputStream)</code>
+ * except that this also takes the <code>memoryLimit</code> argument.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public SingleXZInputStream(InputStream in, int memoryLimit)
+ throws IOException {
+ this(in, memoryLimit, true);
+ }
+
+ /**
+ * Creates a new XZ decompressor that decompresses exactly one
+ * XZ Stream from <code>in</code> with an optional memory usage limit.
+ * <p>
+ * This is identical to <code>SingleXZInputStream(InputStream)</code>
+ * except that this also takes the <code>memoryLimit</code> and
+ * <code>arrayCache</code> arguments.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public SingleXZInputStream(InputStream in, int memoryLimit,
+ ArrayCache arrayCache) throws IOException {
+ this(in, memoryLimit, true, arrayCache);
+ }
+
+ /**
+ * Creates a new XZ decompressor that decompresses exactly one
+ * XZ Stream from <code>in</code> with an optional memory usage limit
+ * and ability to disable verification of integrity checks.
+ * <p>
+ * This is identical to <code>SingleXZInputStream(InputStream,int)</code>
+ * except that this also takes the <code>verifyCheck</code> argument.
+ * <p>
+ * Note that integrity check verification should almost never be disabled.
+ * Possible reasons to disable integrity check verification:
+ * <ul>
+ * <li>Trying to recover data from a corrupt .xz file.</li>
+ * <li>Speeding up decompression. This matters mostly with SHA-256
+ * or with files that have compressed extremely well. It's recommended
+ * that integrity checking isn't disabled for performance reasons
+ * unless the file integrity is verified externally in some other
+ * way.</li>
+ * </ul>
+ * <p>
+ * <code>verifyCheck</code> only affects the integrity check of
+ * the actual compressed data. The CRC32 fields in the headers
+ * are always verified.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param verifyCheck if <code>true</code>, the integrity checks
+ * will be verified; this should almost never
+ * be set to <code>false</code>
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.6
+ */
+ public SingleXZInputStream(InputStream in, int memoryLimit,
+ boolean verifyCheck) throws IOException {
+ this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new XZ decompressor that decompresses exactly one
+ * XZ Stream from <code>in</code> with an optional memory usage limit
+ * and ability to disable verification of integrity checks.
+ * <p>
+ * This is identical to
+ * <code>SingleXZInputStream(InputStream,int,boolean)</code>
+ * except that this also takes the <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param verifyCheck if <code>true</code>, the integrity checks
+ * will be verified; this should almost never
+ * be set to <code>false</code>
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public SingleXZInputStream(InputStream in, int memoryLimit,
+ boolean verifyCheck, ArrayCache arrayCache)
+ throws IOException {
+ this(in, memoryLimit, verifyCheck, readStreamHeader(in), arrayCache);
+ }
+
+ SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
+ byte[] streamHeader, ArrayCache arrayCache)
+ throws IOException {
+ this.arrayCache = arrayCache;
+ this.in = in;
+ this.memoryLimit = memoryLimit;
+ this.verifyCheck = verifyCheck;
+ streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
+ check = Check.getInstance(streamHeaderFlags.checkType);
+ }
+
+ /**
+ * Gets the ID of the integrity check used in this XZ Stream.
+ *
+ * @return the Check ID specified in the XZ Stream Header
+ */
+ public int getCheckType() {
+ return streamHeaderFlags.checkType;
+ }
+
+ /**
+ * Gets the name of the integrity check used in this XZ Stream.
+ *
+ * @return the name of the check specified in the XZ Stream Header
+ */
+ public String getCheckName() {
+ return check.getName();
+ }
+
+ /**
+ * Decompresses the next byte from this input stream.
+ * <p>
+ * Reading lots of data with <code>read()</code> from this input stream
+ * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
+ * if you need to read lots of data one byte at a time.
+ *
+ * @return the next decompressed byte, or <code>-1</code>
+ * to indicate the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ * @throws UnsupportedOptionsException
+ * @throws MemoryLimitException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException
+ * compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ /**
+ * Decompresses into an array of bytes.
+ * <p>
+ * If <code>len</code> is zero, no bytes are read and <code>0</code>
+ * is returned. Otherwise this will try to decompress <code>len</code>
+ * bytes of uncompressed data. Less than <code>len</code> bytes may
+ * be read only in the following situations:
+ * <ul>
+ * <li>The end of the compressed data was reached successfully.</li>
+ * <li>An error is detected after at least one but less <code>len</code>
+ * bytes have already been successfully decompressed.
+ * The next call with non-zero <code>len</code> will immediately
+ * throw the pending exception.</li>
+ * <li>An exception is thrown.</li>
+ * </ul>
+ *
+ * @param buf target buffer for uncompressed data
+ * @param off start offset in <code>buf</code>
+ * @param len maximum number of uncompressed bytes to read
+ *
+ * @return number of bytes read, or <code>-1</code> to indicate
+ * the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ * @throws UnsupportedOptionsException
+ * @throws MemoryLimitException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException
+ * compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (len == 0)
+ return 0;
+
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ if (endReached)
+ return -1;
+
+ int size = 0;
+
+ try {
+ while (len > 0) {
+ if (blockDecoder == null) {
+ try {
+ blockDecoder = new BlockInputStream(
+ in, check, verifyCheck, memoryLimit, -1, -1,
+ arrayCache);
+ } catch (IndexIndicatorException e) {
+ indexHash.validate(in);
+ validateStreamFooter();
+ endReached = true;
+ return size > 0 ? size : -1;
+ }
+ }
+
+ int ret = blockDecoder.read(buf, off, len);
+
+ if (ret > 0) {
+ size += ret;
+ off += ret;
+ len -= ret;
+ } else if (ret == -1) {
+ indexHash.add(blockDecoder.getUnpaddedSize(),
+ blockDecoder.getUncompressedSize());
+ blockDecoder = null;
+ }
+ }
+ } catch (IOException e) {
+ exception = e;
+ if (size == 0)
+ throw e;
+ }
+
+ return size;
+ }
+
+ private void validateStreamFooter() throws IOException {
+ byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
+ new DataInputStream(in).readFully(buf);
+ StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
+
+ if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
+ streamFooterFlags)
+ || indexHash.getIndexSize() != streamFooterFlags.backwardSize)
+ throw new CorruptedInputException(
+ "XZ Stream Footer does not match Stream Header");
+ }
+
+ /**
+ * Returns the number of uncompressed bytes that can be read
+ * without blocking. The value is returned with an assumption
+ * that the compressed input data will be valid. If the compressed
+ * data is corrupt, <code>CorruptedInputException</code> may get
+ * thrown before the number of bytes claimed to be available have
+ * been read from this input stream.
+ *
+ * @return the number of uncompressed bytes that can be read
+ * without blocking
+ */
+ public int available() throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ return blockDecoder == null ? 0 : blockDecoder.available();
+ }
+
+ /**
+ * Closes the stream and calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ * <p>
+ * This is equivalent to <code>close(true)</code>.
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ */
+ public void close() throws IOException {
+ close(true);
+ }
+
+ /**
+ * Closes the stream and optionally calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ * If <code>close(false)</code> has been called, a further
+ * call of <code>close(true)</code> does nothing (it doesn't call
+ * <code>in.close()</code>).
+ * <p>
+ * If you don't want to close the underlying <code>InputStream</code>,
+ * there is usually no need to worry about closing this stream either;
+ * it's fine to do nothing and let the garbage collector handle it.
+ * However, if you are using {@link ArrayCache}, <code>close(false)</code>
+ * can be useful to put the allocated arrays back to the cache without
+ * closing the underlying <code>InputStream</code>.
+ * <p>
+ * Note that if you successfully reach the end of the stream
+ * (<code>read</code> returns <code>-1</code>), the arrays are
+ * automatically put back to the cache by that <code>read</code> call. In
+ * this situation <code>close(false)</code> is redundant (but harmless).
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ *
+ * @since 1.7
+ */
+ public void close(boolean closeInput) throws IOException {
+ if (in != null) {
+ if (blockDecoder != null) {
+ blockDecoder.close();
+ blockDecoder = null;
+ }
+
+ try {
+ if (closeInput)
+ in.close();
+ } finally {
+ in = null;
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/UncompressedLZMA2OutputStream.java b/src/org/tukaani/xz/UncompressedLZMA2OutputStream.java
new file mode 100644
index 0000000..5d0e65f
--- /dev/null
+++ b/src/org/tukaani/xz/UncompressedLZMA2OutputStream.java
@@ -0,0 +1,164 @@
+/*
+ * UncompressedLZMA2OutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+class UncompressedLZMA2OutputStream extends FinishableOutputStream {
+ private final ArrayCache arrayCache;
+
+ private FinishableOutputStream out;
+ private final DataOutputStream outData;
+
+ private final byte[] uncompBuf;
+ private int uncompPos = 0;
+ private boolean dictResetNeeded = true;
+
+ private boolean finished = false;
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ static int getMemoryUsage() {
+ // uncompBuf + a little extra
+ return 70;
+ }
+
+ UncompressedLZMA2OutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ if (out == null)
+ throw new NullPointerException();
+
+ this.out = out;
+ outData = new DataOutputStream(out);
+
+ // We only allocate one array from the cache. We will call
+ // putArray directly in writeEndMarker and thus we don't use
+ // ResettableArrayCache here.
+ this.arrayCache = arrayCache;
+ uncompBuf = arrayCache.getByteArray(
+ LZMA2OutputStream.COMPRESSED_SIZE_MAX, false);
+ }
+
+ public void write(int b) throws IOException {
+ tempBuf[0] = (byte)b;
+ write(tempBuf, 0, 1);
+ }
+
+ public void write(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ while (len > 0) {
+ int copySize = Math.min(LZMA2OutputStream.COMPRESSED_SIZE_MAX
+ - uncompPos, len);
+ System.arraycopy(buf, off, uncompBuf, uncompPos, copySize);
+ len -= copySize;
+ uncompPos += copySize;
+
+ if (uncompPos == LZMA2OutputStream.COMPRESSED_SIZE_MAX)
+ writeChunk();
+ }
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ private void writeChunk() throws IOException {
+ outData.writeByte(dictResetNeeded ? 0x01 : 0x02);
+ outData.writeShort(uncompPos - 1);
+ outData.write(uncompBuf, 0, uncompPos);
+ uncompPos = 0;
+ dictResetNeeded = false;
+ }
+
+ private void writeEndMarker() throws IOException {
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ if (uncompPos > 0)
+ writeChunk();
+
+ out.write(0x00);
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ finished = true;
+ arrayCache.putArray(uncompBuf);
+ }
+
+ public void flush() throws IOException {
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ if (uncompPos > 0)
+ writeChunk();
+
+ out.flush();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ public void finish() throws IOException {
+ if (!finished) {
+ writeEndMarker();
+
+ try {
+ out.finish();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+ }
+
+ public void close() throws IOException {
+ if (out != null) {
+ if (!finished) {
+ try {
+ writeEndMarker();
+ } catch (IOException e) {}
+ }
+
+ try {
+ out.close();
+ } catch (IOException e) {
+ if (exception == null)
+ exception = e;
+ }
+
+ out = null;
+ }
+
+ if (exception != null)
+ throw exception;
+ }
+}
diff --git a/src/org/tukaani/xz/UnsupportedOptionsException.java b/src/org/tukaani/xz/UnsupportedOptionsException.java
new file mode 100644
index 0000000..9aa16e8
--- /dev/null
+++ b/src/org/tukaani/xz/UnsupportedOptionsException.java
@@ -0,0 +1,34 @@
+/*
+ * UnsupportedOptionsException
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+/**
+ * Thrown when compression options not supported by this implementation
+ * are detected. Some other implementation might support those options.
+ */
+public class UnsupportedOptionsException extends XZIOException {
+ private static final long serialVersionUID = 3L;
+
+ /**
+ * Creates a new UnsupportedOptionsException with null
+ * as its error detail message.
+ */
+ public UnsupportedOptionsException() {}
+
+ /**
+ * Creates a new UnsupportedOptionsException with the given
+ * error detail message.
+ *
+ * @param s error detail message
+ */
+ public UnsupportedOptionsException(String s) {
+ super(s);
+ }
+}
diff --git a/src/org/tukaani/xz/X86Options.java b/src/org/tukaani/xz/X86Options.java
new file mode 100644
index 0000000..671ec6d
--- /dev/null
+++ b/src/org/tukaani/xz/X86Options.java
@@ -0,0 +1,37 @@
+/*
+ * X86Options
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import org.tukaani.xz.simple.X86;
+
+/**
+ * BCJ filter for x86 (32-bit and 64-bit) instructions.
+ */
+public class X86Options extends BCJOptions {
+ private static final int ALIGNMENT = 1;
+
+ public X86Options() {
+ super(ALIGNMENT);
+ }
+
+ public FinishableOutputStream getOutputStream(FinishableOutputStream out,
+ ArrayCache arrayCache) {
+ return new SimpleOutputStream(out, new X86(true, startOffset));
+ }
+
+ public InputStream getInputStream(InputStream in, ArrayCache arrayCache) {
+ return new SimpleInputStream(in, new X86(false, startOffset));
+ }
+
+ FilterEncoder getFilterEncoder() {
+ return new BCJEncoder(this, BCJCoder.X86_FILTER_ID);
+ }
+}
diff --git a/src/org/tukaani/xz/XZ.java b/src/org/tukaani/xz/XZ.java
new file mode 100644
index 0000000..4e0857f
--- /dev/null
+++ b/src/org/tukaani/xz/XZ.java
@@ -0,0 +1,53 @@
+/*
+ * XZ
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+/**
+ * XZ constants.
+ */
+public class XZ {
+ /**
+ * XZ Header Magic Bytes begin a XZ file.
+ * This can be useful to detect XZ compressed data.
+ */
+ public static final byte[] HEADER_MAGIC = {
+ (byte)0xFD, '7', 'z', 'X', 'Z', '\0' };
+
+ /**
+ * XZ Footer Magic Bytes are the last bytes of a XZ Stream.
+ */
+ public static final byte[] FOOTER_MAGIC = { 'Y', 'Z' };
+
+ /**
+ * Integrity check ID indicating that no integrity check is calculated.
+ * <p>
+ * Omitting the integrity check is strongly discouraged except when
+ * the integrity of the data will be verified by other means anyway,
+ * and calculating the check twice would be useless.
+ */
+ public static final int CHECK_NONE = 0;
+
+ /**
+ * Integrity check ID for CRC32.
+ */
+ public static final int CHECK_CRC32 = 1;
+
+ /**
+ * Integrity check ID for CRC64.
+ */
+ public static final int CHECK_CRC64 = 4;
+
+ /**
+ * Integrity check ID for SHA-256.
+ */
+ public static final int CHECK_SHA256 = 10;
+
+ private XZ() {}
+}
diff --git a/src/org/tukaani/xz/XZFormatException.java b/src/org/tukaani/xz/XZFormatException.java
new file mode 100644
index 0000000..6f63020
--- /dev/null
+++ b/src/org/tukaani/xz/XZFormatException.java
@@ -0,0 +1,24 @@
+/*
+ * XZFormatException
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+/**
+ * Thrown when the input data is not in the XZ format.
+ */
+public class XZFormatException extends XZIOException {
+ private static final long serialVersionUID = 3L;
+
+ /**
+ * Creates a new exception with the default error detail message.
+ */
+ public XZFormatException() {
+ super("Input is not in the XZ format");
+ }
+}
diff --git a/src/org/tukaani/xz/XZIOException.java b/src/org/tukaani/xz/XZIOException.java
new file mode 100644
index 0000000..14675f5
--- /dev/null
+++ b/src/org/tukaani/xz/XZIOException.java
@@ -0,0 +1,27 @@
+/*
+ * XZIOException
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+/**
+ * Generic {@link java.io.IOException IOException} specific to this package.
+ * The other IOExceptions in this package extend
+ * from <code>XZIOException</code>.
+ */
+public class XZIOException extends java.io.IOException {
+ private static final long serialVersionUID = 3L;
+
+ public XZIOException() {
+ super();
+ }
+
+ public XZIOException(String s) {
+ super(s);
+ }
+}
diff --git a/src/org/tukaani/xz/XZInputStream.java b/src/org/tukaani/xz/XZInputStream.java
new file mode 100644
index 0000000..680f647
--- /dev/null
+++ b/src/org/tukaani/xz/XZInputStream.java
@@ -0,0 +1,527 @@
+/*
+ * XZInputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.EOFException;
+import org.tukaani.xz.common.DecoderUtil;
+
+/**
+ * Decompresses a .xz file in streamed mode (no seeking).
+ * <p>
+ * Use this to decompress regular standalone .xz files. This reads from
+ * its input stream until the end of the input or until an error occurs.
+ * This supports decompressing concatenated .xz files.
+ *
+ * <h4>Typical use cases</h4>
+ * <p>
+ * Getting an input stream to decompress a .xz file:
+ * <p><blockquote><pre>
+ * InputStream infile = new FileInputStream("foo.xz");
+ * XZInputStream inxz = new XZInputStream(infile);
+ * </pre></blockquote>
+ * <p>
+ * It's important to keep in mind that decompressor memory usage depends
+ * on the settings used to compress the file. The worst-case memory usage
+ * of XZInputStream is currently 1.5&nbsp;GiB. Still, very few files will
+ * require more than about 65&nbsp;MiB because that's how much decompressing
+ * a file created with the highest preset level will need, and only a few
+ * people use settings other than the predefined presets.
+ * <p>
+ * It is possible to specify a memory usage limit for
+ * <code>XZInputStream</code>. If decompression requires more memory than
+ * the specified limit, MemoryLimitException will be thrown when reading
+ * from the stream. For example, the following sets the memory usage limit
+ * to 100&nbsp;MiB:
+ * <p><blockquote><pre>
+ * InputStream infile = new FileInputStream("foo.xz");
+ * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
+ * </pre></blockquote>
+ *
+ * <h4>When uncompressed size is known beforehand</h4>
+ * <p>
+ * If you are decompressing complete files and your application knows
+ * exactly how much uncompressed data there should be, it is good to try
+ * reading one more byte by calling <code>read()</code> and checking
+ * that it returns <code>-1</code>. This way the decompressor will parse the
+ * file footers and verify the integrity checks, giving the caller more
+ * confidence that the uncompressed data is valid. (This advice seems to
+ * apply to
+ * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
+ *
+ * @see SingleXZInputStream
+ */
+public class XZInputStream extends InputStream {
+ private final ArrayCache arrayCache;
+
+ private final int memoryLimit;
+ private InputStream in;
+ private SingleXZInputStream xzIn;
+ private final boolean verifyCheck;
+ private boolean endReached = false;
+ private IOException exception = null;
+
+ private final byte[] tempBuf = new byte[1];
+
+ /**
+ * Creates a new XZ decompressor without a memory usage limit.
+ * <p>
+ * This constructor reads and parses the XZ Stream Header (12 bytes)
+ * from <code>in</code>. The header of the first Block is not read
+ * until <code>read</code> is called.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public XZInputStream(InputStream in) throws IOException {
+ this(in, -1);
+ }
+
+ /**
+ * Creates a new XZ decompressor without a memory usage limit.
+ * <p>
+ * This is identical to <code>XZInputStream(InputStream)</code>
+ * except that this takes also the <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public XZInputStream(InputStream in, ArrayCache arrayCache)
+ throws IOException {
+ this(in, -1, arrayCache);
+ }
+
+ /**
+ * Creates a new XZ decompressor with an optional memory usage limit.
+ * <p>
+ * This is identical to <code>XZInputStream(InputStream)</code> except
+ * that this takes also the <code>memoryLimit</code> argument.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public XZInputStream(InputStream in, int memoryLimit) throws IOException {
+ this(in, memoryLimit, true);
+ }
+
+ /**
+ * Creates a new XZ decompressor with an optional memory usage limit.
+ * <p>
+ * This is identical to <code>XZInputStream(InputStream)</code> except
+ * that this takes also the <code>memoryLimit</code> and
+ * <code>arrayCache</code> arguments.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public XZInputStream(InputStream in, int memoryLimit,
+ ArrayCache arrayCache) throws IOException {
+ this(in, memoryLimit, true, arrayCache);
+ }
+
+ /**
+ * Creates a new XZ decompressor with an optional memory usage limit
+ * and ability to disable verification of integrity checks.
+ * <p>
+ * This is identical to <code>XZInputStream(InputStream,int)</code> except
+ * that this takes also the <code>verifyCheck</code> argument.
+ * <p>
+ * Note that integrity check verification should almost never be disabled.
+ * Possible reasons to disable integrity check verification:
+ * <ul>
+ * <li>Trying to recover data from a corrupt .xz file.</li>
+ * <li>Speeding up decompression. This matters mostly with SHA-256
+ * or with files that have compressed extremely well. It's recommended
+ * that integrity checking isn't disabled for performance reasons
+ * unless the file integrity is verified externally in some other
+ * way.</li>
+ * </ul>
+ * <p>
+ * <code>verifyCheck</code> only affects the integrity check of
+ * the actual compressed data. The CRC32 fields in the headers
+ * are always verified.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param verifyCheck if <code>true</code>, the integrity checks
+ * will be verified; this should almost never
+ * be set to <code>false</code>
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.6
+ */
+ public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
+ throws IOException {
+ this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new XZ decompressor with an optional memory usage limit
+ * and ability to disable verification of integrity checks.
+ * <p>
+ * This is identical to <code>XZInputStream(InputStream,int,boolean)</code>
+ * except that this takes also the <code>arrayCache</code> argument.
+ *
+ * @param in input stream from which XZ-compressed
+ * data is read
+ *
+ * @param memoryLimit memory usage limit in kibibytes (KiB)
+ * or <code>-1</code> to impose no
+ * memory usage limit
+ *
+ * @param verifyCheck if <code>true</code>, the integrity checks
+ * will be verified; this should almost never
+ * be set to <code>false</code>
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws XZFormatException
+ * input is not in the XZ format
+ *
+ * @throws CorruptedInputException
+ * XZ header CRC32 doesn't match
+ *
+ * @throws UnsupportedOptionsException
+ * XZ header is valid but specifies options
+ * not supported by this implementation
+ *
+ * @throws EOFException
+ * less than 12 bytes of input was available
+ * from <code>in</code>
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ *
+ * @since 1.7
+ */
+ public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
+ ArrayCache arrayCache) throws IOException {
+ this.arrayCache = arrayCache;
+ this.in = in;
+ this.memoryLimit = memoryLimit;
+ this.verifyCheck = verifyCheck;
+ this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck,
+ arrayCache);
+ }
+
+ /**
+ * Decompresses the next byte from this input stream.
+ * <p>
+ * Reading lots of data with <code>read()</code> from this input stream
+ * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
+ * if you need to read lots of data one byte at a time.
+ *
+ * @return the next decompressed byte, or <code>-1</code>
+ * to indicate the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ * @throws UnsupportedOptionsException
+ * @throws MemoryLimitException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException
+ * compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read() throws IOException {
+ return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
+ }
+
+ /**
+ * Decompresses into an array of bytes.
+ * <p>
+ * If <code>len</code> is zero, no bytes are read and <code>0</code>
+ * is returned. Otherwise this will try to decompress <code>len</code>
+ * bytes of uncompressed data. Less than <code>len</code> bytes may
+ * be read only in the following situations:
+ * <ul>
+ * <li>The end of the compressed data was reached successfully.</li>
+ * <li>An error is detected after at least one but less <code>len</code>
+ * bytes have already been successfully decompressed.
+ * The next call with non-zero <code>len</code> will immediately
+ * throw the pending exception.</li>
+ * <li>An exception is thrown.</li>
+ * </ul>
+ *
+ * @param buf target buffer for uncompressed data
+ * @param off start offset in <code>buf</code>
+ * @param len maximum number of uncompressed bytes to read
+ *
+ * @return number of bytes read, or <code>-1</code> to indicate
+ * the end of the compressed stream
+ *
+ * @throws CorruptedInputException
+ * @throws UnsupportedOptionsException
+ * @throws MemoryLimitException
+ *
+ * @throws XZIOException if the stream has been closed
+ *
+ * @throws EOFException
+ * compressed input is truncated or corrupt
+ *
+ * @throws IOException may be thrown by <code>in</code>
+ */
+ public int read(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (len == 0)
+ return 0;
+
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ if (endReached)
+ return -1;
+
+ int size = 0;
+
+ try {
+ while (len > 0) {
+ if (xzIn == null) {
+ prepareNextStream();
+ if (endReached)
+ return size == 0 ? -1 : size;
+ }
+
+ int ret = xzIn.read(buf, off, len);
+
+ if (ret > 0) {
+ size += ret;
+ off += ret;
+ len -= ret;
+ } else if (ret == -1) {
+ xzIn = null;
+ }
+ }
+ } catch (IOException e) {
+ exception = e;
+ if (size == 0)
+ throw e;
+ }
+
+ return size;
+ }
+
+ private void prepareNextStream() throws IOException {
+ DataInputStream inData = new DataInputStream(in);
+ byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
+
+ // The size of Stream Padding must be a multiple of four bytes,
+ // all bytes zero.
+ do {
+ // First try to read one byte to see if we have reached the end
+ // of the file.
+ int ret = inData.read(buf, 0, 1);
+ if (ret == -1) {
+ endReached = true;
+ return;
+ }
+
+ // Since we got one byte of input, there must be at least
+ // three more available in a valid file.
+ inData.readFully(buf, 1, 3);
+
+ } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
+
+ // Not all bytes are zero. In a valid Stream it indicates the
+ // beginning of the next Stream. Read the rest of the Stream Header
+ // and initialize the XZ decoder.
+ inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
+
+ try {
+ xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf,
+ arrayCache);
+ } catch (XZFormatException e) {
+ // Since this isn't the first .xz Stream, it is more
+ // logical to tell that the data is corrupt.
+ throw new CorruptedInputException(
+ "Garbage after a valid XZ Stream");
+ }
+ }
+
+ /**
+ * Returns the number of uncompressed bytes that can be read
+ * without blocking. The value is returned with an assumption
+ * that the compressed input data will be valid. If the compressed
+ * data is corrupt, <code>CorruptedInputException</code> may get
+ * thrown before the number of bytes claimed to be available have
+ * been read from this input stream.
+ *
+ * @return the number of uncompressed bytes that can be read
+ * without blocking
+ */
+ public int available() throws IOException {
+ if (in == null)
+ throw new XZIOException("Stream closed");
+
+ if (exception != null)
+ throw exception;
+
+ return xzIn == null ? 0 : xzIn.available();
+ }
+
+ /**
+ * Closes the stream and calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ * <p>
+ * This is equivalent to <code>close(true)</code>.
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ */
+ public void close() throws IOException {
+ close(true);
+ }
+
+ /**
+ * Closes the stream and optionally calls <code>in.close()</code>.
+ * If the stream was already closed, this does nothing.
+ * If <code>close(false)</code> has been called, a further
+ * call of <code>close(true)</code> does nothing (it doesn't call
+ * <code>in.close()</code>).
+ * <p>
+ * If you don't want to close the underlying <code>InputStream</code>,
+ * there is usually no need to worry about closing this stream either;
+ * it's fine to do nothing and let the garbage collector handle it.
+ * However, if you are using {@link ArrayCache}, <code>close(false)</code>
+ * can be useful to put the allocated arrays back to the cache without
+ * closing the underlying <code>InputStream</code>.
+ * <p>
+ * Note that if you successfully reach the end of the stream
+ * (<code>read</code> returns <code>-1</code>), the arrays are
+ * automatically put back to the cache by that <code>read</code> call. In
+ * this situation <code>close(false)</code> is redundant (but harmless).
+ *
+ * @throws IOException if thrown by <code>in.close()</code>
+ *
+ * @since 1.7
+ */
+ public void close(boolean closeInput) throws IOException {
+ if (in != null) {
+ if (xzIn != null) {
+ xzIn.close(false);
+ xzIn = null;
+ }
+
+ try {
+ if (closeInput)
+ in.close();
+ } finally {
+ in = null;
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/XZOutputStream.java b/src/org/tukaani/xz/XZOutputStream.java
new file mode 100644
index 0000000..107ef7f
--- /dev/null
+++ b/src/org/tukaani/xz/XZOutputStream.java
@@ -0,0 +1,606 @@
+/*
+ * XZOutputStream
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz;
+
+import java.io.OutputStream;
+import java.io.IOException;
+import org.tukaani.xz.common.EncoderUtil;
+import org.tukaani.xz.common.StreamFlags;
+import org.tukaani.xz.check.Check;
+import org.tukaani.xz.index.IndexEncoder;
+
+/**
+ * Compresses into the .xz file format.
+ *
+ * <h4>Examples</h4>
+ * <p>
+ * Getting an output stream to compress with LZMA2 using the default
+ * settings and the default integrity check type (CRC64):
+ * <p><blockquote><pre>
+ * FileOutputStream outfile = new FileOutputStream("foo.xz");
+ * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options());
+ * </pre></blockquote>
+ * <p>
+ * Using the preset level <code>8</code> for LZMA2 (the default
+ * is <code>6</code>) and SHA-256 instead of CRC64 for integrity checking:
+ * <p><blockquote><pre>
+ * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options(8),
+ * XZ.CHECK_SHA256);
+ * </pre></blockquote>
+ * <p>
+ * Using the x86 BCJ filter together with LZMA2 to compress x86 executables
+ * and printing the memory usage information before creating the
+ * XZOutputStream:
+ * <p><blockquote><pre>
+ * X86Options x86 = new X86Options();
+ * LZMA2Options lzma2 = new LZMA2Options();
+ * FilterOptions[] options = { x86, lzma2 };
+ * System.out.println("Encoder memory usage: "
+ * + FilterOptions.getEncoderMemoryUsage(options)
+ * + " KiB");
+ * System.out.println("Decoder memory usage: "
+ * + FilterOptions.getDecoderMemoryUsage(options)
+ * + " KiB");
+ * XZOutputStream outxz = new XZOutputStream(outfile, options);
+ * </pre></blockquote>
+ */
+public class XZOutputStream extends FinishableOutputStream {
+ private final ArrayCache arrayCache;
+
+ private OutputStream out;
+ private final StreamFlags streamFlags = new StreamFlags();
+ private final Check check;
+ private final IndexEncoder index = new IndexEncoder();
+
+ private BlockOutputStream blockEncoder = null;
+ private FilterEncoder[] filters;
+
+ /**
+ * True if the current filter chain supports flushing.
+ * If it doesn't support flushing, <code>flush()</code>
+ * will use <code>endBlock()</code> as a fallback.
+ */
+ private boolean filtersSupportFlushing;
+
+ private IOException exception = null;
+ private boolean finished = false;
+
+ private final byte[] tempBuf = new byte[1];
+
+ /**
+ * Creates a new XZ compressor using one filter and CRC64 as
+ * the integrity check. This constructor is equivalent to passing
+ * a single-member FilterOptions array to
+ * <code>XZOutputStream(OutputStream, FilterOptions[])</code>.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * filter options to use
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ */
+ public XZOutputStream(OutputStream out, FilterOptions filterOptions)
+ throws IOException {
+ this(out, filterOptions, XZ.CHECK_CRC64);
+ }
+
+ /**
+ * Creates a new XZ compressor using one filter and CRC64 as
+ * the integrity check. This constructor is equivalent to passing
+ * a single-member FilterOptions array to
+ * <code>XZOutputStream(OutputStream, FilterOptions[], ArrayCache)</code>.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * filter options to use
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ *
+ * @since 1.7
+ */
+ public XZOutputStream(OutputStream out, FilterOptions filterOptions,
+ ArrayCache arrayCache)
+ throws IOException {
+ this(out, filterOptions, XZ.CHECK_CRC64, arrayCache);
+ }
+
+ /**
+ * Creates a new XZ compressor using one filter and the specified
+ * integrity check type. This constructor is equivalent to
+ * passing a single-member FilterOptions array to
+ * <code>XZOutputStream(OutputStream, FilterOptions[], int)</code>.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * filter options to use
+ *
+ * @param checkType type of the integrity check,
+ * for example XZ.CHECK_CRC32
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ */
+ public XZOutputStream(OutputStream out, FilterOptions filterOptions,
+ int checkType) throws IOException {
+ this(out, new FilterOptions[] { filterOptions }, checkType);
+ }
+
+ /**
+ * Creates a new XZ compressor using one filter and the specified
+ * integrity check type. This constructor is equivalent to
+ * passing a single-member FilterOptions array to
+ * <code>XZOutputStream(OutputStream, FilterOptions[], int,
+ * ArrayCache)</code>.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * filter options to use
+ *
+ * @param checkType type of the integrity check,
+ * for example XZ.CHECK_CRC32
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ *
+ * @since 1.7
+ */
+ public XZOutputStream(OutputStream out, FilterOptions filterOptions,
+ int checkType, ArrayCache arrayCache)
+ throws IOException {
+ this(out, new FilterOptions[] { filterOptions }, checkType,
+ arrayCache);
+ }
+
+ /**
+ * Creates a new XZ compressor using 1-4 filters and CRC64 as
+ * the integrity check. This constructor is equivalent
+ * <code>XZOutputStream(out, filterOptions, XZ.CHECK_CRC64)</code>.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * array of filter options to use
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ */
+ public XZOutputStream(OutputStream out, FilterOptions[] filterOptions)
+ throws IOException {
+ this(out, filterOptions, XZ.CHECK_CRC64);
+ }
+
+ /**
+ * Creates a new XZ compressor using 1-4 filters and CRC64 as
+ * the integrity check. This constructor is equivalent
+ * <code>XZOutputStream(out, filterOptions, XZ.CHECK_CRC64,
+ * arrayCache)</code>.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * array of filter options to use
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ *
+ * @since 1.7
+ */
+ public XZOutputStream(OutputStream out, FilterOptions[] filterOptions,
+ ArrayCache arrayCache)
+ throws IOException {
+ this(out, filterOptions, XZ.CHECK_CRC64, arrayCache);
+ }
+
+ /**
+ * Creates a new XZ compressor using 1-4 filters and the specified
+ * integrity check type.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * array of filter options to use
+ *
+ * @param checkType type of the integrity check,
+ * for example XZ.CHECK_CRC32
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ */
+ public XZOutputStream(OutputStream out, FilterOptions[] filterOptions,
+ int checkType) throws IOException {
+ this(out, filterOptions, checkType, ArrayCache.getDefaultCache());
+ }
+
+ /**
+ * Creates a new XZ compressor using 1-4 filters and the specified
+ * integrity check type.
+ *
+ * @param out output stream to which the compressed data
+ * will be written
+ *
+ * @param filterOptions
+ * array of filter options to use
+ *
+ * @param checkType type of the integrity check,
+ * for example XZ.CHECK_CRC32
+ *
+ * @param arrayCache cache to be used for allocating large arrays
+ *
+ * @throws UnsupportedOptionsException
+ * invalid filter chain
+ *
+ * @throws IOException may be thrown from <code>out</code>
+ *
+ * @since 1.7
+ */
+ public XZOutputStream(OutputStream out, FilterOptions[] filterOptions,
+ int checkType, ArrayCache arrayCache)
+ throws IOException {
+ this.arrayCache = arrayCache;
+ this.out = out;
+ updateFilters(filterOptions);
+
+ streamFlags.checkType = checkType;
+ check = Check.getInstance(checkType);
+
+ encodeStreamHeader();
+ }
+
+ /**
+ * Updates the filter chain with a single filter.
+ * This is equivalent to passing a single-member FilterOptions array
+ * to <code>updateFilters(FilterOptions[])</code>.
+ *
+ * @param filterOptions
+ * new filter to use
+ *
+ * @throws UnsupportedOptionsException
+ * unsupported filter chain, or trying to change
+ * the filter chain in the middle of a Block
+ */
+ public void updateFilters(FilterOptions filterOptions)
+ throws XZIOException {
+ FilterOptions[] opts = new FilterOptions[1];
+ opts[0] = filterOptions;
+ updateFilters(opts);
+ }
+
+ /**
+ * Updates the filter chain with 1-4 filters.
+ * <p>
+ * Currently this cannot be used to update e.g. LZMA2 options in the
+ * middle of a XZ Block. Use <code>endBlock()</code> to finish the
+ * current XZ Block before calling this function. The new filter chain
+ * will then be used for the next XZ Block.
+ *
+ * @param filterOptions
+ * new filter chain to use
+ *
+ * @throws UnsupportedOptionsException
+ * unsupported filter chain, or trying to change
+ * the filter chain in the middle of a Block
+ */
+ public void updateFilters(FilterOptions[] filterOptions)
+ throws XZIOException {
+ if (blockEncoder != null)
+ throw new UnsupportedOptionsException("Changing filter options "
+ + "in the middle of a XZ Block not implemented");
+
+ if (filterOptions.length < 1 || filterOptions.length > 4)
+ throw new UnsupportedOptionsException(
+ "XZ filter chain must be 1-4 filters");
+
+ filtersSupportFlushing = true;
+ FilterEncoder[] newFilters = new FilterEncoder[filterOptions.length];
+ for (int i = 0; i < filterOptions.length; ++i) {
+ newFilters[i] = filterOptions[i].getFilterEncoder();
+ filtersSupportFlushing &= newFilters[i].supportsFlushing();
+ }
+
+ RawCoder.validate(newFilters);
+ filters = newFilters;
+ }
+
+ /**
+ * Writes one byte to be compressed.
+ *
+ * @throws XZIOException
+ * XZ Stream has grown too big
+ *
+ * @throws XZIOException
+ * <code>finish()</code> or <code>close()</code>
+ * was already called
+ *
+ * @throws IOException may be thrown by the underlying output stream
+ */
+ public void write(int b) throws IOException {
+ tempBuf[0] = (byte)b;
+ write(tempBuf, 0, 1);
+ }
+
+ /**
+ * Writes an array of bytes to be compressed.
+ * The compressors tend to do internal buffering and thus the written
+ * data won't be readable from the compressed output immediately.
+ * Use <code>flush()</code> to force everything written so far to
+ * be written to the underlaying output stream, but be aware that
+ * flushing reduces compression ratio.
+ *
+ * @param buf buffer of bytes to be written
+ * @param off start offset in <code>buf</code>
+ * @param len number of bytes to write
+ *
+ * @throws XZIOException
+ * XZ Stream has grown too big: total file size
+ * about 8&nbsp;EiB or the Index field exceeds
+ * 16&nbsp;GiB; you shouldn't reach these sizes
+ * in practice
+ *
+ * @throws XZIOException
+ * <code>finish()</code> or <code>close()</code>
+ * was already called and len &gt; 0
+ *
+ * @throws IOException may be thrown by the underlying output stream
+ */
+ public void write(byte[] buf, int off, int len) throws IOException {
+ if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
+ throw new IndexOutOfBoundsException();
+
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ if (blockEncoder == null)
+ blockEncoder = new BlockOutputStream(out, filters, check,
+ arrayCache);
+
+ blockEncoder.write(buf, off, len);
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ /**
+ * Finishes the current XZ Block (but not the whole XZ Stream).
+ * This doesn't flush the stream so it's possible that not all data will
+ * be decompressible from the output stream when this function returns.
+ * Call also <code>flush()</code> if flushing is wanted in addition to
+ * finishing the current XZ Block.
+ * <p>
+ * If there is no unfinished Block open, this function will do nothing.
+ * (No empty XZ Block will be created.)
+ * <p>
+ * This function can be useful, for example, to create
+ * random-accessible .xz files.
+ * <p>
+ * Starting a new XZ Block means that the encoder state is reset.
+ * Doing this very often will increase the size of the compressed
+ * file a lot (more than plain <code>flush()</code> would do).
+ *
+ * @throws XZIOException
+ * XZ Stream has grown too big
+ *
+ * @throws XZIOException
+ * stream finished or closed
+ *
+ * @throws IOException may be thrown by the underlying output stream
+ */
+ public void endBlock() throws IOException {
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ // NOTE: Once there is threading with multiple Blocks, it's possible
+ // that this function will be more like a barrier that returns
+ // before the last Block has been finished.
+ if (blockEncoder != null) {
+ try {
+ blockEncoder.finish();
+ index.add(blockEncoder.getUnpaddedSize(),
+ blockEncoder.getUncompressedSize());
+ blockEncoder = null;
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+ }
+
+ /**
+ * Flushes the encoder and calls <code>out.flush()</code>.
+ * All buffered pending data will then be decompressible from
+ * the output stream.
+ * <p>
+ * Calling this function very often may increase the compressed
+ * file size a lot. The filter chain options may affect the size
+ * increase too. For example, with LZMA2 the HC4 match finder has
+ * smaller penalty with flushing than BT4.
+ * <p>
+ * Some filters don't support flushing. If the filter chain has
+ * such a filter, <code>flush()</code> will call <code>endBlock()</code>
+ * before flushing.
+ *
+ * @throws XZIOException
+ * XZ Stream has grown too big
+ *
+ * @throws XZIOException
+ * stream finished or closed
+ *
+ * @throws IOException may be thrown by the underlying output stream
+ */
+ public void flush() throws IOException {
+ if (exception != null)
+ throw exception;
+
+ if (finished)
+ throw new XZIOException("Stream finished or closed");
+
+ try {
+ if (blockEncoder != null) {
+ if (filtersSupportFlushing) {
+ // This will eventually call out.flush() so
+ // no need to do it here again.
+ blockEncoder.flush();
+ } else {
+ endBlock();
+ out.flush();
+ }
+ } else {
+ out.flush();
+ }
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+ }
+
+ /**
+ * Finishes compression without closing the underlying stream.
+ * No more data can be written to this stream after finishing
+ * (calling <code>write</code> with an empty buffer is OK).
+ * <p>
+ * Repeated calls to <code>finish()</code> do nothing unless
+ * an exception was thrown by this stream earlier. In that case
+ * the same exception is thrown again.
+ * <p>
+ * After finishing, the stream may be closed normally with
+ * <code>close()</code>. If the stream will be closed anyway, there
+ * usually is no need to call <code>finish()</code> separately.
+ *
+ * @throws XZIOException
+ * XZ Stream has grown too big
+ *
+ * @throws IOException may be thrown by the underlying output stream
+ */
+ public void finish() throws IOException {
+ if (!finished) {
+ // This checks for pending exceptions so we don't need to
+ // worry about it here.
+ endBlock();
+
+ try {
+ index.encode(out);
+ encodeStreamFooter();
+ } catch (IOException e) {
+ exception = e;
+ throw e;
+ }
+
+ // Set it to true only if everything goes fine. Setting it earlier
+ // would cause repeated calls to finish() do nothing instead of
+ // throwing an exception to indicate an earlier error.
+ finished = true;
+ }
+ }
+
+ /**
+ * Finishes compression and closes the underlying stream.
+ * The underlying stream <code>out</code> is closed even if finishing
+ * fails. If both finishing and closing fail, the exception thrown
+ * by <code>finish()</code> is thrown and the exception from the failed
+ * <code>out.close()</code> is lost.
+ *
+ * @throws XZIOException
+ * XZ Stream has grown too big
+ *
+ * @throws IOException may be thrown by the underlying output stream
+ */
+ public void close() throws IOException {
+ if (out != null) {
+ // If finish() throws an exception, it stores the exception to
+ // the variable "exception". So we can ignore the possible
+ // exception here.
+ try {
+ finish();
+ } catch (IOException e) {}
+
+ try {
+ out.close();
+ } catch (IOException e) {
+ // Remember the exception but only if there is no previous
+ // pending exception.
+ if (exception == null)
+ exception = e;
+ }
+
+ out = null;
+ }
+
+ if (exception != null)
+ throw exception;
+ }
+
+ private void encodeStreamFlags(byte[] buf, int off) {
+ buf[off] = 0x00;
+ buf[off + 1] = (byte)streamFlags.checkType;
+ }
+
+ private void encodeStreamHeader() throws IOException {
+ out.write(XZ.HEADER_MAGIC);
+
+ byte[] buf = new byte[2];
+ encodeStreamFlags(buf, 0);
+ out.write(buf);
+
+ EncoderUtil.writeCRC32(out, buf);
+ }
+
+ private void encodeStreamFooter() throws IOException {
+ byte[] buf = new byte[6];
+ long backwardSize = index.getIndexSize() / 4 - 1;
+ for (int i = 0; i < 4; ++i)
+ buf[i] = (byte)(backwardSize >>> (i * 8));
+
+ encodeStreamFlags(buf, 4);
+
+ EncoderUtil.writeCRC32(out, buf);
+ out.write(buf);
+ out.write(XZ.FOOTER_MAGIC);
+ }
+}
diff --git a/src/org/tukaani/xz/check/CRC32.java b/src/org/tukaani/xz/check/CRC32.java
new file mode 100644
index 0000000..f182898
--- /dev/null
+++ b/src/org/tukaani/xz/check/CRC32.java
@@ -0,0 +1,33 @@
+/*
+ * CRC32
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.check;
+
+public class CRC32 extends Check {
+ private final java.util.zip.CRC32 state = new java.util.zip.CRC32();
+
+ public CRC32() {
+ size = 4;
+ name = "CRC32";
+ }
+
+ public void update(byte[] buf, int off, int len) {
+ state.update(buf, off, len);
+ }
+
+ public byte[] finish() {
+ long value = state.getValue();
+ byte[] buf = { (byte)(value),
+ (byte)(value >>> 8),
+ (byte)(value >>> 16),
+ (byte)(value >>> 24) };
+ state.reset();
+ return buf;
+ }
+}
diff --git a/src/org/tukaani/xz/check/CRC64.java b/src/org/tukaani/xz/check/CRC64.java
new file mode 100644
index 0000000..02b15b7
--- /dev/null
+++ b/src/org/tukaani/xz/check/CRC64.java
@@ -0,0 +1,54 @@
+/*
+ * CRC64
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.check;
+
+public class CRC64 extends Check {
+ private static final long poly = 0xC96C5795D7870F42L;
+ private static final long[] crcTable = new long[256];
+
+ private long crc = -1;
+
+ static {
+ for (int b = 0; b < crcTable.length; ++b) {
+ long r = b;
+ for (int i = 0; i < 8; ++i) {
+ if ((r & 1) == 1)
+ r = (r >>> 1) ^ poly;
+ else
+ r >>>= 1;
+ }
+
+ crcTable[b] = r;
+ }
+ }
+
+ public CRC64() {
+ size = 8;
+ name = "CRC64";
+ }
+
+ public void update(byte[] buf, int off, int len) {
+ int end = off + len;
+
+ while (off < end)
+ crc = crcTable[(buf[off++] ^ (int)crc) & 0xFF] ^ (crc >>> 8);
+ }
+
+ public byte[] finish() {
+ long value = ~crc;
+ crc = -1;
+
+ byte[] buf = new byte[8];
+ for (int i = 0; i < buf.length; ++i)
+ buf[i] = (byte)(value >> (i * 8));
+
+ return buf;
+ }
+}
diff --git a/src/org/tukaani/xz/check/Check.java b/src/org/tukaani/xz/check/Check.java
new file mode 100644
index 0000000..02c011e
--- /dev/null
+++ b/src/org/tukaani/xz/check/Check.java
@@ -0,0 +1,57 @@
+/*
+ * Check
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.check;
+
+import org.tukaani.xz.XZ;
+import org.tukaani.xz.UnsupportedOptionsException;
+
+public abstract class Check {
+ int size;
+ String name;
+
+ public abstract void update(byte[] buf, int off, int len);
+ public abstract byte[] finish();
+
+ public void update(byte[] buf) {
+ update(buf, 0, buf.length);
+ }
+
+ public int getSize() {
+ return size;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public static Check getInstance(int checkType)
+ throws UnsupportedOptionsException {
+ switch (checkType) {
+ case XZ.CHECK_NONE:
+ return new None();
+
+ case XZ.CHECK_CRC32:
+ return new CRC32();
+
+ case XZ.CHECK_CRC64:
+ return new CRC64();
+
+ case XZ.CHECK_SHA256:
+ try {
+ return new SHA256();
+ } catch (java.security.NoSuchAlgorithmException e) {}
+
+ break;
+ }
+
+ throw new UnsupportedOptionsException(
+ "Unsupported Check ID " + checkType);
+ }
+}
diff --git a/src/org/tukaani/xz/check/None.java b/src/org/tukaani/xz/check/None.java
new file mode 100644
index 0000000..b07c8e6
--- /dev/null
+++ b/src/org/tukaani/xz/check/None.java
@@ -0,0 +1,24 @@
+/*
+ * None
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.check;
+
+public class None extends Check {
+ public None() {
+ size = 0;
+ name = "None";
+ }
+
+ public void update(byte[] buf, int off, int len) {}
+
+ public byte[] finish() {
+ byte[] empty = new byte[0];
+ return empty;
+ }
+}
diff --git a/src/org/tukaani/xz/check/SHA256.java b/src/org/tukaani/xz/check/SHA256.java
new file mode 100644
index 0000000..66503c7
--- /dev/null
+++ b/src/org/tukaani/xz/check/SHA256.java
@@ -0,0 +1,30 @@
+/*
+ * SHA256
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.check;
+
+public class SHA256 extends Check {
+ private final java.security.MessageDigest sha256;
+
+ public SHA256() throws java.security.NoSuchAlgorithmException {
+ size = 32;
+ name = "SHA-256";
+ sha256 = java.security.MessageDigest.getInstance("SHA-256");
+ }
+
+ public void update(byte[] buf, int off, int len) {
+ sha256.update(buf, off, len);
+ }
+
+ public byte[] finish() {
+ byte[] buf = sha256.digest();
+ sha256.reset();
+ return buf;
+ }
+}
diff --git a/src/org/tukaani/xz/common/DecoderUtil.java b/src/org/tukaani/xz/common/DecoderUtil.java
new file mode 100644
index 0000000..77ba441
--- /dev/null
+++ b/src/org/tukaani/xz/common/DecoderUtil.java
@@ -0,0 +1,121 @@
+/*
+ * DecoderUtil
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.common;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.EOFException;
+import java.util.zip.CRC32;
+import org.tukaani.xz.XZ;
+import org.tukaani.xz.XZFormatException;
+import org.tukaani.xz.CorruptedInputException;
+import org.tukaani.xz.UnsupportedOptionsException;
+
+public class DecoderUtil extends Util {
+ public static boolean isCRC32Valid(byte[] buf, int off, int len,
+ int ref_off) {
+ CRC32 crc32 = new CRC32();
+ crc32.update(buf, off, len);
+ long value = crc32.getValue();
+
+ for (int i = 0; i < 4; ++i)
+ if ((byte)(value >>> (i * 8)) != buf[ref_off + i])
+ return false;
+
+ return true;
+ }
+
+ public static StreamFlags decodeStreamHeader(byte[] buf)
+ throws IOException {
+ for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i)
+ if (buf[i] != XZ.HEADER_MAGIC[i])
+ throw new XZFormatException();
+
+ if (!isCRC32Valid(buf, XZ.HEADER_MAGIC.length, 2,
+ XZ.HEADER_MAGIC.length + 2))
+ throw new CorruptedInputException("XZ Stream Header is corrupt");
+
+ try {
+ return decodeStreamFlags(buf, XZ.HEADER_MAGIC.length);
+ } catch (UnsupportedOptionsException e) {
+ throw new UnsupportedOptionsException(
+ "Unsupported options in XZ Stream Header");
+ }
+ }
+
+ public static StreamFlags decodeStreamFooter(byte[] buf)
+ throws IOException {
+ if (buf[10] != XZ.FOOTER_MAGIC[0] || buf[11] != XZ.FOOTER_MAGIC[1]) {
+ // NOTE: The exception could be XZFormatException too.
+ // It depends on the situation which one is better.
+ throw new CorruptedInputException("XZ Stream Footer is corrupt");
+ }
+
+ if (!isCRC32Valid(buf, 4, 6, 0))
+ throw new CorruptedInputException("XZ Stream Footer is corrupt");
+
+ StreamFlags streamFlags;
+ try {
+ streamFlags = decodeStreamFlags(buf, 8);
+ } catch (UnsupportedOptionsException e) {
+ throw new UnsupportedOptionsException(
+ "Unsupported options in XZ Stream Footer");
+ }
+
+ streamFlags.backwardSize = 0;
+ for (int i = 0; i < 4; ++i)
+ streamFlags.backwardSize |= (buf[i + 4] & 0xFF) << (i * 8);
+
+ streamFlags.backwardSize = (streamFlags.backwardSize + 1) * 4;
+
+ return streamFlags;
+ }
+
+ private static StreamFlags decodeStreamFlags(byte[] buf, int off)
+ throws UnsupportedOptionsException {
+ if (buf[off] != 0x00 || (buf[off + 1] & 0xFF) >= 0x10)
+ throw new UnsupportedOptionsException();
+
+ StreamFlags streamFlags = new StreamFlags();
+ streamFlags.checkType = buf[off + 1];
+
+ return streamFlags;
+ }
+
+ public static boolean areStreamFlagsEqual(StreamFlags a, StreamFlags b) {
+ // backwardSize is intentionally not compared.
+ return a.checkType == b.checkType;
+ }
+
+ public static long decodeVLI(InputStream in) throws IOException {
+ int b = in.read();
+ if (b == -1)
+ throw new EOFException();
+
+ long num = b & 0x7F;
+ int i = 0;
+
+ while ((b & 0x80) != 0x00) {
+ if (++i >= VLI_SIZE_MAX)
+ throw new CorruptedInputException();
+
+ b = in.read();
+ if (b == -1)
+ throw new EOFException();
+
+ if (b == 0x00)
+ throw new CorruptedInputException();
+
+ num |= (long)(b & 0x7F) << (i * 7);
+ }
+
+ return num;
+ }
+}
diff --git a/src/org/tukaani/xz/common/EncoderUtil.java b/src/org/tukaani/xz/common/EncoderUtil.java
new file mode 100644
index 0000000..57f688b
--- /dev/null
+++ b/src/org/tukaani/xz/common/EncoderUtil.java
@@ -0,0 +1,36 @@
+/*
+ * EncoderUtil
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.common;
+
+import java.io.OutputStream;
+import java.io.IOException;
+import java.util.zip.CRC32;
+
+public class EncoderUtil extends Util {
+ public static void writeCRC32(OutputStream out, byte[] buf)
+ throws IOException {
+ CRC32 crc32 = new CRC32();
+ crc32.update(buf);
+ long value = crc32.getValue();
+
+ for (int i = 0; i < 4; ++i)
+ out.write((byte)(value >>> (i * 8)));
+ }
+
+ public static void encodeVLI(OutputStream out, long num)
+ throws IOException {
+ while (num >= 0x80) {
+ out.write((byte)(num | 0x80));
+ num >>>= 7;
+ }
+
+ out.write((byte)num);
+ }
+}
diff --git a/src/org/tukaani/xz/common/StreamFlags.java b/src/org/tukaani/xz/common/StreamFlags.java
new file mode 100644
index 0000000..b306987
--- /dev/null
+++ b/src/org/tukaani/xz/common/StreamFlags.java
@@ -0,0 +1,15 @@
+/*
+ * StreamFlags
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.common;
+
+public class StreamFlags {
+ public int checkType = -1;
+ public long backwardSize = -1;
+}
diff --git a/src/org/tukaani/xz/common/Util.java b/src/org/tukaani/xz/common/Util.java
new file mode 100644
index 0000000..c4324ce
--- /dev/null
+++ b/src/org/tukaani/xz/common/Util.java
@@ -0,0 +1,28 @@
+/*
+ * Util
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.common;
+
+public class Util {
+ public static final int STREAM_HEADER_SIZE = 12;
+ public static final long BACKWARD_SIZE_MAX = 1L << 34;
+ public static final int BLOCK_HEADER_SIZE_MAX = 1024;
+ public static final long VLI_MAX = Long.MAX_VALUE;
+ public static final int VLI_SIZE_MAX = 9;
+
+ public static int getVLISize(long num) {
+ int size = 0;
+ do {
+ ++size;
+ num >>= 7;
+ } while (num != 0);
+
+ return size;
+ }
+}
diff --git a/src/org/tukaani/xz/delta/DeltaCoder.java b/src/org/tukaani/xz/delta/DeltaCoder.java
new file mode 100644
index 0000000..d94eb66
--- /dev/null
+++ b/src/org/tukaani/xz/delta/DeltaCoder.java
@@ -0,0 +1,27 @@
+/*
+ * DeltaCoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.delta;
+
+abstract class DeltaCoder {
+ static final int DISTANCE_MIN = 1;
+ static final int DISTANCE_MAX = 256;
+ static final int DISTANCE_MASK = DISTANCE_MAX - 1;
+
+ final int distance;
+ final byte[] history = new byte[DISTANCE_MAX];
+ int pos = 0;
+
+ DeltaCoder(int distance) {
+ if (distance < DISTANCE_MIN || distance > DISTANCE_MAX)
+ throw new IllegalArgumentException();
+
+ this.distance = distance;
+ }
+}
diff --git a/src/org/tukaani/xz/delta/DeltaDecoder.java b/src/org/tukaani/xz/delta/DeltaDecoder.java
new file mode 100644
index 0000000..154cbf3
--- /dev/null
+++ b/src/org/tukaani/xz/delta/DeltaDecoder.java
@@ -0,0 +1,24 @@
+/*
+ * DeltaDecoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.delta;
+
+public class DeltaDecoder extends DeltaCoder {
+ public DeltaDecoder(int distance) {
+ super(distance);
+ }
+
+ public void decode(byte[] buf, int off, int len) {
+ int end = off + len;
+ for (int i = off; i < end; ++i) {
+ buf[i] += history[(distance + pos) & DISTANCE_MASK];
+ history[pos-- & DISTANCE_MASK] = buf[i];
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/delta/DeltaEncoder.java b/src/org/tukaani/xz/delta/DeltaEncoder.java
new file mode 100644
index 0000000..17accce
--- /dev/null
+++ b/src/org/tukaani/xz/delta/DeltaEncoder.java
@@ -0,0 +1,24 @@
+/*
+ * DeltaEncoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.delta;
+
+public class DeltaEncoder extends DeltaCoder {
+ public DeltaEncoder(int distance) {
+ super(distance);
+ }
+
+ public void encode(byte[] in, int in_off, int len, byte[] out) {
+ for (int i = 0; i < len; ++i) {
+ byte tmp = history[(distance + pos) & DISTANCE_MASK];
+ history[pos-- & DISTANCE_MASK] = in[in_off + i];
+ out[i] = (byte)(in[in_off + i] - tmp);
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/index/BlockInfo.java b/src/org/tukaani/xz/index/BlockInfo.java
new file mode 100644
index 0000000..babae7f
--- /dev/null
+++ b/src/org/tukaani/xz/index/BlockInfo.java
@@ -0,0 +1,38 @@
+/*
+ * BlockInfo
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.index;
+
+import org.tukaani.xz.common.StreamFlags;
+
+public class BlockInfo {
+ public int blockNumber = -1;
+ public long compressedOffset = -1;
+ public long uncompressedOffset = -1;
+ public long unpaddedSize = -1;
+ public long uncompressedSize = -1;
+
+ IndexDecoder index;
+
+ public BlockInfo(IndexDecoder indexOfFirstStream) {
+ index = indexOfFirstStream;
+ }
+
+ public int getCheckType() {
+ return index.getStreamFlags().checkType;
+ }
+
+ public boolean hasNext() {
+ return index.hasRecord(blockNumber + 1);
+ }
+
+ public void setNext() {
+ index.setBlockInfo(this, blockNumber + 1);
+ }
+}
diff --git a/src/org/tukaani/xz/index/IndexBase.java b/src/org/tukaani/xz/index/IndexBase.java
new file mode 100644
index 0000000..e556105
--- /dev/null
+++ b/src/org/tukaani/xz/index/IndexBase.java
@@ -0,0 +1,56 @@
+/*
+ * IndexBase
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.index;
+
+import org.tukaani.xz.common.Util;
+import org.tukaani.xz.XZIOException;
+
+abstract class IndexBase {
+ private final XZIOException invalidIndexException;
+ long blocksSum = 0;
+ long uncompressedSum = 0;
+ long indexListSize = 0;
+ long recordCount = 0;
+
+ IndexBase(XZIOException invalidIndexException) {
+ this.invalidIndexException = invalidIndexException;
+ }
+
+ private long getUnpaddedIndexSize() {
+ // Index Indicator + Number of Records + List of Records + CRC32
+ return 1 + Util.getVLISize(recordCount) + indexListSize + 4;
+ }
+
+ public long getIndexSize() {
+ return (getUnpaddedIndexSize() + 3) & ~3;
+ }
+
+ public long getStreamSize() {
+ return Util.STREAM_HEADER_SIZE + blocksSum + getIndexSize()
+ + Util.STREAM_HEADER_SIZE;
+ }
+
+ int getIndexPaddingSize() {
+ return (int)((4 - getUnpaddedIndexSize()) & 3);
+ }
+
+ void add(long unpaddedSize, long uncompressedSize) throws XZIOException {
+ blocksSum += (unpaddedSize + 3) & ~3;
+ uncompressedSum += uncompressedSize;
+ indexListSize += Util.getVLISize(unpaddedSize)
+ + Util.getVLISize(uncompressedSize);
+ ++recordCount;
+
+ if (blocksSum < 0 || uncompressedSum < 0
+ || getIndexSize() > Util.BACKWARD_SIZE_MAX
+ || getStreamSize() < 0)
+ throw invalidIndexException;
+ }
+}
diff --git a/src/org/tukaani/xz/index/IndexDecoder.java b/src/org/tukaani/xz/index/IndexDecoder.java
new file mode 100644
index 0000000..a3ae986
--- /dev/null
+++ b/src/org/tukaani/xz/index/IndexDecoder.java
@@ -0,0 +1,223 @@
+/*
+ * IndexDecoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.index;
+
+import java.io.IOException;
+import java.io.EOFException;
+import java.util.zip.CheckedInputStream;
+import org.tukaani.xz.common.DecoderUtil;
+import org.tukaani.xz.common.StreamFlags;
+import org.tukaani.xz.SeekableInputStream;
+import org.tukaani.xz.CorruptedInputException;
+import org.tukaani.xz.MemoryLimitException;
+import org.tukaani.xz.UnsupportedOptionsException;
+
+public class IndexDecoder extends IndexBase {
+ private final StreamFlags streamFlags;
+ private final long streamPadding;
+ private final int memoryUsage;
+
+ // Unpadded Size and Uncompressed Size fields
+ private final long[] unpadded;
+ private final long[] uncompressed;
+
+ // Uncompressed size of the largest Block. It is used by
+ // SeekableXZInputStream to find out the largest Block of the .xz file.
+ private long largestBlockSize = 0;
+
+ // Offsets relative to the beginning of the .xz file. These are all zero
+ // for the first Stream in the file.
+ private int recordOffset = 0;
+ private long compressedOffset = 0;
+ private long uncompressedOffset = 0;
+
+ public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags,
+ long streamPadding, int memoryLimit)
+ throws IOException {
+ super(new CorruptedInputException("XZ Index is corrupt"));
+ this.streamFlags = streamFooterFlags;
+ this.streamPadding = streamPadding;
+
+ // If endPos is exceeded before the CRC32 field has been decoded,
+ // the Index is corrupt.
+ long endPos = in.position() + streamFooterFlags.backwardSize - 4;
+
+ java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
+ CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
+
+ // Index Indicator
+ if (inChecked.read() != 0x00)
+ throw new CorruptedInputException("XZ Index is corrupt");
+
+ try {
+ // Number of Records
+ long count = DecoderUtil.decodeVLI(inChecked);
+
+ // Catch Record counts that obviously too high to be valid.
+ // This test isn't exact because it ignores Index Indicator,
+ // Number of Records, and CRC32 fields, but this is good enough
+ // to catch the most obvious problems.
+ if (count >= streamFooterFlags.backwardSize / 2)
+ throw new CorruptedInputException("XZ Index is corrupt");
+
+ // If the Record count doesn't fit into an int, we cannot
+ // allocate the arrays to hold the Records.
+ if (count > Integer.MAX_VALUE)
+ throw new UnsupportedOptionsException("XZ Index has over "
+ + Integer.MAX_VALUE + " Records");
+
+ // Calculate approximate memory requirements and check the
+ // memory usage limit.
+ memoryUsage = 1 + (int)((16L * count + 1023) / 1024);
+ if (memoryLimit >= 0 && memoryUsage > memoryLimit)
+ throw new MemoryLimitException(memoryUsage, memoryLimit);
+
+ // Allocate the arrays for the Records.
+ unpadded = new long[(int)count];
+ uncompressed = new long[(int)count];
+ int record = 0;
+
+ // Decode the Records.
+ for (int i = (int)count; i > 0; --i) {
+ // Get the next Record.
+ long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
+ long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
+
+ // Check that the input position stays sane. Since this is
+ // checked only once per loop iteration instead of for
+ // every input byte read, it's still possible that
+ // EOFException gets thrown with corrupt input.
+ if (in.position() > endPos)
+ throw new CorruptedInputException("XZ Index is corrupt");
+
+ // Add the new Record.
+ unpadded[record] = blocksSum + unpaddedSize;
+ uncompressed[record] = uncompressedSum + uncompressedSize;
+ ++record;
+ super.add(unpaddedSize, uncompressedSize);
+ assert record == recordCount;
+
+ // Remember the uncompressed size of the largest Block.
+ if (largestBlockSize < uncompressedSize)
+ largestBlockSize = uncompressedSize;
+ }
+ } catch (EOFException e) {
+ // EOFException is caught just in case a corrupt input causes
+ // DecoderUtil.decodeVLI to read too much at once.
+ throw new CorruptedInputException("XZ Index is corrupt");
+ }
+
+ // Validate that the size of the Index field matches
+ // Backward Size.
+ int indexPaddingSize = getIndexPaddingSize();
+ if (in.position() + indexPaddingSize != endPos)
+ throw new CorruptedInputException("XZ Index is corrupt");
+
+ // Index Padding
+ while (indexPaddingSize-- > 0)
+ if (inChecked.read() != 0x00)
+ throw new CorruptedInputException("XZ Index is corrupt");
+
+ // CRC32
+ long value = crc32.getValue();
+ for (int i = 0; i < 4; ++i)
+ if (((value >>> (i * 8)) & 0xFF) != in.read())
+ throw new CorruptedInputException("XZ Index is corrupt");
+ }
+
+ public void setOffsets(IndexDecoder prev) {
+ // NOTE: SeekableXZInputStream checks that the total number of Blocks
+ // in concatenated Streams fits into an int.
+ recordOffset = prev.recordOffset + (int)prev.recordCount;
+ compressedOffset = prev.compressedOffset
+ + prev.getStreamSize() + prev.streamPadding;
+ assert (compressedOffset & 3) == 0;
+ uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum;
+ }
+
+ public int getMemoryUsage() {
+ return memoryUsage;
+ }
+
+ public StreamFlags getStreamFlags() {
+ return streamFlags;
+ }
+
+ public int getRecordCount() {
+ // It was already checked in the constructor that it fits into an int.
+ // Otherwise we couldn't have allocated the arrays.
+ return (int)recordCount;
+ }
+
+ public long getUncompressedSize() {
+ return uncompressedSum;
+ }
+
+ public long getLargestBlockSize() {
+ return largestBlockSize;
+ }
+
+ public boolean hasUncompressedOffset(long pos) {
+ return pos >= uncompressedOffset
+ && pos < uncompressedOffset + uncompressedSum;
+ }
+
+ public boolean hasRecord(int blockNumber) {
+ return blockNumber >= recordOffset
+ && blockNumber < recordOffset + recordCount;
+ }
+
+ public void locateBlock(BlockInfo info, long target) {
+ assert target >= uncompressedOffset;
+ target -= uncompressedOffset;
+ assert target < uncompressedSum;
+
+ int left = 0;
+ int right = unpadded.length - 1;
+
+ while (left < right) {
+ int i = left + (right - left) / 2;
+
+ if (uncompressed[i] <= target)
+ left = i + 1;
+ else
+ right = i;
+ }
+
+ setBlockInfo(info, recordOffset + left);
+ }
+
+ public void setBlockInfo(BlockInfo info, int blockNumber) {
+ // The caller has checked that the given Block number is inside
+ // this Index.
+ assert blockNumber >= recordOffset;
+ assert blockNumber - recordOffset < recordCount;
+
+ info.index = this;
+ info.blockNumber = blockNumber;
+
+ int pos = blockNumber - recordOffset;
+
+ if (pos == 0) {
+ info.compressedOffset = 0;
+ info.uncompressedOffset = 0;
+ } else {
+ info.compressedOffset = (unpadded[pos - 1] + 3) & ~3;
+ info.uncompressedOffset = uncompressed[pos - 1];
+ }
+
+ info.unpaddedSize = unpadded[pos] - info.compressedOffset;
+ info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset;
+
+ info.compressedOffset += compressedOffset
+ + DecoderUtil.STREAM_HEADER_SIZE;
+ info.uncompressedOffset += uncompressedOffset;
+ }
+}
diff --git a/src/org/tukaani/xz/index/IndexEncoder.java b/src/org/tukaani/xz/index/IndexEncoder.java
new file mode 100644
index 0000000..3028802
--- /dev/null
+++ b/src/org/tukaani/xz/index/IndexEncoder.java
@@ -0,0 +1,59 @@
+/*
+ * IndexEncoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.index;
+
+import java.io.OutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.zip.CheckedOutputStream;
+import org.tukaani.xz.common.EncoderUtil;
+import org.tukaani.xz.XZIOException;
+
+public class IndexEncoder extends IndexBase {
+ private final ArrayList<IndexRecord> records
+ = new ArrayList<IndexRecord>();
+
+ public IndexEncoder() {
+ super(new XZIOException("XZ Stream or its Index has grown too big"));
+ }
+
+ public void add(long unpaddedSize, long uncompressedSize)
+ throws XZIOException {
+ super.add(unpaddedSize, uncompressedSize);
+ records.add(new IndexRecord(unpaddedSize, uncompressedSize));
+ }
+
+ public void encode(OutputStream out) throws IOException {
+ java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
+ CheckedOutputStream outChecked = new CheckedOutputStream(out, crc32);
+
+ // Index Indicator
+ outChecked.write(0x00);
+
+ // Number of Records
+ EncoderUtil.encodeVLI(outChecked, recordCount);
+
+ // List of Records
+ for (IndexRecord record : records) {
+ EncoderUtil.encodeVLI(outChecked, record.unpadded);
+ EncoderUtil.encodeVLI(outChecked, record.uncompressed);
+ }
+
+ // Index Padding
+ for (int i = getIndexPaddingSize(); i > 0; --i)
+ outChecked.write(0x00);
+
+ // CRC32
+ long value = crc32.getValue();
+ for (int i = 0; i < 4; ++i)
+ out.write((byte)(value >>> (i * 8)));
+ }
+}
diff --git a/src/org/tukaani/xz/index/IndexHash.java b/src/org/tukaani/xz/index/IndexHash.java
new file mode 100644
index 0000000..61725a5
--- /dev/null
+++ b/src/org/tukaani/xz/index/IndexHash.java
@@ -0,0 +1,98 @@
+/*
+ * IndexHash
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.index;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.zip.CheckedInputStream;
+import org.tukaani.xz.common.DecoderUtil;
+import org.tukaani.xz.XZIOException;
+import org.tukaani.xz.CorruptedInputException;
+
+public class IndexHash extends IndexBase {
+ private org.tukaani.xz.check.Check hash;
+
+ public IndexHash() {
+ super(new CorruptedInputException());
+
+ try {
+ hash = new org.tukaani.xz.check.SHA256();
+ } catch (java.security.NoSuchAlgorithmException e) {
+ hash = new org.tukaani.xz.check.CRC32();
+ }
+ }
+
+ public void add(long unpaddedSize, long uncompressedSize)
+ throws XZIOException {
+ super.add(unpaddedSize, uncompressedSize);
+
+ ByteBuffer buf = ByteBuffer.allocate(2 * 8);
+ buf.putLong(unpaddedSize);
+ buf.putLong(uncompressedSize);
+ hash.update(buf.array());
+ }
+
+ public void validate(InputStream in) throws IOException {
+ // Index Indicator (0x00) has already been read by BlockInputStream
+ // so add 0x00 to the CRC32 here.
+ java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
+ crc32.update('\0');
+ CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
+
+ // Get and validate the Number of Records field.
+ // If Block Header Size was corrupt and became Index Indicator,
+ // this error would actually be about corrupt Block Header.
+ // This is why the error message mentions both possibilities.
+ long storedRecordCount = DecoderUtil.decodeVLI(inChecked);
+ if (storedRecordCount != recordCount)
+ throw new CorruptedInputException(
+ "XZ Block Header or the start of XZ Index is corrupt");
+
+ // Decode and hash the Index field and compare it to
+ // the hash value calculated from the decoded Blocks.
+ IndexHash stored = new IndexHash();
+ for (long i = 0; i < recordCount; ++i) {
+ long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
+ long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
+
+ try {
+ stored.add(unpaddedSize, uncompressedSize);
+ } catch (XZIOException e) {
+ throw new CorruptedInputException("XZ Index is corrupt");
+ }
+
+ if (stored.blocksSum > blocksSum
+ || stored.uncompressedSum > uncompressedSum
+ || stored.indexListSize > indexListSize)
+ throw new CorruptedInputException("XZ Index is corrupt");
+ }
+
+ if (stored.blocksSum != blocksSum
+ || stored.uncompressedSum != uncompressedSum
+ || stored.indexListSize != indexListSize
+ || !Arrays.equals(stored.hash.finish(), hash.finish()))
+ throw new CorruptedInputException("XZ Index is corrupt");
+
+ // Index Padding
+ DataInputStream inData = new DataInputStream(inChecked);
+ for (int i = getIndexPaddingSize(); i > 0; --i)
+ if (inData.readUnsignedByte() != 0x00)
+ throw new CorruptedInputException("XZ Index is corrupt");
+
+ // CRC32
+ long value = crc32.getValue();
+ for (int i = 0; i < 4; ++i)
+ if (((value >>> (i * 8)) & 0xFF) != inData.readUnsignedByte())
+ throw new CorruptedInputException("XZ Index is corrupt");
+ }
+}
diff --git a/src/org/tukaani/xz/index/IndexRecord.java b/src/org/tukaani/xz/index/IndexRecord.java
new file mode 100644
index 0000000..5f6ba0f
--- /dev/null
+++ b/src/org/tukaani/xz/index/IndexRecord.java
@@ -0,0 +1,20 @@
+/*
+ * IndexRecord
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.index;
+
+class IndexRecord {
+ final long unpadded;
+ final long uncompressed;
+
+ IndexRecord(long unpadded, long uncompressed) {
+ this.unpadded = unpadded;
+ this.uncompressed = uncompressed;
+ }
+}
diff --git a/src/org/tukaani/xz/lz/BT4.java b/src/org/tukaani/xz/lz/BT4.java
new file mode 100644
index 0000000..6c46feb
--- /dev/null
+++ b/src/org/tukaani/xz/lz/BT4.java
@@ -0,0 +1,265 @@
+/*
+ * Binary Tree match finder with 2-, 3-, and 4-byte hashing
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lz;
+
+import org.tukaani.xz.ArrayCache;
+
+final class BT4 extends LZEncoder {
+ private final Hash234 hash;
+ private final int[] tree;
+ private final Matches matches;
+ private final int depthLimit;
+
+ private final int cyclicSize;
+ private int cyclicPos = -1;
+ private int lzPos;
+
+ static int getMemoryUsage(int dictSize) {
+ return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10;
+ }
+
+ BT4(int dictSize, int beforeSizeMin, int readAheadMax,
+ int niceLen, int matchLenMax, int depthLimit,
+ ArrayCache arrayCache) {
+ super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
+ arrayCache);
+
+ cyclicSize = dictSize + 1;
+ lzPos = cyclicSize;
+
+ hash = new Hash234(dictSize, arrayCache);
+ tree = arrayCache.getIntArray(cyclicSize * 2, false);
+
+ // Substracting 1 because the shortest match that this match
+ // finder can find is 2 bytes, so there's no need to reserve
+ // space for one-byte matches.
+ matches = new Matches(niceLen - 1);
+
+ this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
+ }
+
+ public void putArraysToCache(ArrayCache arrayCache) {
+ arrayCache.putArray(tree);
+ hash.putArraysToCache(arrayCache);
+ super.putArraysToCache(arrayCache);
+ }
+
+ private int movePos() {
+ int avail = movePos(niceLen, 4);
+
+ if (avail != 0) {
+ if (++lzPos == Integer.MAX_VALUE) {
+ int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
+ hash.normalize(normalizationOffset);
+ normalize(tree, cyclicSize * 2, normalizationOffset);
+ lzPos -= normalizationOffset;
+ }
+
+ if (++cyclicPos == cyclicSize)
+ cyclicPos = 0;
+ }
+
+ return avail;
+ }
+
+ public Matches getMatches() {
+ matches.count = 0;
+
+ int matchLenLimit = matchLenMax;
+ int niceLenLimit = niceLen;
+ int avail = movePos();
+
+ if (avail < matchLenLimit) {
+ if (avail == 0)
+ return matches;
+
+ matchLenLimit = avail;
+ if (niceLenLimit > avail)
+ niceLenLimit = avail;
+ }
+
+ hash.calcHashes(buf, readPos);
+ int delta2 = lzPos - hash.getHash2Pos();
+ int delta3 = lzPos - hash.getHash3Pos();
+ int currentMatch = hash.getHash4Pos();
+ hash.updateTables(lzPos);
+
+ int lenBest = 0;
+
+ // See if the hash from the first two bytes found a match.
+ // The hashing algorithm guarantees that if the first byte
+ // matches, also the second byte does, so there's no need to
+ // test the second byte.
+ if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
+ lenBest = 2;
+ matches.len[0] = 2;
+ matches.dist[0] = delta2 - 1;
+ matches.count = 1;
+ }
+
+ // See if the hash from the first three bytes found a match that
+ // is different from the match possibly found by the two-byte hash.
+ // Also here the hashing algorithm guarantees that if the first byte
+ // matches, also the next two bytes do.
+ if (delta2 != delta3 && delta3 < cyclicSize
+ && buf[readPos - delta3] == buf[readPos]) {
+ lenBest = 3;
+ matches.dist[matches.count++] = delta3 - 1;
+ delta2 = delta3;
+ }
+
+ // If a match was found, see how long it is.
+ if (matches.count > 0) {
+ while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
+ == buf[readPos + lenBest])
+ ++lenBest;
+
+ matches.len[matches.count - 1] = lenBest;
+
+ // Return if it is long enough (niceLen or reached the end of
+ // the dictionary).
+ if (lenBest >= niceLenLimit) {
+ skip(niceLenLimit, currentMatch);
+ return matches;
+ }
+ }
+
+ // Long enough match wasn't found so easily. Look for better matches
+ // from the binary tree.
+ if (lenBest < 3)
+ lenBest = 3;
+
+ int depth = depthLimit;
+
+ int ptr0 = (cyclicPos << 1) + 1;
+ int ptr1 = cyclicPos << 1;
+ int len0 = 0;
+ int len1 = 0;
+
+ while (true) {
+ int delta = lzPos - currentMatch;
+
+ // Return if the search depth limit has been reached or
+ // if the distance of the potential match exceeds the
+ // dictionary size.
+ if (depth-- == 0 || delta >= cyclicSize) {
+ tree[ptr0] = 0;
+ tree[ptr1] = 0;
+ return matches;
+ }
+
+ int pair = (cyclicPos - delta
+ + (delta > cyclicPos ? cyclicSize : 0)) << 1;
+ int len = Math.min(len0, len1);
+
+ if (buf[readPos + len - delta] == buf[readPos + len]) {
+ while (++len < matchLenLimit)
+ if (buf[readPos + len - delta] != buf[readPos + len])
+ break;
+
+ if (len > lenBest) {
+ lenBest = len;
+ matches.len[matches.count] = len;
+ matches.dist[matches.count] = delta - 1;
+ ++matches.count;
+
+ if (len >= niceLenLimit) {
+ tree[ptr1] = tree[pair];
+ tree[ptr0] = tree[pair + 1];
+ return matches;
+ }
+ }
+ }
+
+ if ((buf[readPos + len - delta] & 0xFF)
+ < (buf[readPos + len] & 0xFF)) {
+ tree[ptr1] = currentMatch;
+ ptr1 = pair + 1;
+ currentMatch = tree[ptr1];
+ len1 = len;
+ } else {
+ tree[ptr0] = currentMatch;
+ ptr0 = pair;
+ currentMatch = tree[ptr0];
+ len0 = len;
+ }
+ }
+ }
+
+ private void skip(int niceLenLimit, int currentMatch) {
+ int depth = depthLimit;
+
+ int ptr0 = (cyclicPos << 1) + 1;
+ int ptr1 = cyclicPos << 1;
+ int len0 = 0;
+ int len1 = 0;
+
+ while (true) {
+ int delta = lzPos - currentMatch;
+
+ if (depth-- == 0 || delta >= cyclicSize) {
+ tree[ptr0] = 0;
+ tree[ptr1] = 0;
+ return;
+ }
+
+ int pair = (cyclicPos - delta
+ + (delta > cyclicPos ? cyclicSize : 0)) << 1;
+ int len = Math.min(len0, len1);
+
+ if (buf[readPos + len - delta] == buf[readPos + len]) {
+ // No need to look for longer matches than niceLenLimit
+ // because we only are updating the tree, not returning
+ // matches found to the caller.
+ do {
+ if (++len == niceLenLimit) {
+ tree[ptr1] = tree[pair];
+ tree[ptr0] = tree[pair + 1];
+ return;
+ }
+ } while (buf[readPos + len - delta] == buf[readPos + len]);
+ }
+
+ if ((buf[readPos + len - delta] & 0xFF)
+ < (buf[readPos + len] & 0xFF)) {
+ tree[ptr1] = currentMatch;
+ ptr1 = pair + 1;
+ currentMatch = tree[ptr1];
+ len1 = len;
+ } else {
+ tree[ptr0] = currentMatch;
+ ptr0 = pair;
+ currentMatch = tree[ptr0];
+ len0 = len;
+ }
+ }
+ }
+
+ public void skip(int len) {
+ while (len-- > 0) {
+ int niceLenLimit = niceLen;
+ int avail = movePos();
+
+ if (avail < niceLenLimit) {
+ if (avail == 0)
+ continue;
+
+ niceLenLimit = avail;
+ }
+
+ hash.calcHashes(buf, readPos);
+ int currentMatch = hash.getHash4Pos();
+ hash.updateTables(lzPos);
+
+ skip(niceLenLimit, currentMatch);
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/lz/CRC32Hash.java b/src/org/tukaani/xz/lz/CRC32Hash.java
new file mode 100644
index 0000000..2adfdbf
--- /dev/null
+++ b/src/org/tukaani/xz/lz/CRC32Hash.java
@@ -0,0 +1,35 @@
+/*
+ * CRC32Hash
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lz;
+
+/**
+ * Provides a CRC32 table using the polynomial from IEEE 802.3.
+ */
+class CRC32Hash {
+ private static final int CRC32_POLY = 0xEDB88320;
+
+ static final int[] crcTable = new int[256];
+
+ static {
+ for (int i = 0; i < 256; ++i) {
+ int r = i;
+
+ for (int j = 0; j < 8; ++j) {
+ if ((r & 1) != 0)
+ r = (r >>> 1) ^ CRC32_POLY;
+ else
+ r >>>= 1;
+ }
+
+ crcTable[i] = r;
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/lz/HC4.java b/src/org/tukaani/xz/lz/HC4.java
new file mode 100644
index 0000000..d2b4e84
--- /dev/null
+++ b/src/org/tukaani/xz/lz/HC4.java
@@ -0,0 +1,210 @@
+/*
+ * Hash Chain match finder with 2-, 3-, and 4-byte hashing
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lz;
+
+import org.tukaani.xz.ArrayCache;
+
+final class HC4 extends LZEncoder {
+ private final Hash234 hash;
+ private final int[] chain;
+ private final Matches matches;
+ private final int depthLimit;
+
+ private final int cyclicSize;
+ private int cyclicPos = -1;
+ private int lzPos;
+
+ /**
+ * Gets approximate memory usage of the match finder as kibibytes.
+ */
+ static int getMemoryUsage(int dictSize) {
+ return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
+ }
+
+ /**
+ * Creates a new LZEncoder with the HC4 match finder.
+ * See <code>LZEncoder.getInstance</code> for parameter descriptions.
+ */
+ HC4(int dictSize, int beforeSizeMin, int readAheadMax,
+ int niceLen, int matchLenMax, int depthLimit,
+ ArrayCache arrayCache) {
+ super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
+ arrayCache);
+
+ hash = new Hash234(dictSize, arrayCache);
+
+ // +1 because we need dictSize bytes of history + the current byte.
+ cyclicSize = dictSize + 1;
+ chain = arrayCache.getIntArray(cyclicSize, false);
+ lzPos = cyclicSize;
+
+ // Substracting 1 because the shortest match that this match
+ // finder can find is 2 bytes, so there's no need to reserve
+ // space for one-byte matches.
+ matches = new Matches(niceLen - 1);
+
+ // Use a default depth limit if no other value was specified.
+ // The default is just something based on experimentation;
+ // it's nothing magic.
+ this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
+ }
+
+ public void putArraysToCache(ArrayCache arrayCache) {
+ arrayCache.putArray(chain);
+ hash.putArraysToCache(arrayCache);
+ super.putArraysToCache(arrayCache);
+ }
+
+ /**
+ * Moves to the next byte, checks that there is enough available space,
+ * and possibly normalizes the hash tables and the hash chain.
+ *
+ * @return number of bytes available, including the current byte
+ */
+ private int movePos() {
+ int avail = movePos(4, 4);
+
+ if (avail != 0) {
+ if (++lzPos == Integer.MAX_VALUE) {
+ int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
+ hash.normalize(normalizationOffset);
+ normalize(chain, cyclicSize, normalizationOffset);
+ lzPos -= normalizationOffset;
+ }
+
+ if (++cyclicPos == cyclicSize)
+ cyclicPos = 0;
+ }
+
+ return avail;
+ }
+
+ public Matches getMatches() {
+ matches.count = 0;
+ int matchLenLimit = matchLenMax;
+ int niceLenLimit = niceLen;
+ int avail = movePos();
+
+ if (avail < matchLenLimit) {
+ if (avail == 0)
+ return matches;
+
+ matchLenLimit = avail;
+ if (niceLenLimit > avail)
+ niceLenLimit = avail;
+ }
+
+ hash.calcHashes(buf, readPos);
+ int delta2 = lzPos - hash.getHash2Pos();
+ int delta3 = lzPos - hash.getHash3Pos();
+ int currentMatch = hash.getHash4Pos();
+ hash.updateTables(lzPos);
+
+ chain[cyclicPos] = currentMatch;
+
+ int lenBest = 0;
+
+ // See if the hash from the first two bytes found a match.
+ // The hashing algorithm guarantees that if the first byte
+ // matches, also the second byte does, so there's no need to
+ // test the second byte.
+ if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
+ lenBest = 2;
+ matches.len[0] = 2;
+ matches.dist[0] = delta2 - 1;
+ matches.count = 1;
+ }
+
+ // See if the hash from the first three bytes found a match that
+ // is different from the match possibly found by the two-byte hash.
+ // Also here the hashing algorithm guarantees that if the first byte
+ // matches, also the next two bytes do.
+ if (delta2 != delta3 && delta3 < cyclicSize
+ && buf[readPos - delta3] == buf[readPos]) {
+ lenBest = 3;
+ matches.dist[matches.count++] = delta3 - 1;
+ delta2 = delta3;
+ }
+
+ // If a match was found, see how long it is.
+ if (matches.count > 0) {
+ while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
+ == buf[readPos + lenBest])
+ ++lenBest;
+
+ matches.len[matches.count - 1] = lenBest;
+
+ // Return if it is long enough (niceLen or reached the end of
+ // the dictionary).
+ if (lenBest >= niceLenLimit)
+ return matches;
+ }
+
+ // Long enough match wasn't found so easily. Look for better matches
+ // from the hash chain.
+ if (lenBest < 3)
+ lenBest = 3;
+
+ int depth = depthLimit;
+
+ while (true) {
+ int delta = lzPos - currentMatch;
+
+ // Return if the search depth limit has been reached or
+ // if the distance of the potential match exceeds the
+ // dictionary size.
+ if (depth-- == 0 || delta >= cyclicSize)
+ return matches;
+
+ currentMatch = chain[cyclicPos - delta
+ + (delta > cyclicPos ? cyclicSize : 0)];
+
+ // Test the first byte and the first new byte that would give us
+ // a match that is at least one byte longer than lenBest. This
+ // too short matches get quickly skipped.
+ if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
+ && buf[readPos - delta] == buf[readPos]) {
+ // Calculate the length of the match.
+ int len = 0;
+ while (++len < matchLenLimit)
+ if (buf[readPos + len - delta] != buf[readPos + len])
+ break;
+
+ // Use the match if and only if it is better than the longest
+ // match found so far.
+ if (len > lenBest) {
+ lenBest = len;
+ matches.len[matches.count] = len;
+ matches.dist[matches.count] = delta - 1;
+ ++matches.count;
+
+ // Return if it is long enough (niceLen or reached the
+ // end of the dictionary).
+ if (len >= niceLenLimit)
+ return matches;
+ }
+ }
+ }
+ }
+
+ public void skip(int len) {
+ assert len >= 0;
+
+ while (len-- > 0) {
+ if (movePos() != 0) {
+ // Update the hash chain and hash tables.
+ hash.calcHashes(buf, readPos);
+ chain[cyclicPos] = hash.getHash4Pos();
+ hash.updateTables(lzPos);
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/lz/Hash234.java b/src/org/tukaani/xz/lz/Hash234.java
new file mode 100644
index 0000000..299ec44
--- /dev/null
+++ b/src/org/tukaani/xz/lz/Hash234.java
@@ -0,0 +1,102 @@
+/*
+ * 2-, 3-, and 4-byte hashing
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lz;
+
+import org.tukaani.xz.ArrayCache;
+
+final class Hash234 extends CRC32Hash {
+ private static final int HASH_2_SIZE = 1 << 10;
+ private static final int HASH_2_MASK = HASH_2_SIZE - 1;
+
+ private static final int HASH_3_SIZE = 1 << 16;
+ private static final int HASH_3_MASK = HASH_3_SIZE - 1;
+
+ private final int hash4Mask;
+
+ private final int[] hash2Table;
+ private final int[] hash3Table;
+ private final int[] hash4Table;
+ private final int hash4Size;
+
+ private int hash2Value = 0;
+ private int hash3Value = 0;
+ private int hash4Value = 0;
+
+ static int getHash4Size(int dictSize) {
+ int h = dictSize - 1;
+ h |= h >>> 1;
+ h |= h >>> 2;
+ h |= h >>> 4;
+ h |= h >>> 8;
+ h >>>= 1;
+ h |= 0xFFFF;
+ if (h > (1 << 24))
+ h >>>= 1;
+
+ return h + 1;
+ }
+
+ static int getMemoryUsage(int dictSize) {
+ // Sizes of the hash arrays + a little extra
+ return (HASH_2_SIZE + HASH_3_SIZE + getHash4Size(dictSize))
+ / (1024 / 4) + 4;
+ }
+
+ Hash234(int dictSize, ArrayCache arrayCache) {
+ hash2Table = arrayCache.getIntArray(HASH_2_SIZE, true);
+ hash3Table = arrayCache.getIntArray(HASH_3_SIZE, true);
+
+ hash4Size = getHash4Size(dictSize);
+ hash4Table = arrayCache.getIntArray(hash4Size, true);
+ hash4Mask = hash4Size - 1;
+ }
+
+ void putArraysToCache(ArrayCache arrayCache) {
+ arrayCache.putArray(hash4Table);
+ arrayCache.putArray(hash3Table);
+ arrayCache.putArray(hash2Table);
+ }
+
+ void calcHashes(byte[] buf, int off) {
+ int temp = crcTable[buf[off] & 0xFF] ^ (buf[off + 1] & 0xFF);
+ hash2Value = temp & HASH_2_MASK;
+
+ temp ^= (buf[off + 2] & 0xFF) << 8;
+ hash3Value = temp & HASH_3_MASK;
+
+ temp ^= crcTable[buf[off + 3] & 0xFF] << 5;
+ hash4Value = temp & hash4Mask;
+ }
+
+ int getHash2Pos() {
+ return hash2Table[hash2Value];
+ }
+
+ int getHash3Pos() {
+ return hash3Table[hash3Value];
+ }
+
+ int getHash4Pos() {
+ return hash4Table[hash4Value];
+ }
+
+ void updateTables(int pos) {
+ hash2Table[hash2Value] = pos;
+ hash3Table[hash3Value] = pos;
+ hash4Table[hash4Value] = pos;
+ }
+
+ void normalize(int normalizeOffset) {
+ LZEncoder.normalize(hash2Table, HASH_2_SIZE, normalizeOffset);
+ LZEncoder.normalize(hash3Table, HASH_3_SIZE, normalizeOffset);
+ LZEncoder.normalize(hash4Table, hash4Size, normalizeOffset);
+ }
+}
diff --git a/src/org/tukaani/xz/lz/LZDecoder.java b/src/org/tukaani/xz/lz/LZDecoder.java
new file mode 100644
index 0000000..85b2ca1
--- /dev/null
+++ b/src/org/tukaani/xz/lz/LZDecoder.java
@@ -0,0 +1,133 @@
+/*
+ * LZDecoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lz;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.CorruptedInputException;
+
+public final class LZDecoder {
+ private final byte[] buf;
+ private final int bufSize; // To avoid buf.length with an array-cached buf.
+ private int start = 0;
+ private int pos = 0;
+ private int full = 0;
+ private int limit = 0;
+ private int pendingLen = 0;
+ private int pendingDist = 0;
+
+ public LZDecoder(int dictSize, byte[] presetDict, ArrayCache arrayCache) {
+ bufSize = dictSize;
+ buf = arrayCache.getByteArray(bufSize, false);
+
+ if (presetDict != null) {
+ pos = Math.min(presetDict.length, dictSize);
+ full = pos;
+ start = pos;
+ System.arraycopy(presetDict, presetDict.length - pos, buf, 0, pos);
+ }
+ }
+
+ public void putArraysToCache(ArrayCache arrayCache) {
+ arrayCache.putArray(buf);
+ }
+
+ public void reset() {
+ start = 0;
+ pos = 0;
+ full = 0;
+ limit = 0;
+ buf[bufSize - 1] = 0x00;
+ }
+
+ public void setLimit(int outMax) {
+ if (bufSize - pos <= outMax)
+ limit = bufSize;
+ else
+ limit = pos + outMax;
+ }
+
+ public boolean hasSpace() {
+ return pos < limit;
+ }
+
+ public boolean hasPending() {
+ return pendingLen > 0;
+ }
+
+ public int getPos() {
+ return pos;
+ }
+
+ public int getByte(int dist) {
+ int offset = pos - dist - 1;
+ if (dist >= pos)
+ offset += bufSize;
+
+ return buf[offset] & 0xFF;
+ }
+
+ public void putByte(byte b) {
+ buf[pos++] = b;
+
+ if (full < pos)
+ full = pos;
+ }
+
+ public void repeat(int dist, int len) throws IOException {
+ if (dist < 0 || dist >= full)
+ throw new CorruptedInputException();
+
+ int left = Math.min(limit - pos, len);
+ pendingLen = len - left;
+ pendingDist = dist;
+
+ int back = pos - dist - 1;
+ if (dist >= pos)
+ back += bufSize;
+
+ do {
+ buf[pos++] = buf[back++];
+ if (back == bufSize)
+ back = 0;
+ } while (--left > 0);
+
+ if (full < pos)
+ full = pos;
+ }
+
+ public void repeatPending() throws IOException {
+ if (pendingLen > 0)
+ repeat(pendingDist, pendingLen);
+ }
+
+ public void copyUncompressed(DataInputStream inData, int len)
+ throws IOException {
+ int copySize = Math.min(bufSize - pos, len);
+ inData.readFully(buf, pos, copySize);
+ pos += copySize;
+
+ if (full < pos)
+ full = pos;
+ }
+
+ public int flush(byte[] out, int outOff) {
+ int copySize = pos - start;
+ if (pos == bufSize)
+ pos = 0;
+
+ System.arraycopy(buf, start, out, outOff, copySize);
+ start = pos;
+
+ return copySize;
+ }
+}
diff --git a/src/org/tukaani/xz/lz/LZEncoder.java b/src/org/tukaani/xz/lz/LZEncoder.java
new file mode 100644
index 0000000..0f13029
--- /dev/null
+++ b/src/org/tukaani/xz/lz/LZEncoder.java
@@ -0,0 +1,428 @@
+/*
+ * LZEncoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lz;
+
+import java.io.OutputStream;
+import java.io.IOException;
+import org.tukaani.xz.ArrayCache;
+
+public abstract class LZEncoder {
+ public static final int MF_HC4 = 0x04;
+ public static final int MF_BT4 = 0x14;
+
+ /**
+ * Number of bytes to keep available before the current byte
+ * when moving the LZ window.
+ */
+ private final int keepSizeBefore;
+
+ /**
+ * Number of bytes that must be available, the current byte included,
+ * to make hasEnoughData return true. Flushing and finishing are
+ * naturally exceptions to this since there cannot be any data after
+ * the end of the uncompressed input.
+ */
+ private final int keepSizeAfter;
+
+ final int matchLenMax;
+ final int niceLen;
+
+ final byte[] buf;
+ final int bufSize; // To avoid buf.length with an array-cached buf.
+
+ int readPos = -1;
+ private int readLimit = -1;
+ private boolean finishing = false;
+ private int writePos = 0;
+ private int pendingSize = 0;
+
+ static void normalize(int[] positions, int positionsCount,
+ int normalizationOffset) {
+ for (int i = 0; i < positionsCount; ++i) {
+ if (positions[i] <= normalizationOffset)
+ positions[i] = 0;
+ else
+ positions[i] -= normalizationOffset;
+ }
+ }
+
+ /**
+ * Gets the size of the LZ window buffer that needs to be allocated.
+ */
+ private static int getBufSize(
+ int dictSize, int extraSizeBefore, int extraSizeAfter,
+ int matchLenMax) {
+ int keepSizeBefore = extraSizeBefore + dictSize;
+ int keepSizeAfter = extraSizeAfter + matchLenMax;
+ int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20);
+ return keepSizeBefore + keepSizeAfter + reserveSize;
+ }
+
+ /**
+ * Gets approximate memory usage of the LZEncoder base structure and
+ * the match finder as kibibytes.
+ */
+ public static int getMemoryUsage(
+ int dictSize, int extraSizeBefore, int extraSizeAfter,
+ int matchLenMax, int mf) {
+ // Buffer size + a little extra
+ int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
+ matchLenMax) / 1024 + 10;
+
+ switch (mf) {
+ case MF_HC4:
+ m += HC4.getMemoryUsage(dictSize);
+ break;
+
+ case MF_BT4:
+ m += BT4.getMemoryUsage(dictSize);
+ break;
+
+ default:
+ throw new IllegalArgumentException();
+ }
+
+ return m;
+ }
+
+ /**
+ * Creates a new LZEncoder.
+ * <p>
+ * @param dictSize dictionary size
+ *
+ * @param extraSizeBefore
+ * number of bytes to keep available in the
+ * history in addition to dictSize
+ *
+ * @param extraSizeAfter
+ * number of bytes that must be available
+ * after current position + matchLenMax
+ *
+ * @param niceLen if a match of at least <code>niceLen</code>
+ * bytes is found, be happy with it and don't
+ * stop looking for longer matches
+ *
+ * @param matchLenMax don't test for matches longer than
+ * <code>matchLenMax</code> bytes
+ *
+ * @param mf match finder ID
+ *
+ * @param depthLimit match finder search depth limit
+ */
+ public static LZEncoder getInstance(
+ int dictSize, int extraSizeBefore, int extraSizeAfter,
+ int niceLen, int matchLenMax, int mf, int depthLimit,
+ ArrayCache arrayCache) {
+ switch (mf) {
+ case MF_HC4:
+ return new HC4(dictSize, extraSizeBefore, extraSizeAfter,
+ niceLen, matchLenMax, depthLimit, arrayCache);
+
+ case MF_BT4:
+ return new BT4(dictSize, extraSizeBefore, extraSizeAfter,
+ niceLen, matchLenMax, depthLimit, arrayCache);
+ }
+
+ throw new IllegalArgumentException();
+ }
+
+ /**
+ * Creates a new LZEncoder. See <code>getInstance</code>.
+ */
+ LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter,
+ int niceLen, int matchLenMax, ArrayCache arrayCache) {
+ bufSize = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
+ matchLenMax);
+ buf = arrayCache.getByteArray(bufSize, false);
+
+ keepSizeBefore = extraSizeBefore + dictSize;
+ keepSizeAfter = extraSizeAfter + matchLenMax;
+
+ this.matchLenMax = matchLenMax;
+ this.niceLen = niceLen;
+ }
+
+ public void putArraysToCache(ArrayCache arrayCache) {
+ arrayCache.putArray(buf);
+ }
+
+ /**
+ * Sets a preset dictionary. If a preset dictionary is wanted, this
+ * function must be called immediately after creating the LZEncoder
+ * before any data has been encoded.
+ */
+ public void setPresetDict(int dictSize, byte[] presetDict) {
+ assert !isStarted();
+ assert writePos == 0;
+
+ if (presetDict != null) {
+ // If the preset dictionary buffer is bigger than the dictionary
+ // size, copy only the tail of the preset dictionary.
+ int copySize = Math.min(presetDict.length, dictSize);
+ int offset = presetDict.length - copySize;
+ System.arraycopy(presetDict, offset, buf, 0, copySize);
+ writePos += copySize;
+ skip(copySize);
+ }
+ }
+
+ /**
+ * Moves data from the end of the buffer to the beginning, discarding
+ * old data and making space for new input.
+ */
+ private void moveWindow() {
+ // Align the move to a multiple of 16 bytes. LZMA2 needs this
+ // because it uses the lowest bits from readPos to get the
+ // alignment of the uncompressed data.
+ int moveOffset = (readPos + 1 - keepSizeBefore) & ~15;
+ int moveSize = writePos - moveOffset;
+ System.arraycopy(buf, moveOffset, buf, 0, moveSize);
+
+ readPos -= moveOffset;
+ readLimit -= moveOffset;
+ writePos -= moveOffset;
+ }
+
+ /**
+ * Copies new data into the LZEncoder's buffer.
+ */
+ public int fillWindow(byte[] in, int off, int len) {
+ assert !finishing;
+
+ // Move the sliding window if needed.
+ if (readPos >= bufSize - keepSizeAfter)
+ moveWindow();
+
+ // Try to fill the dictionary buffer. If it becomes full,
+ // some of the input bytes may be left unused.
+ if (len > bufSize - writePos)
+ len = bufSize - writePos;
+
+ System.arraycopy(in, off, buf, writePos, len);
+ writePos += len;
+
+ // Set the new readLimit but only if there's enough data to allow
+ // encoding of at least one more byte.
+ if (writePos >= keepSizeAfter)
+ readLimit = writePos - keepSizeAfter;
+
+ processPendingBytes();
+
+ // Tell the caller how much input we actually copied into
+ // the dictionary.
+ return len;
+ }
+
+ /**
+ * Process pending bytes remaining from preset dictionary initialization
+ * or encoder flush operation.
+ */
+ private void processPendingBytes() {
+ // After flushing or setting a preset dictionary there will be
+ // pending data that hasn't been ran through the match finder yet.
+ // Run it through the match finder now if there is enough new data
+ // available (readPos < readLimit) that the encoder may encode at
+ // least one more input byte. This way we don't waste any time
+ // looping in the match finder (and marking the same bytes as
+ // pending again) if the application provides very little new data
+ // per write call.
+ if (pendingSize > 0 && readPos < readLimit) {
+ readPos -= pendingSize;
+ int oldPendingSize = pendingSize;
+ pendingSize = 0;
+ skip(oldPendingSize);
+ assert pendingSize < oldPendingSize;
+ }
+ }
+
+ /**
+ * Returns true if at least one byte has already been run through
+ * the match finder.
+ */
+ public boolean isStarted() {
+ return readPos != -1;
+ }
+
+ /**
+ * Marks that all the input needs to be made available in
+ * the encoded output.
+ */
+ public void setFlushing() {
+ readLimit = writePos - 1;
+ processPendingBytes();
+ }
+
+ /**
+ * Marks that there is no more input remaining. The read position
+ * can be advanced until the end of the data.
+ */
+ public void setFinishing() {
+ readLimit = writePos - 1;
+ finishing = true;
+ processPendingBytes();
+ }
+
+ /**
+ * Tests if there is enough input available to let the caller encode
+ * at least one more byte.
+ */
+ public boolean hasEnoughData(int alreadyReadLen) {
+ return readPos - alreadyReadLen < readLimit;
+ }
+
+ public void copyUncompressed(OutputStream out, int backward, int len)
+ throws IOException {
+ out.write(buf, readPos + 1 - backward, len);
+ }
+
+ /**
+ * Get the number of bytes available, including the current byte.
+ * <p>
+ * Note that the result is undefined if <code>getMatches</code> or
+ * <code>skip</code> hasn't been called yet and no preset dictionary
+ * is being used.
+ */
+ public int getAvail() {
+ assert isStarted();
+ return writePos - readPos;
+ }
+
+ /**
+ * Gets the lowest four bits of the absolute offset of the current byte.
+ * Bits other than the lowest four are undefined.
+ */
+ public int getPos() {
+ return readPos;
+ }
+
+ /**
+ * Gets the byte from the given backward offset.
+ * <p>
+ * The current byte is at <code>0</code>, the previous byte
+ * at <code>1</code> etc. To get a byte at zero-based distance,
+ * use <code>getByte(dist + 1)<code>.
+ * <p>
+ * This function is equivalent to <code>getByte(0, backward)</code>.
+ */
+ public int getByte(int backward) {
+ return buf[readPos - backward] & 0xFF;
+ }
+
+ /**
+ * Gets the byte from the given forward minus backward offset.
+ * The forward offset is added to the current position. This lets
+ * one read bytes ahead of the current byte.
+ */
+ public int getByte(int forward, int backward) {
+ return buf[readPos + forward - backward] & 0xFF;
+ }
+
+ /**
+ * Get the length of a match at the given distance.
+ *
+ * @param dist zero-based distance of the match to test
+ * @param lenLimit don't test for a match longer than this
+ *
+ * @return length of the match; it is in the range [0, lenLimit]
+ */
+ public int getMatchLen(int dist, int lenLimit) {
+ int backPos = readPos - dist - 1;
+ int len = 0;
+
+ while (len < lenLimit && buf[readPos + len] == buf[backPos + len])
+ ++len;
+
+ return len;
+ }
+
+ /**
+ * Get the length of a match at the given distance and forward offset.
+ *
+ * @param forward forward offset
+ * @param dist zero-based distance of the match to test
+ * @param lenLimit don't test for a match longer than this
+ *
+ * @return length of the match; it is in the range [0, lenLimit]
+ */
+ public int getMatchLen(int forward, int dist, int lenLimit) {
+ int curPos = readPos + forward;
+ int backPos = curPos - dist - 1;
+ int len = 0;
+
+ while (len < lenLimit && buf[curPos + len] == buf[backPos + len])
+ ++len;
+
+ return len;
+ }
+
+ /**
+ * Verifies that the matches returned by the match finder are valid.
+ * This is meant to be used in an assert statement. This is totally
+ * useless for actual encoding since match finder's results should
+ * naturally always be valid if it isn't broken.
+ *
+ * @param matches return value from <code>getMatches</code>
+ *
+ * @return true if matches are valid, false if match finder is broken
+ */
+ public boolean verifyMatches(Matches matches) {
+ int lenLimit = Math.min(getAvail(), matchLenMax);
+
+ for (int i = 0; i < matches.count; ++i)
+ if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i])
+ return false;
+
+ return true;
+ }
+
+ /**
+ * Moves to the next byte, checks if there is enough input available,
+ * and returns the amount of input available.
+ *
+ * @param requiredForFlushing
+ * minimum number of available bytes when
+ * flushing; encoding may be continued with
+ * new input after flushing
+ * @param requiredForFinishing
+ * minimum number of available bytes when
+ * finishing; encoding must not be continued
+ * after finishing or the match finder state
+ * may be corrupt
+ *
+ * @return the number of bytes available or zero if there
+ * is not enough input available
+ */
+ int movePos(int requiredForFlushing, int requiredForFinishing) {
+ assert requiredForFlushing >= requiredForFinishing;
+
+ ++readPos;
+ int avail = writePos - readPos;
+
+ if (avail < requiredForFlushing) {
+ if (avail < requiredForFinishing || !finishing) {
+ ++pendingSize;
+ avail = 0;
+ }
+ }
+
+ return avail;
+ }
+
+ /**
+ * Runs match finder for the next byte and returns the matches found.
+ */
+ public abstract Matches getMatches();
+
+ /**
+ * Skips the given number of bytes in the match finder.
+ */
+ public abstract void skip(int len);
+}
diff --git a/src/org/tukaani/xz/lz/Matches.java b/src/org/tukaani/xz/lz/Matches.java
new file mode 100644
index 0000000..2fbee11
--- /dev/null
+++ b/src/org/tukaani/xz/lz/Matches.java
@@ -0,0 +1,22 @@
+/*
+ * Matches
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lz;
+
+public final class Matches {
+ public final int[] len;
+ public final int[] dist;
+ public int count = 0;
+
+ Matches(int countMax) {
+ len = new int[countMax];
+ dist = new int[countMax];
+ }
+}
diff --git a/src/org/tukaani/xz/lzma/LZMACoder.java b/src/org/tukaani/xz/lzma/LZMACoder.java
new file mode 100644
index 0000000..c31c9a6
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/LZMACoder.java
@@ -0,0 +1,140 @@
+/*
+ * LZMACoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+import org.tukaani.xz.rangecoder.RangeCoder;
+
+abstract class LZMACoder {
+ static final int POS_STATES_MAX = 1 << 4;
+
+ static final int MATCH_LEN_MIN = 2;
+ static final int MATCH_LEN_MAX = MATCH_LEN_MIN + LengthCoder.LOW_SYMBOLS
+ + LengthCoder.MID_SYMBOLS
+ + LengthCoder.HIGH_SYMBOLS - 1;
+
+ static final int DIST_STATES = 4;
+ static final int DIST_SLOTS = 1 << 6;
+ static final int DIST_MODEL_START = 4;
+ static final int DIST_MODEL_END = 14;
+ static final int FULL_DISTANCES = 1 << (DIST_MODEL_END / 2);
+
+ static final int ALIGN_BITS = 4;
+ static final int ALIGN_SIZE = 1 << ALIGN_BITS;
+ static final int ALIGN_MASK = ALIGN_SIZE - 1;
+
+ static final int REPS = 4;
+
+ final int posMask;
+
+ final int[] reps = new int[REPS];
+ final State state = new State();
+
+ final short[][] isMatch = new short[State.STATES][POS_STATES_MAX];
+ final short[] isRep = new short[State.STATES];
+ final short[] isRep0 = new short[State.STATES];
+ final short[] isRep1 = new short[State.STATES];
+ final short[] isRep2 = new short[State.STATES];
+ final short[][] isRep0Long = new short[State.STATES][POS_STATES_MAX];
+ final short[][] distSlots = new short[DIST_STATES][DIST_SLOTS];
+ final short[][] distSpecial = { new short[2], new short[2],
+ new short[4], new short[4],
+ new short[8], new short[8],
+ new short[16], new short[16],
+ new short[32], new short[32] };
+ final short[] distAlign = new short[ALIGN_SIZE];
+
+ static final int getDistState(int len) {
+ return len < DIST_STATES + MATCH_LEN_MIN
+ ? len - MATCH_LEN_MIN
+ : DIST_STATES - 1;
+ }
+
+ LZMACoder(int pb) {
+ posMask = (1 << pb) - 1;
+ }
+
+ void reset() {
+ reps[0] = 0;
+ reps[1] = 0;
+ reps[2] = 0;
+ reps[3] = 0;
+ state.reset();
+
+ for (int i = 0; i < isMatch.length; ++i)
+ RangeCoder.initProbs(isMatch[i]);
+
+ RangeCoder.initProbs(isRep);
+ RangeCoder.initProbs(isRep0);
+ RangeCoder.initProbs(isRep1);
+ RangeCoder.initProbs(isRep2);
+
+ for (int i = 0; i < isRep0Long.length; ++i)
+ RangeCoder.initProbs(isRep0Long[i]);
+
+ for (int i = 0; i < distSlots.length; ++i)
+ RangeCoder.initProbs(distSlots[i]);
+
+ for (int i = 0; i < distSpecial.length; ++i)
+ RangeCoder.initProbs(distSpecial[i]);
+
+ RangeCoder.initProbs(distAlign);
+ }
+
+
+ abstract class LiteralCoder {
+ private final int lc;
+ private final int literalPosMask;
+
+ LiteralCoder(int lc, int lp) {
+ this.lc = lc;
+ this.literalPosMask = (1 << lp) - 1;
+ }
+
+ final int getSubcoderIndex(int prevByte, int pos) {
+ int low = prevByte >> (8 - lc);
+ int high = (pos & literalPosMask) << lc;
+ return low + high;
+ }
+
+
+ abstract class LiteralSubcoder {
+ final short[] probs = new short[0x300];
+
+ void reset() {
+ RangeCoder.initProbs(probs);
+ }
+ }
+ }
+
+
+ abstract class LengthCoder {
+ static final int LOW_SYMBOLS = 1 << 3;
+ static final int MID_SYMBOLS = 1 << 3;
+ static final int HIGH_SYMBOLS = 1 << 8;
+
+ final short[] choice = new short[2];
+ final short[][] low = new short[POS_STATES_MAX][LOW_SYMBOLS];
+ final short[][] mid = new short[POS_STATES_MAX][MID_SYMBOLS];
+ final short[] high = new short[HIGH_SYMBOLS];
+
+ void reset() {
+ RangeCoder.initProbs(choice);
+
+ for (int i = 0; i < low.length; ++i)
+ RangeCoder.initProbs(low[i]);
+
+ for (int i = 0; i < low.length; ++i)
+ RangeCoder.initProbs(mid[i]);
+
+ RangeCoder.initProbs(high);
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/lzma/LZMADecoder.java b/src/org/tukaani/xz/lzma/LZMADecoder.java
new file mode 100644
index 0000000..ccf1960
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/LZMADecoder.java
@@ -0,0 +1,199 @@
+/*
+ * LZMADecoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+import java.io.IOException;
+import org.tukaani.xz.lz.LZDecoder;
+import org.tukaani.xz.rangecoder.RangeDecoder;
+
+public final class LZMADecoder extends LZMACoder {
+ private final LZDecoder lz;
+ private final RangeDecoder rc;
+ private final LiteralDecoder literalDecoder;
+ private final LengthDecoder matchLenDecoder = new LengthDecoder();
+ private final LengthDecoder repLenDecoder = new LengthDecoder();
+
+ public LZMADecoder(LZDecoder lz, RangeDecoder rc, int lc, int lp, int pb) {
+ super(pb);
+ this.lz = lz;
+ this.rc = rc;
+ this.literalDecoder = new LiteralDecoder(lc, lp);
+ reset();
+ }
+
+ public void reset() {
+ super.reset();
+ literalDecoder.reset();
+ matchLenDecoder.reset();
+ repLenDecoder.reset();
+ }
+
+ /**
+ * Returns true if LZMA end marker was detected. It is encoded as
+ * the maximum match distance which with signed ints becomes -1. This
+ * function is needed only for LZMA1. LZMA2 doesn't use the end marker
+ * in the LZMA layer.
+ */
+ public boolean endMarkerDetected() {
+ return reps[0] == -1;
+ }
+
+ public void decode() throws IOException {
+ lz.repeatPending();
+
+ while (lz.hasSpace()) {
+ int posState = lz.getPos() & posMask;
+
+ if (rc.decodeBit(isMatch[state.get()], posState) == 0) {
+ literalDecoder.decode();
+ } else {
+ int len = rc.decodeBit(isRep, state.get()) == 0
+ ? decodeMatch(posState)
+ : decodeRepMatch(posState);
+
+ // NOTE: With LZMA1 streams that have the end marker,
+ // this will throw CorruptedInputException. LZMAInputStream
+ // handles it specially.
+ lz.repeat(reps[0], len);
+ }
+ }
+
+ rc.normalize();
+ }
+
+ private int decodeMatch(int posState) throws IOException {
+ state.updateMatch();
+
+ reps[3] = reps[2];
+ reps[2] = reps[1];
+ reps[1] = reps[0];
+
+ int len = matchLenDecoder.decode(posState);
+ int distSlot = rc.decodeBitTree(distSlots[getDistState(len)]);
+
+ if (distSlot < DIST_MODEL_START) {
+ reps[0] = distSlot;
+ } else {
+ int limit = (distSlot >> 1) - 1;
+ reps[0] = (2 | (distSlot & 1)) << limit;
+
+ if (distSlot < DIST_MODEL_END) {
+ reps[0] |= rc.decodeReverseBitTree(
+ distSpecial[distSlot - DIST_MODEL_START]);
+ } else {
+ reps[0] |= rc.decodeDirectBits(limit - ALIGN_BITS)
+ << ALIGN_BITS;
+ reps[0] |= rc.decodeReverseBitTree(distAlign);
+ }
+ }
+
+ return len;
+ }
+
+ private int decodeRepMatch(int posState) throws IOException {
+ if (rc.decodeBit(isRep0, state.get()) == 0) {
+ if (rc.decodeBit(isRep0Long[state.get()], posState) == 0) {
+ state.updateShortRep();
+ return 1;
+ }
+ } else {
+ int tmp;
+
+ if (rc.decodeBit(isRep1, state.get()) == 0) {
+ tmp = reps[1];
+ } else {
+ if (rc.decodeBit(isRep2, state.get()) == 0) {
+ tmp = reps[2];
+ } else {
+ tmp = reps[3];
+ reps[3] = reps[2];
+ }
+
+ reps[2] = reps[1];
+ }
+
+ reps[1] = reps[0];
+ reps[0] = tmp;
+ }
+
+ state.updateLongRep();
+
+ return repLenDecoder.decode(posState);
+ }
+
+
+ private class LiteralDecoder extends LiteralCoder {
+ private final LiteralSubdecoder[] subdecoders;
+
+ LiteralDecoder(int lc, int lp) {
+ super(lc, lp);
+
+ subdecoders = new LiteralSubdecoder[1 << (lc + lp)];
+ for (int i = 0; i < subdecoders.length; ++i)
+ subdecoders[i] = new LiteralSubdecoder();
+ }
+
+ void reset() {
+ for (int i = 0; i < subdecoders.length; ++i)
+ subdecoders[i].reset();
+ }
+
+ void decode() throws IOException {
+ int i = getSubcoderIndex(lz.getByte(0), lz.getPos());
+ subdecoders[i].decode();
+ }
+
+
+ private class LiteralSubdecoder extends LiteralSubcoder {
+ void decode() throws IOException {
+ int symbol = 1;
+
+ if (state.isLiteral()) {
+ do {
+ symbol = (symbol << 1) | rc.decodeBit(probs, symbol);
+ } while (symbol < 0x100);
+
+ } else {
+ int matchByte = lz.getByte(reps[0]);
+ int offset = 0x100;
+ int matchBit;
+ int bit;
+
+ do {
+ matchByte <<= 1;
+ matchBit = matchByte & offset;
+ bit = rc.decodeBit(probs, offset + matchBit + symbol);
+ symbol = (symbol << 1) | bit;
+ offset &= (0 - bit) ^ ~matchBit;
+ } while (symbol < 0x100);
+ }
+
+ lz.putByte((byte)symbol);
+ state.updateLiteral();
+ }
+ }
+ }
+
+
+ private class LengthDecoder extends LengthCoder {
+ int decode(int posState) throws IOException {
+ if (rc.decodeBit(choice, 0) == 0)
+ return rc.decodeBitTree(low[posState]) + MATCH_LEN_MIN;
+
+ if (rc.decodeBit(choice, 1) == 0)
+ return rc.decodeBitTree(mid[posState])
+ + MATCH_LEN_MIN + LOW_SYMBOLS;
+
+ return rc.decodeBitTree(high)
+ + MATCH_LEN_MIN + LOW_SYMBOLS + MID_SYMBOLS;
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/lzma/LZMAEncoder.java b/src/org/tukaani/xz/lzma/LZMAEncoder.java
new file mode 100644
index 0000000..02d5172
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/LZMAEncoder.java
@@ -0,0 +1,750 @@
+/*
+ * LZMAEncoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+import java.io.IOException;
+import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.lz.LZEncoder;
+import org.tukaani.xz.lz.Matches;
+import org.tukaani.xz.rangecoder.RangeEncoder;
+
+public abstract class LZMAEncoder extends LZMACoder {
+ public static final int MODE_FAST = 1;
+ public static final int MODE_NORMAL = 2;
+
+ /**
+ * LZMA2 chunk is considered full when its uncompressed size exceeds
+ * <code>LZMA2_UNCOMPRESSED_LIMIT</code>.
+ * <p>
+ * A compressed LZMA2 chunk can hold 2 MiB of uncompressed data.
+ * A single LZMA symbol may indicate up to MATCH_LEN_MAX bytes
+ * of data, so the LZMA2 chunk is considered full when there is
+ * less space than MATCH_LEN_MAX bytes.
+ */
+ private static final int LZMA2_UNCOMPRESSED_LIMIT
+ = (2 << 20) - MATCH_LEN_MAX;
+
+ /**
+ * LZMA2 chunk is considered full when its compressed size exceeds
+ * <code>LZMA2_COMPRESSED_LIMIT</code>.
+ * <p>
+ * The maximum compressed size of a LZMA2 chunk is 64 KiB.
+ * A single LZMA symbol might use 20 bytes of space even though
+ * it usually takes just one byte or so. Two more bytes are needed
+ * for LZMA2 uncompressed chunks (see LZMA2OutputStream.writeChunk).
+ * Leave a little safety margin and use 26 bytes.
+ */
+ private static final int LZMA2_COMPRESSED_LIMIT = (64 << 10) - 26;
+
+ private static final int DIST_PRICE_UPDATE_INTERVAL = FULL_DISTANCES;
+ private static final int ALIGN_PRICE_UPDATE_INTERVAL = ALIGN_SIZE;
+
+ private final RangeEncoder rc;
+ final LZEncoder lz;
+ final LiteralEncoder literalEncoder;
+ final LengthEncoder matchLenEncoder;
+ final LengthEncoder repLenEncoder;
+ final int niceLen;
+
+ private int distPriceCount = 0;
+ private int alignPriceCount = 0;
+
+ private final int distSlotPricesSize;
+ private final int[][] distSlotPrices;
+ private final int[][] fullDistPrices
+ = new int[DIST_STATES][FULL_DISTANCES];
+ private final int[] alignPrices = new int[ALIGN_SIZE];
+
+ int back = 0;
+ int readAhead = -1;
+ private int uncompressedSize = 0;
+
+ public static int getMemoryUsage(int mode, int dictSize,
+ int extraSizeBefore, int mf) {
+ int m = 80;
+
+ switch (mode) {
+ case MODE_FAST:
+ m += LZMAEncoderFast.getMemoryUsage(
+ dictSize, extraSizeBefore, mf);
+ break;
+
+ case MODE_NORMAL:
+ m += LZMAEncoderNormal.getMemoryUsage(
+ dictSize, extraSizeBefore, mf);
+ break;
+
+ default:
+ throw new IllegalArgumentException();
+ }
+
+ return m;
+ }
+
+ public static LZMAEncoder getInstance(
+ RangeEncoder rc, int lc, int lp, int pb, int mode,
+ int dictSize, int extraSizeBefore,
+ int niceLen, int mf, int depthLimit,
+ ArrayCache arrayCache) {
+ switch (mode) {
+ case MODE_FAST:
+ return new LZMAEncoderFast(rc, lc, lp, pb,
+ dictSize, extraSizeBefore,
+ niceLen, mf, depthLimit,
+ arrayCache);
+
+ case MODE_NORMAL:
+ return new LZMAEncoderNormal(rc, lc, lp, pb,
+ dictSize, extraSizeBefore,
+ niceLen, mf, depthLimit,
+ arrayCache);
+ }
+
+ throw new IllegalArgumentException();
+ }
+
+ public void putArraysToCache(ArrayCache arrayCache) {
+ lz.putArraysToCache(arrayCache);
+ }
+
+ /**
+ * Gets an integer [0, 63] matching the highest two bits of an integer.
+ * This is like bit scan reverse (BSR) on x86 except that this also
+ * cares about the second highest bit.
+ */
+ public static int getDistSlot(int dist) {
+ if (dist <= DIST_MODEL_START && dist >= 0)
+ return dist;
+
+ int n = dist;
+ int i = 31;
+
+ if ((n & 0xFFFF0000) == 0) {
+ n <<= 16;
+ i = 15;
+ }
+
+ if ((n & 0xFF000000) == 0) {
+ n <<= 8;
+ i -= 8;
+ }
+
+ if ((n & 0xF0000000) == 0) {
+ n <<= 4;
+ i -= 4;
+ }
+
+ if ((n & 0xC0000000) == 0) {
+ n <<= 2;
+ i -= 2;
+ }
+
+ if ((n & 0x80000000) == 0)
+ --i;
+
+ return (i << 1) + ((dist >>> (i - 1)) & 1);
+ }
+
+ /**
+ * Gets the next LZMA symbol.
+ * <p>
+ * There are three types of symbols: literal (a single byte),
+ * repeated match, and normal match. The symbol is indicated
+ * by the return value and by the variable <code>back</code>.
+ * <p>
+ * Literal: <code>back == -1</code> and return value is <code>1</code>.
+ * The literal itself needs to be read from <code>lz</code> separately.
+ * <p>
+ * Repeated match: <code>back</code> is in the range [0, 3] and
+ * the return value is the length of the repeated match.
+ * <p>
+ * Normal match: <code>back - REPS<code> (<code>back - 4</code>)
+ * is the distance of the match and the return value is the length
+ * of the match.
+ */
+ abstract int getNextSymbol();
+
+ LZMAEncoder(RangeEncoder rc, LZEncoder lz,
+ int lc, int lp, int pb, int dictSize, int niceLen) {
+ super(pb);
+ this.rc = rc;
+ this.lz = lz;
+ this.niceLen = niceLen;
+
+ literalEncoder = new LiteralEncoder(lc, lp);
+ matchLenEncoder = new LengthEncoder(pb, niceLen);
+ repLenEncoder = new LengthEncoder(pb, niceLen);
+
+ distSlotPricesSize = getDistSlot(dictSize - 1) + 1;
+ distSlotPrices = new int[DIST_STATES][distSlotPricesSize];
+
+ reset();
+ }
+
+ public LZEncoder getLZEncoder() {
+ return lz;
+ }
+
+ public void reset() {
+ super.reset();
+ literalEncoder.reset();
+ matchLenEncoder.reset();
+ repLenEncoder.reset();
+ distPriceCount = 0;
+ alignPriceCount = 0;
+
+ uncompressedSize += readAhead + 1;
+ readAhead = -1;
+ }
+
+ public int getUncompressedSize() {
+ return uncompressedSize;
+ }
+
+ public void resetUncompressedSize() {
+ uncompressedSize = 0;
+ }
+
+ /**
+ * Compress for LZMA1.
+ */
+ public void encodeForLZMA1() throws IOException {
+ if (!lz.isStarted() && !encodeInit())
+ return;
+
+ while (encodeSymbol()) {}
+ }
+
+ public void encodeLZMA1EndMarker() throws IOException {
+ // End of stream marker is encoded as a match with the maximum
+ // possible distance. The length is ignored by the decoder,
+ // but the minimum length has been used by the LZMA SDK.
+ //
+ // Distance is a 32-bit unsigned integer in LZMA.
+ // With Java's signed int, UINT32_MAX becomes -1.
+ int posState = (lz.getPos() - readAhead) & posMask;
+ rc.encodeBit(isMatch[state.get()], posState, 1);
+ rc.encodeBit(isRep, state.get(), 0);
+ encodeMatch(-1, MATCH_LEN_MIN, posState);
+ }
+
+ /**
+ * Compresses for LZMA2.
+ *
+ * @return true if the LZMA2 chunk became full, false otherwise
+ */
+ public boolean encodeForLZMA2() {
+ // LZMA2 uses RangeEncoderToBuffer so IOExceptions aren't possible.
+ try {
+ if (!lz.isStarted() && !encodeInit())
+ return false;
+
+ while (uncompressedSize <= LZMA2_UNCOMPRESSED_LIMIT
+ && rc.getPendingSize() <= LZMA2_COMPRESSED_LIMIT)
+ if (!encodeSymbol())
+ return false;
+ } catch (IOException e) {
+ throw new Error();
+ }
+
+ return true;
+ }
+
+ private boolean encodeInit() throws IOException {
+ assert readAhead == -1;
+ if (!lz.hasEnoughData(0))
+ return false;
+
+ // The first symbol must be a literal unless using
+ // a preset dictionary. This code isn't run if using
+ // a preset dictionary.
+ skip(1);
+ rc.encodeBit(isMatch[state.get()], 0, 0);
+ literalEncoder.encodeInit();
+
+ --readAhead;
+ assert readAhead == -1;
+
+ ++uncompressedSize;
+ assert uncompressedSize == 1;
+
+ return true;
+ }
+
+ private boolean encodeSymbol() throws IOException {
+ if (!lz.hasEnoughData(readAhead + 1))
+ return false;
+
+ int len = getNextSymbol();
+
+ assert readAhead >= 0;
+ int posState = (lz.getPos() - readAhead) & posMask;
+
+ if (back == -1) {
+ // Literal i.e. eight-bit byte
+ assert len == 1;
+ rc.encodeBit(isMatch[state.get()], posState, 0);
+ literalEncoder.encode();
+ } else {
+ // Some type of match
+ rc.encodeBit(isMatch[state.get()], posState, 1);
+ if (back < REPS) {
+ // Repeated match i.e. the same distance
+ // has been used earlier.
+ assert lz.getMatchLen(-readAhead, reps[back], len) == len;
+ rc.encodeBit(isRep, state.get(), 1);
+ encodeRepMatch(back, len, posState);
+ } else {
+ // Normal match
+ assert lz.getMatchLen(-readAhead, back - REPS, len) == len;
+ rc.encodeBit(isRep, state.get(), 0);
+ encodeMatch(back - REPS, len, posState);
+ }
+ }
+
+ readAhead -= len;
+ uncompressedSize += len;
+
+ return true;
+ }
+
+ private void encodeMatch(int dist, int len, int posState)
+ throws IOException {
+ state.updateMatch();
+ matchLenEncoder.encode(len, posState);
+
+ int distSlot = getDistSlot(dist);
+ rc.encodeBitTree(distSlots[getDistState(len)], distSlot);
+
+ if (distSlot >= DIST_MODEL_START) {
+ int footerBits = (distSlot >>> 1) - 1;
+ int base = (2 | (distSlot & 1)) << footerBits;
+ int distReduced = dist - base;
+
+ if (distSlot < DIST_MODEL_END) {
+ rc.encodeReverseBitTree(
+ distSpecial[distSlot - DIST_MODEL_START],
+ distReduced);
+ } else {
+ rc.encodeDirectBits(distReduced >>> ALIGN_BITS,
+ footerBits - ALIGN_BITS);
+ rc.encodeReverseBitTree(distAlign, distReduced & ALIGN_MASK);
+ --alignPriceCount;
+ }
+ }
+
+ reps[3] = reps[2];
+ reps[2] = reps[1];
+ reps[1] = reps[0];
+ reps[0] = dist;
+
+ --distPriceCount;
+ }
+
+ private void encodeRepMatch(int rep, int len, int posState)
+ throws IOException {
+ if (rep == 0) {
+ rc.encodeBit(isRep0, state.get(), 0);
+ rc.encodeBit(isRep0Long[state.get()], posState, len == 1 ? 0 : 1);
+ } else {
+ int dist = reps[rep];
+ rc.encodeBit(isRep0, state.get(), 1);
+
+ if (rep == 1) {
+ rc.encodeBit(isRep1, state.get(), 0);
+ } else {
+ rc.encodeBit(isRep1, state.get(), 1);
+ rc.encodeBit(isRep2, state.get(), rep - 2);
+
+ if (rep == 3)
+ reps[3] = reps[2];
+
+ reps[2] = reps[1];
+ }
+
+ reps[1] = reps[0];
+ reps[0] = dist;
+ }
+
+ if (len == 1) {
+ state.updateShortRep();
+ } else {
+ repLenEncoder.encode(len, posState);
+ state.updateLongRep();
+ }
+ }
+
+ Matches getMatches() {
+ ++readAhead;
+ Matches matches = lz.getMatches();
+ assert lz.verifyMatches(matches);
+ return matches;
+ }
+
+ void skip(int len) {
+ readAhead += len;
+ lz.skip(len);
+ }
+
+ int getAnyMatchPrice(State state, int posState) {
+ return RangeEncoder.getBitPrice(isMatch[state.get()][posState], 1);
+ }
+
+ int getNormalMatchPrice(int anyMatchPrice, State state) {
+ return anyMatchPrice
+ + RangeEncoder.getBitPrice(isRep[state.get()], 0);
+ }
+
+ int getAnyRepPrice(int anyMatchPrice, State state) {
+ return anyMatchPrice
+ + RangeEncoder.getBitPrice(isRep[state.get()], 1);
+ }
+
+ int getShortRepPrice(int anyRepPrice, State state, int posState) {
+ return anyRepPrice
+ + RangeEncoder.getBitPrice(isRep0[state.get()], 0)
+ + RangeEncoder.getBitPrice(isRep0Long[state.get()][posState],
+ 0);
+ }
+
+ int getLongRepPrice(int anyRepPrice, int rep, State state, int posState) {
+ int price = anyRepPrice;
+
+ if (rep == 0) {
+ price += RangeEncoder.getBitPrice(isRep0[state.get()], 0)
+ + RangeEncoder.getBitPrice(
+ isRep0Long[state.get()][posState], 1);
+ } else {
+ price += RangeEncoder.getBitPrice(isRep0[state.get()], 1);
+
+ if (rep == 1)
+ price += RangeEncoder.getBitPrice(isRep1[state.get()], 0);
+ else
+ price += RangeEncoder.getBitPrice(isRep1[state.get()], 1)
+ + RangeEncoder.getBitPrice(isRep2[state.get()],
+ rep - 2);
+ }
+
+ return price;
+ }
+
+ int getLongRepAndLenPrice(int rep, int len, State state, int posState) {
+ int anyMatchPrice = getAnyMatchPrice(state, posState);
+ int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
+ int longRepPrice = getLongRepPrice(anyRepPrice, rep, state, posState);
+ return longRepPrice + repLenEncoder.getPrice(len, posState);
+ }
+
+ int getMatchAndLenPrice(int normalMatchPrice,
+ int dist, int len, int posState) {
+ int price = normalMatchPrice
+ + matchLenEncoder.getPrice(len, posState);
+ int distState = getDistState(len);
+
+ if (dist < FULL_DISTANCES) {
+ price += fullDistPrices[distState][dist];
+ } else {
+ // Note that distSlotPrices includes also
+ // the price of direct bits.
+ int distSlot = getDistSlot(dist);
+ price += distSlotPrices[distState][distSlot]
+ + alignPrices[dist & ALIGN_MASK];
+ }
+
+ return price;
+ }
+
+ private void updateDistPrices() {
+ distPriceCount = DIST_PRICE_UPDATE_INTERVAL;
+
+ for (int distState = 0; distState < DIST_STATES; ++distState) {
+ for (int distSlot = 0; distSlot < distSlotPricesSize; ++distSlot)
+ distSlotPrices[distState][distSlot]
+ = RangeEncoder.getBitTreePrice(
+ distSlots[distState], distSlot);
+
+ for (int distSlot = DIST_MODEL_END; distSlot < distSlotPricesSize;
+ ++distSlot) {
+ int count = (distSlot >>> 1) - 1 - ALIGN_BITS;
+ distSlotPrices[distState][distSlot]
+ += RangeEncoder.getDirectBitsPrice(count);
+ }
+
+ for (int dist = 0; dist < DIST_MODEL_START; ++dist)
+ fullDistPrices[distState][dist]
+ = distSlotPrices[distState][dist];
+ }
+
+ int dist = DIST_MODEL_START;
+ for (int distSlot = DIST_MODEL_START; distSlot < DIST_MODEL_END;
+ ++distSlot) {
+ int footerBits = (distSlot >>> 1) - 1;
+ int base = (2 | (distSlot & 1)) << footerBits;
+
+ int limit = distSpecial[distSlot - DIST_MODEL_START].length;
+ for (int i = 0; i < limit; ++i) {
+ int distReduced = dist - base;
+ int price = RangeEncoder.getReverseBitTreePrice(
+ distSpecial[distSlot - DIST_MODEL_START],
+ distReduced);
+
+ for (int distState = 0; distState < DIST_STATES; ++distState)
+ fullDistPrices[distState][dist]
+ = distSlotPrices[distState][distSlot] + price;
+
+ ++dist;
+ }
+ }
+
+ assert dist == FULL_DISTANCES;
+ }
+
+ private void updateAlignPrices() {
+ alignPriceCount = ALIGN_PRICE_UPDATE_INTERVAL;
+
+ for (int i = 0; i < ALIGN_SIZE; ++i)
+ alignPrices[i] = RangeEncoder.getReverseBitTreePrice(distAlign,
+ i);
+ }
+
+ /**
+ * Updates the lookup tables used for calculating match distance
+ * and length prices. The updating is skipped for performance reasons
+ * if the tables haven't changed much since the previous update.
+ */
+ void updatePrices() {
+ if (distPriceCount <= 0)
+ updateDistPrices();
+
+ if (alignPriceCount <= 0)
+ updateAlignPrices();
+
+ matchLenEncoder.updatePrices();
+ repLenEncoder.updatePrices();
+ }
+
+
+ class LiteralEncoder extends LiteralCoder {
+ private final LiteralSubencoder[] subencoders;
+
+ LiteralEncoder(int lc, int lp) {
+ super(lc, lp);
+
+ subencoders = new LiteralSubencoder[1 << (lc + lp)];
+ for (int i = 0; i < subencoders.length; ++i)
+ subencoders[i] = new LiteralSubencoder();
+ }
+
+ void reset() {
+ for (int i = 0; i < subencoders.length; ++i)
+ subencoders[i].reset();
+ }
+
+ void encodeInit() throws IOException {
+ // When encoding the first byte of the stream, there is
+ // no previous byte in the dictionary so the encode function
+ // wouldn't work.
+ assert readAhead >= 0;
+ subencoders[0].encode();
+ }
+
+ void encode() throws IOException {
+ assert readAhead >= 0;
+ int i = getSubcoderIndex(lz.getByte(1 + readAhead),
+ lz.getPos() - readAhead);
+ subencoders[i].encode();
+ }
+
+ int getPrice(int curByte, int matchByte,
+ int prevByte, int pos, State state) {
+ int price = RangeEncoder.getBitPrice(
+ isMatch[state.get()][pos & posMask], 0);
+
+ int i = getSubcoderIndex(prevByte, pos);
+ price += state.isLiteral()
+ ? subencoders[i].getNormalPrice(curByte)
+ : subencoders[i].getMatchedPrice(curByte, matchByte);
+
+ return price;
+ }
+
+ private class LiteralSubencoder extends LiteralSubcoder {
+ void encode() throws IOException {
+ int symbol = lz.getByte(readAhead) | 0x100;
+
+ if (state.isLiteral()) {
+ int subencoderIndex;
+ int bit;
+
+ do {
+ subencoderIndex = symbol >>> 8;
+ bit = (symbol >>> 7) & 1;
+ rc.encodeBit(probs, subencoderIndex, bit);
+ symbol <<= 1;
+ } while (symbol < 0x10000);
+
+ } else {
+ int matchByte = lz.getByte(reps[0] + 1 + readAhead);
+ int offset = 0x100;
+ int subencoderIndex;
+ int matchBit;
+ int bit;
+
+ do {
+ matchByte <<= 1;
+ matchBit = matchByte & offset;
+ subencoderIndex = offset + matchBit + (symbol >>> 8);
+ bit = (symbol >>> 7) & 1;
+ rc.encodeBit(probs, subencoderIndex, bit);
+ symbol <<= 1;
+ offset &= ~(matchByte ^ symbol);
+ } while (symbol < 0x10000);
+ }
+
+ state.updateLiteral();
+ }
+
+ int getNormalPrice(int symbol) {
+ int price = 0;
+ int subencoderIndex;
+ int bit;
+
+ symbol |= 0x100;
+
+ do {
+ subencoderIndex = symbol >>> 8;
+ bit = (symbol >>> 7) & 1;
+ price += RangeEncoder.getBitPrice(probs[subencoderIndex],
+ bit);
+ symbol <<= 1;
+ } while (symbol < (0x100 << 8));
+
+ return price;
+ }
+
+ int getMatchedPrice(int symbol, int matchByte) {
+ int price = 0;
+ int offset = 0x100;
+ int subencoderIndex;
+ int matchBit;
+ int bit;
+
+ symbol |= 0x100;
+
+ do {
+ matchByte <<= 1;
+ matchBit = matchByte & offset;
+ subencoderIndex = offset + matchBit + (symbol >>> 8);
+ bit = (symbol >>> 7) & 1;
+ price += RangeEncoder.getBitPrice(probs[subencoderIndex],
+ bit);
+ symbol <<= 1;
+ offset &= ~(matchByte ^ symbol);
+ } while (symbol < (0x100 << 8));
+
+ return price;
+ }
+ }
+ }
+
+
+ class LengthEncoder extends LengthCoder {
+ /**
+ * The prices are updated after at least
+ * <code>PRICE_UPDATE_INTERVAL</code> many lengths
+ * have been encoded with the same posState.
+ */
+ private static final int PRICE_UPDATE_INTERVAL = 32; // FIXME?
+
+ private final int[] counters;
+ private final int[][] prices;
+
+ LengthEncoder(int pb, int niceLen) {
+ int posStates = 1 << pb;
+ counters = new int[posStates];
+
+ // Always allocate at least LOW_SYMBOLS + MID_SYMBOLS because
+ // it makes updatePrices slightly simpler. The prices aren't
+ // usually needed anyway if niceLen < 18.
+ int lenSymbols = Math.max(niceLen - MATCH_LEN_MIN + 1,
+ LOW_SYMBOLS + MID_SYMBOLS);
+ prices = new int[posStates][lenSymbols];
+ }
+
+ void reset() {
+ super.reset();
+
+ // Reset counters to zero to force price update before
+ // the prices are needed.
+ for (int i = 0; i < counters.length; ++i)
+ counters[i] = 0;
+ }
+
+ void encode(int len, int posState) throws IOException {
+ len -= MATCH_LEN_MIN;
+
+ if (len < LOW_SYMBOLS) {
+ rc.encodeBit(choice, 0, 0);
+ rc.encodeBitTree(low[posState], len);
+ } else {
+ rc.encodeBit(choice, 0, 1);
+ len -= LOW_SYMBOLS;
+
+ if (len < MID_SYMBOLS) {
+ rc.encodeBit(choice, 1, 0);
+ rc.encodeBitTree(mid[posState], len);
+ } else {
+ rc.encodeBit(choice, 1, 1);
+ rc.encodeBitTree(high, len - MID_SYMBOLS);
+ }
+ }
+
+ --counters[posState];
+ }
+
+ int getPrice(int len, int posState) {
+ return prices[posState][len - MATCH_LEN_MIN];
+ }
+
+ void updatePrices() {
+ for (int posState = 0; posState < counters.length; ++posState) {
+ if (counters[posState] <= 0) {
+ counters[posState] = PRICE_UPDATE_INTERVAL;
+ updatePrices(posState);
+ }
+ }
+ }
+
+ private void updatePrices(int posState) {
+ int choice0Price = RangeEncoder.getBitPrice(choice[0], 0);
+
+ int i = 0;
+ for (; i < LOW_SYMBOLS; ++i)
+ prices[posState][i] = choice0Price
+ + RangeEncoder.getBitTreePrice(low[posState], i);
+
+ choice0Price = RangeEncoder.getBitPrice(choice[0], 1);
+ int choice1Price = RangeEncoder.getBitPrice(choice[1], 0);
+
+ for (; i < LOW_SYMBOLS + MID_SYMBOLS; ++i)
+ prices[posState][i] = choice0Price + choice1Price
+ + RangeEncoder.getBitTreePrice(mid[posState],
+ i - LOW_SYMBOLS);
+
+ choice1Price = RangeEncoder.getBitPrice(choice[1], 1);
+
+ for (; i < prices[posState].length; ++i)
+ prices[posState][i] = choice0Price + choice1Price
+ + RangeEncoder.getBitTreePrice(high, i - LOW_SYMBOLS
+ - MID_SYMBOLS);
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/lzma/LZMAEncoderFast.java b/src/org/tukaani/xz/lzma/LZMAEncoderFast.java
new file mode 100644
index 0000000..f8230ee
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/LZMAEncoderFast.java
@@ -0,0 +1,153 @@
+/*
+ * LZMAEncoderFast
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.lz.LZEncoder;
+import org.tukaani.xz.lz.Matches;
+import org.tukaani.xz.rangecoder.RangeEncoder;
+
+final class LZMAEncoderFast extends LZMAEncoder {
+ private static final int EXTRA_SIZE_BEFORE = 1;
+ private static final int EXTRA_SIZE_AFTER = MATCH_LEN_MAX - 1;
+
+ private Matches matches = null;
+
+ static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
+ return LZEncoder.getMemoryUsage(
+ dictSize, Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
+ EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf);
+ }
+
+ LZMAEncoderFast(RangeEncoder rc, int lc, int lp, int pb,
+ int dictSize, int extraSizeBefore,
+ int niceLen, int mf, int depthLimit,
+ ArrayCache arrayCache) {
+ super(rc, LZEncoder.getInstance(dictSize,
+ Math.max(extraSizeBefore,
+ EXTRA_SIZE_BEFORE),
+ EXTRA_SIZE_AFTER,
+ niceLen, MATCH_LEN_MAX,
+ mf, depthLimit, arrayCache),
+ lc, lp, pb, dictSize, niceLen);
+ }
+
+ private boolean changePair(int smallDist, int bigDist) {
+ return smallDist < (bigDist >>> 7);
+ }
+
+ int getNextSymbol() {
+ // Get the matches for the next byte unless readAhead indicates
+ // that we already got the new matches during the previous call
+ // to this function.
+ if (readAhead == -1)
+ matches = getMatches();
+
+ back = -1;
+
+ // Get the number of bytes available in the dictionary, but
+ // not more than the maximum match length. If there aren't
+ // enough bytes remaining to encode a match at all, return
+ // immediately to encode this byte as a literal.
+ int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
+ if (avail < MATCH_LEN_MIN)
+ return 1;
+
+ // Look for a match from the previous four match distances.
+ int bestRepLen = 0;
+ int bestRepIndex = 0;
+ for (int rep = 0; rep < REPS; ++rep) {
+ int len = lz.getMatchLen(reps[rep], avail);
+ if (len < MATCH_LEN_MIN)
+ continue;
+
+ // If it is long enough, return it.
+ if (len >= niceLen) {
+ back = rep;
+ skip(len - 1);
+ return len;
+ }
+
+ // Remember the index and length of the best repeated match.
+ if (len > bestRepLen) {
+ bestRepIndex = rep;
+ bestRepLen = len;
+ }
+ }
+
+ int mainLen = 0;
+ int mainDist = 0;
+
+ if (matches.count > 0) {
+ mainLen = matches.len[matches.count - 1];
+ mainDist = matches.dist[matches.count - 1];
+
+ if (mainLen >= niceLen) {
+ back = mainDist + REPS;
+ skip(mainLen - 1);
+ return mainLen;
+ }
+
+ while (matches.count > 1
+ && mainLen == matches.len[matches.count - 2] + 1) {
+ if (!changePair(matches.dist[matches.count - 2], mainDist))
+ break;
+
+ --matches.count;
+ mainLen = matches.len[matches.count - 1];
+ mainDist = matches.dist[matches.count - 1];
+ }
+
+ if (mainLen == MATCH_LEN_MIN && mainDist >= 0x80)
+ mainLen = 1;
+ }
+
+ if (bestRepLen >= MATCH_LEN_MIN) {
+ if (bestRepLen + 1 >= mainLen
+ || (bestRepLen + 2 >= mainLen && mainDist >= (1 << 9))
+ || (bestRepLen + 3 >= mainLen && mainDist >= (1 << 15))) {
+ back = bestRepIndex;
+ skip(bestRepLen - 1);
+ return bestRepLen;
+ }
+ }
+
+ if (mainLen < MATCH_LEN_MIN || avail <= MATCH_LEN_MIN)
+ return 1;
+
+ // Get the next match. Test if it is better than the current match.
+ // If so, encode the current byte as a literal.
+ matches = getMatches();
+
+ if (matches.count > 0) {
+ int newLen = matches.len[matches.count - 1];
+ int newDist = matches.dist[matches.count - 1];
+
+ if ((newLen >= mainLen && newDist < mainDist)
+ || (newLen == mainLen + 1
+ && !changePair(mainDist, newDist))
+ || newLen > mainLen + 1
+ || (newLen + 1 >= mainLen
+ && mainLen >= MATCH_LEN_MIN + 1
+ && changePair(newDist, mainDist)))
+ return 1;
+ }
+
+ int limit = Math.max(mainLen - 1, MATCH_LEN_MIN);
+ for (int rep = 0; rep < REPS; ++rep)
+ if (lz.getMatchLen(reps[rep], limit) == limit)
+ return 1;
+
+ back = mainDist + REPS;
+ skip(mainLen - 2);
+ return mainLen;
+ }
+}
diff --git a/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java b/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java
new file mode 100644
index 0000000..8079cd2
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java
@@ -0,0 +1,568 @@
+/*
+ * LZMAEncoderNormal
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.lz.LZEncoder;
+import org.tukaani.xz.lz.Matches;
+import org.tukaani.xz.rangecoder.RangeEncoder;
+
+final class LZMAEncoderNormal extends LZMAEncoder {
+ private static final int OPTS = 4096;
+
+ private static final int EXTRA_SIZE_BEFORE = OPTS;
+ private static final int EXTRA_SIZE_AFTER = OPTS;
+
+ private final Optimum[] opts = new Optimum[OPTS];
+ private int optCur = 0;
+ private int optEnd = 0;
+
+ private Matches matches;
+
+ // These are fields solely to avoid allocating the objects again and
+ // again on each function call.
+ private final int[] repLens = new int[REPS];
+ private final State nextState = new State();
+
+ static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
+ return LZEncoder.getMemoryUsage(dictSize,
+ Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
+ EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
+ + OPTS * 64 / 1024;
+ }
+
+ LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
+ int dictSize, int extraSizeBefore,
+ int niceLen, int mf, int depthLimit,
+ ArrayCache arrayCache) {
+ super(rc, LZEncoder.getInstance(dictSize,
+ Math.max(extraSizeBefore,
+ EXTRA_SIZE_BEFORE),
+ EXTRA_SIZE_AFTER,
+ niceLen, MATCH_LEN_MAX,
+ mf, depthLimit, arrayCache),
+ lc, lp, pb, dictSize, niceLen);
+
+ for (int i = 0; i < OPTS; ++i)
+ opts[i] = new Optimum();
+ }
+
+ public void reset() {
+ optCur = 0;
+ optEnd = 0;
+ super.reset();
+ }
+
+ /**
+ * Converts the opts array from backward indexes to forward indexes.
+ * Then it will be simple to get the next symbol from the array
+ * in later calls to <code>getNextSymbol()</code>.
+ */
+ private int convertOpts() {
+ optEnd = optCur;
+
+ int optPrev = opts[optCur].optPrev;
+
+ do {
+ Optimum opt = opts[optCur];
+
+ if (opt.prev1IsLiteral) {
+ opts[optPrev].optPrev = optCur;
+ opts[optPrev].backPrev = -1;
+ optCur = optPrev--;
+
+ if (opt.hasPrev2) {
+ opts[optPrev].optPrev = optPrev + 1;
+ opts[optPrev].backPrev = opt.backPrev2;
+ optCur = optPrev;
+ optPrev = opt.optPrev2;
+ }
+ }
+
+ int temp = opts[optPrev].optPrev;
+ opts[optPrev].optPrev = optCur;
+ optCur = optPrev;
+ optPrev = temp;
+ } while (optCur > 0);
+
+ optCur = opts[0].optPrev;
+ back = opts[optCur].backPrev;
+ return optCur;
+ }
+
+ int getNextSymbol() {
+ // If there are pending symbols from an earlier call to this
+ // function, return those symbols first.
+ if (optCur < optEnd) {
+ int len = opts[optCur].optPrev - optCur;
+ optCur = opts[optCur].optPrev;
+ back = opts[optCur].backPrev;
+ return len;
+ }
+
+ assert optCur == optEnd;
+ optCur = 0;
+ optEnd = 0;
+ back = -1;
+
+ if (readAhead == -1)
+ matches = getMatches();
+
+ // Get the number of bytes available in the dictionary, but
+ // not more than the maximum match length. If there aren't
+ // enough bytes remaining to encode a match at all, return
+ // immediately to encode this byte as a literal.
+ int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
+ if (avail < MATCH_LEN_MIN)
+ return 1;
+
+ // Get the lengths of repeated matches.
+ int repBest = 0;
+ for (int rep = 0; rep < REPS; ++rep) {
+ repLens[rep] = lz.getMatchLen(reps[rep], avail);
+
+ if (repLens[rep] < MATCH_LEN_MIN) {
+ repLens[rep] = 0;
+ continue;
+ }
+
+ if (repLens[rep] > repLens[repBest])
+ repBest = rep;
+ }
+
+ // Return if the best repeated match is at least niceLen bytes long.
+ if (repLens[repBest] >= niceLen) {
+ back = repBest;
+ skip(repLens[repBest] - 1);
+ return repLens[repBest];
+ }
+
+ // Initialize mainLen and mainDist to the longest match found
+ // by the match finder.
+ int mainLen = 0;
+ int mainDist = 0;
+ if (matches.count > 0) {
+ mainLen = matches.len[matches.count - 1];
+ mainDist = matches.dist[matches.count - 1];
+
+ // Return if it is at least niceLen bytes long.
+ if (mainLen >= niceLen) {
+ back = mainDist + REPS;
+ skip(mainLen - 1);
+ return mainLen;
+ }
+ }
+
+ int curByte = lz.getByte(0);
+ int matchByte = lz.getByte(reps[0] + 1);
+
+ // If the match finder found no matches and this byte cannot be
+ // encoded as a repeated match (short or long), we must be return
+ // to have the byte encoded as a literal.
+ if (mainLen < MATCH_LEN_MIN && curByte != matchByte
+ && repLens[repBest] < MATCH_LEN_MIN)
+ return 1;
+
+
+ int pos = lz.getPos();
+ int posState = pos & posMask;
+
+ // Calculate the price of encoding the current byte as a literal.
+ {
+ int prevByte = lz.getByte(1);
+ int literalPrice = literalEncoder.getPrice(curByte, matchByte,
+ prevByte, pos, state);
+ opts[1].set1(literalPrice, 0, -1);
+ }
+
+ int anyMatchPrice = getAnyMatchPrice(state, posState);
+ int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
+
+ // If it is possible to encode this byte as a short rep, see if
+ // it is cheaper than encoding it as a literal.
+ if (matchByte == curByte) {
+ int shortRepPrice = getShortRepPrice(anyRepPrice,
+ state, posState);
+ if (shortRepPrice < opts[1].price)
+ opts[1].set1(shortRepPrice, 0, 0);
+ }
+
+ // Return if there is neither normal nor long repeated match. Use
+ // a short match instead of a literal if is is possible and cheaper.
+ optEnd = Math.max(mainLen, repLens[repBest]);
+ if (optEnd < MATCH_LEN_MIN) {
+ assert optEnd == 0 : optEnd;
+ back = opts[1].backPrev;
+ return 1;
+ }
+
+
+ // Update the lookup tables for distances and lengths before using
+ // those price calculation functions. (The price function above
+ // don't need these tables.)
+ updatePrices();
+
+ // Initialize the state and reps of this position in opts[].
+ // updateOptStateAndReps() will need these to get the new
+ // state and reps for the next byte.
+ opts[0].state.set(state);
+ System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
+
+ // Initialize the prices for latter opts that will be used below.
+ for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
+ opts[i].reset();
+
+ // Calculate the prices of repeated matches of all lengths.
+ for (int rep = 0; rep < REPS; ++rep) {
+ int repLen = repLens[rep];
+ if (repLen < MATCH_LEN_MIN)
+ continue;
+
+ int longRepPrice = getLongRepPrice(anyRepPrice, rep,
+ state, posState);
+ do {
+ int price = longRepPrice + repLenEncoder.getPrice(repLen,
+ posState);
+ if (price < opts[repLen].price)
+ opts[repLen].set1(price, 0, rep);
+ } while (--repLen >= MATCH_LEN_MIN);
+ }
+
+ // Calculate the prices of normal matches that are longer than rep0.
+ {
+ int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
+ if (len <= mainLen) {
+ int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
+ state);
+
+ // Set i to the index of the shortest match that is
+ // at least len bytes long.
+ int i = 0;
+ while (len > matches.len[i])
+ ++i;
+
+ while (true) {
+ int dist = matches.dist[i];
+ int price = getMatchAndLenPrice(normalMatchPrice,
+ dist, len, posState);
+ if (price < opts[len].price)
+ opts[len].set1(price, 0, dist + REPS);
+
+ if (len == matches.len[i])
+ if (++i == matches.count)
+ break;
+
+ ++len;
+ }
+ }
+ }
+
+
+ avail = Math.min(lz.getAvail(), OPTS - 1);
+
+ // Get matches for later bytes and optimize the use of LZMA symbols
+ // by calculating the prices and picking the cheapest symbol
+ // combinations.
+ while (++optCur < optEnd) {
+ matches = getMatches();
+ if (matches.count > 0
+ && matches.len[matches.count - 1] >= niceLen)
+ break;
+
+ --avail;
+ ++pos;
+ posState = pos & posMask;
+
+ updateOptStateAndReps();
+ anyMatchPrice = opts[optCur].price
+ + getAnyMatchPrice(opts[optCur].state, posState);
+ anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
+
+ calc1BytePrices(pos, posState, avail, anyRepPrice);
+
+ if (avail >= MATCH_LEN_MIN) {
+ int startLen = calcLongRepPrices(pos, posState,
+ avail, anyRepPrice);
+ if (matches.count > 0)
+ calcNormalMatchPrices(pos, posState, avail,
+ anyMatchPrice, startLen);
+ }
+ }
+
+ return convertOpts();
+ }
+
+ /**
+ * Updates the state and reps for the current byte in the opts array.
+ */
+ private void updateOptStateAndReps() {
+ int optPrev = opts[optCur].optPrev;
+ assert optPrev < optCur;
+
+ if (opts[optCur].prev1IsLiteral) {
+ --optPrev;
+
+ if (opts[optCur].hasPrev2) {
+ opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
+ if (opts[optCur].backPrev2 < REPS)
+ opts[optCur].state.updateLongRep();
+ else
+ opts[optCur].state.updateMatch();
+ } else {
+ opts[optCur].state.set(opts[optPrev].state);
+ }
+
+ opts[optCur].state.updateLiteral();
+ } else {
+ opts[optCur].state.set(opts[optPrev].state);
+ }
+
+ if (optPrev == optCur - 1) {
+ // Must be either a short rep or a literal.
+ assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
+
+ if (opts[optCur].backPrev == 0)
+ opts[optCur].state.updateShortRep();
+ else
+ opts[optCur].state.updateLiteral();
+
+ System.arraycopy(opts[optPrev].reps, 0,
+ opts[optCur].reps, 0, REPS);
+ } else {
+ int back;
+ if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
+ optPrev = opts[optCur].optPrev2;
+ back = opts[optCur].backPrev2;
+ opts[optCur].state.updateLongRep();
+ } else {
+ back = opts[optCur].backPrev;
+ if (back < REPS)
+ opts[optCur].state.updateLongRep();
+ else
+ opts[optCur].state.updateMatch();
+ }
+
+ if (back < REPS) {
+ opts[optCur].reps[0] = opts[optPrev].reps[back];
+
+ int rep;
+ for (rep = 1; rep <= back; ++rep)
+ opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
+
+ for (; rep < REPS; ++rep)
+ opts[optCur].reps[rep] = opts[optPrev].reps[rep];
+ } else {
+ opts[optCur].reps[0] = back - REPS;
+ System.arraycopy(opts[optPrev].reps, 0,
+ opts[optCur].reps, 1, REPS - 1);
+ }
+ }
+ }
+
+ /**
+ * Calculates prices of a literal, a short rep, and literal + rep0.
+ */
+ private void calc1BytePrices(int pos, int posState,
+ int avail, int anyRepPrice) {
+ // This will be set to true if using a literal or a short rep.
+ boolean nextIsByte = false;
+
+ int curByte = lz.getByte(0);
+ int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
+
+ // Try a literal.
+ int literalPrice = opts[optCur].price
+ + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
+ pos, opts[optCur].state);
+ if (literalPrice < opts[optCur + 1].price) {
+ opts[optCur + 1].set1(literalPrice, optCur, -1);
+ nextIsByte = true;
+ }
+
+ // Try a short rep.
+ if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
+ || opts[optCur + 1].backPrev != 0)) {
+ int shortRepPrice = getShortRepPrice(anyRepPrice,
+ opts[optCur].state,
+ posState);
+ if (shortRepPrice <= opts[optCur + 1].price) {
+ opts[optCur + 1].set1(shortRepPrice, optCur, 0);
+ nextIsByte = true;
+ }
+ }
+
+ // If neither a literal nor a short rep was the cheapest choice,
+ // try literal + long rep0.
+ if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
+ int lenLimit = Math.min(niceLen, avail - 1);
+ int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
+
+ if (len >= MATCH_LEN_MIN) {
+ nextState.set(opts[optCur].state);
+ nextState.updateLiteral();
+ int nextPosState = (pos + 1) & posMask;
+ int price = literalPrice
+ + getLongRepAndLenPrice(0, len,
+ nextState, nextPosState);
+
+ int i = optCur + 1 + len;
+ while (optEnd < i)
+ opts[++optEnd].reset();
+
+ if (price < opts[i].price)
+ opts[i].set2(price, optCur, 0);
+ }
+ }
+ }
+
+ /**
+ * Calculates prices of long rep and long rep + literal + rep0.
+ */
+ private int calcLongRepPrices(int pos, int posState,
+ int avail, int anyRepPrice) {
+ int startLen = MATCH_LEN_MIN;
+ int lenLimit = Math.min(avail, niceLen);
+
+ for (int rep = 0; rep < REPS; ++rep) {
+ int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
+ if (len < MATCH_LEN_MIN)
+ continue;
+
+ while (optEnd < optCur + len)
+ opts[++optEnd].reset();
+
+ int longRepPrice = getLongRepPrice(anyRepPrice, rep,
+ opts[optCur].state, posState);
+
+ for (int i = len; i >= MATCH_LEN_MIN; --i) {
+ int price = longRepPrice
+ + repLenEncoder.getPrice(i, posState);
+ if (price < opts[optCur + i].price)
+ opts[optCur + i].set1(price, optCur, rep);
+ }
+
+ if (rep == 0)
+ startLen = len + 1;
+
+ int len2Limit = Math.min(niceLen, avail - len - 1);
+ int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
+ len2Limit);
+
+ if (len2 >= MATCH_LEN_MIN) {
+ // Rep
+ int price = longRepPrice
+ + repLenEncoder.getPrice(len, posState);
+ nextState.set(opts[optCur].state);
+ nextState.updateLongRep();
+
+ // Literal
+ int curByte = lz.getByte(len, 0);
+ int matchByte = lz.getByte(0); // lz.getByte(len, len)
+ int prevByte = lz.getByte(len, 1);
+ price += literalEncoder.getPrice(curByte, matchByte, prevByte,
+ pos + len, nextState);
+ nextState.updateLiteral();
+
+ // Rep0
+ int nextPosState = (pos + len + 1) & posMask;
+ price += getLongRepAndLenPrice(0, len2,
+ nextState, nextPosState);
+
+ int i = optCur + len + 1 + len2;
+ while (optEnd < i)
+ opts[++optEnd].reset();
+
+ if (price < opts[i].price)
+ opts[i].set3(price, optCur, rep, len, 0);
+ }
+ }
+
+ return startLen;
+ }
+
+ /**
+ * Calculates prices of a normal match and normal match + literal + rep0.
+ */
+ private void calcNormalMatchPrices(int pos, int posState, int avail,
+ int anyMatchPrice, int startLen) {
+ // If the longest match is so long that it would not fit into
+ // the opts array, shorten the matches.
+ if (matches.len[matches.count - 1] > avail) {
+ matches.count = 0;
+ while (matches.len[matches.count] < avail)
+ ++matches.count;
+
+ matches.len[matches.count++] = avail;
+ }
+
+ if (matches.len[matches.count - 1] < startLen)
+ return;
+
+ while (optEnd < optCur + matches.len[matches.count - 1])
+ opts[++optEnd].reset();
+
+ int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
+ opts[optCur].state);
+
+ int match = 0;
+ while (startLen > matches.len[match])
+ ++match;
+
+ for (int len = startLen; ; ++len) {
+ int dist = matches.dist[match];
+
+ // Calculate the price of a match of len bytes from the nearest
+ // possible distance.
+ int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
+ dist, len, posState);
+ if (matchAndLenPrice < opts[optCur + len].price)
+ opts[optCur + len].set1(matchAndLenPrice,
+ optCur, dist + REPS);
+
+ if (len != matches.len[match])
+ continue;
+
+ // Try match + literal + rep0. First get the length of the rep0.
+ int len2Limit = Math.min(niceLen, avail - len - 1);
+ int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
+
+ if (len2 >= MATCH_LEN_MIN) {
+ nextState.set(opts[optCur].state);
+ nextState.updateMatch();
+
+ // Literal
+ int curByte = lz.getByte(len, 0);
+ int matchByte = lz.getByte(0); // lz.getByte(len, len)
+ int prevByte = lz.getByte(len, 1);
+ int price = matchAndLenPrice
+ + literalEncoder.getPrice(curByte, matchByte,
+ prevByte, pos + len,
+ nextState);
+ nextState.updateLiteral();
+
+ // Rep0
+ int nextPosState = (pos + len + 1) & posMask;
+ price += getLongRepAndLenPrice(0, len2,
+ nextState, nextPosState);
+
+ int i = optCur + len + 1 + len2;
+ while (optEnd < i)
+ opts[++optEnd].reset();
+
+ if (price < opts[i].price)
+ opts[i].set3(price, optCur, dist + REPS, len, 0);
+ }
+
+ if (++match == matches.count)
+ break;
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/lzma/Optimum.java b/src/org/tukaani/xz/lzma/Optimum.java
new file mode 100644
index 0000000..845ac97
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/Optimum.java
@@ -0,0 +1,73 @@
+/*
+ * Optimum
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+final class Optimum {
+ private static final int INFINITY_PRICE = 1 << 30;
+
+ final State state = new State();
+ final int[] reps = new int[LZMACoder.REPS];
+
+ /**
+ * Cumulative price of arriving to this byte.
+ */
+ int price;
+
+ int optPrev;
+ int backPrev;
+ boolean prev1IsLiteral;
+
+ boolean hasPrev2;
+ int optPrev2;
+ int backPrev2;
+
+ /**
+ * Resets the price.
+ */
+ void reset() {
+ price = INFINITY_PRICE;
+ }
+
+ /**
+ * Sets to indicate one LZMA symbol (literal, rep, or match).
+ */
+ void set1(int newPrice, int optCur, int back) {
+ price = newPrice;
+ optPrev = optCur;
+ backPrev = back;
+ prev1IsLiteral = false;
+ }
+
+ /**
+ * Sets to indicate two LZMA symbols of which the first one is a literal.
+ */
+ void set2(int newPrice, int optCur, int back) {
+ price = newPrice;
+ optPrev = optCur + 1;
+ backPrev = back;
+ prev1IsLiteral = true;
+ hasPrev2 = false;
+ }
+
+ /**
+ * Sets to indicate three LZMA symbols of which the second one
+ * is a literal.
+ */
+ void set3(int newPrice, int optCur, int back2, int len2, int back) {
+ price = newPrice;
+ optPrev = optCur + len2 + 1;
+ backPrev = back;
+ prev1IsLiteral = true;
+ hasPrev2 = true;
+ optPrev2 = optCur;
+ backPrev2 = back2;
+ }
+}
diff --git a/src/org/tukaani/xz/lzma/State.java b/src/org/tukaani/xz/lzma/State.java
new file mode 100644
index 0000000..0ece860
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/State.java
@@ -0,0 +1,75 @@
+/*
+ * State
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+final class State {
+ static final int STATES = 12;
+
+ private static final int LIT_STATES = 7;
+
+ private static final int LIT_LIT = 0;
+ private static final int MATCH_LIT_LIT = 1;
+ private static final int REP_LIT_LIT = 2;
+ private static final int SHORTREP_LIT_LIT = 3;
+ private static final int MATCH_LIT = 4;
+ private static final int REP_LIT = 5;
+ private static final int SHORTREP_LIT = 6;
+ private static final int LIT_MATCH = 7;
+ private static final int LIT_LONGREP = 8;
+ private static final int LIT_SHORTREP = 9;
+ private static final int NONLIT_MATCH = 10;
+ private static final int NONLIT_REP = 11;
+
+ private int state;
+
+ State() {}
+
+ State(State other) {
+ state = other.state;
+ }
+
+ void reset() {
+ state = LIT_LIT;
+ }
+
+ int get() {
+ return state;
+ }
+
+ void set(State other) {
+ state = other.state;
+ }
+
+ void updateLiteral() {
+ if (state <= SHORTREP_LIT_LIT)
+ state = LIT_LIT;
+ else if (state <= LIT_SHORTREP)
+ state -= 3;
+ else
+ state -= 6;
+ }
+
+ void updateMatch() {
+ state = state < LIT_STATES ? LIT_MATCH : NONLIT_MATCH;
+ }
+
+ void updateLongRep() {
+ state = state < LIT_STATES ? LIT_LONGREP : NONLIT_REP;
+ }
+
+ void updateShortRep() {
+ state = state < LIT_STATES ? LIT_SHORTREP : NONLIT_REP;
+ }
+
+ boolean isLiteral() {
+ return state < LIT_STATES;
+ }
+}
diff --git a/src/org/tukaani/xz/package-info.java b/src/org/tukaani/xz/package-info.java
new file mode 100644
index 0000000..4e961df
--- /dev/null
+++ b/src/org/tukaani/xz/package-info.java
@@ -0,0 +1,45 @@
+/*
+ * package-info
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+/**
+ * XZ data compression support.
+ *
+ * <h4>Introduction</h4>
+ * <p>
+ * This aims to be a complete implementation of XZ data compression
+ * in pure Java. Features:
+ * <ul>
+ * <li>Full support for the .xz file format specification version 1.0.4</li>
+ * <li>Single-threaded streamed compression and decompression</li>
+ * <li>Single-threaded decompression with limited random access support</li>
+ * <li>Raw streams (no .xz headers) for advanced users, including LZMA2
+ * with preset dictionary</li>
+ * </ul>
+ * <p>
+ * Threading is planned but it is unknown when it will be implemented.
+ * <p>
+ * For the latest source code, see the
+ * <a href="http://tukaani.org/xz/java.html">home page of XZ for Java</a>.
+ *
+ * <h4>Getting started</h4>
+ * <p>
+ * Start by reading the documentation of {@link org.tukaani.xz.XZOutputStream}
+ * and {@link org.tukaani.xz.XZInputStream}.
+ * If you use XZ inside another file format or protocol,
+ * see also {@link org.tukaani.xz.SingleXZInputStream}.
+ *
+ * <h4>Licensing</h4>
+ * <p>
+ * XZ for Java has been put into the public domain, thus you can do
+ * whatever you want with it. All the files in the package have been
+ * written by Lasse Collin and/or Igor Pavlov.
+ * <p>
+ * This software is provided "as is", without any warranty.
+ */
+package org.tukaani.xz;
diff --git a/src/org/tukaani/xz/rangecoder/RangeCoder.java b/src/org/tukaani/xz/rangecoder/RangeCoder.java
new file mode 100644
index 0000000..df9b0c4
--- /dev/null
+++ b/src/org/tukaani/xz/rangecoder/RangeCoder.java
@@ -0,0 +1,26 @@
+/*
+ * RangeCoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.rangecoder;
+
+import java.util.Arrays;
+
+public abstract class RangeCoder {
+ static final int SHIFT_BITS = 8;
+ static final int TOP_MASK = 0xFF000000;
+ static final int BIT_MODEL_TOTAL_BITS = 11;
+ static final int BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS;
+ static final short PROB_INIT = (short)(BIT_MODEL_TOTAL / 2);
+ static final int MOVE_BITS = 5;
+
+ public static final void initProbs(short[] probs) {
+ Arrays.fill(probs, PROB_INIT);
+ }
+}
diff --git a/src/org/tukaani/xz/rangecoder/RangeDecoder.java b/src/org/tukaani/xz/rangecoder/RangeDecoder.java
new file mode 100644
index 0000000..e63532e
--- /dev/null
+++ b/src/org/tukaani/xz/rangecoder/RangeDecoder.java
@@ -0,0 +1,83 @@
+/*
+ * RangeDecoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.rangecoder;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+public abstract class RangeDecoder extends RangeCoder {
+ int range = 0;
+ int code = 0;
+
+ public abstract void normalize() throws IOException;
+
+ public int decodeBit(short[] probs, int index) throws IOException {
+ normalize();
+
+ int prob = probs[index];
+ int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob;
+ int bit;
+
+ // Compare code and bound as if they were unsigned 32-bit integers.
+ if ((code ^ 0x80000000) < (bound ^ 0x80000000)) {
+ range = bound;
+ probs[index] = (short)(
+ prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS));
+ bit = 0;
+ } else {
+ range -= bound;
+ code -= bound;
+ probs[index] = (short)(prob - (prob >>> MOVE_BITS));
+ bit = 1;
+ }
+
+ return bit;
+ }
+
+ public int decodeBitTree(short[] probs) throws IOException {
+ int symbol = 1;
+
+ do {
+ symbol = (symbol << 1) | decodeBit(probs, symbol);
+ } while (symbol < probs.length);
+
+ return symbol - probs.length;
+ }
+
+ public int decodeReverseBitTree(short[] probs) throws IOException {
+ int symbol = 1;
+ int i = 0;
+ int result = 0;
+
+ do {
+ int bit = decodeBit(probs, symbol);
+ symbol = (symbol << 1) | bit;
+ result |= bit << i++;
+ } while (symbol < probs.length);
+
+ return result;
+ }
+
+ public int decodeDirectBits(int count) throws IOException {
+ int result = 0;
+
+ do {
+ normalize();
+
+ range >>>= 1;
+ int t = (code - range) >>> 31;
+ code -= range & (t - 1);
+ result = (result << 1) | (1 - t);
+ } while (--count != 0);
+
+ return result;
+ }
+}
diff --git a/src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java b/src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java
new file mode 100644
index 0000000..dd2f7cc
--- /dev/null
+++ b/src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java
@@ -0,0 +1,71 @@
+/*
+ * RangeDecoderFromBuffer
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.rangecoder;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.CorruptedInputException;
+
+public final class RangeDecoderFromBuffer extends RangeDecoder {
+ private static final int INIT_SIZE = 5;
+
+ private final byte[] buf;
+ private int pos;
+
+ public RangeDecoderFromBuffer(int inputSizeMax, ArrayCache arrayCache) {
+ // We will use the *end* of the array so if the cache gives us
+ // a bigger-than-requested array, we still want to use buf.length.
+ buf = arrayCache.getByteArray(inputSizeMax - INIT_SIZE, false);
+ pos = buf.length;
+ }
+
+ public void putArraysToCache(ArrayCache arrayCache) {
+ arrayCache.putArray(buf);
+ }
+
+ public void prepareInputBuffer(DataInputStream in, int len)
+ throws IOException {
+ if (len < INIT_SIZE)
+ throw new CorruptedInputException();
+
+ if (in.readUnsignedByte() != 0x00)
+ throw new CorruptedInputException();
+
+ code = in.readInt();
+ range = 0xFFFFFFFF;
+
+ // Read the data to the end of the buffer. If the data is corrupt
+ // and the decoder, reading from buf, tries to read past the end of
+ // the data, ArrayIndexOutOfBoundsException will be thrown and
+ // the problem is detected immediately.
+ len -= INIT_SIZE;
+ pos = buf.length - len;
+ in.readFully(buf, pos, len);
+ }
+
+ public boolean isFinished() {
+ return pos == buf.length && code == 0;
+ }
+
+ public void normalize() throws IOException {
+ if ((range & TOP_MASK) == 0) {
+ try {
+ // If the input is corrupt, this might throw
+ // ArrayIndexOutOfBoundsException.
+ code = (code << SHIFT_BITS) | (buf[pos++] & 0xFF);
+ range <<= SHIFT_BITS;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ throw new CorruptedInputException();
+ }
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java b/src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java
new file mode 100644
index 0000000..142b518
--- /dev/null
+++ b/src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java
@@ -0,0 +1,41 @@
+/*
+ * RangeDecoderFromStream
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.rangecoder;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import org.tukaani.xz.CorruptedInputException;
+
+public final class RangeDecoderFromStream extends RangeDecoder {
+ private final DataInputStream inData;
+
+ public RangeDecoderFromStream(InputStream in) throws IOException {
+ inData = new DataInputStream(in);
+
+ if (inData.readUnsignedByte() != 0x00)
+ throw new CorruptedInputException();
+
+ code = inData.readInt();
+ range = 0xFFFFFFFF;
+ }
+
+ public boolean isFinished() {
+ return code == 0;
+ }
+
+ public void normalize() throws IOException {
+ if ((range & TOP_MASK) == 0) {
+ code = (code << SHIFT_BITS) | inData.readUnsignedByte();
+ range <<= SHIFT_BITS;
+ }
+ }
+}
diff --git a/src/org/tukaani/xz/rangecoder/RangeEncoder.java b/src/org/tukaani/xz/rangecoder/RangeEncoder.java
new file mode 100644
index 0000000..daa84a3
--- /dev/null
+++ b/src/org/tukaani/xz/rangecoder/RangeEncoder.java
@@ -0,0 +1,200 @@
+/*
+ * RangeEncoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.rangecoder;
+
+import java.io.IOException;
+
+public abstract class RangeEncoder extends RangeCoder {
+ private static final int MOVE_REDUCING_BITS = 4;
+ private static final int BIT_PRICE_SHIFT_BITS = 4;
+
+ private static final int[] prices
+ = new int[BIT_MODEL_TOTAL >>> MOVE_REDUCING_BITS];
+
+ private long low;
+ private int range;
+
+ // NOTE: int is OK for LZMA2 because a compressed chunk
+ // is not more than 64 KiB, but with LZMA1 there is no chunking
+ // so in theory cacheSize can grow very big. To be very safe,
+ // use long instead of int since this code is used for LZMA1 too.
+ long cacheSize;
+ private byte cache;
+
+ static {
+ for (int i = (1 << MOVE_REDUCING_BITS) / 2; i < BIT_MODEL_TOTAL;
+ i += (1 << MOVE_REDUCING_BITS)) {
+ int w = i;
+ int bitCount = 0;
+
+ for (int j = 0; j < BIT_PRICE_SHIFT_BITS; ++j) {
+ w *= w;
+ bitCount <<= 1;
+
+ while ((w & 0xFFFF0000) != 0) {
+ w >>>= 1;
+ ++bitCount;
+ }
+ }
+
+ prices[i >> MOVE_REDUCING_BITS]
+ = (BIT_MODEL_TOTAL_BITS << BIT_PRICE_SHIFT_BITS)
+ - 15 - bitCount;
+ }
+ }
+
+ public void reset() {
+ low = 0;
+ range = 0xFFFFFFFF;
+ cache = 0x00;
+ cacheSize = 1;
+ }
+
+ public int getPendingSize() {
+ // This function is only needed by users of RangeEncoderToBuffer,
+ // but providing a must-be-never-called version here makes
+ // LZMAEncoder simpler.
+ throw new Error();
+ }
+
+ public int finish() throws IOException {
+ for (int i = 0; i < 5; ++i)
+ shiftLow();
+
+ // RangeEncoderToBuffer.finish() needs a return value to tell
+ // how big the finished buffer is. RangeEncoderToStream has no
+ // buffer and thus no return value is needed. Here we use a dummy
+ // value which can be overriden in RangeEncoderToBuffer.finish().
+ return -1;
+ }
+
+ abstract void writeByte(int b) throws IOException;
+
+ private void shiftLow() throws IOException {
+ int lowHi = (int)(low >>> 32);
+
+ if (lowHi != 0 || low < 0xFF000000L) {
+ int temp = cache;
+
+ do {
+ writeByte(temp + lowHi);
+ temp = 0xFF;
+ } while (--cacheSize != 0);
+
+ cache = (byte)(low >>> 24);
+ }
+
+ ++cacheSize;
+ low = (low & 0x00FFFFFF) << 8;
+ }
+
+ public void encodeBit(short[] probs, int index, int bit)
+ throws IOException {
+ int prob = probs[index];
+ int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob;
+
+ // NOTE: Any non-zero value for bit is taken as 1.
+ if (bit == 0) {
+ range = bound;
+ probs[index] = (short)(
+ prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS));
+ } else {
+ low += bound & 0xFFFFFFFFL;
+ range -= bound;
+ probs[index] = (short)(prob - (prob >>> MOVE_BITS));
+ }
+
+ if ((range & TOP_MASK) == 0) {
+ range <<= SHIFT_BITS;
+ shiftLow();
+ }
+ }
+
+ public static int getBitPrice(int prob, int bit) {
+ // NOTE: Unlike in encodeBit(), here bit must be 0 or 1.
+ assert bit == 0 || bit == 1;
+ return prices[(prob ^ ((-bit) & (BIT_MODEL_TOTAL - 1)))
+ >>> MOVE_REDUCING_BITS];
+ }
+
+ public void encodeBitTree(short[] probs, int symbol) throws IOException {
+ int index = 1;
+ int mask = probs.length;
+
+ do {
+ mask >>>= 1;
+ int bit = symbol & mask;
+ encodeBit(probs, index, bit);
+
+ index <<= 1;
+ if (bit != 0)
+ index |= 1;
+
+ } while (mask != 1);
+ }
+
+ public static int getBitTreePrice(short[] probs, int symbol) {
+ int price = 0;
+ symbol |= probs.length;
+
+ do {
+ int bit = symbol & 1;
+ symbol >>>= 1;
+ price += getBitPrice(probs[symbol], bit);
+ } while (symbol != 1);
+
+ return price;
+ }
+
+ public void encodeReverseBitTree(short[] probs, int symbol)
+ throws IOException {
+ int index = 1;
+ symbol |= probs.length;
+
+ do {
+ int bit = symbol & 1;
+ symbol >>>= 1;
+ encodeBit(probs, index, bit);
+ index = (index << 1) | bit;
+ } while (symbol != 1);
+ }
+
+ public static int getReverseBitTreePrice(short[] probs, int symbol) {
+ int price = 0;
+ int index = 1;
+ symbol |= probs.length;
+
+ do {
+ int bit = symbol & 1;
+ symbol >>>= 1;
+ price += getBitPrice(probs[index], bit);
+ index = (index << 1) | bit;
+ } while (symbol != 1);
+
+ return price;
+ }
+
+ public void encodeDirectBits(int value, int count) throws IOException {
+ do {
+ range >>>= 1;
+ low += range & (0 - ((value >>> --count) & 1));
+
+ if ((range & TOP_MASK) == 0) {
+ range <<= SHIFT_BITS;
+ shiftLow();
+ }
+ } while (count != 0);
+ }
+
+ public static int getDirectBitsPrice(int count) {
+ return count << BIT_PRICE_SHIFT_BITS;
+ }
+}
diff --git a/src/org/tukaani/xz/rangecoder/RangeEncoderToBuffer.java b/src/org/tukaani/xz/rangecoder/RangeEncoderToBuffer.java
new file mode 100644
index 0000000..aee2e3d
--- /dev/null
+++ b/src/org/tukaani/xz/rangecoder/RangeEncoderToBuffer.java
@@ -0,0 +1,59 @@
+/*
+ * RangeEncoderToBuffer
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.rangecoder;
+
+import java.io.OutputStream;
+import java.io.IOException;
+import org.tukaani.xz.ArrayCache;
+
+public final class RangeEncoderToBuffer extends RangeEncoder {
+ private final byte[] buf;
+ private int bufPos;
+
+ public RangeEncoderToBuffer(int bufSize, ArrayCache arrayCache) {
+ buf = arrayCache.getByteArray(bufSize, false);
+ reset();
+ }
+
+ public void putArraysToCache(ArrayCache arrayCache) {
+ arrayCache.putArray(buf);
+ }
+
+ public void reset() {
+ super.reset();
+ bufPos = 0;
+ }
+
+ public int getPendingSize() {
+ // With LZMA2 it is known that cacheSize fits into an int.
+ return bufPos + (int)cacheSize + 5 - 1;
+ }
+
+ public int finish() {
+ // super.finish() cannot throw an IOException because writeByte()
+ // provided in this file cannot throw an IOException.
+ try {
+ super.finish();
+ } catch (IOException e) {
+ throw new Error();
+ }
+
+ return bufPos;
+ }
+
+ public void write(OutputStream out) throws IOException {
+ out.write(buf, 0, bufPos);
+ }
+
+ void writeByte(int b) {
+ buf[bufPos++] = (byte)b;
+ }
+}
diff --git a/src/org/tukaani/xz/rangecoder/RangeEncoderToStream.java b/src/org/tukaani/xz/rangecoder/RangeEncoderToStream.java
new file mode 100644
index 0000000..9199c59
--- /dev/null
+++ b/src/org/tukaani/xz/rangecoder/RangeEncoderToStream.java
@@ -0,0 +1,27 @@
+/*
+ * RangeEncoderToStream
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.rangecoder;
+
+import java.io.OutputStream;
+import java.io.IOException;
+
+public final class RangeEncoderToStream extends RangeEncoder {
+ private final OutputStream out;
+
+ public RangeEncoderToStream(OutputStream out) {
+ this.out = out;
+ reset();
+ }
+
+ void writeByte(int b) throws IOException {
+ out.write(b);
+ }
+}
diff --git a/src/org/tukaani/xz/simple/ARM.java b/src/org/tukaani/xz/simple/ARM.java
new file mode 100644
index 0000000..6febf78
--- /dev/null
+++ b/src/org/tukaani/xz/simple/ARM.java
@@ -0,0 +1,50 @@
+/*
+ * BCJ filter for little endian ARM instructions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.simple;
+
+public final class ARM implements SimpleFilter {
+ private final boolean isEncoder;
+ private int pos;
+
+ public ARM(boolean isEncoder, int startPos) {
+ this.isEncoder = isEncoder;
+ pos = startPos + 8;
+ }
+
+ public int code(byte[] buf, int off, int len) {
+ int end = off + len - 4;
+ int i;
+
+ for (i = off; i <= end; i += 4) {
+ if ((buf[i + 3] & 0xFF) == 0xEB) {
+ int src = ((buf[i + 2] & 0xFF) << 16)
+ | ((buf[i + 1] & 0xFF) << 8)
+ | (buf[i] & 0xFF);
+ src <<= 2;
+
+ int dest;
+ if (isEncoder)
+ dest = src + (pos + i - off);
+ else
+ dest = src - (pos + i - off);
+
+ dest >>>= 2;
+ buf[i + 2] = (byte)(dest >>> 16);
+ buf[i + 1] = (byte)(dest >>> 8);
+ buf[i] = (byte)dest;
+ }
+ }
+
+ i -= off;
+ pos += i;
+ return i;
+ }
+}
diff --git a/src/org/tukaani/xz/simple/ARMThumb.java b/src/org/tukaani/xz/simple/ARMThumb.java
new file mode 100644
index 0000000..b8e7ca9
--- /dev/null
+++ b/src/org/tukaani/xz/simple/ARMThumb.java
@@ -0,0 +1,53 @@
+/*
+ * BCJ filter for little endian ARM-Thumb instructions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.simple;
+
+public final class ARMThumb implements SimpleFilter {
+ private final boolean isEncoder;
+ private int pos;
+
+ public ARMThumb(boolean isEncoder, int startPos) {
+ this.isEncoder = isEncoder;
+ pos = startPos + 4;
+ }
+
+ public int code(byte[] buf, int off, int len) {
+ int end = off + len - 4;
+ int i;
+
+ for (i = off; i <= end; i += 2) {
+ if ((buf[i + 1] & 0xF8) == 0xF0 && (buf[i + 3] & 0xF8) == 0xF8) {
+ int src = ((buf[i + 1] & 0x07) << 19)
+ | ((buf[i] & 0xFF) << 11)
+ | ((buf[i + 3] & 0x07) << 8)
+ | (buf[i + 2] & 0xFF);
+ src <<= 1;
+
+ int dest;
+ if (isEncoder)
+ dest = src + (pos + i - off);
+ else
+ dest = src - (pos + i - off);
+
+ dest >>>= 1;
+ buf[i + 1] = (byte)(0xF0 | ((dest >>> 19) & 0x07));
+ buf[i] = (byte)(dest >>> 11);
+ buf[i + 3] = (byte)(0xF8 | ((dest >>> 8) & 0x07));
+ buf[i + 2] = (byte)dest;
+ i += 2;
+ }
+ }
+
+ i -= off;
+ pos += i;
+ return i;
+ }
+}
diff --git a/src/org/tukaani/xz/simple/IA64.java b/src/org/tukaani/xz/simple/IA64.java
new file mode 100644
index 0000000..776a1b7
--- /dev/null
+++ b/src/org/tukaani/xz/simple/IA64.java
@@ -0,0 +1,81 @@
+/*
+ * BCJ filter for Itanium (IA-64) instructions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.simple;
+
+public final class IA64 implements SimpleFilter {
+ private static final int[] BRANCH_TABLE = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 6, 6, 0, 0, 7, 7,
+ 4, 4, 0, 0, 4, 4, 0, 0 };
+
+ private final boolean isEncoder;
+ private int pos;
+
+ public IA64(boolean isEncoder, int startPos) {
+ this.isEncoder = isEncoder;
+ pos = startPos;
+ }
+
+ public int code(byte[] buf, int off, int len) {
+ int end = off + len - 16;
+ int i;
+
+ for (i = off; i <= end; i += 16) {
+ int instrTemplate = buf[i] & 0x1F;
+ int mask = BRANCH_TABLE[instrTemplate];
+
+ for (int slot = 0, bitPos = 5; slot < 3; ++slot, bitPos += 41) {
+ if (((mask >>> slot) & 1) == 0)
+ continue;
+
+ int bytePos = bitPos >>> 3;
+ int bitRes = bitPos & 7;
+
+ long instr = 0;
+ for (int j = 0; j < 6; ++j)
+ instr |= (buf[i + bytePos + j] & 0xFFL) << (8 * j);
+
+ long instrNorm = instr >>> bitRes;
+
+ if (((instrNorm >>> 37) & 0x0F) != 0x05
+ || ((instrNorm >>> 9) & 0x07) != 0x00)
+ continue;
+
+ int src = (int)((instrNorm >>> 13) & 0x0FFFFF);
+ src |= ((int)(instrNorm >>> 36) & 1) << 20;
+ src <<= 4;
+
+ int dest;
+ if (isEncoder)
+ dest = src + (pos + i - off);
+ else
+ dest = src - (pos + i - off);
+
+ dest >>>= 4;
+
+ instrNorm &= ~(0x8FFFFFL << 13);
+ instrNorm |= (dest & 0x0FFFFFL) << 13;
+ instrNorm |= (dest & 0x100000L) << (36 - 20);
+
+ instr &= (1 << bitRes) - 1;
+ instr |= instrNorm << bitRes;
+
+ for (int j = 0; j < 6; ++j)
+ buf[i + bytePos + j] = (byte)(instr >>> (8 * j));
+ }
+ }
+
+ i -= off;
+ pos += i;
+ return i;
+ }
+}
diff --git a/src/org/tukaani/xz/simple/PowerPC.java b/src/org/tukaani/xz/simple/PowerPC.java
new file mode 100644
index 0000000..b7400ab
--- /dev/null
+++ b/src/org/tukaani/xz/simple/PowerPC.java
@@ -0,0 +1,50 @@
+/*
+ * BCJ filter for big endian PowerPC instructions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.simple;
+
+public final class PowerPC implements SimpleFilter {
+ private final boolean isEncoder;
+ private int pos;
+
+ public PowerPC(boolean isEncoder, int startPos) {
+ this.isEncoder = isEncoder;
+ pos = startPos;
+ }
+
+ public int code(byte[] buf, int off, int len) {
+ int end = off + len - 4;
+ int i;
+
+ for (i = off; i <= end; i += 4) {
+ if ((buf[i] & 0xFC) == 0x48 && (buf[i + 3] & 0x03) == 0x01) {
+ int src = ((buf[i] & 0x03) << 24)
+ | ((buf[i + 1] & 0xFF) << 16)
+ | ((buf[i + 2] & 0xFF) << 8)
+ | (buf[i + 3] & 0xFC);
+
+ int dest;
+ if (isEncoder)
+ dest = src + (pos + i - off);
+ else
+ dest = src - (pos + i - off);
+
+ buf[i] = (byte)(0x48 | ((dest >>> 24) & 0x03));
+ buf[i + 1] = (byte)(dest >>> 16);
+ buf[i + 2] = (byte)(dest >>> 8);
+ buf[i + 3] = (byte)((buf[i + 3] & 0x03) | dest);
+ }
+ }
+
+ i -= off;
+ pos += i;
+ return i;
+ }
+}
diff --git a/src/org/tukaani/xz/simple/SPARC.java b/src/org/tukaani/xz/simple/SPARC.java
new file mode 100644
index 0000000..913c8ac
--- /dev/null
+++ b/src/org/tukaani/xz/simple/SPARC.java
@@ -0,0 +1,56 @@
+/*
+ * BCJ filter for SPARC instructions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.simple;
+
+public final class SPARC implements SimpleFilter {
+ private final boolean isEncoder;
+ private int pos;
+
+ public SPARC(boolean isEncoder, int startPos) {
+ this.isEncoder = isEncoder;
+ pos = startPos;
+ }
+
+ public int code(byte[] buf, int off, int len) {
+ int end = off + len - 4;
+ int i;
+
+ for (i = off; i <= end; i += 4) {
+ if ((buf[i] == 0x40 && (buf[i + 1] & 0xC0) == 0x00)
+ || (buf[i] == 0x7F && (buf[i + 1] & 0xC0) == 0xC0)) {
+ int src = ((buf[i] & 0xFF) << 24)
+ | ((buf[i + 1] & 0xFF) << 16)
+ | ((buf[i + 2] & 0xFF) << 8)
+ | (buf[i + 3] & 0xFF);
+ src <<= 2;
+
+ int dest;
+ if (isEncoder)
+ dest = src + (pos + i - off);
+ else
+ dest = src - (pos + i - off);
+
+ dest >>>= 2;
+ dest = (((0 - ((dest >>> 22) & 1)) << 22) & 0x3FFFFFFF)
+ | (dest & 0x3FFFFF) | 0x40000000;
+
+ buf[i] = (byte)(dest >>> 24);
+ buf[i + 1] = (byte)(dest >>> 16);
+ buf[i + 2] = (byte)(dest >>> 8);
+ buf[i + 3] = (byte)dest;
+ }
+ }
+
+ i -= off;
+ pos += i;
+ return i;
+ }
+}
diff --git a/src/org/tukaani/xz/simple/SimpleFilter.java b/src/org/tukaani/xz/simple/SimpleFilter.java
new file mode 100644
index 0000000..6f72906
--- /dev/null
+++ b/src/org/tukaani/xz/simple/SimpleFilter.java
@@ -0,0 +1,14 @@
+/*
+ * BCJ filter for little endian ARM instructions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.simple;
+
+public interface SimpleFilter {
+ int code(byte[] buf, int off, int len);
+}
diff --git a/src/org/tukaani/xz/simple/X86.java b/src/org/tukaani/xz/simple/X86.java
new file mode 100644
index 0000000..a05e08b
--- /dev/null
+++ b/src/org/tukaani/xz/simple/X86.java
@@ -0,0 +1,98 @@
+/*
+ * BCJ filter for x86 instructions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.simple;
+
+public final class X86 implements SimpleFilter {
+ private static final boolean[] MASK_TO_ALLOWED_STATUS
+ = {true, true, true, false, true, false, false, false};
+
+ private static final int[] MASK_TO_BIT_NUMBER = {0, 1, 2, 2, 3, 3, 3, 3};
+
+ private final boolean isEncoder;
+ private int pos;
+ private int prevMask = 0;
+
+ private static boolean test86MSByte(byte b) {
+ int i = b & 0xFF;
+ return i == 0x00 || i == 0xFF;
+ }
+
+ public X86(boolean isEncoder, int startPos) {
+ this.isEncoder = isEncoder;
+ pos = startPos + 5;
+ }
+
+ public int code(byte[] buf, int off, int len) {
+ int prevPos = off - 1;
+ int end = off + len - 5;
+ int i;
+
+ for (i = off; i <= end; ++i) {
+ if ((buf[i] & 0xFE) != 0xE8)
+ continue;
+
+ prevPos = i - prevPos;
+ if ((prevPos & ~3) != 0) { // (unsigned)prevPos > 3
+ prevMask = 0;
+ } else {
+ prevMask = (prevMask << (prevPos - 1)) & 7;
+ if (prevMask != 0) {
+ if (!MASK_TO_ALLOWED_STATUS[prevMask] || test86MSByte(
+ buf[i + 4 - MASK_TO_BIT_NUMBER[prevMask]])) {
+ prevPos = i;
+ prevMask = (prevMask << 1) | 1;
+ continue;
+ }
+ }
+ }
+
+ prevPos = i;
+
+ if (test86MSByte(buf[i + 4])) {
+ int src = (buf[i + 1] & 0xFF)
+ | ((buf[i + 2] & 0xFF) << 8)
+ | ((buf[i + 3] & 0xFF) << 16)
+ | ((buf[i + 4] & 0xFF) << 24);
+ int dest;
+ while (true) {
+ if (isEncoder)
+ dest = src + (pos + i - off);
+ else
+ dest = src - (pos + i - off);
+
+ if (prevMask == 0)
+ break;
+
+ int index = MASK_TO_BIT_NUMBER[prevMask] * 8;
+ if (!test86MSByte((byte)(dest >>> (24 - index))))
+ break;
+
+ src = dest ^ ((1 << (32 - index)) - 1);
+ }
+
+ buf[i + 1] = (byte)dest;
+ buf[i + 2] = (byte)(dest >>> 8);
+ buf[i + 3] = (byte)(dest >>> 16);
+ buf[i + 4] = (byte)(~(((dest >>> 24) & 1) - 1));
+ i += 4;
+ } else {
+ prevMask = (prevMask << 1) | 1;
+ }
+ }
+
+ prevPos = i - prevPos;
+ prevMask = ((prevPos & ~3) != 0) ? 0 : prevMask << (prevPos - 1);
+
+ i -= off;
+ pos += i;
+ return i;
+ }
+}