aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2021-05-06 20:18:46 +0300
committerLasse Collin <lasse.collin@tukaani.org>2021-05-06 20:18:46 +0300
commit6f0e0c41e3682254c2e0be245f275f77df821ffe (patch)
treedfc5c38f1b6fee2660fae9825aa7d0e1daf2e5a3
parentd8a12bc0c61282b38439ee76b05dbde0200002e1 (diff)
downloadxz-embedded-6f0e0c41e3682254c2e0be245f275f77df821ffe.tar.gz
Add xz_dec_catrun() to support concatenated .xz files.
-rw-r--r--README15
-rw-r--r--linux/include/linux/xz.h44
-rw-r--r--linux/lib/xz/xz_dec_stream.c80
-rw-r--r--userspace/Makefile2
-rw-r--r--userspace/xz_config.h3
-rw-r--r--userspace/xzminidec.c8
6 files changed, 148 insertions, 4 deletions
diff --git a/README b/README
index 566d604..c8ca09c 100644
--- a/README
+++ b/README
@@ -84,6 +84,21 @@ Embedding into userspace applications
environment. Probably you should at least skim through it even if the
default file works as is.
+Supporting concatenated .xz files
+
+ Regular .xz files can be concatenated as is and the xz command line
+ tool will decompress all streams from a concatenated file (a few
+ other popular formats and tools support this too). This kind of .xz
+ files aren't as uncommon as one might think because pxz, an early
+ threaded XZ compressor, created this kind of .xz files.
+
+ The xz_dec_run() function will stop after decompressing one stream.
+ This is good when XZ data is stored inside some other file format.
+ However, if one is decompressing regular standalone .xz files, one
+ will want to decompress all streams in the file. This is easy with
+ xz_dec_catrun(). To include support for xz_dec_catrun(), you need
+ to #define XZ_DEC_CONCATENATED in xz_config.h or in compiler flags.
+
Integrity check support
XZ Embedded always supports the integrity check types None and
diff --git a/linux/include/linux/xz.h b/linux/include/linux/xz.h
index d24b94a..8c25dd5 100644
--- a/linux/include/linux/xz.h
+++ b/linux/include/linux/xz.h
@@ -198,7 +198,7 @@ struct xz_dec;
XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
/**
- * xz_dec_run() - Run the XZ decoder
+ * xz_dec_run() - Run the XZ decoder for a single XZ stream
* @s: Decoder state allocated using xz_dec_init()
* @b: Input and output buffers
*
@@ -214,10 +214,52 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
* cannot give the single-call decoder a too small buffer and then expect to
* get that amount valid data from the beginning of the stream. You must use
* the multi-call decoder if you don't want to uncompress the whole stream.
+ *
+ * Use xz_dec_run() when XZ data is stored inside some other file format.
+ * The decoding will stop after one XZ stream has been decompresed. To
+ * decompress regular .xz files which might have multiple concatenated
+ * streams, use xz_dec_catrun() instead.
*/
XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
/**
+ * xz_dec_catrun() - Run the XZ decoder with support for concatenated streams
+ * @s: Decoder state allocated using xz_dec_init()
+ * @b: Input and output buffers
+ * @finish: This is an int instead of bool to avoid requiring stdbool.h.
+ * As long as more input might be coming, finish must be false.
+ * When the caller knows that it has provided all the input to
+ * the decoder (some possibly still in b->in), it must set finish
+ * to true. Only when finish is true can this function return
+ * XZ_STREAM_END to indicate successful decompression of the
+ * file. In single-call mode (XZ_SINGLE) finish is assumed to
+ * always be true; the caller-provided value is ignored.
+ *
+ * This is like xz_dec_run() except that this makes it easy to decode .xz
+ * files with multiple streams (multiple .xz files concatenated as is).
+ * The rarely-used Stream Padding feature is supported too, that is, there
+ * can be null bytes after or between the streams. The number of null bytes
+ * must be a multiple of four.
+ *
+ * When finish is false and b->in_pos == b->in_size, it is possible that
+ * XZ_BUF_ERROR isn't returned even when no progress is possible (XZ_OK is
+ * returned instead). This shouldn't matter because in this situation a
+ * reasonable caller will attempt to provide more input or set finish to
+ * true for the next xz_dec_catrun() call anyway.
+ *
+ * For any struct xz_dec that has been initialized for multi-call mode:
+ * Once decoding has been started with xz_dec_run() or xz_dec_catrun(),
+ * the same function must be used until xz_dec_reset() or xz_dec_end().
+ * Switching between the two decoding functions without resetting results
+ * in undefined behavior.
+ *
+ * xz_dec_catrun() is only available if XZ_DEC_CONCATENATED was defined
+ * at compile time.
+ */
+XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b,
+ int finish);
+
+/**
* xz_dec_reset() - Reset an already allocated decoder state
* @s: Decoder state allocated using xz_dec_init()
*
diff --git a/linux/lib/xz/xz_dec_stream.c b/linux/lib/xz/xz_dec_stream.c
index f69581b..e4aab73 100644
--- a/linux/lib/xz/xz_dec_stream.c
+++ b/linux/lib/xz/xz_dec_stream.c
@@ -35,7 +35,8 @@ struct xz_dec {
SEQ_INDEX,
SEQ_INDEX_PADDING,
SEQ_INDEX_CRC32,
- SEQ_STREAM_FOOTER
+ SEQ_STREAM_FOOTER,
+ SEQ_STREAM_PADDING
} sequence;
/* Position in variable-length integers and Check fields */
@@ -742,6 +743,10 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
return XZ_OK;
return dec_stream_footer(s);
+
+ case SEQ_STREAM_PADDING:
+ /* Never reached, only silencing a warning */
+ break;
}
}
@@ -809,6 +814,79 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
return ret;
}
+#ifdef XZ_DEC_CONCATENATED
+XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b,
+ int finish)
+{
+ enum xz_ret ret;
+
+ if (DEC_IS_SINGLE(s->mode)) {
+ xz_dec_reset(s);
+ finish = true;
+ }
+
+ while (true) {
+ if (s->sequence == SEQ_STREAM_PADDING) {
+ /*
+ * Skip Stream Padding. Its size must be a multiple
+ * of four bytes which is tracked with s->pos.
+ */
+ while (true) {
+ if (b->in_pos == b->in_size) {
+ /*
+ * Note that if we are repeatedly
+ * given no input and finish is false,
+ * we will keep returning XZ_OK even
+ * though no progress is being made.
+ * The lack of XZ_BUF_ERROR support
+ * isn't a problem here because a
+ * reasonable caller will eventually
+ * provide more input or set finish
+ * to true.
+ */
+ if (!finish)
+ return XZ_OK;
+
+ if (s->pos != 0)
+ return XZ_DATA_ERROR;
+
+ return XZ_STREAM_END;
+ }
+
+ if (b->in[b->in_pos] != 0x00) {
+ if (s->pos != 0)
+ return XZ_DATA_ERROR;
+
+ break;
+ }
+
+ ++b->in_pos;
+ s->pos = (s->pos + 1) & 3;
+ }
+
+ /*
+ * More input remains. It should be a new Stream.
+ *
+ * In single-call mode xz_dec_run() will always call
+ * xz_dec_reset(). Thus, we need to do it here only
+ * in multi-call mode.
+ */
+ if (DEC_IS_MULTI(s->mode))
+ xz_dec_reset(s);
+ }
+
+ ret = xz_dec_run(s, b);
+
+ if (ret != XZ_STREAM_END)
+ break;
+
+ s->sequence = SEQ_STREAM_PADDING;
+ }
+
+ return ret;
+}
+#endif
+
XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
{
struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
diff --git a/userspace/Makefile b/userspace/Makefile
index d39d041..1acacf0 100644
--- a/userspace/Makefile
+++ b/userspace/Makefile
@@ -13,7 +13,7 @@
CC = gcc -std=gnu89
BCJ_CPPFLAGS = -DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64 \
-DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC
-CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK
+CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK -DXZ_DEC_CONCATENATED
CFLAGS = -ggdb3 -O2 -pedantic -Wall -Wextra -Wno-long-long
RM = rm -f
VPATH = ../linux/include/linux ../linux/lib/xz
diff --git a/userspace/xz_config.h b/userspace/xz_config.h
index eb9dac1..ee590d7 100644
--- a/userspace/xz_config.h
+++ b/userspace/xz_config.h
@@ -10,6 +10,9 @@
#ifndef XZ_CONFIG_H
#define XZ_CONFIG_H
+/* Uncomment to enable building of xz_dec_catrun(). */
+/* #define XZ_DEC_CONCATENATED */
+
/* Uncomment to enable CRC64 support. */
/* #define XZ_USE_CRC64 */
diff --git a/userspace/xzminidec.c b/userspace/xzminidec.c
index 753bbc9..b542109 100644
--- a/userspace/xzminidec.c
+++ b/userspace/xzminidec.c
@@ -70,7 +70,13 @@ int main(int argc, char **argv)
b.in_pos = 0;
}
- ret = xz_dec_run(s, &b);
+ /*
+ * There are a few ways to set the "finish" (the third)
+ * argument. We could use feof(stdin) but testing in_size
+ * is fine too and may also work in applications that don't
+ * use FILEs.
+ */
+ ret = xz_dec_catrun(s, &b, b.in_size == 0);
if (b.out_pos == sizeof(out)) {
if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {