diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2021-05-06 20:18:46 +0300 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2021-05-06 20:18:46 +0300 |
commit | 6f0e0c41e3682254c2e0be245f275f77df821ffe (patch) | |
tree | dfc5c38f1b6fee2660fae9825aa7d0e1daf2e5a3 | |
parent | d8a12bc0c61282b38439ee76b05dbde0200002e1 (diff) | |
download | xz-embedded-6f0e0c41e3682254c2e0be245f275f77df821ffe.tar.gz |
Add xz_dec_catrun() to support concatenated .xz files.
-rw-r--r-- | README | 15 | ||||
-rw-r--r-- | linux/include/linux/xz.h | 44 | ||||
-rw-r--r-- | linux/lib/xz/xz_dec_stream.c | 80 | ||||
-rw-r--r-- | userspace/Makefile | 2 | ||||
-rw-r--r-- | userspace/xz_config.h | 3 | ||||
-rw-r--r-- | userspace/xzminidec.c | 8 |
6 files changed, 148 insertions, 4 deletions
@@ -84,6 +84,21 @@ Embedding into userspace applications environment. Probably you should at least skim through it even if the default file works as is. +Supporting concatenated .xz files + + Regular .xz files can be concatenated as is and the xz command line + tool will decompress all streams from a concatenated file (a few + other popular formats and tools support this too). This kind of .xz + files aren't as uncommon as one might think because pxz, an early + threaded XZ compressor, created this kind of .xz files. + + The xz_dec_run() function will stop after decompressing one stream. + This is good when XZ data is stored inside some other file format. + However, if one is decompressing regular standalone .xz files, one + will want to decompress all streams in the file. This is easy with + xz_dec_catrun(). To include support for xz_dec_catrun(), you need + to #define XZ_DEC_CONCATENATED in xz_config.h or in compiler flags. + Integrity check support XZ Embedded always supports the integrity check types None and diff --git a/linux/include/linux/xz.h b/linux/include/linux/xz.h index d24b94a..8c25dd5 100644 --- a/linux/include/linux/xz.h +++ b/linux/include/linux/xz.h @@ -198,7 +198,7 @@ struct xz_dec; XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); /** - * xz_dec_run() - Run the XZ decoder + * xz_dec_run() - Run the XZ decoder for a single XZ stream * @s: Decoder state allocated using xz_dec_init() * @b: Input and output buffers * @@ -214,10 +214,52 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); * cannot give the single-call decoder a too small buffer and then expect to * get that amount valid data from the beginning of the stream. You must use * the multi-call decoder if you don't want to uncompress the whole stream. + * + * Use xz_dec_run() when XZ data is stored inside some other file format. + * The decoding will stop after one XZ stream has been decompresed. To + * decompress regular .xz files which might have multiple concatenated + * streams, use xz_dec_catrun() instead. */ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); /** + * xz_dec_catrun() - Run the XZ decoder with support for concatenated streams + * @s: Decoder state allocated using xz_dec_init() + * @b: Input and output buffers + * @finish: This is an int instead of bool to avoid requiring stdbool.h. + * As long as more input might be coming, finish must be false. + * When the caller knows that it has provided all the input to + * the decoder (some possibly still in b->in), it must set finish + * to true. Only when finish is true can this function return + * XZ_STREAM_END to indicate successful decompression of the + * file. In single-call mode (XZ_SINGLE) finish is assumed to + * always be true; the caller-provided value is ignored. + * + * This is like xz_dec_run() except that this makes it easy to decode .xz + * files with multiple streams (multiple .xz files concatenated as is). + * The rarely-used Stream Padding feature is supported too, that is, there + * can be null bytes after or between the streams. The number of null bytes + * must be a multiple of four. + * + * When finish is false and b->in_pos == b->in_size, it is possible that + * XZ_BUF_ERROR isn't returned even when no progress is possible (XZ_OK is + * returned instead). This shouldn't matter because in this situation a + * reasonable caller will attempt to provide more input or set finish to + * true for the next xz_dec_catrun() call anyway. + * + * For any struct xz_dec that has been initialized for multi-call mode: + * Once decoding has been started with xz_dec_run() or xz_dec_catrun(), + * the same function must be used until xz_dec_reset() or xz_dec_end(). + * Switching between the two decoding functions without resetting results + * in undefined behavior. + * + * xz_dec_catrun() is only available if XZ_DEC_CONCATENATED was defined + * at compile time. + */ +XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b, + int finish); + +/** * xz_dec_reset() - Reset an already allocated decoder state * @s: Decoder state allocated using xz_dec_init() * diff --git a/linux/lib/xz/xz_dec_stream.c b/linux/lib/xz/xz_dec_stream.c index f69581b..e4aab73 100644 --- a/linux/lib/xz/xz_dec_stream.c +++ b/linux/lib/xz/xz_dec_stream.c @@ -35,7 +35,8 @@ struct xz_dec { SEQ_INDEX, SEQ_INDEX_PADDING, SEQ_INDEX_CRC32, - SEQ_STREAM_FOOTER + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING } sequence; /* Position in variable-length integers and Check fields */ @@ -742,6 +743,10 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) return XZ_OK; return dec_stream_footer(s); + + case SEQ_STREAM_PADDING: + /* Never reached, only silencing a warning */ + break; } } @@ -809,6 +814,79 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) return ret; } +#ifdef XZ_DEC_CONCATENATED +XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b, + int finish) +{ + enum xz_ret ret; + + if (DEC_IS_SINGLE(s->mode)) { + xz_dec_reset(s); + finish = true; + } + + while (true) { + if (s->sequence == SEQ_STREAM_PADDING) { + /* + * Skip Stream Padding. Its size must be a multiple + * of four bytes which is tracked with s->pos. + */ + while (true) { + if (b->in_pos == b->in_size) { + /* + * Note that if we are repeatedly + * given no input and finish is false, + * we will keep returning XZ_OK even + * though no progress is being made. + * The lack of XZ_BUF_ERROR support + * isn't a problem here because a + * reasonable caller will eventually + * provide more input or set finish + * to true. + */ + if (!finish) + return XZ_OK; + + if (s->pos != 0) + return XZ_DATA_ERROR; + + return XZ_STREAM_END; + } + + if (b->in[b->in_pos] != 0x00) { + if (s->pos != 0) + return XZ_DATA_ERROR; + + break; + } + + ++b->in_pos; + s->pos = (s->pos + 1) & 3; + } + + /* + * More input remains. It should be a new Stream. + * + * In single-call mode xz_dec_run() will always call + * xz_dec_reset(). Thus, we need to do it here only + * in multi-call mode. + */ + if (DEC_IS_MULTI(s->mode)) + xz_dec_reset(s); + } + + ret = xz_dec_run(s, b); + + if (ret != XZ_STREAM_END) + break; + + s->sequence = SEQ_STREAM_PADDING; + } + + return ret; +} +#endif + XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) { struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); diff --git a/userspace/Makefile b/userspace/Makefile index d39d041..1acacf0 100644 --- a/userspace/Makefile +++ b/userspace/Makefile @@ -13,7 +13,7 @@ CC = gcc -std=gnu89 BCJ_CPPFLAGS = -DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64 \ -DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC -CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK +CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK -DXZ_DEC_CONCATENATED CFLAGS = -ggdb3 -O2 -pedantic -Wall -Wextra -Wno-long-long RM = rm -f VPATH = ../linux/include/linux ../linux/lib/xz diff --git a/userspace/xz_config.h b/userspace/xz_config.h index eb9dac1..ee590d7 100644 --- a/userspace/xz_config.h +++ b/userspace/xz_config.h @@ -10,6 +10,9 @@ #ifndef XZ_CONFIG_H #define XZ_CONFIG_H +/* Uncomment to enable building of xz_dec_catrun(). */ +/* #define XZ_DEC_CONCATENATED */ + /* Uncomment to enable CRC64 support. */ /* #define XZ_USE_CRC64 */ diff --git a/userspace/xzminidec.c b/userspace/xzminidec.c index 753bbc9..b542109 100644 --- a/userspace/xzminidec.c +++ b/userspace/xzminidec.c @@ -70,7 +70,13 @@ int main(int argc, char **argv) b.in_pos = 0; } - ret = xz_dec_run(s, &b); + /* + * There are a few ways to set the "finish" (the third) + * argument. We could use feof(stdin) but testing in_size + * is fine too and may also work in applications that don't + * use FILEs. + */ + ret = xz_dec_catrun(s, &b, b.in_size == 0); if (b.out_pos == sizeof(out)) { if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) { |