From 6f0e0c41e3682254c2e0be245f275f77df821ffe Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Thu, 6 May 2021 20:18:46 +0300 Subject: Add xz_dec_catrun() to support concatenated .xz files. --- linux/include/linux/xz.h | 44 +++++++++++++++++++++++- linux/lib/xz/xz_dec_stream.c | 80 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 122 insertions(+), 2 deletions(-) (limited to 'linux') diff --git a/linux/include/linux/xz.h b/linux/include/linux/xz.h index d24b94a..8c25dd5 100644 --- a/linux/include/linux/xz.h +++ b/linux/include/linux/xz.h @@ -198,7 +198,7 @@ struct xz_dec; XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); /** - * xz_dec_run() - Run the XZ decoder + * xz_dec_run() - Run the XZ decoder for a single XZ stream * @s: Decoder state allocated using xz_dec_init() * @b: Input and output buffers * @@ -214,9 +214,51 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); * cannot give the single-call decoder a too small buffer and then expect to * get that amount valid data from the beginning of the stream. You must use * the multi-call decoder if you don't want to uncompress the whole stream. + * + * Use xz_dec_run() when XZ data is stored inside some other file format. + * The decoding will stop after one XZ stream has been decompresed. To + * decompress regular .xz files which might have multiple concatenated + * streams, use xz_dec_catrun() instead. */ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); +/** + * xz_dec_catrun() - Run the XZ decoder with support for concatenated streams + * @s: Decoder state allocated using xz_dec_init() + * @b: Input and output buffers + * @finish: This is an int instead of bool to avoid requiring stdbool.h. + * As long as more input might be coming, finish must be false. + * When the caller knows that it has provided all the input to + * the decoder (some possibly still in b->in), it must set finish + * to true. Only when finish is true can this function return + * XZ_STREAM_END to indicate successful decompression of the + * file. In single-call mode (XZ_SINGLE) finish is assumed to + * always be true; the caller-provided value is ignored. + * + * This is like xz_dec_run() except that this makes it easy to decode .xz + * files with multiple streams (multiple .xz files concatenated as is). + * The rarely-used Stream Padding feature is supported too, that is, there + * can be null bytes after or between the streams. The number of null bytes + * must be a multiple of four. + * + * When finish is false and b->in_pos == b->in_size, it is possible that + * XZ_BUF_ERROR isn't returned even when no progress is possible (XZ_OK is + * returned instead). This shouldn't matter because in this situation a + * reasonable caller will attempt to provide more input or set finish to + * true for the next xz_dec_catrun() call anyway. + * + * For any struct xz_dec that has been initialized for multi-call mode: + * Once decoding has been started with xz_dec_run() or xz_dec_catrun(), + * the same function must be used until xz_dec_reset() or xz_dec_end(). + * Switching between the two decoding functions without resetting results + * in undefined behavior. + * + * xz_dec_catrun() is only available if XZ_DEC_CONCATENATED was defined + * at compile time. + */ +XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b, + int finish); + /** * xz_dec_reset() - Reset an already allocated decoder state * @s: Decoder state allocated using xz_dec_init() diff --git a/linux/lib/xz/xz_dec_stream.c b/linux/lib/xz/xz_dec_stream.c index f69581b..e4aab73 100644 --- a/linux/lib/xz/xz_dec_stream.c +++ b/linux/lib/xz/xz_dec_stream.c @@ -35,7 +35,8 @@ struct xz_dec { SEQ_INDEX, SEQ_INDEX_PADDING, SEQ_INDEX_CRC32, - SEQ_STREAM_FOOTER + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING } sequence; /* Position in variable-length integers and Check fields */ @@ -742,6 +743,10 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) return XZ_OK; return dec_stream_footer(s); + + case SEQ_STREAM_PADDING: + /* Never reached, only silencing a warning */ + break; } } @@ -809,6 +814,79 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) return ret; } +#ifdef XZ_DEC_CONCATENATED +XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b, + int finish) +{ + enum xz_ret ret; + + if (DEC_IS_SINGLE(s->mode)) { + xz_dec_reset(s); + finish = true; + } + + while (true) { + if (s->sequence == SEQ_STREAM_PADDING) { + /* + * Skip Stream Padding. Its size must be a multiple + * of four bytes which is tracked with s->pos. + */ + while (true) { + if (b->in_pos == b->in_size) { + /* + * Note that if we are repeatedly + * given no input and finish is false, + * we will keep returning XZ_OK even + * though no progress is being made. + * The lack of XZ_BUF_ERROR support + * isn't a problem here because a + * reasonable caller will eventually + * provide more input or set finish + * to true. + */ + if (!finish) + return XZ_OK; + + if (s->pos != 0) + return XZ_DATA_ERROR; + + return XZ_STREAM_END; + } + + if (b->in[b->in_pos] != 0x00) { + if (s->pos != 0) + return XZ_DATA_ERROR; + + break; + } + + ++b->in_pos; + s->pos = (s->pos + 1) & 3; + } + + /* + * More input remains. It should be a new Stream. + * + * In single-call mode xz_dec_run() will always call + * xz_dec_reset(). Thus, we need to do it here only + * in multi-call mode. + */ + if (DEC_IS_MULTI(s->mode)) + xz_dec_reset(s); + } + + ret = xz_dec_run(s, b); + + if (ret != XZ_STREAM_END) + break; + + s->sequence = SEQ_STREAM_PADDING; + } + + return ret; +} +#endif + XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) { struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); -- cgit v1.2.3