Avoid overlapping memcpy() with invalid input with in-place decompression.

With valid files, the safety margin described in lib/decompress_unxz.c ensures that these buffers cannot overlap. But if the uncompressed size of the input is larger than the caller thought, which is possible when the input file is invalid/corrupt, the buffers can overlap. Obviously the result will then be garbage (and usually the decoder will return an error too) but no other harm will happen when such an over-run occurs. This change only affects uncompressed LZMA2 chunks and so this should have no effect on performance.
author: Lasse Collin <lasse.collin@tukaani.org> 2020-02-03 00:18:32 +0200
committer: Lasse Collin <lasse.collin@tukaani.org> 2020-02-03 00:18:32 +0200
commit: cfc1499e9fc23d8caa6dfdf1cc3ccf60d6fcd947 (patch)
tree: dc3e070899450cfc8e6453e8b0e0c42c49cb5506
parent: 40d291b609d0cc6344f3e26ed34b4fd755e403da (diff)
download: xz-embedded-cfc1499e9fc23d8caa6dfdf1cc3ccf60d6fcd947.tar.gz
2 files changed, 20 insertions, 3 deletions
diff --git a/linux/lib/decompress_unxz.c b/linux/lib/decompress_unxz.c
index 9f34eb5..3033b20 100644
--- a/linux/lib/decompress_unxz.c
+++ b/linux/lib/decompress_unxz.c
@@ -167,7 +167,7 @@
  * memeq and memzero are not used much and any remotely sane implementation
  * is fast enough. memcpy/memmove speed matters in multi-call mode, but
  * the kernel image is decompressed in single-call mode, in which only
- * memcpy speed can matter and only if there is a lot of uncompressible data
+ * memmove speed can matter and only if there is a lot of uncompressible data
  * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
  * functions below should just be kept small; it's probably not worth
  * optimizing for speed.
diff --git a/linux/lib/xz/xz_dec_lzma2.c b/linux/lib/xz/xz_dec_lzma2.c
index 156f26f..dd80989 100644
--- a/linux/lib/xz/xz_dec_lzma2.c
+++ b/linux/lib/xz/xz_dec_lzma2.c
@@ -387,7 +387,14 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
 
 		*left -= copy_size;
 
-		memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
+		/*
+		 * If doing in-place decompression in single-call mode and the
+		 * uncompressed size of the file is larger than the caller
+		 * thought (i.e. it is invalid input!), the buffers below may
+		 * overlap and cause undefined behavior with memcpy().
+		 * With valid inputs memcpy() would be fine here.
+		 */
+		memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
 		dict->pos += copy_size;
 
 		if (dict->full < dict->pos)
@@ -397,7 +404,11 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
 			if (dict->pos == dict->end)
 				dict->pos = 0;
 
-			memcpy(b->out + b->out_pos, b->in + b->in_pos,
+			/*
+			 * Like above but for multi-call mode: use memmove()
+			 * to avoid undefined behavior with invalid input.
+			 */
+			memmove(b->out + b->out_pos, b->in + b->in_pos,
 					copy_size);
 		}
 
@@ -421,6 +432,12 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
 		if (dict->pos == dict->end)
 			dict->pos = 0;
 
+		/*
+		 * These buffers cannot overlap even if doing in-place
+		 * decompression because in multi-call mode dict->buf
+		 * has been allocated by us in this file; it's not
+		 * provided by the caller like in single-call mode.
+		 */
 		memcpy(b->out + b->out_pos, dict->buf + dict->start,
 				copy_size);
 	}
author	Lasse Collin <lasse.collin@tukaani.org>	2020-02-03 00:18:32 +0200
committer	Lasse Collin <lasse.collin@tukaani.org>	2020-02-03 00:18:32 +0200
commit	cfc1499e9fc23d8caa6dfdf1cc3ccf60d6fcd947 (patch)
tree	dc3e070899450cfc8e6453e8b0e0c42c49cb5506
parent	40d291b609d0cc6344f3e26ed34b4fd755e403da (diff)
download	xz-embedded-cfc1499e9fc23d8caa6dfdf1cc3ccf60d6fcd947.tar.gz