aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2009-04-18 00:43:42 +0300
committerLasse Collin <lasse.collin@tukaani.org>2009-04-18 00:43:42 +0300
commitd85055697ab0919d5ab8d70af61d8fa46e7dc330 (patch)
tree714ff057f10720d94dac4b8d02363bf96c014c66
parent8da29ff834299c180a2fb22bc179dd6bbfde88e3 (diff)
downloadxz-embedded-d85055697ab0919d5ab8d70af61d8fa46e7dc330.tar.gz
Added BCJ filter decoders.
-rw-r--r--README63
-rw-r--r--linux/Documentation/xz.txt11
-rw-r--r--linux/lib/xz/Kconfig46
-rw-r--r--linux/lib/xz/Makefile3
-rw-r--r--linux/lib/xz/xz_boot.c36
-rw-r--r--linux/lib/xz/xz_dec_bcj.c566
-rw-r--r--linux/lib/xz/xz_dec_stream.c69
-rw-r--r--linux/lib/xz/xz_private.h70
-rw-r--r--userspace/Makefile13
9 files changed, 800 insertions, 77 deletions
diff --git a/README b/README
index 252f39d..fad17b8 100644
--- a/README
+++ b/README
@@ -17,13 +17,30 @@ XZ Embedded
Compiling the Linux kernel module
- cd linux/lib/xz
- make -C /path/to/kernel/source \
- CONFIG_XZ_DEC=m KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)"
-
The xz_dec module depends on crc32 module, so make sure that you have
it enabled (CONFIG_CRC32).
+ Building the xz_dec module without support for BCJ filters:
+
+ cd linux/lib/xz
+ make -C /path/to/kernel/source \
+ KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
+ CONFIG_XZ_DEC=m
+
+ Building the xz_dec module with support for BCJ filters:
+
+ cd linux/lib/xz
+ make -C /path/to/kernel/source \
+ KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
+ CONFIG_XZ_DEC=m CONFIG_XZ_DEC_BCJ=y \
+ CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \
+ CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \
+ CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y
+
+ If you want only one or a few of the BCJ filters, omit the appropriate
+ variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support
+ code shared between all BCJ filters.
+
Compiler requirements
XZ Embedded should compile as either GNU-C89 (used in the Linux
@@ -57,6 +74,34 @@ Embedding into userspace applications
Your code should use only the functions declared in xz.h. The rest of
the .h files are meant only for internal use in XZ Embedded.
+BCJ filter support
+
+ If you want support for one or more BCJ filters, you need to copy also
+ linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate
+ #defines when compiling XZ Embedded. You don't need these #defines in
+ the code that just uses XZ Embedded via xz.h, but having them always
+ #defined doesn't hurt either.
+
+ #define Instruction set BCJ filter endianness
+ XZ_DEC_X86 x86 or x86-64 Little endian only
+ XZ_DEC_POWERPC PowerPC Big endian only
+ XZ_DEC_IA64 Itanium (IA-64) Big or little endian
+ XZ_DEC_ARM ARM Little endian only
+ XZ_DEC_ARMTHUMB ARM-Thumb Little endian only
+ XZ_DEC_SPARC SPARC Big or little endian
+
+ While some architectures are (partially) bi-endian, the endianness
+ setting doesn't change the endianness of the instructions on all
+ architectures. That's why Itanium and SPARC filters work for both big
+ and little endian executables (Itanium has little endian instructions
+ and SPARC has big endian instructions).
+
+ There currently is no filter for little endian PowerPC or big endian
+ ARM or ARM-Thumb. Implementing filters for them can be considered if
+ there is a need for such filters in real-world applications.
+
+Notes about shared libraries
+
If you are including XZ Embedded into a shared library, you very
probably should rename the xz_* functions to prevent symbol
conflicts in case your library is linked against some other library
@@ -64,11 +109,11 @@ Embedding into userspace applications
a different version of XZ Embedded). TODO: Provide an easy way
to do this.
- NOTE: Please don't create a shared library of XZ Embedded itself
- unless it is fine to rebuild everything depending on that shared
- library everytime you upgrade to a newer version of XZ Embedded.
- There are no API or ABI stability guarantees between different
- versions of XZ Embedded.
+ Please don't create a shared library of XZ Embedded itself unless
+ it is fine to rebuild everything depending on that shared library
+ everytime you upgrade to a newer version of XZ Embedded. There are
+ no API or ABI stability guarantees between different versions of
+ XZ Embedded.
Specifying the calling convention
diff --git a/linux/Documentation/xz.txt b/linux/Documentation/xz.txt
index d0f91b2..5319dd6 100644
--- a/linux/Documentation/xz.txt
+++ b/linux/Documentation/xz.txt
@@ -3,8 +3,9 @@ XZ data compression in Linux
============================
The xz_dec module provides XZ decoder which supports the LZMA2
- filter and CRC32 for integrity checking. The usage of the xz_dec
- module is documented in include/linux/xz.h.
+ filter, and optionally also Branch/Call/Jump (BCJ) filters for
+ executable data. CRC32 is supported for integrity checking. The
+ usage of the xz_dec module is documented in include/linux/xz.h.
Userspace tools
@@ -51,12 +52,6 @@ Notes on compression options
Future plans
- Add support for BCJ (Branch/Call/Jump) filters for different
- instruction sets. This could be useful both at boot and with
- compressed file systems that store executables. BCJ filters are
- small and improve the compression ratio a little, and have minimal
- effect on performance.
-
Creating a limited XZ encoder may be considered if people think it is
useful. LZMA2 is slower to compress than e.g. Deflate or LZO even at
the fastest settings, so it isn't clear if LZMA2 encoder is wanted
diff --git a/linux/lib/xz/Kconfig b/linux/lib/xz/Kconfig
index be2e2fa..9858748 100644
--- a/linux/lib/xz/Kconfig
+++ b/linux/lib/xz/Kconfig
@@ -2,6 +2,46 @@ config XZ_DEC
tristate "XZ decompression support"
select CRC32
help
- Currently this supports the LZMA2 compression algorithm and
- CRC32 for integrity checking. Other algorithms may be added
- later. See Documentation/xz.txt for more information.
+ LZMA2 compression algorithm and BCJ filters are supported using
+ the .xz file format as the container. For integrity checking,
+ CRC32 is supported. See Documentation/xz.txt for more information.
+
+config XZ_DEC_X86
+ bool "x86 BCJ filter decoder" if EMBEDDED
+ default y
+ depends on XZ_DEC
+ select XZ_DEC_BCJ
+
+config XZ_DEC_POWERPC
+ bool "PowerPC BCJ filter decoder" if EMBEDDED
+ default y
+ depends on XZ_DEC
+ select XZ_DEC_BCJ
+
+config XZ_DEC_IA64
+ bool "IA-64 BCJ filter decoder" if EMBEDDED
+ default y
+ depends on XZ_DEC
+ select XZ_DEC_BCJ
+
+config XZ_DEC_ARM
+ bool "ARM BCJ filter decoder" if EMBEDDED
+ default y
+ depends on XZ_DEC
+ select XZ_DEC_BCJ
+
+config XZ_DEC_ARMTHUMB
+ bool "ARM-Thumb BCJ filter decoder" if EMBEDDED
+ default y
+ depends on XZ_DEC
+ select XZ_DEC_BCJ
+
+config XZ_DEC_SPARC
+ bool "SPARC BCJ filter decoder" if EMBEDDED
+ default y
+ depends on XZ_DEC
+ select XZ_DEC_BCJ
+
+config XZ_DEC_BCJ
+ bool
+ default n
diff --git a/linux/lib/xz/Makefile b/linux/lib/xz/Makefile
index a9e358f..bd3809f 100644
--- a/linux/lib/xz/Makefile
+++ b/linux/lib/xz/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_XZ_DEC) += xz_dec.o
-xz_dec-objs := xz_dec_stream.o xz_dec_lzma2.o xz_dec_syms.o
+xz_dec-y := xz_dec_syms.o xz_dec_stream.o xz_dec_lzma2.o
+xz_dec-$(CONFIG_XZ_DEC_BCJ) += xz_dec_bcj.o
diff --git a/linux/lib/xz/xz_boot.c b/linux/lib/xz/xz_boot.c
index 9a1adfb..71a9886 100644
--- a/linux/lib/xz/xz_boot.c
+++ b/linux/lib/xz/xz_boot.c
@@ -44,10 +44,43 @@
* is in b.in_pos and the amount of output used is in b.out_pos.
*/
+/*
+ * Allow using the macro INIT to mark all functions with __init.
+ * INIT is already used for this purporse in the Deflate decoder,
+ * so this should ease things a little.
+ */
#if !defined(XZ_FUNC) && defined(INIT)
# define XZ_FUNC INIT
#endif
+
+/*
+ * Use the internal CRC32 code instead of kernel's CRC32 module, which
+ * is not available in early phase of booting.
+ */
#define XZ_INTERNAL_CRC32
+
+/*
+ * Ignore the configuration specified in the kernel config for the xz_dec
+ * module. For boot time use, we enable only the BCJ filter of the current
+ * architecture, or none if no BCJ filter is available for the architecture.
+ */
+#define XZ_IGNORE_KCONFIG
+#ifdef CONFIG_X86
+# define XZ_DEC_X86
+#endif
+#ifdef CONFIG_PPC
+# define XZ_DEC_PPC
+#endif
+#ifdef CONFIG_ARM
+# define XZ_DEC_ARM
+#endif
+#ifdef CONFIG_IA64
+# define XZ_DEC_IA64
+#endif
+#ifdef CONFIG_SPARC
+# define XZ_DEC_SPARC
+#endif
+
#include "xz_private.h"
#ifdef XZ_MEM_FUNCS
@@ -125,6 +158,9 @@ static void * XZ_FUNC memmove(void *dest, const void *src, size_t size)
#include "xz_crc32.c"
#include "xz_dec_stream.c"
#include "xz_dec_lzma2.c"
+#ifdef XZ_DEC_BCJ
+# include "xz_dec_bcj.c"
+#endif
/**
* xz_dec_buf() - Single-call XZ decoder
diff --git a/linux/lib/xz/xz_dec_bcj.c b/linux/lib/xz/xz_dec_bcj.c
new file mode 100644
index 0000000..c660347
--- /dev/null
+++ b/linux/lib/xz/xz_dec_bcj.c
@@ -0,0 +1,566 @@
+/*
+ * Branch/Call/Jump (BCJ) filter decoders
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#include "xz_private.h"
+
+struct xz_dec_bcj {
+ /* Type of the BCJ filter being used */
+ enum {
+ BCJ_X86 = 4, /* x86 or x86-64 */
+ BCJ_POWERPC = 5, /* Big endian only */
+ BCJ_IA64 = 6, /* Big or little endian */
+ BCJ_ARM = 7, /* Little endian only */
+ BCJ_ARMTHUMB = 8, /* Little endian only */
+ BCJ_SPARC = 9 /* Big or little endian */
+ } type;
+
+ /*
+ * Return value of the next filter in the chain. We need to preserve
+ * this information across calls, because we must not call the next
+ * filter anymore once it has returned XZ_STREAM_END.
+ */
+ enum xz_ret ret;
+
+ /*
+ * Absolute position relative to the beginning of the uncompressed
+ * data (in a single .xz Block). We care only about the lowest 32
+ * bits so this doesn't need to be uint64_t even with big files.
+ */
+ uint32_t pos;
+
+ /* x86 filter state */
+ uint32_t x86_prev_mask;
+
+ /* Temporary space to hold the variables from struct xz_buf */
+ uint8_t *out;
+ size_t out_pos;
+ size_t out_size;
+
+ struct {
+ /* Amount of already filtered data in the beginning of buf */
+ size_t filtered;
+
+ /* Total amount of data currently stored in buf */
+ size_t size;
+
+ /*
+ * Buffer to hold a mix of filtered and unfiltered data. This
+ * needs to be big enough to hold Alignment + 2 * Look-ahead:
+ *
+ * Type Alignment Look-ahead
+ * x86 1 4
+ * PowerPC 4 0
+ * IA-64 16 0
+ * ARM 4 0
+ * ARM-Thumb 2 2
+ * SPARC 4 0
+ */
+ uint8_t buf[16];
+ } temp;
+};
+
+#ifdef XZ_DEC_X86
+/*
+ * This is macro used to test the most significant byte of a memory address
+ * in an x86 instruction.
+ */
+#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
+
+static noinline_for_stack size_t XZ_FUNC bcj_x86(
+ struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+ static const bool mask_to_allowed_status[8]
+ = { true, true, true, false, true, false, false, false };
+
+ static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
+
+ size_t i;
+ size_t prev_pos = (size_t)-1;
+ uint32_t prev_mask = s->x86_prev_mask;
+ uint32_t src;
+ uint32_t dest;
+ uint32_t j;
+ uint8_t b;
+
+ if (size <= 4)
+ return 0;
+
+ size -= 4;
+ for (i = 0; i < size; ++i) {
+ if ((buf[i] & 0xFE) != 0xE8)
+ continue;
+
+ prev_pos = i - prev_pos;
+ if (prev_pos > 3) {
+ prev_mask = 0;
+ } else {
+ prev_mask = (prev_mask << (prev_pos - 1)) & 7;
+ if (prev_mask != 0) {
+ b = buf[i + 4 - mask_to_bit_num[prev_mask]];
+ if (!mask_to_allowed_status[prev_mask]
+ || bcj_x86_test_msbyte(b)) {
+ prev_pos = i;
+ prev_mask = (prev_mask << 1) | 1;
+ continue;
+ }
+ }
+ }
+
+ prev_pos = i;
+
+ if (bcj_x86_test_msbyte(buf[i + 4])) {
+ src =(uint32_t)buf[i + 1]
+ | ((uint32_t)buf[i + 2] << 8)
+ | ((uint32_t)buf[i + 3] << 16)
+ | ((uint32_t)buf[i + 4] << 24);
+ while (true) {
+ dest = src - (s->pos + (uint32_t)i + 5);
+ if (prev_mask == 0)
+ break;
+
+ j = mask_to_bit_num[prev_mask] * 8;
+ b = (uint8_t)(dest >> (24 - j));
+ if (!bcj_x86_test_msbyte(b))
+ break;
+
+ src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
+ }
+
+ dest &= 0x01FFFFFF;
+ dest |= (uint32_t)0 - (dest & 0x01000000);
+ buf[i + 1] = (uint8_t)dest;
+ buf[i + 2] = (uint8_t)(dest >> 8);
+ buf[i + 3] = (uint8_t)(dest >> 16);
+ buf[i + 4] = (uint8_t)(dest >> 24);
+ i += 4;
+ } else {
+ prev_mask = (prev_mask << 1) | 1;
+ }
+ }
+
+ prev_pos = i - prev_pos;
+ s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
+ return i;
+}
+#endif
+
+#ifdef XZ_DEC_POWERPC
+static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
+ struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+ size_t i;
+ uint32_t instr;
+
+ for (i = 0; i + 4 <= size; i += 4) {
+ instr = ((uint32_t)buf[i] << 24)
+ | ((uint32_t)buf[i + 1] << 16)
+ | ((uint32_t)buf[i + 2] << 8)
+ | (uint32_t)buf[i + 3];
+ if ((instr & 0xFC000003) == 0x48000001) {
+ instr &= 0x03FFFFFC;
+ instr -= s->pos + (uint32_t)i;
+ instr &= 0x03FFFFFC;
+ instr |= 0x48000001;
+ buf[i] = (uint8_t)(instr >> 24);
+ buf[i + 1] = (uint8_t)(instr >> 16);
+ buf[i + 2] = (uint8_t)(instr >> 8);
+ buf[i + 3] = (uint8_t)instr;
+ }
+ }
+
+ return i;
+}
+#endif
+
+#ifdef XZ_DEC_IA64
+static noinline_for_stack size_t XZ_FUNC bcj_ia64(
+ struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+ static const uint8_t branch_table[32] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 6, 6, 0, 0, 7, 7,
+ 4, 4, 0, 0, 4, 4, 0, 0
+ };
+
+ /*
+ * The local variables take a little bit stack space, but it's less
+ * than what LZMA2 decoder takes, so it doesn't make sense to reduce
+ * stack usage here without doing that for the LZMA2 decoder too.
+ */
+
+ /* Loop counters */
+ size_t i;
+ size_t j;
+
+ /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
+ uint32_t slot;
+
+ /* Bitwise offset of the instruction indicated by slot */
+ uint32_t bit_pos;
+
+ /* bit_pos split into byte and bit parts */
+ uint32_t byte_pos;
+ uint32_t bit_res;
+
+ /* Address part of an instruction */
+ uint32_t addr;
+
+ /* Mask used to detect which instructions to convert */
+ uint32_t mask;
+
+ /* 41-bit instruction stored somewhere in the lowest 48 bits */
+ uint64_t instr;
+
+ /* Instruction normalized with bit_res for easier manipulation */
+ uint64_t norm;
+
+ for (i = 0; i + 16 <= size; i += 16) {
+ mask = branch_table[buf[i] & 0x1F];
+ for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
+ if (((mask >> slot) & 1) == 0)
+ continue;
+
+ byte_pos = bit_pos >> 3;
+ bit_res = bit_pos & 7;
+ instr = 0;
+ for (j = 0; j < 6; ++j)
+ instr |= (uint64_t)(buf[i + j + byte_pos])
+ << (8 * j);
+
+ norm = instr >> bit_res;
+
+ if (((norm >> 37) & 0x0F) == 0x05
+ && ((norm >> 9) & 0x07) == 0) {
+ addr = (norm >> 13) & 0x0FFFFF;
+ addr |= ((uint32_t)(norm >> 36) & 1) << 20;
+ addr <<= 4;
+ addr -= s->pos + (uint32_t)i;
+ addr >>= 4;
+
+ norm &= ~((uint64_t)0x8FFFFF << 13);
+ norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
+ norm |= (uint64_t)(addr & 0x100000)
+ << (36 - 20);
+
+ instr &= (1 << bit_res) - 1;
+ instr |= norm << bit_res;
+
+ for (j = 0; j < 6; j++)
+ buf[i + j + byte_pos]
+ = (uint8_t)(instr >> (8 * j));
+ }
+ }
+ }
+
+ return i;
+}
+#endif
+
+#ifdef XZ_DEC_ARM
+static noinline_for_stack size_t XZ_FUNC bcj_arm(
+ struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+ size_t i;
+ uint32_t addr;
+
+ for (i = 0; i + 4 <= size; i += 4) {
+ if (buf[i + 3] == 0xEB) {
+ addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
+ | ((uint32_t)buf[i + 2] << 16);
+ addr <<= 2;
+ addr -= s->pos + (uint32_t)i + 8;
+ addr >>= 2;
+ buf[i] = (uint8_t)addr;
+ buf[i + 1] = (uint8_t)(addr >> 8);
+ buf[i + 2] = (uint8_t)(addr >> 16);
+ }
+ }
+
+ return i;
+}
+#endif
+
+#ifdef XZ_DEC_ARMTHUMB
+static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
+ struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+ size_t i;
+ uint32_t addr;
+
+ for (i = 0; i + 4 <= size; i += 2) {
+ if ((buf[i + 1] & 0xF8) == 0xF0
+ && (buf[i + 3] & 0xF8) == 0xF8) {
+ addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
+ | ((uint32_t)buf[i] << 11)
+ | (((uint32_t)buf[i + 3] & 0x07) << 8)
+ | (uint32_t)buf[i + 2];
+ addr <<= 1;
+ addr -= s->pos + (uint32_t)i + 4;
+ addr >>= 1;
+ buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
+ buf[i] = (uint8_t)(addr >> 11);
+ buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
+ buf[i + 2] = (uint8_t)addr;
+ i += 2;
+ }
+ }
+
+ return i;
+}
+#endif
+
+#ifdef XZ_DEC_SPARC
+static noinline_for_stack size_t XZ_FUNC bcj_sparc(
+ struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+ size_t i;
+ uint32_t instr;
+
+ for (i = 0; i + 4 <= size; i += 4) {
+ instr = ((uint32_t)buf[i] << 24)
+ | ((uint32_t)buf[i + 1] << 16)
+ | ((uint32_t)buf[i + 2] << 8)
+ | (uint32_t)buf[i + 3];
+ if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
+ instr <<= 2;
+ instr -= s->pos + (uint32_t)i;
+ instr >>= 2;
+ instr = ((uint32_t)0x40000000 - (instr & 0x400000))
+ | 0x40000000 | (instr & 0x3FFFFF);
+ buf[i] = (uint8_t)(instr >> 24);
+ buf[i + 1] = (uint8_t)(instr >> 16);
+ buf[i + 2] = (uint8_t)(instr >> 8);
+ buf[i + 3] = (uint8_t)instr;
+ }
+ }
+
+ return i;
+}
+#endif
+
+/*
+ * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
+ * of data that got filtered.
+ *
+ * NOTE: This is implemented as a switch statement to avoid using function
+ * pointers, which could be problematic in the kernel boot code, which must
+ * avoid pointers to static data (at least on x86).
+ */
+static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
+ uint8_t *buf, size_t *pos, size_t size)
+{
+ size_t filtered;
+
+ buf += *pos;
+ size -= *pos;
+
+ switch (s->type) {
+#ifdef XZ_DEC_X86
+ case BCJ_X86:
+ filtered = bcj_x86(s, buf, size);
+ break;
+#endif
+#ifdef XZ_DEC_POWERPC
+ case BCJ_POWERPC:
+ filtered = bcj_powerpc(s, buf, size);
+ break;
+#endif
+#ifdef XZ_DEC_IA64
+ case BCJ_IA64:
+ filtered = bcj_ia64(s, buf, size);
+ break;
+#endif
+#ifdef XZ_DEC_ARM
+ case BCJ_ARM:
+ filtered = bcj_arm(s, buf, size);
+ break;
+#endif
+#ifdef XZ_DEC_ARMTHUMB
+ case BCJ_ARMTHUMB:
+ filtered = bcj_armthumb(s, buf, size);
+ break;
+#endif
+#ifdef XZ_DEC_SPARC
+ case BCJ_SPARC:
+ filtered = bcj_sparc(s, buf, size);
+ break;
+#endif
+ default:
+ /* Never reached but silence compiler warnings. */
+ filtered = 0;
+ break;
+ }
+
+ *pos += filtered;
+ s->pos += filtered;
+}
+
+/*
+ * Flush pending filtered data from temp to the output buffer.
+ * Move the remaining mixture of possibly filtered and unfiltered
+ * data to the beginning of temp.
+ */
+static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
+{
+ size_t copy_size;
+
+ copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
+ memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
+ b->out_pos += copy_size;
+
+ s->temp.filtered -= copy_size;
+ s->temp.size -= copy_size;
+ memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
+}
+
+/*
+ * The BCJ filter functions are primitive in sense that they process the
+ * data in chunks of 1-16 bytes. To hide this issue, this function does
+ * some buffering.
+ */
+enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
+ struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
+{
+ size_t out_start;
+
+ /*
+ * Flush pending already filtered data to the output buffer. Return
+ * immediatelly if we couldn't flush everything, or if the next
+ * filter in the chain had already returned XZ_STREAM_END.
+ */
+ if (s->temp.filtered > 0) {
+ bcj_flush(s, b);
+ if (s->temp.filtered > 0)
+ return XZ_OK;
+
+ if (s->ret == XZ_STREAM_END)
+ return XZ_STREAM_END;
+ }
+
+ /*
+ * If we have more output space than what is currently pending in
+ * temp, copy the unfiltered data from temp to the output buffer
+ * and try to fill the output buffer by decoding more data from the
+ * next filter in the chain. Apply the BCJ filter on the new data
+ * in the output buffer. If everything cannot be filtered, copy it
+ * to temp and rewind the output buffer position accordingly.
+ */
+ if (s->temp.size < b->out_size - b->out_pos) {
+ out_start = b->out_pos;
+ memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
+ b->out_pos += s->temp.size;
+
+ s->ret = xz_dec_lzma2_run(lzma2, b);
+ if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
+ return s->ret;
+
+ bcj_apply(s, b->out, &out_start, b->out_pos);
+
+ /*
+ * As an exception, if the next filter returned XZ_STREAM_END,
+ * we can do that too, since the last few bytes that remain
+ * unfiltered are meant to remain unfiltered.
+ */
+ if (s->ret == XZ_STREAM_END)
+ return XZ_STREAM_END;
+
+ s->temp.size = b->out_pos - out_start;
+ b->out_pos -= s->temp.size;
+ memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
+ }
+
+ /*
+ * If we have unfiltered data in temp, try to fill by decoding more
+ * data from the next filter. Apply the BCJ filter on temp. Then we
+ * hopefully can fill the actual output buffer by copying filtered
+ * data from temp. A mix of filtered and unfiltered data may be left
+ * in temp; it will be taken care on the next call to this function.
+ */
+ if (s->temp.size > 0) {
+ /* Make b->out{,_pos,_size} temporarily point to s->temp. */
+ s->out = b->out;
+ s->out_pos = b->out_pos;
+ s->out_size = b->out_size;
+ b->out = s->temp.buf;
+ b->out_pos = s->temp.size;
+ b->out_size = sizeof(s->temp.buf);
+
+ s->ret = xz_dec_lzma2_run(lzma2, b);
+
+ s->temp.size = b->out_pos;
+ b->out = s->out;
+ b->out_pos = s->out_pos;
+ b->out_size = s->out_size;
+
+ if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
+ return s->ret;
+
+ bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
+
+ /*
+ * If the next filter returned XZ_STREAM_END, we mark that
+ * everything is filtered, since the last unfiltered bytes
+ * of the stream are meant to be left as is.
+ */
+ if (s->ret == XZ_STREAM_END)
+ s->temp.filtered = s->temp.size;
+
+ bcj_flush(s, b);
+ if (s->temp.filtered > 0)
+ return XZ_OK;
+ }
+
+ return s->ret;
+}
+
+struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(void)
+{
+ struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ return s;
+}
+
+enum xz_ret XZ_FUNC xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
+{
+ switch (id) {
+#ifdef XZ_DEC_X86
+ case BCJ_X86:
+#endif
+#ifdef XZ_DEC_POWERPC
+ case BCJ_POWERPC:
+#endif
+#ifdef XZ_DEC_IA64
+ case BCJ_IA64:
+#endif
+#ifdef XZ_DEC_ARM
+ case BCJ_ARM:
+#endif
+#ifdef XZ_DEC_ARMTHUMB
+ case BCJ_ARMTHUMB:
+#endif
+#ifdef XZ_DEC_SPARC
+ case BCJ_SPARC:
+#endif
+ break;
+
+ default:
+ /* Unsupported Filter ID */
+ return XZ_OPTIONS_ERROR;
+ }
+
+ s->type = id;
+ s->ret = XZ_OK;
+ s->pos = 0;
+ s->x86_prev_mask = 0;
+ s->temp.filtered = 0;
+ s->temp.size = 0;
+
+ return XZ_OK;
+}
diff --git a/linux/lib/xz/xz_dec_stream.c b/linux/lib/xz/xz_dec_stream.c
index cdc0d67..302559d 100644
--- a/linux/lib/xz/xz_dec_stream.c
+++ b/linux/lib/xz/xz_dec_stream.c
@@ -88,11 +88,6 @@ struct xz_dec {
/* Size of the Block Header field */
uint32_t size;
-
-#ifdef XZ_DEC_BCJ
- /* Type of the BCJ filter */
- enum xz_bcj_type bcj_type;
-#endif
} block_header;
/* Information collected when decoding Blocks */
@@ -145,6 +140,7 @@ struct xz_dec {
#ifdef XZ_DEC_BCJ
struct xz_dec_bcj *bcj;
+ bool bcj_active;
#endif
};
@@ -152,8 +148,8 @@ struct xz_dec {
static enum xz_ret XZ_FUNC xz_dec_raw(struct xz_dec *s, struct xz_buf *b)
{
#ifdef XZ_DEC_BCJ
- if (s->block_header.bcj_type != XZ_BCJ_NONE)
- return xz_dec_bcj_run(s, b);
+ if (s->bcj_active)
+ return xz_dec_bcj_run(s->bcj, s->lzma2, b);
#endif
return xz_dec_lzma2_run(s->lzma2, b);
@@ -347,36 +343,14 @@ static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
#ifdef XZ_DEC_BCJ
/* If there are two filters, the first one must be a BCJ filter. */
- if (s->temp.buf[1] & 0x01) {
+ s->bcj_active = s->temp.buf[1] & 0x01;
+ if (s->bcj_active) {
if (s->temp.size - s->temp.pos < 2)
return XZ_OPTIONS_ERROR;
- switch (s->temp.buf[s->temp.pos]) {
-#ifdef XZ_DEC_BCJ_X86
- case XZ_BCJ_X86:
-#endif
-#ifdef XZ_DEC_BCJ_POWERPC
- case XZ_BCJ_POWERPC:
-#endif
-#ifdef XZ_DEC_BCJ_IA64
- case XZ_BCJ_IA64:
-#endif
-#ifdef XZ_DEC_BCJ_ARM
- case XZ_BCJ_ARM:
-#endif
-#ifdef XZ_DEC_BCJ_ARMTHUMB
- case XZ_BCJ_ARMTHUMB:
-#endif
-#ifdef XZ_DEC_BCJ_SPARC
- case XZ_BCJ_SPARC:
-#endif
- break;
-
- default:
- return XZ_OPTIONS_ERROR;
- }
-
- s->block_header.bcj_type = s->temp.buf[s->temp.pos++];
+ ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
+ if (ret != XZ_OK)
+ return ret;
/*
* We don't support custom start offset,
@@ -384,10 +358,6 @@ static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
*/
if (s->temp.buf[s->temp.pos++] != 0x00)
return XZ_OPTIONS_ERROR;
-
- xz_dec_bcj_reset(s);
- } else {
- s->bcj.type = XZ_BCJ_NONE;
}
#endif
@@ -719,15 +689,27 @@ struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max)
if (s == NULL)
return NULL;
+#ifdef XZ_DEC_BCJ
+ s->bcj = xz_dec_bcj_create();
+ if (s->bcj == NULL)
+ goto error_bcj;
+#endif
+
s->lzma2 = xz_dec_lzma2_create(dict_max);
- if (s->lzma2 == NULL) {
- kfree(s);
- return NULL;
- }
+ if (s->lzma2 == NULL)
+ goto error_lzma2;
s->single_call = dict_max == 0;
xz_dec_reset(s);
return s;
+
+error_lzma2:
+#ifdef XZ_DEC_BCJ
+ kfree(s->bcj);
+error_bcj:
+#endif
+ kfree(s);
+ return NULL;
}
void XZ_FUNC xz_dec_reset(struct xz_dec *s)
@@ -753,6 +735,9 @@ void XZ_FUNC xz_dec_end(struct xz_dec *s)
{
if (s != NULL) {
xz_dec_lzma2_end(s->lzma2);
+#ifdef XZ_DEC_BCJ
+ xz_dec_bcj_end(s->bcj);
+#endif
kfree(s);
}
}
diff --git a/linux/lib/xz/xz_private.h b/linux/lib/xz/xz_private.h
index bd47708..0844d3e 100644
--- a/linux/lib/xz/xz_private.h
+++ b/linux/lib/xz/xz_private.h
@@ -16,6 +16,26 @@
# include <linux/vmalloc.h>
# include <linux/string.h>
# endif
+# ifndef XZ_IGNORE_KCONFIG
+# ifdef CONFIG_XZ_DEC_X86
+# define XZ_DEC_X86
+# endif
+# ifdef CONFIG_XZ_DEC_POWERPC
+# define XZ_DEC_POWERPC
+# endif
+# ifdef CONFIG_XZ_DEC_IA64
+# define XZ_DEC_IA64
+# endif
+# ifdef CONFIG_XZ_DEC_ARM
+# define XZ_DEC_ARM
+# endif
+# ifdef CONFIG_XZ_DEC_ARMTHUMB
+# define XZ_DEC_ARMTHUMB
+# endif
+# ifdef CONFIG_XZ_DEC_SPARC
+# define XZ_DEC_SPARC
+# endif
+# endif
# include <linux/xz.h>
#else
# ifndef XZ_MEM_FUNCS
@@ -38,6 +58,13 @@
# define __always_inline inline
# endif
# endif
+# ifndef noinline_for_stack
+# ifdef __GNUC__
+# define noinline_for_stack __attribute__((__noinline__))
+# else
+# define noinline_for_stack
+# endif
+# endif
#endif
#ifdef XZ_MEM_FUNCS
@@ -50,6 +77,15 @@
# define memzero(buf, size) memset(buf, 0, size)
#endif
+#ifndef XZ_DEC_BCJ
+# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
+ || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
+ || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
+ || defined(XZ_DEC_SPARC)
+# define XZ_DEC_BCJ
+# endif
+#endif
+
/*
* Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
* before calling xz_dec_lzma2_run().
@@ -72,15 +108,29 @@ extern enum xz_ret XZ_FUNC xz_dec_lzma2_run(
/* Free the memory allocated for the LZMA2 decoder. */
extern void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
-/* BCJ filter types (Branch/Jump/Call) */
-enum xz_bcj_type {
- XZ_BCJ_NONE = 0,
- XZ_BCJ_X86 = 4,
- XZ_BCJ_POWERPC = 5, /* Big endian */
- XZ_BCJ_IA64 = 6,
- XZ_BCJ_ARM = 7, /* Little endian */
- XZ_BCJ_ARMTHUMB = 8, /* Little endian */
- XZ_BCJ_SPARC = 9
-};
+/*
+ * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
+ * calling xz_dec_bcj_run().
+ */
+extern struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(void);
+
+/*
+ * Decode the Filter ID of a BCJ filter. This implementation doesn't
+ * support custom start offsets, so no decoding of Filter Properties
+ * is needed. Returns XZ_OK if the given Filter ID is supported.
+ * Otherwise XZ_OPTIONS_ERROR is returned.
+ */
+extern enum xz_ret XZ_FUNC xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
+
+/*
+ * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
+ * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
+ * must be called directly.
+ */
+extern enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
+ struct xz_dec_lzma2 *lzma2, struct xz_buf *b);
+
+/* Free the memory allocated for the BCJ filters. */
+#define xz_dec_bcj_end(s) kfree(s)
#endif
diff --git a/userspace/Makefile b/userspace/Makefile
index 61ed75c..979e745 100644
--- a/userspace/Makefile
+++ b/userspace/Makefile
@@ -8,20 +8,25 @@
#
CC = gcc -std=gnu89
+BCJ_CPPFLAGS = -DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64 \
+ -DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC
CFLAGS = -ggdb3 -O2 -pedantic -Wall -Wextra
RM = rm -f
VPATH = ../linux/include/linux ../linux/lib/xz
-COMMON_OBJS = xz_crc32.o xz_dec_stream.o xz_dec_lzma2.o
+COMMON_SRCS = xz_crc32.c xz_dec_stream.c xz_dec_lzma2.c xz_dec_bcj.c
+COMMON_OBJS = $(COMMON_SRCS:.c=.o)
XZMINIDEC_OBJS = xzminidec.o
BUFTEST_OBJS = buftest.o
BOOTTEST_OBJS = boottest.o
XZ_HEADERS = xz.h xz_private.h xz_stream.h xz_lzma2.h
PROGRAMS = xzminidec buftest boottest
+ALL_CPPFLAGS = $(BCJ_CPPFLAGS) $(CPPFLAGS)
+
all: $(PROGRAMS)
%.o: %.c $(XZ_HEADERS)
- $(CC) -I../linux/include/linux $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
+ $(CC) -I../linux/include/linux $(ALL_CPPFLAGS) $(CFLAGS) -c -o $@ $<
xzminidec: $(COMMON_OBJS) $(XZMINIDEC_OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(COMMON_OBJS) $(XZMINIDEC_OBJS)
@@ -29,8 +34,8 @@ xzminidec: $(COMMON_OBJS) $(XZMINIDEC_OBJS)
buftest: $(COMMON_OBJS) $(BUFTEST_OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(COMMON_OBJS) $(BUFTEST_OBJS)
-boottest: $(BOOTTEST_OBJS) xz_crc32.c xz_dec_stream.c xz_dec_lzma2.c
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTTEST_OBJS)
+boottest: $(BOOTTEST_OBJS) $(COMMON_SRCS)
+ $(CC) $(ALL_CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTTEST_OBJS)
.PHONY: clean
clean: