diff options
author | Eugene Kliuchnikov <eustas@google.com> | 2018-02-26 09:04:36 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-26 09:04:36 -0500 |
commit | 35e69fc7cf9421ab04ffc9d52cb36d07fa12984a (patch) | |
tree | a1ed614391936d455da2b0610ef8e8caf88b4289 /c/dec | |
parent | 3af18990f50d8f040038aaa08c41f5d27d62efb5 (diff) | |
download | brotli-35e69fc7cf9421ab04ffc9d52cb36d07fa12984a.tar.gz |
New feature: "Large Window Brotli" (#640)
* New feature: "Large Window Brotli"
By setting special encoder/decoder flag it is now possible to extend
LZ-window up to 30 bits; though produced stream will not be RFC7932
compliant.
Added new dictionary generator - "DSH". It combines speed of "Sieve"
and quality of "DM". Plus utilities to prepare train corpora
(remove unique strings).
Improved compression ratio: now two sub-blocks could be stitched:
the last copy command could be extended to span the next sub-block.
Fixed compression ineffectiveness caused by floating numbers rounding and
wrong cost heuristic.
Other C changes:
- combined / moved `context.h` to `common`
- moved transforms to `common`
- unified some aspects of code formatting
- added an abstraction for encoder (static) dictionary
- moved default allocator/deallocator functions to `common`
brotli CLI:
- window size is auto-adjusted if not specified explicitly
Java:
- added "eager" decoding both to JNI wrapper and pure decoder
- huge speed-up of `DictionaryData` initialization
* Add dictionaryless compressed dictionary
* Fix `sources.lst`
* Fix `sources.lst` and add a note that `libtool` is also required.
* Update setup.py
* Fix `EagerStreamTest`
* Fix BUILD file
* Add missing `libdivsufsort` dependency
* Fix "unused parameter" warning.
Diffstat (limited to 'c/dec')
-rw-r--r-- | c/dec/bit_reader.h | 33 | ||||
-rw-r--r-- | c/dec/context.h | 251 | ||||
-rw-r--r-- | c/dec/decode.c | 455 | ||||
-rw-r--r-- | c/dec/huffman.c | 22 | ||||
-rw-r--r-- | c/dec/huffman.h | 20 | ||||
-rw-r--r-- | c/dec/prefix.h | 7 | ||||
-rw-r--r-- | c/dec/state.c | 37 | ||||
-rw-r--r-- | c/dec/state.h | 47 | ||||
-rw-r--r-- | c/dec/transform.h | 300 |
9 files changed, 362 insertions, 810 deletions
diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h index 21c59d7..39e4873 100644 --- a/c/dec/bit_reader.h +++ b/c/dec/bit_reader.h @@ -20,7 +20,7 @@ extern "C" { #define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1) -static const uint32_t kBitMask[33] = { 0x0000, +static const uint32_t kBitMask[33] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, @@ -35,7 +35,7 @@ static BROTLI_INLINE uint32_t BitMask(uint32_t n) { if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) { /* Masking with this expression turns to a single "Unsigned Bit Field Extract" UBFX instruction on ARM. */ - return ~((0xffffffffU) << n); + return ~((0xFFFFFFFFu) << n); } else { return kBitMask[n]; } @@ -58,8 +58,9 @@ typedef struct { /* Initializes the BrotliBitReader fields. */ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br); -/* Ensures that accumulator is not empty. May consume one byte of input. - Returns 0 if data is required but there is no input available. +/* Ensures that accumulator is not empty. + May consume up to sizeof(brotli_reg_t) - 1 bytes of input. + Returns BROTLI_FALSE if data is required but there is no input available. For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned reading. */ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br); @@ -98,9 +99,9 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount( return TO_BROTLI_BOOL(br->avail_in >= num); } -/* Guarantees that there are at least n_bits + 1 bits in accumulator. +/* Guarantees that there are at least |n_bits| + 1 bits in accumulator. Precondition: accumulator contains at least 1 bit. - n_bits should be in the range [1..24] for regular build. For portable + |n_bits| should be in the range [1..24] for regular build. For portable non-64-bit little-endian build only 16 bits are safe to request. */ static BROTLI_INLINE void BrotliFillBitWindow( BrotliBitReader* const br, uint32_t n_bits) { @@ -158,7 +159,8 @@ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) { BrotliFillBitWindow(br, 17); } -/* Pulls one byte of input to accumulator. */ +/* Tries to pull one byte of input to accumulator. + Returns BROTLI_FALSE if there is no input available. */ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) { if (br->avail_in == 0) { return BROTLI_FALSE; @@ -190,15 +192,16 @@ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked( return (uint32_t)BrotliGetBitsUnmasked(br); } -/* Returns the specified number of bits from |br| without advancing bit pos. */ +/* Returns the specified number of bits from |br| without advancing bit + position. */ static BROTLI_INLINE uint32_t BrotliGetBits( BrotliBitReader* const br, uint32_t n_bits) { BrotliFillBitWindow(br, n_bits); return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); } -/* Tries to peek the specified amount of bits. Returns 0, if there is not - enough input. */ +/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there + is not enough input. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { while (BrotliGetAvailableBits(br) < n_bits) { @@ -210,7 +213,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( return BROTLI_TRUE; } -/* Advances the bit pos by n_bits. */ +/* Advances the bit pos by |n_bits|. */ static BROTLI_INLINE void BrotliDropBits( BrotliBitReader* const br, uint32_t n_bits) { br->bit_pos_ += n_bits; @@ -230,7 +233,7 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) { } /* Reads the specified number of bits from |br| and advances the bit pos. - Precondition: accumulator MUST contain at least n_bits. */ + Precondition: accumulator MUST contain at least |n_bits|. */ static BROTLI_INLINE void BrotliTakeBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); @@ -259,8 +262,8 @@ static BROTLI_INLINE uint32_t BrotliReadBits( } } -/* Tries to read the specified amount of bits. Returns 0, if there is not - enough input. n_bits MUST be positive. */ +/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there + is not enough input. |n_bits| MUST be positive. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { while (BrotliGetAvailableBits(br) < n_bits) { @@ -284,7 +287,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) { } /* Copies remaining input bytes stored in the bit reader to the output. Value - num may not be larger than BrotliGetRemainingBytes. The bit reader must be + |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be warmed up again after this. */ static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, BrotliBitReader* br, size_t num) { diff --git a/c/dec/context.h b/c/dec/context.h deleted file mode 100644 index 9402cbe..0000000 --- a/c/dec/context.h +++ /dev/null @@ -1,251 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Lookup table to map the previous two bytes to a context id. - - There are four different context modeling modes defined here: - CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, - CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, - CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, - CONTEXT_SIGNED: second-order context model tuned for signed integers. - - The context id for the UTF8 context model is calculated as follows. If p1 - and p2 are the previous two bytes, we calculate the context as - - context = kContextLookup[p1] | kContextLookup[p2 + 256]. - - If the previous two bytes are ASCII characters (i.e. < 128), this will be - equivalent to - - context = 4 * context1(p1) + context2(p2), - - where context1 is based on the previous byte in the following way: - - 0 : non-ASCII control - 1 : \t, \n, \r - 2 : space - 3 : other punctuation - 4 : " ' - 5 : % - 6 : ( < [ { - 7 : ) > ] } - 8 : , ; : - 9 : . - 10 : = - 11 : number - 12 : upper-case vowel - 13 : upper-case consonant - 14 : lower-case vowel - 15 : lower-case consonant - - and context2 is based on the second last byte: - - 0 : control, space - 1 : punctuation - 2 : upper-case letter, number - 3 : lower-case letter - - If the last byte is ASCII, and the second last byte is not (in a valid UTF8 - stream it will be a continuation byte, value between 128 and 191), the - context is the same as if the second last byte was an ASCII control or space. - - If the last byte is a UTF8 lead byte (value >= 192), then the next byte will - be a continuation byte and the context id is 2 or 3 depending on the LSB of - the last byte and to a lesser extent on the second last byte if it is ASCII. - - If the last byte is a UTF8 continuation byte, the second last byte can be: - - continuation byte: the next byte is probably ASCII or lead byte (assuming - 4-byte UTF8 characters are rare) and the context id is 0 or 1. - - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 - - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 - - The possible value combinations of the previous two bytes, the range of - context ids and the type of the next byte is summarized in the table below: - - |--------\-----------------------------------------------------------------| - | \ Last byte | - | Second \---------------------------------------------------------------| - | last byte \ ASCII | cont. byte | lead byte | - | \ (0-127) | (128-191) | (192-) | - |=============|===================|=====================|==================| - | ASCII | next: ASCII/lead | not valid | next: cont. | - | (0-127) | context: 4 - 63 | | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | - | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: ASCII/lead | not valid | - | (192-207) | | context: 0 - 1 | | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: cont. | not valid | - | (208-) | | context: 2 - 3 | | - |-------------|-------------------|---------------------|------------------| - - The context id for the signed context mode is calculated as: - - context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2]. - - For any context modeling modes, the context ids can be calculated by |-ing - together two lookups from one table using context model dependent offsets: - - context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2]. - - where offset1 and offset2 are dependent on the context mode. -*/ - -#ifndef BROTLI_DEC_CONTEXT_H_ -#define BROTLI_DEC_CONTEXT_H_ - -#include <brotli/types.h> - -enum ContextType { - CONTEXT_LSB6 = 0, - CONTEXT_MSB6 = 1, - CONTEXT_UTF8 = 2, - CONTEXT_SIGNED = 3 -}; - -/* Common context lookup table for all context modes. */ -static const uint8_t kContextLookup[1792] = { - /* CONTEXT_UTF8, last byte. */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, - 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, - 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, - 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, - 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, - /* UTF8 continuation byte range. */ - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - /* UTF8 lead byte range. */ - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - /* CONTEXT_UTF8 second last byte. */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, - /* UTF8 continuation byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* UTF8 lead byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - /* CONTEXT_SIGNED, second last byte. */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, - /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */ - 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, - /* CONTEXT_LSB6, last byte. */ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - /* CONTEXT_MSB6, last byte. */ - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, - 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, - 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, - 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, - 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, - 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, - 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, - 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, - /* CONTEXT_{M,L}SB6, second last byte, */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static const int kContextLookupOffsets[8] = { - /* CONTEXT_LSB6 */ - 1024, 1536, - /* CONTEXT_MSB6 */ - 1280, 1536, - /* CONTEXT_UTF8 */ - 0, 256, - /* CONTEXT_SIGNED */ - 768, 512, -}; - -#endif /* BROTLI_DEC_CONTEXT_H_ */ diff --git a/c/dec/decode.c b/c/dec/decode.c index 846557a..630edeb 100644 --- a/c/dec/decode.c +++ b/c/dec/decode.c @@ -14,15 +14,15 @@ #include <string.h> /* memcpy, memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" #include "../common/version.h" #include "./bit_reader.h" -#include "./context.h" #include "./huffman.h" #include "./prefix.h" #include "./state.h" -#include "./transform.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -37,7 +37,7 @@ extern "C" { (unsigned long)(idx), (unsigned long)array_name[idx])) #define HUFFMAN_TABLE_BITS 8U -#define HUFFMAN_TABLE_MASK 0xff +#define HUFFMAN_TABLE_MASK 0xFF /* We need the slack region for the following reasons: - doing up to two 16-byte copies for fast backward copying @@ -59,11 +59,16 @@ static const uint8_t kCodeLengthPrefixValue[16] = { BROTLI_BOOL BrotliDecoderSetParameter( BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) { + if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE; switch (p) { case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION: state->canny_ringbuffer_allocation = !!value ? 0 : 1; return BROTLI_TRUE; + case BROTLI_DECODER_PARAM_LARGE_WINDOW: + state->large_window = TO_BROTLI_BOOL(!!value); + return BROTLI_TRUE; + default: return BROTLI_FALSE; } } @@ -80,8 +85,15 @@ BrotliDecoderState* BrotliDecoderCreateInstance( BROTLI_DUMP(); return 0; } - BrotliDecoderStateInitWithCustomAllocators( - state, alloc_func, free_func, opaque); + if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) { + BROTLI_DUMP(); + if (!alloc_func && !free_func) { + free(state); + } else if (alloc_func && free_func) { + free_func(opaque, state); + } + return 0; + } return state; } @@ -97,39 +109,61 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) { } } -/* Saves error code and converts it to BrotliDecoderResult */ +/* Saves error code and converts it to BrotliDecoderResult. */ static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode( BrotliDecoderState* s, BrotliDecoderErrorCode e) { s->error_code = (int)e; switch (e) { case BROTLI_DECODER_SUCCESS: return BROTLI_DECODER_RESULT_SUCCESS; + case BROTLI_DECODER_NEEDS_MORE_INPUT: return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; + case BROTLI_DECODER_NEEDS_MORE_OUTPUT: return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; + default: return BROTLI_DECODER_RESULT_ERROR; } } -/* Decodes a number in the range [9..24], by reading 1 - 7 bits. - Precondition: bit-reader accumulator has at least 7 bits. */ -static uint32_t DecodeWindowBits(BrotliBitReader* br) { +/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli". + Precondition: bit-reader accumulator has at least 8 bits. */ +static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s, + BrotliBitReader* br) { uint32_t n; + BROTLI_BOOL large_window = s->large_window; + s->large_window = BROTLI_FALSE; BrotliTakeBits(br, 1, &n); if (n == 0) { - return 16; + s->window_bits = 16; + return BROTLI_DECODER_SUCCESS; } BrotliTakeBits(br, 3, &n); if (n != 0) { - return 17 + n; + s->window_bits = 17 + n; + return BROTLI_DECODER_SUCCESS; } BrotliTakeBits(br, 3, &n); + if (n == 1) { + if (large_window) { + BrotliTakeBits(br, 1, &n); + if (n == 1) { + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); + } + s->large_window = BROTLI_TRUE; + return BROTLI_DECODER_SUCCESS; + } else { + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); + } + } if (n != 0) { - return 8 + n; + s->window_bits = 8 + n; + return BROTLI_DECODER_SUCCESS; } - return 17; + s->window_bits = 17; + return BROTLI_DECODER_SUCCESS; } static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) { @@ -342,7 +376,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( *result = table->value; return BROTLI_TRUE; } - return BROTLI_FALSE; /* No valid bits at all. */ + return BROTLI_FALSE; /* No valid bits at all. */ } val = (uint32_t)BrotliGetBitsUnmasked(br); table += val & HUFFMAN_TABLE_MASK; @@ -352,11 +386,11 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( *result = table->value; return BROTLI_TRUE; } else { - return BROTLI_FALSE; /* Not enough bits for the first level. */ + return BROTLI_FALSE; /* Not enough bits for the first level. */ } } if (available_bits <= HUFFMAN_TABLE_BITS) { - return BROTLI_FALSE; /* Not enough bits to move to the second level. */ + return BROTLI_FALSE; /* Not enough bits to move to the second level. */ } /* Speculatively drop HUFFMAN_TABLE_BITS. */ @@ -364,7 +398,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( available_bits -= HUFFMAN_TABLE_BITS; table += table->value + val; if (available_bits < table->bits) { - return BROTLI_FALSE; /* Not enough bits for the second level. */ + return BROTLI_FALSE; /* Not enough bits for the second level. */ } BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits); @@ -428,12 +462,11 @@ static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) { } /* Reads (s->symbol + 1) symbols. - Totally 1..4 symbols are read, 1..10 bits each. - The list of symbols MUST NOT contain duplicates. - */ + Totally 1..4 symbols are read, 1..11 bits each. + The list of symbols MUST NOT contain duplicates. */ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( - uint32_t alphabet_size, BrotliDecoderState* s) { - /* max_bits == 1..10; symbol == 0..3; 1..40 bits will be read. */ + uint32_t alphabet_size, uint32_t max_symbol, BrotliDecoderState* s) { + /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */ BrotliBitReader* br = &s->br; uint32_t max_bits = Log2Floor(alphabet_size - 1); uint32_t i = s->sub_loop_counter; @@ -445,7 +478,7 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ; return BROTLI_DECODER_NEEDS_MORE_INPUT; } - if (v >= alphabet_size) { + if (v >= max_symbol) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET); } @@ -471,14 +504,13 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( B) remember code length (if it is not 0) C) extend corresponding index-chain D) reduce the Huffman space - E) update the histogram - */ + E) update the histogram */ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len, uint32_t* symbol, uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len, uint16_t* symbol_lists, uint16_t* code_length_histo, int* next_symbol) { *repeat = 0; - if (code_len != 0) { /* code_len == 1..15 */ + if (code_len != 0) { /* code_len == 1..15 */ symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol); next_symbol[code_len] = (int)(*symbol); *prev_code_len = code_len; @@ -498,8 +530,7 @@ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len, D) For each symbol do the same operations as in ProcessSingleCodeLength PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or - code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH - */ + code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */ static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len, uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol, uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len, @@ -576,12 +607,12 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths( BrotliFillBitWindow16(br); p += BrotliGetBitsUnmasked(br) & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); - BrotliDropBits(br, p->bits); /* Use 1..5 bits */ + BrotliDropBits(br, p->bits); /* Use 1..5 bits. */ code_len = p->value; /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { ProcessSingleCodeLength(code_len, &symbol, &repeat, &space, &prev_code_len, symbol_lists, code_length_histo, next_symbol); - } else { /* code_len == 16..17, extra_bits == 2..3 */ + } else { /* code_len == 16..17, extra_bits == 2..3 */ uint32_t extra_bits = (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3; uint32_t repeat_delta = @@ -616,13 +647,13 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths( get_byte = BROTLI_TRUE; continue; } - code_len = p->value; /* code_len == 0..17 */ + code_len = p->value; /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { BrotliDropBits(br, p->bits); ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space, &s->prev_code_len, s->symbol_lists, s->code_length_histo, s->next_symbol); - } else { /* code_len == 16..17, extra_bits == 2..3 */ + } else { /* code_len == 16..17, extra_bits == 2..3 */ uint32_t extra_bits = code_len - 14U; uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits); if (available_bits < p->bits + extra_bits) { @@ -674,7 +705,7 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) { ++num_codes; ++s->code_length_histo[v]; if (space - 1U >= 32U) { - /* space is 0 or wrapped around */ + /* space is 0 or wrapped around. */ break; } } @@ -689,22 +720,22 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) { There are 2 scenarios: A) Huffman code contains only few symbols (1..4). Those symbols are read directly; their code lengths are defined by the number of symbols. - For this scenario 4 - 45 bits will be read. + For this scenario 4 - 49 bits will be read. B) 2-phase decoding: B.1) Small Huffman table is decoded; it is specified with code lengths encoded with predefined entropy code. 32 - 74 bits are used. B.2) Decoded table is used to decode code lengths of symbols in resulting - Huffman table. In worst case 3520 bits are read. -*/ + Huffman table. In worst case 3520 bits are read. */ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, + uint32_t max_symbol, HuffmanCode* table, uint32_t* opt_table_size, BrotliDecoderState* s) { BrotliBitReader* br = &s->br; /* Unnecessary masking, but might be good for safety. */ - alphabet_size &= 0x3ff; - /* State machine */ + alphabet_size &= 0x7FF; + /* State machine. */ for (;;) { switch (s->substate_huffman) { case BROTLI_STATE_HUFFMAN_NONE: @@ -717,7 +748,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */ if (s->sub_loop_counter != 1) { s->space = 32; - s->repeat = 0; /* num_codes */ + s->repeat = 0; /* num_codes */ memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) * (BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1)); memset(&s->code_length_code_lengths[0], 0, @@ -729,20 +760,22 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE: /* Read symbols, codes & code lengths directly. */ - if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */ + if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */ s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE; return BROTLI_DECODER_NEEDS_MORE_INPUT; } s->sub_loop_counter = 0; /* No break, transit to the next state. */ + case BROTLI_STATE_HUFFMAN_SIMPLE_READ: { BrotliDecoderErrorCode result = - ReadSimpleHuffmanSymbols(alphabet_size, s); + ReadSimpleHuffmanSymbols(alphabet_size, max_symbol, s); if (result != BROTLI_DECODER_SUCCESS) { return result; } /* No break, transit to the next state. */ } + case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: { uint32_t table_size; if (s->symbol == 3) { @@ -787,11 +820,12 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS; /* No break, transit to the next state. */ } + case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: { uint32_t table_size; - BrotliDecoderErrorCode result = ReadSymbolCodeLengths(alphabet_size, s); + BrotliDecoderErrorCode result = ReadSymbolCodeLengths(max_symbol, s); if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { - result = SafeReadSymbolCodeLengths(alphabet_size, s); + result = SafeReadSymbolCodeLengths(max_symbol, s); } if (result != BROTLI_DECODER_SUCCESS) { return result; @@ -823,7 +857,7 @@ static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table, uint32_t code; uint32_t nbits; code = ReadSymbol(table, br); - nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */ + nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */ return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits); } @@ -842,7 +876,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( } { uint32_t bits; - uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */ + uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */ if (!BrotliSafeReadBits(br, nbits, &bits)) { s->block_length_index = index; s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX; @@ -867,8 +901,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( of Y values, and reinitialize only first elements in L. Most of input values are 0 and 1. To reduce number of branches, we replace - inner for loop with do-while. - */ + inner for loop with do-while. */ static BROTLI_NOINLINE void InverseMoveToFrontTransform( uint8_t* v, uint32_t v_len, BrotliDecoderState* state) { /* Reinitialize elements that could have been changed. */ @@ -884,7 +917,7 @@ static BROTLI_NOINLINE void InverseMoveToFrontTransform( /* Initialize list using 4 consequent values pattern. */ mtf[0] = pattern; do { - pattern += 0x04040404; /* Advance all 4 values by 4. */ + pattern += 0x04040404; /* Advance all 4 values by 4. */ mtf[i] = pattern; i++; } while (i <= upper_bound); @@ -917,7 +950,8 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode( while (s->htree_index < group->num_htrees) { uint32_t table_size; BrotliDecoderErrorCode result = - ReadHuffmanCode(group->alphabet_size, s->next, &table_size, s); + ReadHuffmanCode(group->alphabet_size, group->max_symbol, + s->next, &table_size, s); if (result != BROTLI_DECODER_SUCCESS) return result; group->htrees[s->htree_index] = s->next; s->next += table_size; @@ -934,8 +968,7 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode( 2) Decode Huffman table using ReadHuffmanCode function. This table will be used for reading context map items. 3) Read context map items; "0" values could be run-length encoded. - 4) Optionally, apply InverseMoveToFront transform to the resulting map. - */ + 4) Optionally, apply InverseMoveToFront transform to the resulting map. */ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, uint32_t* num_htrees, uint8_t** context_map_arg, @@ -964,6 +997,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, } s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX; /* No break, continue to next state. */ + case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: { uint32_t bits; /* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe @@ -982,13 +1016,17 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN; /* No break, continue to next state. */ } - case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: - result = ReadHuffmanCode(*num_htrees + s->max_run_length_prefix, + + case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: { + uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix; + result = ReadHuffmanCode(alphabet_size, alphabet_size, s->context_map_table, NULL, s); if (result != BROTLI_DECODER_SUCCESS) return result; s->code = 0xFFFF; s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE; /* No break, continue to next state. */ + } + case BROTLI_STATE_CONTEXT_MAP_DECODE: { uint32_t context_index = s->context_index; uint32_t max_run_length_prefix = s->max_run_length_prefix; @@ -1037,6 +1075,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, } /* No break, continue to next state. */ } + case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: { uint32_t bits; if (!BrotliSafeReadBits(br, 1, &bits)) { @@ -1049,6 +1088,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; return BROTLI_DECODER_SUCCESS; } + default: return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE); @@ -1067,8 +1107,11 @@ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength( BrotliBitReader* br = &s->br; uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2]; uint32_t block_type; + if (max_block_type <= 1) { + return BROTLI_FALSE; + } - /* Read 0..15 + 3..39 bits */ + /* Read 0..15 + 3..39 bits. */ if (!safe) { block_type = ReadSymbol(type_tree, br); s->block_length[tree_type] = ReadBlockLength(len_tree, br); @@ -1125,9 +1168,8 @@ static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) { trivial = s->trivial_literal_contexts[block_type >> 5]; s->trivial_literal_context = (trivial >> (block_type & 31)) & 1; s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]]; - context_mode = s->context_modes[block_type]; - s->context_lookup1 = &kContextLookup[kContextLookupOffsets[context_mode]]; - s->context_lookup2 = &kContextLookup[kContextLookupOffsets[context_mode + 1]]; + context_mode = s->context_modes[block_type] & 3; + s->context_lookup = BROTLI_CONTEXT_LUT(context_mode); } /* Decodes the block type and updates the state for literal context. @@ -1164,6 +1206,7 @@ static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal( static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) { DecodeCommandBlockSwitchInternal(0, s); } + static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch( BrotliDecoderState* s) { return DecodeCommandBlockSwitchInternal(1, s); @@ -1200,8 +1243,7 @@ static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) { /* Dumps output. Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push - and either ring-buffer is as big as window size, or |force| is true. - */ + and either ring-buffer is as big as window size, or |force| is true. */ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer( BrotliDecoderState* s, size_t* available_out, uint8_t** next_out, size_t* total_out, BROTLI_BOOL force) { @@ -1260,8 +1302,7 @@ static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) { this function is called. Last two bytes of ring-buffer are initialized to 0, so context calculation - could be done uniformly for the first two and all other positions. -*/ + could be done uniformly for the first two and all other positions. */ static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer( BrotliDecoderState* s) { uint8_t* old_ringbuffer = s->ringbuffer; @@ -1321,8 +1362,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( return BROTLI_DECODER_NEEDS_MORE_INPUT; } s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_UNCOMPRESSED_WRITE: { BrotliDecoderErrorCode result; result = WriteRingBuffer( @@ -1346,8 +1388,7 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( If we know the data size is small, do not allocate more ring buffer size than needed to reduce memory usage. - When this method is called, metablock size and flags MUST be decoded. -*/ + When this method is called, metablock size and flags MUST be decoded. */ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( BrotliDecoderState* s) { int window_size = 1 << s->window_bits; @@ -1378,7 +1419,7 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( if (!!s->canny_ringbuffer_allocation) { /* Reduce ring buffer size to save memory when server is unscrupulous. In worst case memory usage might be 1.5x bigger for a short period of - ring buffer reallocation.*/ + ring buffer reallocation. */ while ((new_ringbuffer_size >> 1) >= min_size) { new_ringbuffer_size >>= 1; } @@ -1398,7 +1439,7 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) { s->loop_counter = i; return BROTLI_DECODER_NEEDS_MORE_INPUT; } - s->context_modes[i] = (uint8_t)(bits << 1); + s->context_modes[i] = (uint8_t)bits; BROTLI_LOG_ARRAY_INDEX(s->context_modes, i); i++; } @@ -1413,12 +1454,12 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { s->distance_context = 1; } else { int distance_code = s->distance_code << 1; - /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: */ - /* 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */ - const uint32_t kDistanceShortCodeIndexOffset = 0xaaafff1b; - /* kDistanceShortCodeValueOffset has 2-bit values from LSB: */ - /*-0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */ - const uint32_t kDistanceShortCodeValueOffset = 0xfa5fa500; + /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: + 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */ + const uint32_t kDistanceShortCodeIndexOffset = 0xAAAFFF1B; + /* kDistanceShortCodeValueOffset has 2-bit values from LSB: + -0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */ + const uint32_t kDistanceShortCodeValueOffset = 0xFA5FA500; int v = (s->dist_rb_idx + (int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3; s->distance_code = s->dist_rb[v]; @@ -1428,9 +1469,9 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { } else { s->distance_code -= v; if (s->distance_code <= 0) { - /* A huge distance will cause a BROTLI_FAILURE() soon. */ - /* This is a little faster than failing here. */ - s->distance_code = 0x0fffffff; + /* A huge distance will cause a BROTLI_FAILURE() soon. + This is a little faster than failing here. */ + s->distance_code = 0x7FFFFFFF; } } } @@ -1446,7 +1487,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits( } } -/* Precondition: s->distance_code < 0 */ +/* Precondition: s->distance_code < 0. */ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( int safe, BrotliDecoderState* s, BrotliBitReader* br) { int distval; @@ -1462,10 +1503,10 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( } s->distance_code = (int)code; } - /* Convert the distance code to the actual distance by possibly */ - /* looking up past distances from the s->ringbuffer. */ + /* Convert the distance code to the actual distance by possibly + looking up past distances from the s->ringbuffer. */ s->distance_context = 0; - if ((s->distance_code & ~0xf) == 0) { + if ((s->distance_code & ~0xF) == 0) { TakeDistanceFromRingBuffer(s); --s->block_length[2]; return BROTLI_TRUE; @@ -1481,14 +1522,14 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( s->distance_code = (int)s->num_direct_distance_codes + offset + (int)BrotliReadBits(br, nbits); } else { - /* This branch also works well when s->distance_postfix_bits == 0 */ + /* This branch also works well when s->distance_postfix_bits == 0. */ uint32_t bits; postfix = distval & s->distance_postfix_mask; distval >>= s->distance_postfix_bits; nbits = ((uint32_t)distval >> 1) + 1; if (safe) { if (!SafeReadBits(br, nbits, &bits)) { - s->distance_code = -1; /* Restore precondition. */ + s->distance_code = -1; /* Restore precondition. */ BrotliBitReaderRestoreState(br, &memento); return BROTLI_FALSE; } @@ -1615,7 +1656,7 @@ CommandBegin: if (safe) { s->state = BROTLI_STATE_COMMAND_BEGIN; } - if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_BEGIN; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1624,7 +1665,7 @@ CommandBegin: BROTLI_SAFE(DecodeCommandBlockSwitch(s)); goto CommandBegin; } - /* Read the insert/copy length in the command */ + /* Read the insert/copy length in the command. */ BROTLI_SAFE(ReadCommand(s, br, &i)); BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n", pos, i, s->copy_length)); @@ -1637,13 +1678,13 @@ CommandInner: if (safe) { s->state = BROTLI_STATE_COMMAND_INNER; } - /* Read the literals in the command */ + /* Read the literals in the command. */ if (s->trivial_literal_context) { uint32_t bits; uint32_t value; PreloadSymbol(safe, s->literal_htree, br, &bits, &value); do { - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1679,7 +1720,7 @@ CommandInner: do { const HuffmanCode* hc; uint8_t context; - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1688,7 +1729,7 @@ CommandInner: BROTLI_SAFE(DecodeLiteralBlockSwitch(s)); if (s->trivial_literal_context) goto CommandInner; } - context = s->context_lookup1[p1] | s->context_lookup2[p2]; + context = BROTLI_CONTEXT(p1, p2, s->context_lookup); BROTLI_LOG_UINT(context); hc = s->literal_hgroup.htrees[s->context_map_slice[context]]; p2 = p1; @@ -1744,14 +1785,25 @@ CommandPostDecodeLiterals: } i = s->copy_length; /* Apply copy of LZ77 back-reference, or static dictionary reference if - the distance is larger than the max LZ77 distance */ + the distance is larger than the max LZ77 distance */ if (s->distance_code > s->max_distance) { - int address = s->distance_code - s->max_distance - 1; + /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC. + With this choice, no signed overflow can occur after decoding + a special distance code (e.g., after adding 3 to the last distance). */ + if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) { + BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d " + "len: %d bytes left: %d\n", + pos, s->distance_code, i, s->meta_block_remaining_len)); + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE); + } if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH && i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) { + int address = s->distance_code - s->max_distance - 1; const BrotliDictionary* words = s->dictionary; + const BrotliTransforms* transforms = s->transforms; int offset = (int)s->dictionary->offsets_by_length[i]; uint32_t shift = s->dictionary->size_bits_by_length[i]; + int mask = (int)BitMask(shift); int word_idx = address & mask; int transform_idx = address >> shift; @@ -1761,16 +1813,16 @@ CommandPostDecodeLiterals: if (BROTLI_PREDICT_FALSE(!words->data)) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET); } - if (transform_idx < kNumTransforms) { + if (transform_idx < (int)transforms->num_transforms) { const uint8_t* word = &words->data[offset]; int len = i; - if (transform_idx == 0) { + if (transform_idx == transforms->cutOffTransforms[0]) { memcpy(&s->ringbuffer[pos], word, (size_t)len); BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n", len, word)); } else { len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len, - transform_idx); + transforms, transform_idx); BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]," " transform_idx = %d, transformed: [%.*s]\n", i, word, transform_idx, len, &s->ringbuffer[pos])); @@ -1778,7 +1830,6 @@ CommandPostDecodeLiterals: pos += len; s->meta_block_remaining_len -= len; if (pos >= s->ringbuffer_size) { - /*s->partial_pos_rb += (size_t)s->ringbuffer_size;*/ s->state = BROTLI_STATE_COMMAND_POST_WRITE_1; goto saveStateAndReturn; } @@ -1800,14 +1851,13 @@ CommandPostDecodeLiterals: uint8_t* copy_src = &s->ringbuffer[src_start]; int dst_end = pos + i; int src_end = src_start + i; - /* update the recent distances cache */ + /* Update the recent distances cache. */ s->dist_rb[s->dist_rb_idx & 3] = s->distance_code; ++s->dist_rb_idx; s->meta_block_remaining_len -= i; /* There are 32+ bytes of slack in the ring-buffer allocation. Also, we have 16 short codes, that make these 16 bytes irrelevant - in the ring-buffer. Let's copy over them as a first guess. - */ + in the ring-buffer. Let's copy over them as a first guess. */ memmove16(copy_dst, copy_src); if (src_end > pos && dst_end > src_start) { /* Regions intersect. */ @@ -1830,7 +1880,7 @@ CommandPostDecodeLiterals: } BROTLI_LOG_UINT(s->meta_block_remaining_len); if (s->meta_block_remaining_len <= 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; goto saveStateAndReturn; } else { @@ -1850,7 +1900,7 @@ CommandPostWrapCopy: } } if (s->meta_block_remaining_len <= 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; goto saveStateAndReturn; } else { @@ -1875,6 +1925,21 @@ static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands( return ProcessCommandsInternal(1, s); } +/* Returns the maximum number of distance symbols which can only represent + distances not exceeding BROTLI_MAX_ALLOWED_DISTANCE. */ +static uint32_t BrotliMaxDistanceSymbol(uint32_t ndirect, uint32_t npostfix) { + static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28}; + static const uint32_t diff[BROTLI_MAX_NPOSTFIX + 1] = {73, 126, 228, 424}; + uint32_t postfix = 1U << npostfix; + if (ndirect < bound[npostfix]) { + return ndirect + diff[npostfix] + postfix; + } else if (ndirect > bound[npostfix] + postfix) { + return ndirect + diff[npostfix]; + } else { + return bound[npostfix] + diff[npostfix] + postfix; + } +} + BrotliDecoderResult BrotliDecoderDecompress( size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size, uint8_t* decoded_buffer) { @@ -1885,7 +1950,9 @@ BrotliDecoderResult BrotliDecoderDecompress( const uint8_t* next_in = encoded_buffer; size_t available_out = *decoded_size; uint8_t* next_out = decoded_buffer; - BrotliDecoderStateInit(&s); + if (!BrotliDecoderStateInit(&s, 0, 0, 0)) { + return BROTLI_DECODER_RESULT_ERROR; + } result = BrotliDecoderDecompressStream( &s, &available_in, &next_in, &available_out, &next_out, &total_out); *decoded_size = total_out; @@ -1897,23 +1964,22 @@ BrotliDecoderResult BrotliDecoderDecompress( } /* Invariant: input stream is never overconsumed: - * invalid input implies that the whole stream is invalid -> any amount of + - invalid input implies that the whole stream is invalid -> any amount of input could be read and discarded - * when result is "needs more input", then at least one more byte is REQUIRED + - when result is "needs more input", then at least one more byte is REQUIRED to complete decoding; all input data MUST be consumed by decoder, so client could swap the input buffer - * when result is "needs more output" decoder MUST ensure that it doesn't + - when result is "needs more output" decoder MUST ensure that it doesn't hold more than 7 bits in bit reader; this saves client from swapping input buffer ahead of time - * when result is "success" decoder MUST return all unused data back to input - buffer; this is possible because the invariant is hold on enter -*/ + - when result is "success" decoder MUST return all unused data back to input + buffer; this is possible because the invariant is held on enter */ BrotliDecoderResult BrotliDecoderDecompressStream( BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in, size_t* available_out, uint8_t** next_out, size_t* total_out) { BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS; BrotliBitReader* br = &s->br; - /* Ensure that *total_out is set, even if no data will ever be pushed out. */ + /* Ensure that |total_out| is set, even if no data will ever be pushed out. */ if (total_out) { *total_out = s->partial_pos_out; } @@ -1926,7 +1992,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS)); } if (!*available_out) next_out = 0; - if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ + if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ br->avail_in = *available_in; br->next_in = *next_in; } else { @@ -1938,9 +2004,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } /* State machine */ for (;;) { - if (result != BROTLI_DECODER_SUCCESS) { /* Error, needs more input/output */ + if (result != BROTLI_DECODER_SUCCESS) { + /* Error, needs more input/output. */ if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { - if (s->ringbuffer != 0) { /* Pro-actively push output. */ + if (s->ringbuffer != 0) { /* Pro-actively push output. */ BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s, available_out, next_out, total_out, BROTLI_TRUE); /* WriteRingBuffer checks s->meta_block_remaining_len validity. */ @@ -1949,9 +2016,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } } - if (s->buffer_length != 0) { /* Used with internal buffer. */ - if (br->avail_in == 0) { /* Successfully finished read transaction. */ - /* Accumulator contains less than 8 bits, because internal buffer + if (s->buffer_length != 0) { /* Used with internal buffer. */ + if (br->avail_in == 0) { + /* Successfully finished read transaction. + Accumulator contains less than 8 bits, because internal buffer is expanded byte-by-byte until it is enough to complete read. */ s->buffer_length = 0; /* Switch to input stream and restart. */ @@ -1971,9 +2039,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream( /* Retry with more data in buffer. */ continue; } - /* Can't finish reading and no more input.*/ + /* Can't finish reading and no more input. */ break; - } else { /* Input stream doesn't contain enough input. */ + } else { /* Input stream doesn't contain enough input. */ /* Copy tail to internal buffer and return. */ *next_in = br->next_in; *available_in = br->avail_in; @@ -1992,7 +2060,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( if (s->buffer_length != 0) { /* Just consumed the buffered input and produced some output. Otherwise - it would result in "needs more input". Reset internal buffer.*/ + it would result in "needs more input". Reset internal buffer. */ s->buffer_length = 0; } else { /* Using input stream in last iteration. When decoder switches to input @@ -2012,13 +2080,32 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } /* Decode window size. */ - s->window_bits = DecodeWindowBits(br); /* Reads 1..7 bits. */ - BROTLI_LOG_UINT(s->window_bits); - if (s->window_bits == 9) { - /* Value 9 is reserved for future use. */ + result = DecodeWindowBits(s, br); /* Reads 1..8 bits. */ + if (result != BROTLI_DECODER_SUCCESS) { + break; + } + if (s->large_window) { + s->state = BROTLI_STATE_LARGE_WINDOW_BITS; + break; + } + s->state = BROTLI_STATE_INITIALIZE; + break; + + case BROTLI_STATE_LARGE_WINDOW_BITS: + if (!BrotliSafeReadBits(br, 6, &s->window_bits)) { + result = BROTLI_DECODER_NEEDS_MORE_INPUT; + break; + } + if (s->window_bits < BROTLI_LARGE_MIN_WBITS || + s->window_bits > BROTLI_LARGE_MAX_WBITS) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); break; } + s->state = BROTLI_STATE_INITIALIZE; + /* No break, continue to next state */ + + case BROTLI_STATE_INITIALIZE: + BROTLI_LOG_UINT(s->window_bits); /* Maximum distance, see section 9.1. of the spec. */ s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP; @@ -2034,14 +2121,16 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258; s->state = BROTLI_STATE_METABLOCK_BEGIN; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_METABLOCK_BEGIN: BrotliDecoderStateMetablockBegin(s); BROTLI_LOG_UINT(s->pos); s->state = BROTLI_STATE_METABLOCK_HEADER; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_METABLOCK_HEADER: - result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */ + result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */ if (result != BROTLI_DECODER_SUCCESS) { break; } @@ -2071,6 +2160,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->loop_counter = 0; s->state = BROTLI_STATE_HUFFMAN_CODE_0; break; + case BROTLI_STATE_UNCOMPRESSED: { result = CopyUncompressedBlockToOutput( available_out, next_out, total_out, s); @@ -2080,6 +2170,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_METABLOCK_DONE; break; } + case BROTLI_STATE_METADATA: for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) { uint32_t bits; @@ -2093,6 +2184,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_METABLOCK_DONE; } break; + case BROTLI_STATE_HUFFMAN_CODE_0: if (s->loop_counter >= 3) { s->state = BROTLI_STATE_METABLOCK_HEADER_2; @@ -2110,23 +2202,28 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } s->state = BROTLI_STATE_HUFFMAN_CODE_1; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_HUFFMAN_CODE_1: { + uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2; int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258; - result = ReadHuffmanCode(s->num_block_types[s->loop_counter] + 2, + result = ReadHuffmanCode(alphabet_size, alphabet_size, &s->block_type_trees[tree_offset], NULL, s); if (result != BROTLI_DECODER_SUCCESS) break; s->state = BROTLI_STATE_HUFFMAN_CODE_2; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_HUFFMAN_CODE_2: { + uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS; int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26; - result = ReadHuffmanCode(BROTLI_NUM_BLOCK_LEN_SYMBOLS, + result = ReadHuffmanCode(alphabet_size, alphabet_size, &s->block_len_trees[tree_offset], NULL, s); if (result != BROTLI_DECODER_SUCCESS) break; s->state = BROTLI_STATE_HUFFMAN_CODE_3; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_HUFFMAN_CODE_3: { int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26; if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter], @@ -2139,6 +2236,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_HUFFMAN_CODE_0; break; } + case BROTLI_STATE_METABLOCK_HEADER_2: { uint32_t bits; if (!BrotliSafeReadBits(br, 6, &bits)) { @@ -2160,15 +2258,17 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } s->loop_counter = 0; s->state = BROTLI_STATE_CONTEXT_MODES; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_CONTEXT_MODES: result = ReadContextModes(s); if (result != BROTLI_DECODER_SUCCESS) { break; } s->state = BROTLI_STATE_CONTEXT_MAP_1; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_CONTEXT_MAP_1: result = DecodeContextMap( s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS, @@ -2178,54 +2278,54 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } DetectTrivialLiteralBlockTypes(s); s->state = BROTLI_STATE_CONTEXT_MAP_2; - /* No break, continue to next state */ - case BROTLI_STATE_CONTEXT_MAP_2: - { - uint32_t num_distance_codes = s->num_direct_distance_codes + - ((2 * BROTLI_MAX_DISTANCE_BITS) << s->distance_postfix_bits); - BROTLI_BOOL allocation_success = BROTLI_TRUE; - result = DecodeContextMap( - s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS, - &s->num_dist_htrees, &s->dist_context_map, s); - if (result != BROTLI_DECODER_SUCCESS) { - break; - } - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS, - s->num_literal_htrees); - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS, - s->num_block_types[1]); - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->distance_hgroup, num_distance_codes, - s->num_dist_htrees); - if (!allocation_success) { - return SaveErrorCode(s, - BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); - } + /* No break, continue to next state. */ + + case BROTLI_STATE_CONTEXT_MAP_2: { + uint32_t num_direct_codes = + s->num_direct_distance_codes - BROTLI_NUM_DISTANCE_SHORT_CODES; + uint32_t num_distance_codes = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_codes, s->distance_postfix_bits, + (s->large_window ? BROTLI_LARGE_MAX_DISTANCE_BITS : + BROTLI_MAX_DISTANCE_BITS)); + uint32_t max_distance_symbol = (s->large_window ? + BrotliMaxDistanceSymbol( + num_direct_codes, s->distance_postfix_bits) : + num_distance_codes); + BROTLI_BOOL allocation_success = BROTLI_TRUE; + result = DecodeContextMap( + s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS, + &s->num_dist_htrees, &s->dist_context_map, s); + if (result != BROTLI_DECODER_SUCCESS) { + break; + } + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS, + BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees); + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS, + BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]); + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->distance_hgroup, num_distance_codes, + max_distance_symbol, s->num_dist_htrees); + if (!allocation_success) { + return SaveErrorCode(s, + BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); } s->loop_counter = 0; s->state = BROTLI_STATE_TREE_GROUP; - /* No break, continue to next state */ - case BROTLI_STATE_TREE_GROUP: - { - HuffmanTreeGroup* hgroup = NULL; - switch (s->loop_counter) { - case 0: - hgroup = &s->literal_hgroup; - break; - case 1: - hgroup = &s->insert_copy_hgroup; - break; - case 2: - hgroup = &s->distance_hgroup; - break; - default: - return SaveErrorCode(s, BROTLI_FAILURE( - BROTLI_DECODER_ERROR_UNREACHABLE)); - } - result = HuffmanTreeGroupDecode(hgroup, s); + /* No break, continue to next state. */ + } + + case BROTLI_STATE_TREE_GROUP: { + HuffmanTreeGroup* hgroup = NULL; + switch (s->loop_counter) { + case 0: hgroup = &s->literal_hgroup; break; + case 1: hgroup = &s->insert_copy_hgroup; break; + case 2: hgroup = &s->distance_hgroup; break; + default: return SaveErrorCode(s, BROTLI_FAILURE( + BROTLI_DECODER_ERROR_UNREACHABLE)); } + result = HuffmanTreeGroupDecode(hgroup, s); if (result != BROTLI_DECODER_SUCCESS) break; s->loop_counter++; if (s->loop_counter >= 3) { @@ -2239,6 +2339,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_COMMAND_BEGIN; } break; + } + case BROTLI_STATE_COMMAND_BEGIN: case BROTLI_STATE_COMMAND_INNER: case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS: @@ -2248,6 +2350,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = SafeProcessCommands(s); } break; + case BROTLI_STATE_COMMAND_INNER_WRITE: case BROTLI_STATE_COMMAND_POST_WRITE_1: case BROTLI_STATE_COMMAND_POST_WRITE_2: @@ -2262,7 +2365,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) { if (s->meta_block_remaining_len == 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; } else { s->state = BROTLI_STATE_COMMAND_BEGIN; @@ -2282,6 +2385,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_COMMAND_INNER; } break; + case BROTLI_STATE_METABLOCK_DONE: if (s->meta_block_remaining_len < 0) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2); @@ -2302,7 +2406,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( *next_in = br->next_in; } s->state = BROTLI_STATE_DONE; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_DONE: if (s->ringbuffer != 0) { result = WriteRingBuffer( diff --git a/c/dec/huffman.c b/c/dec/huffman.c index 4fe7bfa..f142442 100644 --- a/c/dec/huffman.c +++ b/c/dec/huffman.c @@ -86,9 +86,9 @@ static BROTLI_INLINE void ReplicateValue(HuffmanCode* table, } while (end > 0); } -/* Returns the table width of the next 2nd level table. count is the histogram - of bit lengths for the remaining symbols, len is the code length of the next - processed symbol */ +/* Returns the table width of the next 2nd level table. |count| is the histogram + of bit lengths for the remaining symbols, |len| is the code length of the + next processed symbol. */ static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count, int len, int root_bits) { int left = 1 << (len - root_bits); @@ -118,7 +118,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <= BROTLI_REVERSE_BITS_MAX); - /* generate offsets into sorted symbol table by code length */ + /* Generate offsets into sorted symbol table by code length. */ symbol = -1; bits = 1; BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { @@ -129,7 +129,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, /* Symbols with code length 0 are placed after all other symbols. */ offset[0] = BROTLI_CODE_LENGTH_CODES - 1; - /* sort symbols by length, by symbol order within each length */ + /* Sort symbols by length, by symbol order within each length. */ symbol = BROTLI_CODE_LENGTH_CODES; do { BROTLI_REPEAT(6, { @@ -150,7 +150,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, return; } - /* fill in table */ + /* Fill in table. */ key = 0; key_step = BROTLI_REVERSE_BITS_LOWEST; symbol = 0; @@ -200,9 +200,8 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, table_size = 1 << table_bits; total_size = table_size; - /* fill in root table */ - /* let's reduce the table size to a smaller size if possible, and */ - /* create the repetitions by memcpy if possible in the coming loop */ + /* Fill in the root table. Reduce the table size to if possible, + and create the repetitions by memcpy. */ if (table_bits > max_length) { table_bits = max_length; table_size = 1 << table_bits; @@ -224,15 +223,14 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, key_step >>= 1; } while (++bits <= table_bits); - /* if root_bits != table_bits we only created one fraction of the */ - /* table, and we need to replicate it now. */ + /* If root_bits != table_bits then replicate to fill the remaining slots. */ while (total_size != table_size) { memcpy(&table[table_size], &table[0], (size_t)table_size * sizeof(table[0])); table_size <<= 1; } - /* fill in 2nd level tables and add pointers to root table */ + /* Fill in 2nd level tables and add pointers to root table. */ key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1); sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1); sub_key_step = BROTLI_REVERSE_BITS_LOWEST; diff --git a/c/dec/huffman.h b/c/dec/huffman.h index 730af88..521ec6e 100644 --- a/c/dec/huffman.h +++ b/c/dec/huffman.h @@ -19,10 +19,11 @@ extern "C" { #define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15 /* Maximum possible Huffman table size for an alphabet size of (index * 32), - * max code length 15 and root table bits 8. */ + max code length 15 and root table bits 8. */ static const uint16_t kMaxHuffmanTableSize[] = { 256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822, - 854, 886, 920, 952, 984, 1016, 1048, 1080}; + 854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272, + 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528}; /* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */ #define BROTLI_HUFFMAN_MAX_SIZE_26 396 /* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */ @@ -41,23 +42,26 @@ typedef struct { BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table, const uint8_t* const code_lengths, uint16_t* count); -/* Builds Huffman lookup table assuming code lengths are in symbol order. */ -/* Returns size of resulting table. */ +/* Builds Huffman lookup table assuming code lengths are in symbol order. + Returns size of resulting table. */ BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg); -/* Builds a simple Huffman table. The num_symbols parameter is to be */ -/* interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, 2 means 3 */ -/* symbols, 3 means 4 symbols with lengths 2,2,2,2, 4 means 4 symbols with */ -/* lengths 1,2,3,3. */ +/* Builds a simple Huffman table. The |num_symbols| parameter is to be + interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, + 2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2], + 4 means 4 symbols with lengths [1, 2, 3, 3]. */ BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, int root_bits, uint16_t* symbols, uint32_t num_symbols); /* Contains a collection of Huffman trees with the same alphabet size. */ +/* max_symbol is needed due to simple codes since log2(alphabet_size) could be + greater than log2(max_symbol). */ typedef struct { HuffmanCode** htrees; HuffmanCode* codes; uint16_t alphabet_size; + uint16_t max_symbol; uint16_t num_htrees; } HuffmanTreeGroup; diff --git a/c/dec/prefix.h b/c/dec/prefix.h index aa776c7..3ea062d 100644 --- a/c/dec/prefix.h +++ b/c/dec/prefix.h @@ -5,8 +5,7 @@ */ /* Lookup tables to map prefix codes to value ranges. This is used during - decoding of the block lengths, literal insertion lengths and copy lengths. -*/ + decoding of the block lengths, literal insertion lengths and copy lengths. */ #ifndef BROTLI_DEC_PREFIX_H_ #define BROTLI_DEC_PREFIX_H_ @@ -14,8 +13,8 @@ #include "../common/constants.h" #include <brotli/types.h> -/* Represents the range of values belonging to a prefix code: */ -/* [offset, offset + 2^nbits) */ +/* Represents the range of values belonging to a prefix code: + [offset, offset + 2^nbits) */ struct PrefixCodeRange { uint16_t offset; uint8_t nbits; diff --git a/c/dec/state.c b/c/dec/state.c index eaec823..e0b37c2 100644 --- a/c/dec/state.c +++ b/c/dec/state.c @@ -15,25 +15,11 @@ extern "C" { #endif -static void* DefaultAllocFunc(void* opaque, size_t size) { - BROTLI_UNUSED(opaque); - return malloc(size); -} - -static void DefaultFreeFunc(void* opaque, void* address) { - BROTLI_UNUSED(opaque); - free(address); -} - -void BrotliDecoderStateInit(BrotliDecoderState* s) { - BrotliDecoderStateInitWithCustomAllocators(s, 0, 0, 0); -} - -void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, +BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { if (!alloc_func) { - s->alloc_func = DefaultAllocFunc; - s->free_func = DefaultFreeFunc; + s->alloc_func = BrotliDefaultAllocFunc; + s->free_func = BrotliDefaultFreeFunc; s->memory_manager_opaque = 0; } else { s->alloc_func = alloc_func; @@ -45,6 +31,7 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, BrotliInitBitReader(&s->br); s->state = BROTLI_STATE_UNINITED; + s->large_window = 0; s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; @@ -103,13 +90,16 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, s->mtf_upper_bound = 63; s->dictionary = BrotliGetDictionary(); + s->transforms = BrotliGetTransforms(); + + return BROTLI_TRUE; } void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) { s->meta_block_remaining_len = 0; - s->block_length[0] = 1U << 28; - s->block_length[1] = 1U << 28; - s->block_length[2] = 1U << 28; + s->block_length[0] = 1U << 24; + s->block_length[1] = 1U << 24; + s->block_length[2] = 1U << 24; s->num_block_types[0] = 1; s->num_block_types[1] = 1; s->num_block_types[2] = 1; @@ -126,8 +116,7 @@ void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) { s->literal_htree = NULL; s->dist_context_map_slice = NULL; s->dist_htree_index = 0; - s->context_lookup1 = NULL; - s->context_lookup2 = NULL; + s->context_lookup = NULL; s->literal_hgroup.codes = NULL; s->literal_hgroup.htrees = NULL; s->insert_copy_hgroup.codes = NULL; @@ -153,7 +142,8 @@ void BrotliDecoderStateCleanup(BrotliDecoderState* s) { } BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, - HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t ntrees) { + HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol, + uint32_t ntrees) { /* Pack two allocations into one */ const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5]; const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size; @@ -162,6 +152,7 @@ BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s, code_size + htree_size); group->alphabet_size = (uint16_t)alphabet_size; + group->max_symbol = (uint16_t)max_symbol; group->num_htrees = (uint16_t)ntrees; group->htrees = p; group->codes = (HuffmanCode*)(&p[ntrees]); diff --git a/c/dec/state.h b/c/dec/state.h index 069beca..d28b639 100644 --- a/c/dec/state.h +++ b/c/dec/state.h @@ -12,6 +12,7 @@ #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" #include <brotli/types.h> #include "./bit_reader.h" #include "./huffman.h" @@ -22,6 +23,8 @@ extern "C" { typedef enum { BROTLI_STATE_UNINITED, + BROTLI_STATE_LARGE_WINDOW_BITS, + BROTLI_STATE_INITIALIZE, BROTLI_STATE_METABLOCK_BEGIN, BROTLI_STATE_METABLOCK_HEADER, BROTLI_STATE_METABLOCK_HEADER_2, @@ -126,23 +129,22 @@ struct BrotliDecoderStateStruct { uint8_t* ringbuffer; uint8_t* ringbuffer_end; HuffmanCode* htree_command; - const uint8_t* context_lookup1; - const uint8_t* context_lookup2; + const uint8_t* context_lookup; uint8_t* context_map_slice; uint8_t* dist_context_map_slice; - /* This ring buffer holds a few past copy distances that will be used by */ - /* some special distance codes. */ + /* This ring buffer holds a few past copy distances that will be used by + some special distance codes. */ HuffmanTreeGroup literal_hgroup; HuffmanTreeGroup insert_copy_hgroup; HuffmanTreeGroup distance_hgroup; HuffmanCode* block_type_trees; HuffmanCode* block_len_trees; /* This is true if the literal context map histogram type always matches the - block type. It is then not needed to keep the context (faster decoding). */ + block type. It is then not needed to keep the context (faster decoding). */ int trivial_literal_context; - /* Distance context is actual after command is decoded and before distance - is computed. After distance computation it is used as a temporary variable. */ + /* Distance context is actual after command is decoded and before distance is + computed. After distance computation it is used as a temporary variable. */ int distance_context; int meta_block_remaining_len; uint32_t block_length_index; @@ -162,11 +164,11 @@ struct BrotliDecoderStateStruct { int copy_length; int distance_code; - /* For partial write operations */ - size_t rb_roundtrips; /* How many times we went around the ring-buffer */ - size_t partial_pos_out; /* How much output to the user in total */ + /* For partial write operations. */ + size_t rb_roundtrips; /* how many times we went around the ring-buffer */ + size_t partial_pos_out; /* how much output to the user in total */ - /* For ReadHuffmanCode */ + /* For ReadHuffmanCode. */ uint32_t symbol; uint32_t repeat; uint32_t space; @@ -180,25 +182,26 @@ struct BrotliDecoderStateStruct { /* Tails of symbol chains. */ int next_symbol[32]; uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES]; - /* Population counts for the code lengths */ + /* Population counts for the code lengths. */ uint16_t code_length_histo[16]; - /* For HuffmanTreeGroupDecode */ + /* For HuffmanTreeGroupDecode. */ int htree_index; HuffmanCode* next; - /* For DecodeContextMap */ + /* For DecodeContextMap. */ uint32_t context_index; uint32_t max_run_length_prefix; uint32_t code; HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272]; - /* For InverseMoveToFrontTransform */ + /* For InverseMoveToFrontTransform. */ uint32_t mtf_upper_bound; uint32_t mtf[64 + 1]; - /* less used attributes are in the end of this struct */ - /* States inside function calls */ + /* Less used attributes are at the end of this struct. */ + + /* States inside function calls. */ BrotliRunningMetablockHeaderState substate_metablock_header; BrotliRunningTreeGroupState substate_tree_group; BrotliRunningContextMapState substate_context_map; @@ -212,6 +215,7 @@ struct BrotliDecoderStateStruct { unsigned int is_metadata : 1; unsigned int should_wrap_ringbuffer : 1; unsigned int canny_ringbuffer_allocation : 1; + unsigned int large_window : 1; unsigned int size_nibbles : 8; uint32_t window_bits; @@ -222,6 +226,7 @@ struct BrotliDecoderStateStruct { uint8_t* context_modes; const BrotliDictionary* dictionary; + const BrotliTransforms* transforms; uint32_t trivial_literal_contexts[8]; /* 256 bits */ }; @@ -229,17 +234,15 @@ struct BrotliDecoderStateStruct { typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal; #define BrotliDecoderState BrotliDecoderStateInternal -BROTLI_INTERNAL void BrotliDecoderStateInit(BrotliDecoderState* s); -BROTLI_INTERNAL void BrotliDecoderStateInitWithCustomAllocators( - BrotliDecoderState* s, brotli_alloc_func alloc_func, - brotli_free_func free_func, void* opaque); +BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, + brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque); BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s); BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s); BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock( BrotliDecoderState* s); BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit( BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size, - uint32_t ntrees); + uint32_t max_symbol, uint32_t ntrees); #define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L) diff --git a/c/dec/transform.h b/c/dec/transform.h deleted file mode 100644 index e1d96ff..0000000 --- a/c/dec/transform.h +++ /dev/null @@ -1,300 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Transformations on dictionary words. */ - -#ifndef BROTLI_DEC_TRANSFORM_H_ -#define BROTLI_DEC_TRANSFORM_H_ - -#include "../common/platform.h" -#include <brotli/types.h> - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -enum WordTransformType { - kIdentity = 0, - kOmitLast1 = 1, - kOmitLast2 = 2, - kOmitLast3 = 3, - kOmitLast4 = 4, - kOmitLast5 = 5, - kOmitLast6 = 6, - kOmitLast7 = 7, - kOmitLast8 = 8, - kOmitLast9 = 9, - kUppercaseFirst = 10, - kUppercaseAll = 11, - kOmitFirst1 = 12, - kOmitFirst2 = 13, - kOmitFirst3 = 14, - kOmitFirst4 = 15, - kOmitFirst5 = 16, - kOmitFirst6 = 17, - kOmitFirst7 = 18, - kOmitFirst8 = 19, - kOmitFirst9 = 20 -}; - -typedef struct { - const uint8_t prefix_id; - const uint8_t transform; - const uint8_t suffix_id; -} Transform; - -static const char kPrefixSuffix[208] = - "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0" - " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0" - " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0" - " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous "; - -enum { - /* EMPTY = "" - SP = " " - DQUOT = "\"" - SQUOT = "'" - CLOSEBR = "]" - OPEN = "(" - SLASH = "/" - NBSP = non-breaking space "\0xc2\xa0" - */ - kPFix_EMPTY = 0, - kPFix_SP = 1, - kPFix_COMMASP = 3, - kPFix_SPofSPtheSP = 6, - kPFix_SPtheSP = 9, - kPFix_eSP = 12, - kPFix_SPofSP = 15, - kPFix_sSP = 20, - kPFix_DOT = 23, - kPFix_SPandSP = 25, - kPFix_SPinSP = 31, - kPFix_DQUOT = 36, - kPFix_SPtoSP = 38, - kPFix_DQUOTGT = 43, - kPFix_NEWLINE = 46, - kPFix_DOTSP = 48, - kPFix_CLOSEBR = 51, - kPFix_SPforSP = 53, - kPFix_SPaSP = 59, - kPFix_SPthatSP = 63, - kPFix_SQUOT = 70, - kPFix_SPwithSP = 72, - kPFix_SPfromSP = 79, - kPFix_SPbySP = 86, - kPFix_OPEN = 91, - kPFix_DOTSPTheSP = 93, - kPFix_SPonSP = 100, - kPFix_SPasSP = 105, - kPFix_SPisSP = 110, - kPFix_ingSP = 115, - kPFix_NEWLINETAB = 120, - kPFix_COLON = 123, - kPFix_edSP = 125, - kPFix_EQDQUOT = 129, - kPFix_SPatSP = 132, - kPFix_lySP = 137, - kPFix_COMMA = 141, - kPFix_EQSQUOT = 143, - kPFix_DOTcomSLASH = 146, - kPFix_DOTSPThisSP = 152, - kPFix_SPnotSP = 160, - kPFix_erSP = 166, - kPFix_alSP = 170, - kPFix_fulSP = 174, - kPFix_iveSP = 179, - kPFix_lessSP = 184, - kPFix_estSP = 190, - kPFix_izeSP = 195, - kPFix_NBSP = 200, - kPFix_ousSP = 203 -}; - -static const Transform kTransforms[] = { - { kPFix_EMPTY, kIdentity, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SP }, - { kPFix_SP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPtheSP }, - { kPFix_SP, kIdentity, kPFix_EMPTY }, - { kPFix_sSP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPofSP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPandSP }, - { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY }, - { kPFix_COMMASP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_COMMASP }, - { kPFix_SP, kUppercaseFirst, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPinSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPtoSP }, - { kPFix_eSP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_DQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_DOT }, - { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT }, - { kPFix_EMPTY, kIdentity, kPFix_NEWLINE }, - { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR }, - { kPFix_EMPTY, kIdentity, kPFix_SPforSP }, - { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPaSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPthatSP }, - { kPFix_SP, kUppercaseFirst, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSP }, - { kPFix_DOT, kIdentity, kPFix_EMPTY }, - { kPFix_SP, kIdentity, kPFix_COMMASP }, - { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPwithSP }, - { kPFix_EMPTY, kIdentity, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_SPfromSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPbySP }, - { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY }, - { kPFix_SPtheSP, kIdentity, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPonSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPasSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPisSP }, - { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast1, kPFix_ingSP }, - { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB }, - { kPFix_EMPTY, kIdentity, kPFix_COLON }, - { kPFix_SP, kIdentity, kPFix_DOTSP }, - { kPFix_EMPTY, kIdentity, kPFix_edSP }, - { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP }, - { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPatSP }, - { kPFix_EMPTY, kIdentity, kPFix_lySP }, - { kPFix_SPtheSP, kIdentity, kPFix_SPofSP }, - { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY }, - { kPFix_SP, kUppercaseFirst, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT }, - { kPFix_DOT, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseAll, kPFix_SP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT }, - { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT }, - { kPFix_SP, kIdentity, kPFix_DOT }, - { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY }, - { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP }, - { kPFix_EMPTY, kIdentity, kPFix_COMMA }, - { kPFix_DOT, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT }, - { kPFix_EMPTY, kIdentity, kPFix_SPnotSP }, - { kPFix_SP, kIdentity, kPFix_EQDQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_erSP }, - { kPFix_SP, kUppercaseAll, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_alSP }, - { kPFix_SP, kUppercaseAll, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP }, - { kPFix_SP, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kIdentity, kPFix_fulSP }, - { kPFix_SP, kUppercaseFirst, kPFix_DOTSP }, - { kPFix_EMPTY, kIdentity, kPFix_iveSP }, - { kPFix_EMPTY, kIdentity, kPFix_lessSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_estSP }, - { kPFix_SP, kUppercaseFirst, kPFix_DOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT }, - { kPFix_SP, kIdentity, kPFix_EQSQUOT }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA }, - { kPFix_EMPTY, kIdentity, kPFix_izeSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DOT }, - { kPFix_NBSP, kIdentity, kPFix_EMPTY }, - { kPFix_SP, kIdentity, kPFix_COMMA }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_ousSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseFirst, kPFix_COMMA }, - { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA }, - { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP }, - { kPFix_SP, kUppercaseAll, kPFix_DOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_DOTSP }, - { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT }, -}; - -static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); - -static int ToUpperCase(uint8_t* p) { - if (p[0] < 0xc0) { - if (p[0] >= 'a' && p[0] <= 'z') { - p[0] ^= 32; - } - return 1; - } - /* An overly simplified uppercasing model for UTF-8. */ - if (p[0] < 0xe0) { - p[1] ^= 32; - return 2; - } - /* An arbitrary transform for three byte characters. */ - p[2] ^= 5; - return 3; -} - -static BROTLI_NOINLINE int BrotliTransformDictionaryWord( - uint8_t* dst, const uint8_t* word, int len, int transform) { - int idx = 0; - { - const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id]; - while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } - } - { - const int t = kTransforms[transform].transform; - int i = 0; - int skip = t - (kOmitFirst1 - 1); - if (skip > 0) { - word += skip; - len -= skip; - } else if (t <= kOmitLast9) { - len -= t; - } - while (i < len) { dst[idx++] = word[i++]; } - if (t == kUppercaseFirst) { - ToUpperCase(&dst[idx - len]); - } else if (t == kUppercaseAll) { - uint8_t* uppercase = &dst[idx - len]; - while (len > 0) { - int step = ToUpperCase(uppercase); - uppercase += step; - len -= step; - } - } - } - { - const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id]; - while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } - return idx; - } -} - -#if defined(__cplusplus) || defined(c_plusplus) -} /* extern "C" */ -#endif - -#endif /* BROTLI_DEC_TRANSFORM_H_ */ |