aboutsummaryrefslogtreecommitdiff
path: root/c
diff options
context:
space:
mode:
authorEugene Kliuchnikov <eustas@google.com>2018-02-26 09:04:36 -0500
committerGitHub <noreply@github.com>2018-02-26 09:04:36 -0500
commit35e69fc7cf9421ab04ffc9d52cb36d07fa12984a (patch)
treea1ed614391936d455da2b0610ef8e8caf88b4289 /c
parent3af18990f50d8f040038aaa08c41f5d27d62efb5 (diff)
downloadbrotli-35e69fc7cf9421ab04ffc9d52cb36d07fa12984a.tar.gz
New feature: "Large Window Brotli" (#640)
* New feature: "Large Window Brotli" By setting special encoder/decoder flag it is now possible to extend LZ-window up to 30 bits; though produced stream will not be RFC7932 compliant. Added new dictionary generator - "DSH". It combines speed of "Sieve" and quality of "DM". Plus utilities to prepare train corpora (remove unique strings). Improved compression ratio: now two sub-blocks could be stitched: the last copy command could be extended to span the next sub-block. Fixed compression ineffectiveness caused by floating numbers rounding and wrong cost heuristic. Other C changes: - combined / moved `context.h` to `common` - moved transforms to `common` - unified some aspects of code formatting - added an abstraction for encoder (static) dictionary - moved default allocator/deallocator functions to `common` brotli CLI: - window size is auto-adjusted if not specified explicitly Java: - added "eager" decoding both to JNI wrapper and pure decoder - huge speed-up of `DictionaryData` initialization * Add dictionaryless compressed dictionary * Fix `sources.lst` * Fix `sources.lst` and add a note that `libtool` is also required. * Update setup.py * Fix `EagerStreamTest` * Fix BUILD file * Add missing `libdivsufsort` dependency * Fix "unused parameter" warning.
Diffstat (limited to 'c')
-rw-r--r--c/common/constants.h19
-rwxr-xr-x[-rw-r--r--]c/common/context.h (renamed from c/dec/context.h)354
-rwxr-xr-xc/common/dictionary.bin.brbin0 -> 51687 bytes
-rwxr-xr-xc/common/platform.h22
-rwxr-xr-xc/common/transform.c236
-rwxr-xr-xc/common/transform.h80
-rw-r--r--c/dec/bit_reader.h33
-rw-r--r--c/dec/decode.c455
-rw-r--r--c/dec/huffman.c22
-rw-r--r--c/dec/huffman.h20
-rw-r--r--c/dec/prefix.h7
-rw-r--r--c/dec/state.c37
-rw-r--r--c/dec/state.h47
-rw-r--r--c/dec/transform.h300
-rw-r--r--c/enc/backward_references.c21
-rw-r--r--c/enc/backward_references.h1
-rw-r--r--c/enc/backward_references_hq.c91
-rw-r--r--c/enc/backward_references_hq.h16
-rw-r--r--c/enc/backward_references_inc.h13
-rw-r--r--c/enc/bit_cost.h8
-rw-r--r--c/enc/block_encoder_inc.h13
-rw-r--r--c/enc/block_splitter.c2
-rw-r--r--c/enc/block_splitter_inc.h2
-rw-r--r--c/enc/brotli_bit_stream.c187
-rw-r--r--c/enc/brotli_bit_stream.h50
-rw-r--r--c/enc/command.h30
-rw-r--r--c/enc/compress_fragment.c4
-rw-r--r--c/enc/compress_fragment_two_pass.c2
-rw-r--r--c/enc/context.h184
-rw-r--r--c/enc/encode.c338
-rwxr-xr-xc/enc/encoder_dict.c32
-rwxr-xr-xc/enc/encoder_dict.h42
-rw-r--r--c/enc/entropy_encode.c22
-rw-r--r--c/enc/entropy_encode.h8
-rw-r--r--c/enc/entropy_encode_static.h4
-rw-r--r--c/enc/hash.h35
-rw-r--r--c/enc/hash_forgetful_chain_inc.h15
-rw-r--r--c/enc/hash_longest_match64_inc.h16
-rw-r--r--c/enc/hash_longest_match_inc.h14
-rw-r--r--c/enc/hash_longest_match_quickly_inc.h19
-rw-r--r--c/enc/hash_to_binary_tree_inc.h10
-rw-r--r--c/enc/histogram.c15
-rw-r--r--c/enc/histogram.h7
-rw-r--r--c/enc/histogram_inc.h2
-rw-r--r--c/enc/literal_cost.c8
-rw-r--r--c/enc/literal_cost.h2
-rw-r--r--c/enc/memory.c14
-rw-r--r--c/enc/metablock.c24
-rw-r--r--c/enc/metablock.h7
-rwxr-xr-xc/enc/params.h11
-rw-r--r--c/enc/prefix.h5
-rw-r--r--c/enc/quality.h11
-rw-r--r--c/enc/ringbuffer.h24
-rw-r--r--c/enc/static_dict.c80
-rw-r--r--c/enc/static_dict.h5
-rw-r--r--c/enc/static_dict_lut.h2
-rw-r--r--c/enc/utf8_util.c40
-rw-r--r--c/enc/write_bits.h18
-rw-r--r--c/include/brotli/decode.h15
-rw-r--r--c/include/brotli/encode.h11
-rw-r--r--c/tools/brotli.c189
61 files changed, 1778 insertions, 1523 deletions
diff --git a/c/common/constants.h b/c/common/constants.h
index 416ec55..26edcd5 100644
--- a/c/common/constants.h
+++ b/c/common/constants.h
@@ -28,18 +28,25 @@
/* "code length of 8 is repeated" */
#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
+/* "Large Window Brotli" */
+#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
+#define BROTLI_LARGE_MIN_WBITS 10
+#define BROTLI_LARGE_MAX_WBITS 30
+
/* Specification: 4. Encoding of distances */
#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
#define BROTLI_MAX_NPOSTFIX 3
#define BROTLI_MAX_NDIRECT 120
#define BROTLI_MAX_DISTANCE_BITS 24U
-/* BROTLI_NUM_DISTANCE_SYMBOLS == 520 */
-#define BROTLI_NUM_DISTANCE_SYMBOLS (BROTLI_NUM_DISTANCE_SHORT_CODES + \
- BROTLI_MAX_NDIRECT + \
- (BROTLI_MAX_DISTANCE_BITS << \
- (BROTLI_MAX_NPOSTFIX + 1)))
-/* Distance that is guaranteed to be representable in any stream. */
+#define BROTLI_DISTANCE_ALPHABET_SIZE(NDIRECT, NPOSTFIX, MAXNBITS) ( \
+ BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \
+ ((MAXNBITS) << ((NPOSTFIX) + 1)))
+/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
+#define BROTLI_NUM_DISTANCE_SYMBOLS \
+ BROTLI_DISTANCE_ALPHABET_SIZE( \
+ BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
#define BROTLI_MAX_DISTANCE 0x3FFFFFC
+#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
/* 7.1. Context modes and context ID lookup for literals */
/* "context IDs for literals are in the range of 0..63" */
diff --git a/c/dec/context.h b/c/common/context.h
index 9402cbe..24b3eb4 100644..100755
--- a/c/dec/context.h
+++ b/c/common/context.h
@@ -6,110 +6,171 @@
/* Lookup table to map the previous two bytes to a context id.
- There are four different context modeling modes defined here:
- CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
- CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
- CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
- CONTEXT_SIGNED: second-order context model tuned for signed integers.
-
- The context id for the UTF8 context model is calculated as follows. If p1
- and p2 are the previous two bytes, we calculate the context as
-
- context = kContextLookup[p1] | kContextLookup[p2 + 256].
-
- If the previous two bytes are ASCII characters (i.e. < 128), this will be
- equivalent to
-
- context = 4 * context1(p1) + context2(p2),
-
- where context1 is based on the previous byte in the following way:
-
- 0 : non-ASCII control
- 1 : \t, \n, \r
- 2 : space
- 3 : other punctuation
- 4 : " '
- 5 : %
- 6 : ( < [ {
- 7 : ) > ] }
- 8 : , ; :
- 9 : .
- 10 : =
- 11 : number
- 12 : upper-case vowel
- 13 : upper-case consonant
- 14 : lower-case vowel
- 15 : lower-case consonant
-
- and context2 is based on the second last byte:
-
- 0 : control, space
- 1 : punctuation
- 2 : upper-case letter, number
- 3 : lower-case letter
-
- If the last byte is ASCII, and the second last byte is not (in a valid UTF8
- stream it will be a continuation byte, value between 128 and 191), the
- context is the same as if the second last byte was an ASCII control or space.
-
- If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
- be a continuation byte and the context id is 2 or 3 depending on the LSB of
- the last byte and to a lesser extent on the second last byte if it is ASCII.
-
- If the last byte is a UTF8 continuation byte, the second last byte can be:
- - continuation byte: the next byte is probably ASCII or lead byte (assuming
- 4-byte UTF8 characters are rare) and the context id is 0 or 1.
- - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
- - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
-
- The possible value combinations of the previous two bytes, the range of
- context ids and the type of the next byte is summarized in the table below:
-
- |--------\-----------------------------------------------------------------|
- | \ Last byte |
- | Second \---------------------------------------------------------------|
- | last byte \ ASCII | cont. byte | lead byte |
- | \ (0-127) | (128-191) | (192-) |
- |=============|===================|=====================|==================|
- | ASCII | next: ASCII/lead | not valid | next: cont. |
- | (0-127) | context: 4 - 63 | | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
- | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: ASCII/lead | not valid |
- | (192-207) | | context: 0 - 1 | |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: cont. | not valid |
- | (208-) | | context: 2 - 3 | |
- |-------------|-------------------|---------------------|------------------|
-
- The context id for the signed context mode is calculated as:
-
- context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
-
- For any context modeling modes, the context ids can be calculated by |-ing
- together two lookups from one table using context model dependent offsets:
-
- context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
-
- where offset1 and offset2 are dependent on the context mode.
+ There are four different context modeling modes defined here:
+ CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
+ CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
+ CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
+ CONTEXT_SIGNED: second-order context model tuned for signed integers.
+
+ If |p1| and |p2| are the previous two bytes, and |mode| is current context
+ mode, we calculate the context as:
+
+ context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
+
+ For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
+ (i.e. < 128), this will be equivalent to
+
+ context = 4 * context1(p1) + context2(p2),
+
+ where context1 is based on the previous byte in the following way:
+
+ 0 : non-ASCII control
+ 1 : \t, \n, \r
+ 2 : space
+ 3 : other punctuation
+ 4 : " '
+ 5 : %
+ 6 : ( < [ {
+ 7 : ) > ] }
+ 8 : , ; :
+ 9 : .
+ 10 : =
+ 11 : number
+ 12 : upper-case vowel
+ 13 : upper-case consonant
+ 14 : lower-case vowel
+ 15 : lower-case consonant
+
+ and context2 is based on the second last byte:
+
+ 0 : control, space
+ 1 : punctuation
+ 2 : upper-case letter, number
+ 3 : lower-case letter
+
+ If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+ stream it will be a continuation byte, value between 128 and 191), the
+ context is the same as if the second last byte was an ASCII control or space.
+
+ If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+ be a continuation byte and the context id is 2 or 3 depending on the LSB of
+ the last byte and to a lesser extent on the second last byte if it is ASCII.
+
+ If the last byte is a UTF8 continuation byte, the second last byte can be:
+ - continuation byte: the next byte is probably ASCII or lead byte (assuming
+ 4-byte UTF8 characters are rare) and the context id is 0 or 1.
+ - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+ - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+
+ The possible value combinations of the previous two bytes, the range of
+ context ids and the type of the next byte is summarized in the table below:
+
+ |--------\-----------------------------------------------------------------|
+ | \ Last byte |
+ | Second \---------------------------------------------------------------|
+ | last byte \ ASCII | cont. byte | lead byte |
+ | \ (0-127) | (128-191) | (192-) |
+ |=============|===================|=====================|==================|
+ | ASCII | next: ASCII/lead | not valid | next: cont. |
+ | (0-127) | context: 4 - 63 | | context: 2 - 3 |
+ |-------------|-------------------|---------------------|------------------|
+ | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
+ | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
+ |-------------|-------------------|---------------------|------------------|
+ | lead byte | not valid | next: ASCII/lead | not valid |
+ | (192-207) | | context: 0 - 1 | |
+ |-------------|-------------------|---------------------|------------------|
+ | lead byte | not valid | next: cont. | not valid |
+ | (208-) | | context: 2 - 3 | |
+ |-------------|-------------------|---------------------|------------------|
*/
-#ifndef BROTLI_DEC_CONTEXT_H_
-#define BROTLI_DEC_CONTEXT_H_
+#ifndef BROTLI_COMMON_CONTEXT_H_
+#define BROTLI_COMMON_CONTEXT_H_
#include <brotli/types.h>
-enum ContextType {
+typedef enum ContextType {
CONTEXT_LSB6 = 0,
CONTEXT_MSB6 = 1,
CONTEXT_UTF8 = 2,
CONTEXT_SIGNED = 3
-};
+} ContextType;
/* Common context lookup table for all context modes. */
-static const uint8_t kContextLookup[1792] = {
+static const uint8_t kContextLookup[2048] = {
+ /* CONTEXT_LSB6, last byte. */
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+
+ /* CONTEXT_LSB6, second last byte, */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ /* CONTEXT_MSB6, last byte. */
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
+ 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+ 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+ 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
+ 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
+ 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
+ 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
+ 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
+ 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
+ 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
+ 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
+ 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
+ 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
+ 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
+
+ /* CONTEXT_MSB6, second last byte, */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
/* CONTEXT_UTF8, last byte. */
/* ASCII range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
@@ -130,6 +191,7 @@ static const uint8_t kContextLookup[1792] = {
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+
/* CONTEXT_UTF8 second last byte. */
/* ASCII range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -150,23 +212,7 @@ static const uint8_t kContextLookup[1792] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- /* CONTEXT_SIGNED, second last byte. */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+
/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
@@ -184,68 +230,32 @@ static const uint8_t kContextLookup[1792] = {
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
- /* CONTEXT_LSB6, last byte. */
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- /* CONTEXT_MSB6, last byte. */
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
- 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
- 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
- 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
- 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
- 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
- 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
- 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
- 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
- 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
- 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
- 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
- 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
- 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
- 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
- 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
- /* CONTEXT_{M,L}SB6, second last byte, */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};
-static const int kContextLookupOffsets[8] = {
- /* CONTEXT_LSB6 */
- 1024, 1536,
- /* CONTEXT_MSB6 */
- 1280, 1536,
- /* CONTEXT_UTF8 */
- 0, 256,
- /* CONTEXT_SIGNED */
- 768, 512,
+ /* CONTEXT_SIGNED, second last byte. */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
};
-#endif /* BROTLI_DEC_CONTEXT_H_ */
+typedef const uint8_t* ContextLut;
+
+/* typeof(MODE) == ContextType; returns ContextLut */
+#define BROTLI_CONTEXT_LUT(MODE) (&kContextLookup[(MODE) << 9])
+
+/* typeof(LUT) == ContextLut */
+#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
+
+#endif /* BROTLI_COMMON_CONTEXT_H_ */
diff --git a/c/common/dictionary.bin.br b/c/common/dictionary.bin.br
new file mode 100755
index 0000000..6a55d42
--- /dev/null
+++ b/c/common/dictionary.bin.br
Binary files differ
diff --git a/c/common/platform.h b/c/common/platform.h
index 804fd25..d6fd3ee 100755
--- a/c/common/platform.h
+++ b/c/common/platform.h
@@ -10,6 +10,7 @@
#define BROTLI_COMMON_PLATFORM_H_
#include <string.h> /* memcpy */
+#include <stdlib.h> /* malloc, free */
#include <brotli/port.h>
#include <brotli/types.h>
@@ -204,7 +205,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
-#else /* BROTLI_ALIGNED_READ */
+#else /* BROTLI_ALIGNED_READ */
/* Unaligned memory access is allowed: just cast pointer to requested type. */
static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
return *(const uint16_t*)p;
@@ -218,7 +219,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
*(uint64_t*)p = v;
}
-#endif /* BROTLI_ALIGNED_READ */
+#endif /* BROTLI_ALIGNED_READ */
#if BROTLI_LITTLE_ENDIAN
/* Straight endianness. Just read / write values. */
@@ -390,6 +391,18 @@ BROTLI_MIN_MAX(size_t) BROTLI_MIN_MAX(uint32_t) BROTLI_MIN_MAX(uint8_t)
(A)[(J)] = __brotli_swap_tmp; \
}
+/* Default brotli_alloc_func */
+static void* BrotliDefaultAllocFunc(void* opaque, size_t size) {
+ BROTLI_UNUSED(opaque);
+ return malloc(size);
+}
+
+/* Default brotli_free_func */
+static void BrotliDefaultFreeFunc(void* opaque, void* address) {
+ BROTLI_UNUSED(opaque);
+ free(address);
+}
+
BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
BROTLI_UNUSED(BrotliSuppressUnusedFunctions);
BROTLI_UNUSED(BrotliUnalignedRead16);
@@ -413,6 +426,11 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
BROTLI_UNUSED(brotli_max_uint32_t);
BROTLI_UNUSED(brotli_min_uint8_t);
BROTLI_UNUSED(brotli_max_uint8_t);
+ BROTLI_UNUSED(BrotliDefaultAllocFunc);
+ BROTLI_UNUSED(BrotliDefaultFreeFunc);
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+ BROTLI_UNUSED(BrotliDump);
+#endif
}
#endif /* BROTLI_COMMON_PLATFORM_H_ */
diff --git a/c/common/transform.c b/c/common/transform.c
new file mode 100755
index 0000000..53fe4f6
--- /dev/null
+++ b/c/common/transform.c
@@ -0,0 +1,236 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./platform.h"
+#include "./transform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* RFC 7932 transforms string data */
+static const char kPrefixSuffix[217] =
+ "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
+/* 0x _0 _2 __5 _E _3 _6 _8 _E */
+ "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
+/* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */
+ "that \1\'\6 with \6 from \4 by \1(\6. T"
+/* 4x _5_ _7 _E _5 _A _C */
+ "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
+/* 6x _3 _8 _D _2 _7_ _ _A _C */
+ "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
+/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */
+ " not \3er \3al \4ful \4ive \5less \4es"
+/* Ax _5 _9 _D _2 _7 _D */
+ "t \4ize \2\xc2\xa0\4ous \5 the \2e \0";
+/* Cx _2 _7___ ___ _A _F _5 _8 */
+
+static const uint16_t kPrefixSuffixMap[50] = {
+ 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
+ 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
+ 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
+ 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
+ 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
+};
+
+/* RFC 7932 transforms */
+static const uint8_t kTransformsData[] = {
+ 49, BROTLI_TRANSFORM_IDENTITY, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 0,
+ 0, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 47,
+ 0, BROTLI_TRANSFORM_IDENTITY, 49,
+ 4, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 3,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 6,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
+ 1, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 1,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 7,
+ 49, BROTLI_TRANSFORM_IDENTITY, 9,
+ 48, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 8,
+ 49, BROTLI_TRANSFORM_IDENTITY, 5,
+ 49, BROTLI_TRANSFORM_IDENTITY, 10,
+ 49, BROTLI_TRANSFORM_IDENTITY, 11,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 13,
+ 49, BROTLI_TRANSFORM_IDENTITY, 14,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 15,
+ 49, BROTLI_TRANSFORM_IDENTITY, 16,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 12,
+ 5, BROTLI_TRANSFORM_IDENTITY, 49,
+ 0, BROTLI_TRANSFORM_IDENTITY, 1,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 18,
+ 49, BROTLI_TRANSFORM_IDENTITY, 17,
+ 49, BROTLI_TRANSFORM_IDENTITY, 19,
+ 49, BROTLI_TRANSFORM_IDENTITY, 20,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
+ 47, BROTLI_TRANSFORM_IDENTITY, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 22,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 23,
+ 49, BROTLI_TRANSFORM_IDENTITY, 24,
+ 49, BROTLI_TRANSFORM_IDENTITY, 25,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
+ 49, BROTLI_TRANSFORM_IDENTITY, 27,
+ 49, BROTLI_TRANSFORM_IDENTITY, 28,
+ 0, BROTLI_TRANSFORM_IDENTITY, 12,
+ 49, BROTLI_TRANSFORM_IDENTITY, 29,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 31,
+ 49, BROTLI_TRANSFORM_IDENTITY, 32,
+ 47, BROTLI_TRANSFORM_IDENTITY, 3,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
+ 5, BROTLI_TRANSFORM_IDENTITY, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
+ 49, BROTLI_TRANSFORM_IDENTITY, 30,
+ 0, BROTLI_TRANSFORM_IDENTITY, 5,
+ 35, BROTLI_TRANSFORM_IDENTITY, 49,
+ 47, BROTLI_TRANSFORM_IDENTITY, 2,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
+ 49, BROTLI_TRANSFORM_IDENTITY, 36,
+ 49, BROTLI_TRANSFORM_IDENTITY, 33,
+ 5, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+ 49, BROTLI_TRANSFORM_IDENTITY, 37,
+ 0, BROTLI_TRANSFORM_IDENTITY, 30,
+ 49, BROTLI_TRANSFORM_IDENTITY, 38,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 39,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 34,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+ 0, BROTLI_TRANSFORM_IDENTITY, 21,
+ 49, BROTLI_TRANSFORM_IDENTITY, 40,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+ 49, BROTLI_TRANSFORM_IDENTITY, 41,
+ 49, BROTLI_TRANSFORM_IDENTITY, 42,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
+ 49, BROTLI_TRANSFORM_IDENTITY, 43,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
+ 0, BROTLI_TRANSFORM_IDENTITY, 34,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+ 49, BROTLI_TRANSFORM_IDENTITY, 44,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+ 45, BROTLI_TRANSFORM_IDENTITY, 49,
+ 0, BROTLI_TRANSFORM_IDENTITY, 33,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+ 49, BROTLI_TRANSFORM_IDENTITY, 46,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+};
+
+static BrotliTransforms kBrotliTransforms = {
+ sizeof(kPrefixSuffix),
+ (const uint8_t*)kPrefixSuffix,
+ kPrefixSuffixMap,
+ sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
+ kTransformsData,
+ {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
+};
+
+const BrotliTransforms* BrotliGetTransforms(void) {
+ return &kBrotliTransforms;
+}
+
+static int ToUpperCase(uint8_t* p) {
+ if (p[0] < 0xC0) {
+ if (p[0] >= 'a' && p[0] <= 'z') {
+ p[0] ^= 32;
+ }
+ return 1;
+ }
+ /* An overly simplified uppercasing model for UTF-8. */
+ if (p[0] < 0xE0) {
+ p[1] ^= 32;
+ return 2;
+ }
+ /* An arbitrary transform for three byte characters. */
+ p[2] ^= 5;
+ return 3;
+}
+
+int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
+ const BrotliTransforms* BROTLI_RESTRICT transforms, int transfom_idx) {
+ int idx = 0;
+ const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transfom_idx);
+ uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transfom_idx);
+ const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transfom_idx);
+ {
+ int prefix_len = *prefix++;
+ while (prefix_len--) { dst[idx++] = *prefix++; }
+ }
+ {
+ const int t = type;
+ int i = 0;
+ if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
+ len -= t;
+ } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
+ && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
+ int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
+ word += skip;
+ len -= skip;
+ }
+ while (i < len) { dst[idx++] = word[i++]; }
+ if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
+ ToUpperCase(&dst[idx - len]);
+ } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
+ uint8_t* uppercase = &dst[idx - len];
+ while (len > 0) {
+ int step = ToUpperCase(uppercase);
+ uppercase += step;
+ len -= step;
+ }
+ }
+ }
+ {
+ int suffix_len = *suffix++;
+ while (suffix_len--) { dst[idx++] = *suffix++; }
+ return idx;
+ }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/c/common/transform.h b/c/common/transform.h
new file mode 100755
index 0000000..b279c04
--- /dev/null
+++ b/c/common/transform.h
@@ -0,0 +1,80 @@
+/* transforms is a part of ABI, nut not API.
+
+ It means that there are some functions that are supposed to be in "common"
+ library, but header itself is not placed into include/brotli. This way,
+ aforementioned functions will be available only to brotli internals.
+ */
+
+#ifndef BROTLI_COMMON_TRANSFORM_H_
+#define BROTLI_COMMON_TRANSFORM_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum BrotliWordTransformType {
+ BROTLI_TRANSFORM_IDENTITY = 0,
+ BROTLI_TRANSFORM_OMIT_LAST_1 = 1,
+ BROTLI_TRANSFORM_OMIT_LAST_2 = 2,
+ BROTLI_TRANSFORM_OMIT_LAST_3 = 3,
+ BROTLI_TRANSFORM_OMIT_LAST_4 = 4,
+ BROTLI_TRANSFORM_OMIT_LAST_5 = 5,
+ BROTLI_TRANSFORM_OMIT_LAST_6 = 6,
+ BROTLI_TRANSFORM_OMIT_LAST_7 = 7,
+ BROTLI_TRANSFORM_OMIT_LAST_8 = 8,
+ BROTLI_TRANSFORM_OMIT_LAST_9 = 9,
+ BROTLI_TRANSFORM_UPPERCASE_FIRST = 10,
+ BROTLI_TRANSFORM_UPPERCASE_ALL = 11,
+ BROTLI_TRANSFORM_OMIT_FIRST_1 = 12,
+ BROTLI_TRANSFORM_OMIT_FIRST_2 = 13,
+ BROTLI_TRANSFORM_OMIT_FIRST_3 = 14,
+ BROTLI_TRANSFORM_OMIT_FIRST_4 = 15,
+ BROTLI_TRANSFORM_OMIT_FIRST_5 = 16,
+ BROTLI_TRANSFORM_OMIT_FIRST_6 = 17,
+ BROTLI_TRANSFORM_OMIT_FIRST_7 = 18,
+ BROTLI_TRANSFORM_OMIT_FIRST_8 = 19,
+ BROTLI_TRANSFORM_OMIT_FIRST_9 = 20,
+ BROTLI_NUM_TRANSFORM_TYPES /* Counts transforms, not a transform itself. */
+};
+
+#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9
+
+typedef struct BrotliTransforms {
+ uint16_t prefix_suffix_size;
+ /* Last character must be null, so prefix_suffix_size must be at least 1. */
+ const uint8_t* prefix_suffix;
+ const uint16_t* prefix_suffix_map;
+ uint32_t num_transforms;
+ /* Each entry is a [prefix_id, transform, suffix_id] triplet. */
+ const uint8_t* transforms;
+ /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""].
+ 0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""].
+ -1, if cut-off transform does not exist. */
+ int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1];
+} BrotliTransforms;
+
+/* T is BrotliTransforms*; result is uint8_t. */
+#define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0])
+#define BROTLI_TRANSFORM_TYPE(T, I) ((T)->transforms[((I) * 3) + 1])
+#define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2])
+
+/* T is BrotliTransforms*; result is const uint8_t*. */
+#define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \
+ (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]])
+#define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \
+ (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]])
+
+BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void);
+
+BROTLI_COMMON_API int BrotliTransformDictionaryWord(
+ uint8_t* dst, const uint8_t* word, int len,
+ const BrotliTransforms* transforms, int transform_idx);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_COMMON_TRANSFORM_H_ */
diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h
index 21c59d7..39e4873 100644
--- a/c/dec/bit_reader.h
+++ b/c/dec/bit_reader.h
@@ -20,7 +20,7 @@ extern "C" {
#define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1)
-static const uint32_t kBitMask[33] = { 0x0000,
+static const uint32_t kBitMask[33] = { 0x00000000,
0x00000001, 0x00000003, 0x00000007, 0x0000000F,
0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
@@ -35,7 +35,7 @@ static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) {
/* Masking with this expression turns to a single
"Unsigned Bit Field Extract" UBFX instruction on ARM. */
- return ~((0xffffffffU) << n);
+ return ~((0xFFFFFFFFu) << n);
} else {
return kBitMask[n];
}
@@ -58,8 +58,9 @@ typedef struct {
/* Initializes the BrotliBitReader fields. */
BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
-/* Ensures that accumulator is not empty. May consume one byte of input.
- Returns 0 if data is required but there is no input available.
+/* Ensures that accumulator is not empty.
+ May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
+ Returns BROTLI_FALSE if data is required but there is no input available.
For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
reading. */
BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
@@ -98,9 +99,9 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
return TO_BROTLI_BOOL(br->avail_in >= num);
}
-/* Guarantees that there are at least n_bits + 1 bits in accumulator.
+/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
Precondition: accumulator contains at least 1 bit.
- n_bits should be in the range [1..24] for regular build. For portable
+ |n_bits| should be in the range [1..24] for regular build. For portable
non-64-bit little-endian build only 16 bits are safe to request. */
static BROTLI_INLINE void BrotliFillBitWindow(
BrotliBitReader* const br, uint32_t n_bits) {
@@ -158,7 +159,8 @@ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
BrotliFillBitWindow(br, 17);
}
-/* Pulls one byte of input to accumulator. */
+/* Tries to pull one byte of input to accumulator.
+ Returns BROTLI_FALSE if there is no input available. */
static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
if (br->avail_in == 0) {
return BROTLI_FALSE;
@@ -190,15 +192,16 @@ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
return (uint32_t)BrotliGetBitsUnmasked(br);
}
-/* Returns the specified number of bits from |br| without advancing bit pos. */
+/* Returns the specified number of bits from |br| without advancing bit
+ position. */
static BROTLI_INLINE uint32_t BrotliGetBits(
BrotliBitReader* const br, uint32_t n_bits) {
BrotliFillBitWindow(br, n_bits);
return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
}
-/* Tries to peek the specified amount of bits. Returns 0, if there is not
- enough input. */
+/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there
+ is not enough input. */
static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
while (BrotliGetAvailableBits(br) < n_bits) {
@@ -210,7 +213,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
return BROTLI_TRUE;
}
-/* Advances the bit pos by n_bits. */
+/* Advances the bit pos by |n_bits|. */
static BROTLI_INLINE void BrotliDropBits(
BrotliBitReader* const br, uint32_t n_bits) {
br->bit_pos_ += n_bits;
@@ -230,7 +233,7 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
}
/* Reads the specified number of bits from |br| and advances the bit pos.
- Precondition: accumulator MUST contain at least n_bits. */
+ Precondition: accumulator MUST contain at least |n_bits|. */
static BROTLI_INLINE void BrotliTakeBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
*val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
@@ -259,8 +262,8 @@ static BROTLI_INLINE uint32_t BrotliReadBits(
}
}
-/* Tries to read the specified amount of bits. Returns 0, if there is not
- enough input. n_bits MUST be positive. */
+/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
+ is not enough input. |n_bits| MUST be positive. */
static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
while (BrotliGetAvailableBits(br) < n_bits) {
@@ -284,7 +287,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {
}
/* Copies remaining input bytes stored in the bit reader to the output. Value
- num may not be larger than BrotliGetRemainingBytes. The bit reader must be
+ |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
warmed up again after this. */
static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
BrotliBitReader* br, size_t num) {
diff --git a/c/dec/decode.c b/c/dec/decode.c
index 846557a..630edeb 100644
--- a/c/dec/decode.c
+++ b/c/dec/decode.c
@@ -14,15 +14,15 @@
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include "../common/transform.h"
#include "../common/version.h"
#include "./bit_reader.h"
-#include "./context.h"
#include "./huffman.h"
#include "./prefix.h"
#include "./state.h"
-#include "./transform.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -37,7 +37,7 @@ extern "C" {
(unsigned long)(idx), (unsigned long)array_name[idx]))
#define HUFFMAN_TABLE_BITS 8U
-#define HUFFMAN_TABLE_MASK 0xff
+#define HUFFMAN_TABLE_MASK 0xFF
/* We need the slack region for the following reasons:
- doing up to two 16-byte copies for fast backward copying
@@ -59,11 +59,16 @@ static const uint8_t kCodeLengthPrefixValue[16] = {
BROTLI_BOOL BrotliDecoderSetParameter(
BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) {
+ if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
switch (p) {
case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION:
state->canny_ringbuffer_allocation = !!value ? 0 : 1;
return BROTLI_TRUE;
+ case BROTLI_DECODER_PARAM_LARGE_WINDOW:
+ state->large_window = TO_BROTLI_BOOL(!!value);
+ return BROTLI_TRUE;
+
default: return BROTLI_FALSE;
}
}
@@ -80,8 +85,15 @@ BrotliDecoderState* BrotliDecoderCreateInstance(
BROTLI_DUMP();
return 0;
}
- BrotliDecoderStateInitWithCustomAllocators(
- state, alloc_func, free_func, opaque);
+ if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) {
+ BROTLI_DUMP();
+ if (!alloc_func && !free_func) {
+ free(state);
+ } else if (alloc_func && free_func) {
+ free_func(opaque, state);
+ }
+ return 0;
+ }
return state;
}
@@ -97,39 +109,61 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) {
}
}
-/* Saves error code and converts it to BrotliDecoderResult */
+/* Saves error code and converts it to BrotliDecoderResult. */
static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode(
BrotliDecoderState* s, BrotliDecoderErrorCode e) {
s->error_code = (int)e;
switch (e) {
case BROTLI_DECODER_SUCCESS:
return BROTLI_DECODER_RESULT_SUCCESS;
+
case BROTLI_DECODER_NEEDS_MORE_INPUT:
return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+
case BROTLI_DECODER_NEEDS_MORE_OUTPUT:
return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+
default:
return BROTLI_DECODER_RESULT_ERROR;
}
}
-/* Decodes a number in the range [9..24], by reading 1 - 7 bits.
- Precondition: bit-reader accumulator has at least 7 bits. */
-static uint32_t DecodeWindowBits(BrotliBitReader* br) {
+/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli".
+ Precondition: bit-reader accumulator has at least 8 bits. */
+static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s,
+ BrotliBitReader* br) {
uint32_t n;
+ BROTLI_BOOL large_window = s->large_window;
+ s->large_window = BROTLI_FALSE;
BrotliTakeBits(br, 1, &n);
if (n == 0) {
- return 16;
+ s->window_bits = 16;
+ return BROTLI_DECODER_SUCCESS;
}
BrotliTakeBits(br, 3, &n);
if (n != 0) {
- return 17 + n;
+ s->window_bits = 17 + n;
+ return BROTLI_DECODER_SUCCESS;
}
BrotliTakeBits(br, 3, &n);
+ if (n == 1) {
+ if (large_window) {
+ BrotliTakeBits(br, 1, &n);
+ if (n == 1) {
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+ }
+ s->large_window = BROTLI_TRUE;
+ return BROTLI_DECODER_SUCCESS;
+ } else {
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+ }
+ }
if (n != 0) {
- return 8 + n;
+ s->window_bits = 8 + n;
+ return BROTLI_DECODER_SUCCESS;
}
- return 17;
+ s->window_bits = 17;
+ return BROTLI_DECODER_SUCCESS;
}
static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) {
@@ -342,7 +376,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
*result = table->value;
return BROTLI_TRUE;
}
- return BROTLI_FALSE; /* No valid bits at all. */
+ return BROTLI_FALSE; /* No valid bits at all. */
}
val = (uint32_t)BrotliGetBitsUnmasked(br);
table += val & HUFFMAN_TABLE_MASK;
@@ -352,11 +386,11 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
*result = table->value;
return BROTLI_TRUE;
} else {
- return BROTLI_FALSE; /* Not enough bits for the first level. */
+ return BROTLI_FALSE; /* Not enough bits for the first level. */
}
}
if (available_bits <= HUFFMAN_TABLE_BITS) {
- return BROTLI_FALSE; /* Not enough bits to move to the second level. */
+ return BROTLI_FALSE; /* Not enough bits to move to the second level. */
}
/* Speculatively drop HUFFMAN_TABLE_BITS. */
@@ -364,7 +398,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
available_bits -= HUFFMAN_TABLE_BITS;
table += table->value + val;
if (available_bits < table->bits) {
- return BROTLI_FALSE; /* Not enough bits for the second level. */
+ return BROTLI_FALSE; /* Not enough bits for the second level. */
}
BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits);
@@ -428,12 +462,11 @@ static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
}
/* Reads (s->symbol + 1) symbols.
- Totally 1..4 symbols are read, 1..10 bits each.
- The list of symbols MUST NOT contain duplicates.
- */
+ Totally 1..4 symbols are read, 1..11 bits each.
+ The list of symbols MUST NOT contain duplicates. */
static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
- uint32_t alphabet_size, BrotliDecoderState* s) {
- /* max_bits == 1..10; symbol == 0..3; 1..40 bits will be read. */
+ uint32_t alphabet_size, uint32_t max_symbol, BrotliDecoderState* s) {
+ /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */
BrotliBitReader* br = &s->br;
uint32_t max_bits = Log2Floor(alphabet_size - 1);
uint32_t i = s->sub_loop_counter;
@@ -445,7 +478,7 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
- if (v >= alphabet_size) {
+ if (v >= max_symbol) {
return
BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET);
}
@@ -471,14 +504,13 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
B) remember code length (if it is not 0)
C) extend corresponding index-chain
D) reduce the Huffman space
- E) update the histogram
- */
+ E) update the histogram */
static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
uint32_t* symbol, uint32_t* repeat, uint32_t* space,
uint32_t* prev_code_len, uint16_t* symbol_lists,
uint16_t* code_length_histo, int* next_symbol) {
*repeat = 0;
- if (code_len != 0) { /* code_len == 1..15 */
+ if (code_len != 0) { /* code_len == 1..15 */
symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol);
next_symbol[code_len] = (int)(*symbol);
*prev_code_len = code_len;
@@ -498,8 +530,7 @@ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
D) For each symbol do the same operations as in ProcessSingleCodeLength
PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or
- code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH
- */
+ code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */
static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len,
uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol,
uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len,
@@ -576,12 +607,12 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths(
BrotliFillBitWindow16(br);
p += BrotliGetBitsUnmasked(br) &
BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
- BrotliDropBits(br, p->bits); /* Use 1..5 bits */
+ BrotliDropBits(br, p->bits); /* Use 1..5 bits. */
code_len = p->value; /* code_len == 0..17 */
if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
ProcessSingleCodeLength(code_len, &symbol, &repeat, &space,
&prev_code_len, symbol_lists, code_length_histo, next_symbol);
- } else { /* code_len == 16..17, extra_bits == 2..3 */
+ } else { /* code_len == 16..17, extra_bits == 2..3 */
uint32_t extra_bits =
(code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3;
uint32_t repeat_delta =
@@ -616,13 +647,13 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
get_byte = BROTLI_TRUE;
continue;
}
- code_len = p->value; /* code_len == 0..17 */
+ code_len = p->value; /* code_len == 0..17 */
if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
BrotliDropBits(br, p->bits);
ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space,
&s->prev_code_len, s->symbol_lists, s->code_length_histo,
s->next_symbol);
- } else { /* code_len == 16..17, extra_bits == 2..3 */
+ } else { /* code_len == 16..17, extra_bits == 2..3 */
uint32_t extra_bits = code_len - 14U;
uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits);
if (available_bits < p->bits + extra_bits) {
@@ -674,7 +705,7 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
++num_codes;
++s->code_length_histo[v];
if (space - 1U >= 32U) {
- /* space is 0 or wrapped around */
+ /* space is 0 or wrapped around. */
break;
}
}
@@ -689,22 +720,22 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
There are 2 scenarios:
A) Huffman code contains only few symbols (1..4). Those symbols are read
directly; their code lengths are defined by the number of symbols.
- For this scenario 4 - 45 bits will be read.
+ For this scenario 4 - 49 bits will be read.
B) 2-phase decoding:
B.1) Small Huffman table is decoded; it is specified with code lengths
encoded with predefined entropy code. 32 - 74 bits are used.
B.2) Decoded table is used to decode code lengths of symbols in resulting
- Huffman table. In worst case 3520 bits are read.
-*/
+ Huffman table. In worst case 3520 bits are read. */
static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
+ uint32_t max_symbol,
HuffmanCode* table,
uint32_t* opt_table_size,
BrotliDecoderState* s) {
BrotliBitReader* br = &s->br;
/* Unnecessary masking, but might be good for safety. */
- alphabet_size &= 0x3ff;
- /* State machine */
+ alphabet_size &= 0x7FF;
+ /* State machine. */
for (;;) {
switch (s->substate_huffman) {
case BROTLI_STATE_HUFFMAN_NONE:
@@ -717,7 +748,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
if (s->sub_loop_counter != 1) {
s->space = 32;
- s->repeat = 0; /* num_codes */
+ s->repeat = 0; /* num_codes */
memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) *
(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1));
memset(&s->code_length_code_lengths[0], 0,
@@ -729,20 +760,22 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
/* Read symbols, codes & code lengths directly. */
- if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */
+ if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */
s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
s->sub_loop_counter = 0;
/* No break, transit to the next state. */
+
case BROTLI_STATE_HUFFMAN_SIMPLE_READ: {
BrotliDecoderErrorCode result =
- ReadSimpleHuffmanSymbols(alphabet_size, s);
+ ReadSimpleHuffmanSymbols(alphabet_size, max_symbol, s);
if (result != BROTLI_DECODER_SUCCESS) {
return result;
}
/* No break, transit to the next state. */
}
+
case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: {
uint32_t table_size;
if (s->symbol == 3) {
@@ -787,11 +820,12 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
/* No break, transit to the next state. */
}
+
case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
uint32_t table_size;
- BrotliDecoderErrorCode result = ReadSymbolCodeLengths(alphabet_size, s);
+ BrotliDecoderErrorCode result = ReadSymbolCodeLengths(max_symbol, s);
if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
- result = SafeReadSymbolCodeLengths(alphabet_size, s);
+ result = SafeReadSymbolCodeLengths(max_symbol, s);
}
if (result != BROTLI_DECODER_SUCCESS) {
return result;
@@ -823,7 +857,7 @@ static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
uint32_t code;
uint32_t nbits;
code = ReadSymbol(table, br);
- nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */
+ nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */
return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits);
}
@@ -842,7 +876,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
}
{
uint32_t bits;
- uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */
+ uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */
if (!BrotliSafeReadBits(br, nbits, &bits)) {
s->block_length_index = index;
s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX;
@@ -867,8 +901,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
of Y values, and reinitialize only first elements in L.
Most of input values are 0 and 1. To reduce number of branches, we replace
- inner for loop with do-while.
- */
+ inner for loop with do-while. */
static BROTLI_NOINLINE void InverseMoveToFrontTransform(
uint8_t* v, uint32_t v_len, BrotliDecoderState* state) {
/* Reinitialize elements that could have been changed. */
@@ -884,7 +917,7 @@ static BROTLI_NOINLINE void InverseMoveToFrontTransform(
/* Initialize list using 4 consequent values pattern. */
mtf[0] = pattern;
do {
- pattern += 0x04040404; /* Advance all 4 values by 4. */
+ pattern += 0x04040404; /* Advance all 4 values by 4. */
mtf[i] = pattern;
i++;
} while (i <= upper_bound);
@@ -917,7 +950,8 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
while (s->htree_index < group->num_htrees) {
uint32_t table_size;
BrotliDecoderErrorCode result =
- ReadHuffmanCode(group->alphabet_size, s->next, &table_size, s);
+ ReadHuffmanCode(group->alphabet_size, group->max_symbol,
+ s->next, &table_size, s);
if (result != BROTLI_DECODER_SUCCESS) return result;
group->htrees[s->htree_index] = s->next;
s->next += table_size;
@@ -934,8 +968,7 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
2) Decode Huffman table using ReadHuffmanCode function.
This table will be used for reading context map items.
3) Read context map items; "0" values could be run-length encoded.
- 4) Optionally, apply InverseMoveToFront transform to the resulting map.
- */
+ 4) Optionally, apply InverseMoveToFront transform to the resulting map. */
static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
uint32_t* num_htrees,
uint8_t** context_map_arg,
@@ -964,6 +997,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
}
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
/* No break, continue to next state. */
+
case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
uint32_t bits;
/* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe
@@ -982,13 +1016,17 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
/* No break, continue to next state. */
}
- case BROTLI_STATE_CONTEXT_MAP_HUFFMAN:
- result = ReadHuffmanCode(*num_htrees + s->max_run_length_prefix,
+
+ case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: {
+ uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix;
+ result = ReadHuffmanCode(alphabet_size, alphabet_size,
s->context_map_table, NULL, s);
if (result != BROTLI_DECODER_SUCCESS) return result;
s->code = 0xFFFF;
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
/* No break, continue to next state. */
+ }
+
case BROTLI_STATE_CONTEXT_MAP_DECODE: {
uint32_t context_index = s->context_index;
uint32_t max_run_length_prefix = s->max_run_length_prefix;
@@ -1037,6 +1075,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
}
/* No break, continue to next state. */
}
+
case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: {
uint32_t bits;
if (!BrotliSafeReadBits(br, 1, &bits)) {
@@ -1049,6 +1088,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
return BROTLI_DECODER_SUCCESS;
}
+
default:
return
BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
@@ -1067,8 +1107,11 @@ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength(
BrotliBitReader* br = &s->br;
uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2];
uint32_t block_type;
+ if (max_block_type <= 1) {
+ return BROTLI_FALSE;
+ }
- /* Read 0..15 + 3..39 bits */
+ /* Read 0..15 + 3..39 bits. */
if (!safe) {
block_type = ReadSymbol(type_tree, br);
s->block_length[tree_type] = ReadBlockLength(len_tree, br);
@@ -1125,9 +1168,8 @@ static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) {
trivial = s->trivial_literal_contexts[block_type >> 5];
s->trivial_literal_context = (trivial >> (block_type & 31)) & 1;
s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]];
- context_mode = s->context_modes[block_type];
- s->context_lookup1 = &kContextLookup[kContextLookupOffsets[context_mode]];
- s->context_lookup2 = &kContextLookup[kContextLookupOffsets[context_mode + 1]];
+ context_mode = s->context_modes[block_type] & 3;
+ s->context_lookup = BROTLI_CONTEXT_LUT(context_mode);
}
/* Decodes the block type and updates the state for literal context.
@@ -1164,6 +1206,7 @@ static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal(
static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) {
DecodeCommandBlockSwitchInternal(0, s);
}
+
static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch(
BrotliDecoderState* s) {
return DecodeCommandBlockSwitchInternal(1, s);
@@ -1200,8 +1243,7 @@ static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) {
/* Dumps output.
Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push
- and either ring-buffer is as big as window size, or |force| is true.
- */
+ and either ring-buffer is as big as window size, or |force| is true. */
static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
BrotliDecoderState* s, size_t* available_out, uint8_t** next_out,
size_t* total_out, BROTLI_BOOL force) {
@@ -1260,8 +1302,7 @@ static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) {
this function is called.
Last two bytes of ring-buffer are initialized to 0, so context calculation
- could be done uniformly for the first two and all other positions.
-*/
+ could be done uniformly for the first two and all other positions. */
static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer(
BrotliDecoderState* s) {
uint8_t* old_ringbuffer = s->ringbuffer;
@@ -1321,8 +1362,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_UNCOMPRESSED_WRITE: {
BrotliDecoderErrorCode result;
result = WriteRingBuffer(
@@ -1346,8 +1388,7 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
If we know the data size is small, do not allocate more ring buffer
size than needed to reduce memory usage.
- When this method is called, metablock size and flags MUST be decoded.
-*/
+ When this method is called, metablock size and flags MUST be decoded. */
static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
BrotliDecoderState* s) {
int window_size = 1 << s->window_bits;
@@ -1378,7 +1419,7 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
if (!!s->canny_ringbuffer_allocation) {
/* Reduce ring buffer size to save memory when server is unscrupulous.
In worst case memory usage might be 1.5x bigger for a short period of
- ring buffer reallocation.*/
+ ring buffer reallocation. */
while ((new_ringbuffer_size >> 1) >= min_size) {
new_ringbuffer_size >>= 1;
}
@@ -1398,7 +1439,7 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
s->loop_counter = i;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
- s->context_modes[i] = (uint8_t)(bits << 1);
+ s->context_modes[i] = (uint8_t)bits;
BROTLI_LOG_ARRAY_INDEX(s->context_modes, i);
i++;
}
@@ -1413,12 +1454,12 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
s->distance_context = 1;
} else {
int distance_code = s->distance_code << 1;
- /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: */
- /* 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
- const uint32_t kDistanceShortCodeIndexOffset = 0xaaafff1b;
- /* kDistanceShortCodeValueOffset has 2-bit values from LSB: */
- /*-0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */
- const uint32_t kDistanceShortCodeValueOffset = 0xfa5fa500;
+ /* kDistanceShortCodeIndexOffset has 2-bit values from LSB:
+ 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
+ const uint32_t kDistanceShortCodeIndexOffset = 0xAAAFFF1B;
+ /* kDistanceShortCodeValueOffset has 2-bit values from LSB:
+ -0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */
+ const uint32_t kDistanceShortCodeValueOffset = 0xFA5FA500;
int v = (s->dist_rb_idx +
(int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3;
s->distance_code = s->dist_rb[v];
@@ -1428,9 +1469,9 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
} else {
s->distance_code -= v;
if (s->distance_code <= 0) {
- /* A huge distance will cause a BROTLI_FAILURE() soon. */
- /* This is a little faster than failing here. */
- s->distance_code = 0x0fffffff;
+ /* A huge distance will cause a BROTLI_FAILURE() soon.
+ This is a little faster than failing here. */
+ s->distance_code = 0x7FFFFFFF;
}
}
}
@@ -1446,7 +1487,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
}
}
-/* Precondition: s->distance_code < 0 */
+/* Precondition: s->distance_code < 0. */
static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
int safe, BrotliDecoderState* s, BrotliBitReader* br) {
int distval;
@@ -1462,10 +1503,10 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
}
s->distance_code = (int)code;
}
- /* Convert the distance code to the actual distance by possibly */
- /* looking up past distances from the s->ringbuffer. */
+ /* Convert the distance code to the actual distance by possibly
+ looking up past distances from the s->ringbuffer. */
s->distance_context = 0;
- if ((s->distance_code & ~0xf) == 0) {
+ if ((s->distance_code & ~0xF) == 0) {
TakeDistanceFromRingBuffer(s);
--s->block_length[2];
return BROTLI_TRUE;
@@ -1481,14 +1522,14 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
s->distance_code = (int)s->num_direct_distance_codes + offset +
(int)BrotliReadBits(br, nbits);
} else {
- /* This branch also works well when s->distance_postfix_bits == 0 */
+ /* This branch also works well when s->distance_postfix_bits == 0. */
uint32_t bits;
postfix = distval & s->distance_postfix_mask;
distval >>= s->distance_postfix_bits;
nbits = ((uint32_t)distval >> 1) + 1;
if (safe) {
if (!SafeReadBits(br, nbits, &bits)) {
- s->distance_code = -1; /* Restore precondition. */
+ s->distance_code = -1; /* Restore precondition. */
BrotliBitReaderRestoreState(br, &memento);
return BROTLI_FALSE;
}
@@ -1615,7 +1656,7 @@ CommandBegin:
if (safe) {
s->state = BROTLI_STATE_COMMAND_BEGIN;
}
- if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */
+ if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */
s->state = BROTLI_STATE_COMMAND_BEGIN;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@@ -1624,7 +1665,7 @@ CommandBegin:
BROTLI_SAFE(DecodeCommandBlockSwitch(s));
goto CommandBegin;
}
- /* Read the insert/copy length in the command */
+ /* Read the insert/copy length in the command. */
BROTLI_SAFE(ReadCommand(s, br, &i));
BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n",
pos, i, s->copy_length));
@@ -1637,13 +1678,13 @@ CommandInner:
if (safe) {
s->state = BROTLI_STATE_COMMAND_INNER;
}
- /* Read the literals in the command */
+ /* Read the literals in the command. */
if (s->trivial_literal_context) {
uint32_t bits;
uint32_t value;
PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
do {
- if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
+ if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
s->state = BROTLI_STATE_COMMAND_INNER;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@@ -1679,7 +1720,7 @@ CommandInner:
do {
const HuffmanCode* hc;
uint8_t context;
- if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
+ if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
s->state = BROTLI_STATE_COMMAND_INNER;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@@ -1688,7 +1729,7 @@ CommandInner:
BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
if (s->trivial_literal_context) goto CommandInner;
}
- context = s->context_lookup1[p1] | s->context_lookup2[p2];
+ context = BROTLI_CONTEXT(p1, p2, s->context_lookup);
BROTLI_LOG_UINT(context);
hc = s->literal_hgroup.htrees[s->context_map_slice[context]];
p2 = p1;
@@ -1744,14 +1785,25 @@ CommandPostDecodeLiterals:
}
i = s->copy_length;
/* Apply copy of LZ77 back-reference, or static dictionary reference if
- the distance is larger than the max LZ77 distance */
+ the distance is larger than the max LZ77 distance */
if (s->distance_code > s->max_distance) {
- int address = s->distance_code - s->max_distance - 1;
+ /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC.
+ With this choice, no signed overflow can occur after decoding
+ a special distance code (e.g., after adding 3 to the last distance). */
+ if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) {
+ BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+ "len: %d bytes left: %d\n",
+ pos, s->distance_code, i, s->meta_block_remaining_len));
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE);
+ }
if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
+ int address = s->distance_code - s->max_distance - 1;
const BrotliDictionary* words = s->dictionary;
+ const BrotliTransforms* transforms = s->transforms;
int offset = (int)s->dictionary->offsets_by_length[i];
uint32_t shift = s->dictionary->size_bits_by_length[i];
+
int mask = (int)BitMask(shift);
int word_idx = address & mask;
int transform_idx = address >> shift;
@@ -1761,16 +1813,16 @@ CommandPostDecodeLiterals:
if (BROTLI_PREDICT_FALSE(!words->data)) {
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
}
- if (transform_idx < kNumTransforms) {
+ if (transform_idx < (int)transforms->num_transforms) {
const uint8_t* word = &words->data[offset];
int len = i;
- if (transform_idx == 0) {
+ if (transform_idx == transforms->cutOffTransforms[0]) {
memcpy(&s->ringbuffer[pos], word, (size_t)len);
BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n",
len, word));
} else {
len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len,
- transform_idx);
+ transforms, transform_idx);
BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s],"
" transform_idx = %d, transformed: [%.*s]\n",
i, word, transform_idx, len, &s->ringbuffer[pos]));
@@ -1778,7 +1830,6 @@ CommandPostDecodeLiterals:
pos += len;
s->meta_block_remaining_len -= len;
if (pos >= s->ringbuffer_size) {
- /*s->partial_pos_rb += (size_t)s->ringbuffer_size;*/
s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
goto saveStateAndReturn;
}
@@ -1800,14 +1851,13 @@ CommandPostDecodeLiterals:
uint8_t* copy_src = &s->ringbuffer[src_start];
int dst_end = pos + i;
int src_end = src_start + i;
- /* update the recent distances cache */
+ /* Update the recent distances cache. */
s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
++s->dist_rb_idx;
s->meta_block_remaining_len -= i;
/* There are 32+ bytes of slack in the ring-buffer allocation.
Also, we have 16 short codes, that make these 16 bytes irrelevant
- in the ring-buffer. Let's copy over them as a first guess.
- */
+ in the ring-buffer. Let's copy over them as a first guess. */
memmove16(copy_dst, copy_src);
if (src_end > pos && dst_end > src_start) {
/* Regions intersect. */
@@ -1830,7 +1880,7 @@ CommandPostDecodeLiterals:
}
BROTLI_LOG_UINT(s->meta_block_remaining_len);
if (s->meta_block_remaining_len <= 0) {
- /* Next metablock, if any */
+ /* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;
goto saveStateAndReturn;
} else {
@@ -1850,7 +1900,7 @@ CommandPostWrapCopy:
}
}
if (s->meta_block_remaining_len <= 0) {
- /* Next metablock, if any */
+ /* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;
goto saveStateAndReturn;
} else {
@@ -1875,6 +1925,21 @@ static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands(
return ProcessCommandsInternal(1, s);
}
+/* Returns the maximum number of distance symbols which can only represent
+ distances not exceeding BROTLI_MAX_ALLOWED_DISTANCE. */
+static uint32_t BrotliMaxDistanceSymbol(uint32_t ndirect, uint32_t npostfix) {
+ static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28};
+ static const uint32_t diff[BROTLI_MAX_NPOSTFIX + 1] = {73, 126, 228, 424};
+ uint32_t postfix = 1U << npostfix;
+ if (ndirect < bound[npostfix]) {
+ return ndirect + diff[npostfix] + postfix;
+ } else if (ndirect > bound[npostfix] + postfix) {
+ return ndirect + diff[npostfix];
+ } else {
+ return bound[npostfix] + diff[npostfix] + postfix;
+ }
+}
+
BrotliDecoderResult BrotliDecoderDecompress(
size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size,
uint8_t* decoded_buffer) {
@@ -1885,7 +1950,9 @@ BrotliDecoderResult BrotliDecoderDecompress(
const uint8_t* next_in = encoded_buffer;
size_t available_out = *decoded_size;
uint8_t* next_out = decoded_buffer;
- BrotliDecoderStateInit(&s);
+ if (!BrotliDecoderStateInit(&s, 0, 0, 0)) {
+ return BROTLI_DECODER_RESULT_ERROR;
+ }
result = BrotliDecoderDecompressStream(
&s, &available_in, &next_in, &available_out, &next_out, &total_out);
*decoded_size = total_out;
@@ -1897,23 +1964,22 @@ BrotliDecoderResult BrotliDecoderDecompress(
}
/* Invariant: input stream is never overconsumed:
- * invalid input implies that the whole stream is invalid -> any amount of
+ - invalid input implies that the whole stream is invalid -> any amount of
input could be read and discarded
- * when result is "needs more input", then at least one more byte is REQUIRED
+ - when result is "needs more input", then at least one more byte is REQUIRED
to complete decoding; all input data MUST be consumed by decoder, so
client could swap the input buffer
- * when result is "needs more output" decoder MUST ensure that it doesn't
+ - when result is "needs more output" decoder MUST ensure that it doesn't
hold more than 7 bits in bit reader; this saves client from swapping input
buffer ahead of time
- * when result is "success" decoder MUST return all unused data back to input
- buffer; this is possible because the invariant is hold on enter
-*/
+ - when result is "success" decoder MUST return all unused data back to input
+ buffer; this is possible because the invariant is held on enter */
BrotliDecoderResult BrotliDecoderDecompressStream(
BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in,
size_t* available_out, uint8_t** next_out, size_t* total_out) {
BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
BrotliBitReader* br = &s->br;
- /* Ensure that *total_out is set, even if no data will ever be pushed out. */
+ /* Ensure that |total_out| is set, even if no data will ever be pushed out. */
if (total_out) {
*total_out = s->partial_pos_out;
}
@@ -1926,7 +1992,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));
}
if (!*available_out) next_out = 0;
- if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */
+ if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */
br->avail_in = *available_in;
br->next_in = *next_in;
} else {
@@ -1938,9 +2004,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
/* State machine */
for (;;) {
- if (result != BROTLI_DECODER_SUCCESS) { /* Error, needs more input/output */
+ if (result != BROTLI_DECODER_SUCCESS) {
+ /* Error, needs more input/output. */
if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
- if (s->ringbuffer != 0) { /* Pro-actively push output. */
+ if (s->ringbuffer != 0) { /* Pro-actively push output. */
BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s,
available_out, next_out, total_out, BROTLI_TRUE);
/* WriteRingBuffer checks s->meta_block_remaining_len validity. */
@@ -1949,9 +2016,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
}
- if (s->buffer_length != 0) { /* Used with internal buffer. */
- if (br->avail_in == 0) { /* Successfully finished read transaction. */
- /* Accumulator contains less than 8 bits, because internal buffer
+ if (s->buffer_length != 0) { /* Used with internal buffer. */
+ if (br->avail_in == 0) {
+ /* Successfully finished read transaction.
+ Accumulator contains less than 8 bits, because internal buffer
is expanded byte-by-byte until it is enough to complete read. */
s->buffer_length = 0;
/* Switch to input stream and restart. */
@@ -1971,9 +2039,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
/* Retry with more data in buffer. */
continue;
}
- /* Can't finish reading and no more input.*/
+ /* Can't finish reading and no more input. */
break;
- } else { /* Input stream doesn't contain enough input. */
+ } else { /* Input stream doesn't contain enough input. */
/* Copy tail to internal buffer and return. */
*next_in = br->next_in;
*available_in = br->avail_in;
@@ -1992,7 +2060,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
if (s->buffer_length != 0) {
/* Just consumed the buffered input and produced some output. Otherwise
- it would result in "needs more input". Reset internal buffer.*/
+ it would result in "needs more input". Reset internal buffer. */
s->buffer_length = 0;
} else {
/* Using input stream in last iteration. When decoder switches to input
@@ -2012,13 +2080,32 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
/* Decode window size. */
- s->window_bits = DecodeWindowBits(br); /* Reads 1..7 bits. */
- BROTLI_LOG_UINT(s->window_bits);
- if (s->window_bits == 9) {
- /* Value 9 is reserved for future use. */
+ result = DecodeWindowBits(s, br); /* Reads 1..8 bits. */
+ if (result != BROTLI_DECODER_SUCCESS) {
+ break;
+ }
+ if (s->large_window) {
+ s->state = BROTLI_STATE_LARGE_WINDOW_BITS;
+ break;
+ }
+ s->state = BROTLI_STATE_INITIALIZE;
+ break;
+
+ case BROTLI_STATE_LARGE_WINDOW_BITS:
+ if (!BrotliSafeReadBits(br, 6, &s->window_bits)) {
+ result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+ break;
+ }
+ if (s->window_bits < BROTLI_LARGE_MIN_WBITS ||
+ s->window_bits > BROTLI_LARGE_MAX_WBITS) {
result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
break;
}
+ s->state = BROTLI_STATE_INITIALIZE;
+ /* No break, continue to next state */
+
+ case BROTLI_STATE_INITIALIZE:
+ BROTLI_LOG_UINT(s->window_bits);
/* Maximum distance, see section 9.1. of the spec. */
s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP;
@@ -2034,14 +2121,16 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258;
s->state = BROTLI_STATE_METABLOCK_BEGIN;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_METABLOCK_BEGIN:
BrotliDecoderStateMetablockBegin(s);
BROTLI_LOG_UINT(s->pos);
s->state = BROTLI_STATE_METABLOCK_HEADER;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_METABLOCK_HEADER:
- result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */
+ result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */
if (result != BROTLI_DECODER_SUCCESS) {
break;
}
@@ -2071,6 +2160,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->loop_counter = 0;
s->state = BROTLI_STATE_HUFFMAN_CODE_0;
break;
+
case BROTLI_STATE_UNCOMPRESSED: {
result = CopyUncompressedBlockToOutput(
available_out, next_out, total_out, s);
@@ -2080,6 +2170,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_METABLOCK_DONE;
break;
}
+
case BROTLI_STATE_METADATA:
for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
uint32_t bits;
@@ -2093,6 +2184,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_METABLOCK_DONE;
}
break;
+
case BROTLI_STATE_HUFFMAN_CODE_0:
if (s->loop_counter >= 3) {
s->state = BROTLI_STATE_METABLOCK_HEADER_2;
@@ -2110,23 +2202,28 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
s->state = BROTLI_STATE_HUFFMAN_CODE_1;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_HUFFMAN_CODE_1: {
+ uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2;
int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258;
- result = ReadHuffmanCode(s->num_block_types[s->loop_counter] + 2,
+ result = ReadHuffmanCode(alphabet_size, alphabet_size,
&s->block_type_trees[tree_offset], NULL, s);
if (result != BROTLI_DECODER_SUCCESS) break;
s->state = BROTLI_STATE_HUFFMAN_CODE_2;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_HUFFMAN_CODE_2: {
+ uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS;
int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
- result = ReadHuffmanCode(BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+ result = ReadHuffmanCode(alphabet_size, alphabet_size,
&s->block_len_trees[tree_offset], NULL, s);
if (result != BROTLI_DECODER_SUCCESS) break;
s->state = BROTLI_STATE_HUFFMAN_CODE_3;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_HUFFMAN_CODE_3: {
int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter],
@@ -2139,6 +2236,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_HUFFMAN_CODE_0;
break;
}
+
case BROTLI_STATE_METABLOCK_HEADER_2: {
uint32_t bits;
if (!BrotliSafeReadBits(br, 6, &bits)) {
@@ -2160,15 +2258,17 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
s->loop_counter = 0;
s->state = BROTLI_STATE_CONTEXT_MODES;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_CONTEXT_MODES:
result = ReadContextModes(s);
if (result != BROTLI_DECODER_SUCCESS) {
break;
}
s->state = BROTLI_STATE_CONTEXT_MAP_1;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_CONTEXT_MAP_1:
result = DecodeContextMap(
s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS,
@@ -2178,54 +2278,54 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
DetectTrivialLiteralBlockTypes(s);
s->state = BROTLI_STATE_CONTEXT_MAP_2;
- /* No break, continue to next state */
- case BROTLI_STATE_CONTEXT_MAP_2:
- {
- uint32_t num_distance_codes = s->num_direct_distance_codes +
- ((2 * BROTLI_MAX_DISTANCE_BITS) << s->distance_postfix_bits);
- BROTLI_BOOL allocation_success = BROTLI_TRUE;
- result = DecodeContextMap(
- s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
- &s->num_dist_htrees, &s->dist_context_map, s);
- if (result != BROTLI_DECODER_SUCCESS) {
- break;
- }
- allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
- s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
- s->num_literal_htrees);
- allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
- s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
- s->num_block_types[1]);
- allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
- s, &s->distance_hgroup, num_distance_codes,
- s->num_dist_htrees);
- if (!allocation_success) {
- return SaveErrorCode(s,
- BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
- }
+ /* No break, continue to next state. */
+
+ case BROTLI_STATE_CONTEXT_MAP_2: {
+ uint32_t num_direct_codes =
+ s->num_direct_distance_codes - BROTLI_NUM_DISTANCE_SHORT_CODES;
+ uint32_t num_distance_codes = BROTLI_DISTANCE_ALPHABET_SIZE(
+ num_direct_codes, s->distance_postfix_bits,
+ (s->large_window ? BROTLI_LARGE_MAX_DISTANCE_BITS :
+ BROTLI_MAX_DISTANCE_BITS));
+ uint32_t max_distance_symbol = (s->large_window ?
+ BrotliMaxDistanceSymbol(
+ num_direct_codes, s->distance_postfix_bits) :
+ num_distance_codes);
+ BROTLI_BOOL allocation_success = BROTLI_TRUE;
+ result = DecodeContextMap(
+ s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
+ &s->num_dist_htrees, &s->dist_context_map, s);
+ if (result != BROTLI_DECODER_SUCCESS) {
+ break;
+ }
+ allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+ s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
+ BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees);
+ allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+ s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
+ BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]);
+ allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+ s, &s->distance_hgroup, num_distance_codes,
+ max_distance_symbol, s->num_dist_htrees);
+ if (!allocation_success) {
+ return SaveErrorCode(s,
+ BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
}
s->loop_counter = 0;
s->state = BROTLI_STATE_TREE_GROUP;
- /* No break, continue to next state */
- case BROTLI_STATE_TREE_GROUP:
- {
- HuffmanTreeGroup* hgroup = NULL;
- switch (s->loop_counter) {
- case 0:
- hgroup = &s->literal_hgroup;
- break;
- case 1:
- hgroup = &s->insert_copy_hgroup;
- break;
- case 2:
- hgroup = &s->distance_hgroup;
- break;
- default:
- return SaveErrorCode(s, BROTLI_FAILURE(
- BROTLI_DECODER_ERROR_UNREACHABLE));
- }
- result = HuffmanTreeGroupDecode(hgroup, s);
+ /* No break, continue to next state. */
+ }
+
+ case BROTLI_STATE_TREE_GROUP: {
+ HuffmanTreeGroup* hgroup = NULL;
+ switch (s->loop_counter) {
+ case 0: hgroup = &s->literal_hgroup; break;
+ case 1: hgroup = &s->insert_copy_hgroup; break;
+ case 2: hgroup = &s->distance_hgroup; break;
+ default: return SaveErrorCode(s, BROTLI_FAILURE(
+ BROTLI_DECODER_ERROR_UNREACHABLE));
}
+ result = HuffmanTreeGroupDecode(hgroup, s);
if (result != BROTLI_DECODER_SUCCESS) break;
s->loop_counter++;
if (s->loop_counter >= 3) {
@@ -2239,6 +2339,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_COMMAND_BEGIN;
}
break;
+ }
+
case BROTLI_STATE_COMMAND_BEGIN:
case BROTLI_STATE_COMMAND_INNER:
case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS:
@@ -2248,6 +2350,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
result = SafeProcessCommands(s);
}
break;
+
case BROTLI_STATE_COMMAND_INNER_WRITE:
case BROTLI_STATE_COMMAND_POST_WRITE_1:
case BROTLI_STATE_COMMAND_POST_WRITE_2:
@@ -2262,7 +2365,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
if (s->meta_block_remaining_len == 0) {
- /* Next metablock, if any */
+ /* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;
} else {
s->state = BROTLI_STATE_COMMAND_BEGIN;
@@ -2282,6 +2385,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_COMMAND_INNER;
}
break;
+
case BROTLI_STATE_METABLOCK_DONE:
if (s->meta_block_remaining_len < 0) {
result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2);
@@ -2302,7 +2406,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
*next_in = br->next_in;
}
s->state = BROTLI_STATE_DONE;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_DONE:
if (s->ringbuffer != 0) {
result = WriteRingBuffer(
diff --git a/c/dec/huffman.c b/c/dec/huffman.c
index 4fe7bfa..f142442 100644
--- a/c/dec/huffman.c
+++ b/c/dec/huffman.c
@@ -86,9 +86,9 @@ static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
} while (end > 0);
}
-/* Returns the table width of the next 2nd level table. count is the histogram
- of bit lengths for the remaining symbols, len is the code length of the next
- processed symbol */
+/* Returns the table width of the next 2nd level table. |count| is the histogram
+ of bit lengths for the remaining symbols, |len| is the code length of the
+ next processed symbol. */
static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count,
int len, int root_bits) {
int left = 1 << (len - root_bits);
@@ -118,7 +118,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <=
BROTLI_REVERSE_BITS_MAX);
- /* generate offsets into sorted symbol table by code length */
+ /* Generate offsets into sorted symbol table by code length. */
symbol = -1;
bits = 1;
BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, {
@@ -129,7 +129,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
/* Symbols with code length 0 are placed after all other symbols. */
offset[0] = BROTLI_CODE_LENGTH_CODES - 1;
- /* sort symbols by length, by symbol order within each length */
+ /* Sort symbols by length, by symbol order within each length. */
symbol = BROTLI_CODE_LENGTH_CODES;
do {
BROTLI_REPEAT(6, {
@@ -150,7 +150,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
return;
}
- /* fill in table */
+ /* Fill in table. */
key = 0;
key_step = BROTLI_REVERSE_BITS_LOWEST;
symbol = 0;
@@ -200,9 +200,8 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
table_size = 1 << table_bits;
total_size = table_size;
- /* fill in root table */
- /* let's reduce the table size to a smaller size if possible, and */
- /* create the repetitions by memcpy if possible in the coming loop */
+ /* Fill in the root table. Reduce the table size to if possible,
+ and create the repetitions by memcpy. */
if (table_bits > max_length) {
table_bits = max_length;
table_size = 1 << table_bits;
@@ -224,15 +223,14 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
key_step >>= 1;
} while (++bits <= table_bits);
- /* if root_bits != table_bits we only created one fraction of the */
- /* table, and we need to replicate it now. */
+ /* If root_bits != table_bits then replicate to fill the remaining slots. */
while (total_size != table_size) {
memcpy(&table[table_size], &table[0],
(size_t)table_size * sizeof(table[0]));
table_size <<= 1;
}
- /* fill in 2nd level tables and add pointers to root table */
+ /* Fill in 2nd level tables and add pointers to root table. */
key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1);
sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1);
sub_key_step = BROTLI_REVERSE_BITS_LOWEST;
diff --git a/c/dec/huffman.h b/c/dec/huffman.h
index 730af88..521ec6e 100644
--- a/c/dec/huffman.h
+++ b/c/dec/huffman.h
@@ -19,10 +19,11 @@ extern "C" {
#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
/* Maximum possible Huffman table size for an alphabet size of (index * 32),
- * max code length 15 and root table bits 8. */
+ max code length 15 and root table bits 8. */
static const uint16_t kMaxHuffmanTableSize[] = {
256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822,
- 854, 886, 920, 952, 984, 1016, 1048, 1080};
+ 854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272,
+ 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528};
/* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
#define BROTLI_HUFFMAN_MAX_SIZE_26 396
/* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
@@ -41,23 +42,26 @@ typedef struct {
BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
const uint8_t* const code_lengths, uint16_t* count);
-/* Builds Huffman lookup table assuming code lengths are in symbol order. */
-/* Returns size of resulting table. */
+/* Builds Huffman lookup table assuming code lengths are in symbol order.
+ Returns size of resulting table. */
BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg);
-/* Builds a simple Huffman table. The num_symbols parameter is to be */
-/* interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, 2 means 3 */
-/* symbols, 3 means 4 symbols with lengths 2,2,2,2, 4 means 4 symbols with */
-/* lengths 1,2,3,3. */
+/* Builds a simple Huffman table. The |num_symbols| parameter is to be
+ interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
+ 2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
+ 4 means 4 symbols with lengths [1, 2, 3, 3]. */
BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
int root_bits, uint16_t* symbols, uint32_t num_symbols);
/* Contains a collection of Huffman trees with the same alphabet size. */
+/* max_symbol is needed due to simple codes since log2(alphabet_size) could be
+ greater than log2(max_symbol). */
typedef struct {
HuffmanCode** htrees;
HuffmanCode* codes;
uint16_t alphabet_size;
+ uint16_t max_symbol;
uint16_t num_htrees;
} HuffmanTreeGroup;
diff --git a/c/dec/prefix.h b/c/dec/prefix.h
index aa776c7..3ea062d 100644
--- a/c/dec/prefix.h
+++ b/c/dec/prefix.h
@@ -5,8 +5,7 @@
*/
/* Lookup tables to map prefix codes to value ranges. This is used during
- decoding of the block lengths, literal insertion lengths and copy lengths.
-*/
+ decoding of the block lengths, literal insertion lengths and copy lengths. */
#ifndef BROTLI_DEC_PREFIX_H_
#define BROTLI_DEC_PREFIX_H_
@@ -14,8 +13,8 @@
#include "../common/constants.h"
#include <brotli/types.h>
-/* Represents the range of values belonging to a prefix code: */
-/* [offset, offset + 2^nbits) */
+/* Represents the range of values belonging to a prefix code:
+ [offset, offset + 2^nbits) */
struct PrefixCodeRange {
uint16_t offset;
uint8_t nbits;
diff --git a/c/dec/state.c b/c/dec/state.c
index eaec823..e0b37c2 100644
--- a/c/dec/state.c
+++ b/c/dec/state.c
@@ -15,25 +15,11 @@
extern "C" {
#endif
-static void* DefaultAllocFunc(void* opaque, size_t size) {
- BROTLI_UNUSED(opaque);
- return malloc(size);
-}
-
-static void DefaultFreeFunc(void* opaque, void* address) {
- BROTLI_UNUSED(opaque);
- free(address);
-}
-
-void BrotliDecoderStateInit(BrotliDecoderState* s) {
- BrotliDecoderStateInitWithCustomAllocators(s, 0, 0, 0);
-}
-
-void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
+BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
if (!alloc_func) {
- s->alloc_func = DefaultAllocFunc;
- s->free_func = DefaultFreeFunc;
+ s->alloc_func = BrotliDefaultAllocFunc;
+ s->free_func = BrotliDefaultFreeFunc;
s->memory_manager_opaque = 0;
} else {
s->alloc_func = alloc_func;
@@ -45,6 +31,7 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
BrotliInitBitReader(&s->br);
s->state = BROTLI_STATE_UNINITED;
+ s->large_window = 0;
s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
@@ -103,13 +90,16 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
s->mtf_upper_bound = 63;
s->dictionary = BrotliGetDictionary();
+ s->transforms = BrotliGetTransforms();
+
+ return BROTLI_TRUE;
}
void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
s->meta_block_remaining_len = 0;
- s->block_length[0] = 1U << 28;
- s->block_length[1] = 1U << 28;
- s->block_length[2] = 1U << 28;
+ s->block_length[0] = 1U << 24;
+ s->block_length[1] = 1U << 24;
+ s->block_length[2] = 1U << 24;
s->num_block_types[0] = 1;
s->num_block_types[1] = 1;
s->num_block_types[2] = 1;
@@ -126,8 +116,7 @@ void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
s->literal_htree = NULL;
s->dist_context_map_slice = NULL;
s->dist_htree_index = 0;
- s->context_lookup1 = NULL;
- s->context_lookup2 = NULL;
+ s->context_lookup = NULL;
s->literal_hgroup.codes = NULL;
s->literal_hgroup.htrees = NULL;
s->insert_copy_hgroup.codes = NULL;
@@ -153,7 +142,8 @@ void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
}
BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
- HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t ntrees) {
+ HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol,
+ uint32_t ntrees) {
/* Pack two allocations into one */
const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5];
const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
@@ -162,6 +152,7 @@ BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s,
code_size + htree_size);
group->alphabet_size = (uint16_t)alphabet_size;
+ group->max_symbol = (uint16_t)max_symbol;
group->num_htrees = (uint16_t)ntrees;
group->htrees = p;
group->codes = (HuffmanCode*)(&p[ntrees]);
diff --git a/c/dec/state.h b/c/dec/state.h
index 069beca..d28b639 100644
--- a/c/dec/state.h
+++ b/c/dec/state.h
@@ -12,6 +12,7 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include "../common/transform.h"
#include <brotli/types.h>
#include "./bit_reader.h"
#include "./huffman.h"
@@ -22,6 +23,8 @@ extern "C" {
typedef enum {
BROTLI_STATE_UNINITED,
+ BROTLI_STATE_LARGE_WINDOW_BITS,
+ BROTLI_STATE_INITIALIZE,
BROTLI_STATE_METABLOCK_BEGIN,
BROTLI_STATE_METABLOCK_HEADER,
BROTLI_STATE_METABLOCK_HEADER_2,
@@ -126,23 +129,22 @@ struct BrotliDecoderStateStruct {
uint8_t* ringbuffer;
uint8_t* ringbuffer_end;
HuffmanCode* htree_command;
- const uint8_t* context_lookup1;
- const uint8_t* context_lookup2;
+ const uint8_t* context_lookup;
uint8_t* context_map_slice;
uint8_t* dist_context_map_slice;
- /* This ring buffer holds a few past copy distances that will be used by */
- /* some special distance codes. */
+ /* This ring buffer holds a few past copy distances that will be used by
+ some special distance codes. */
HuffmanTreeGroup literal_hgroup;
HuffmanTreeGroup insert_copy_hgroup;
HuffmanTreeGroup distance_hgroup;
HuffmanCode* block_type_trees;
HuffmanCode* block_len_trees;
/* This is true if the literal context map histogram type always matches the
- block type. It is then not needed to keep the context (faster decoding). */
+ block type. It is then not needed to keep the context (faster decoding). */
int trivial_literal_context;
- /* Distance context is actual after command is decoded and before distance
- is computed. After distance computation it is used as a temporary variable. */
+ /* Distance context is actual after command is decoded and before distance is
+ computed. After distance computation it is used as a temporary variable. */
int distance_context;
int meta_block_remaining_len;
uint32_t block_length_index;
@@ -162,11 +164,11 @@ struct BrotliDecoderStateStruct {
int copy_length;
int distance_code;
- /* For partial write operations */
- size_t rb_roundtrips; /* How many times we went around the ring-buffer */
- size_t partial_pos_out; /* How much output to the user in total */
+ /* For partial write operations. */
+ size_t rb_roundtrips; /* how many times we went around the ring-buffer */
+ size_t partial_pos_out; /* how much output to the user in total */
- /* For ReadHuffmanCode */
+ /* For ReadHuffmanCode. */
uint32_t symbol;
uint32_t repeat;
uint32_t space;
@@ -180,25 +182,26 @@ struct BrotliDecoderStateStruct {
/* Tails of symbol chains. */
int next_symbol[32];
uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
- /* Population counts for the code lengths */
+ /* Population counts for the code lengths. */
uint16_t code_length_histo[16];
- /* For HuffmanTreeGroupDecode */
+ /* For HuffmanTreeGroupDecode. */
int htree_index;
HuffmanCode* next;
- /* For DecodeContextMap */
+ /* For DecodeContextMap. */
uint32_t context_index;
uint32_t max_run_length_prefix;
uint32_t code;
HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
- /* For InverseMoveToFrontTransform */
+ /* For InverseMoveToFrontTransform. */
uint32_t mtf_upper_bound;
uint32_t mtf[64 + 1];
- /* less used attributes are in the end of this struct */
- /* States inside function calls */
+ /* Less used attributes are at the end of this struct. */
+
+ /* States inside function calls. */
BrotliRunningMetablockHeaderState substate_metablock_header;
BrotliRunningTreeGroupState substate_tree_group;
BrotliRunningContextMapState substate_context_map;
@@ -212,6 +215,7 @@ struct BrotliDecoderStateStruct {
unsigned int is_metadata : 1;
unsigned int should_wrap_ringbuffer : 1;
unsigned int canny_ringbuffer_allocation : 1;
+ unsigned int large_window : 1;
unsigned int size_nibbles : 8;
uint32_t window_bits;
@@ -222,6 +226,7 @@ struct BrotliDecoderStateStruct {
uint8_t* context_modes;
const BrotliDictionary* dictionary;
+ const BrotliTransforms* transforms;
uint32_t trivial_literal_contexts[8]; /* 256 bits */
};
@@ -229,17 +234,15 @@ struct BrotliDecoderStateStruct {
typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
#define BrotliDecoderState BrotliDecoderStateInternal
-BROTLI_INTERNAL void BrotliDecoderStateInit(BrotliDecoderState* s);
-BROTLI_INTERNAL void BrotliDecoderStateInitWithCustomAllocators(
- BrotliDecoderState* s, brotli_alloc_func alloc_func,
- brotli_free_func free_func, void* opaque);
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s);
BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
BrotliDecoderState* s);
BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size,
- uint32_t ntrees);
+ uint32_t max_symbol, uint32_t ntrees);
#define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
diff --git a/c/dec/transform.h b/c/dec/transform.h
deleted file mode 100644
index e1d96ff..0000000
--- a/c/dec/transform.h
+++ /dev/null
@@ -1,300 +0,0 @@
-/* Copyright 2013 Google Inc. All Rights Reserved.
-
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-
-/* Transformations on dictionary words. */
-
-#ifndef BROTLI_DEC_TRANSFORM_H_
-#define BROTLI_DEC_TRANSFORM_H_
-
-#include "../common/platform.h"
-#include <brotli/types.h>
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-enum WordTransformType {
- kIdentity = 0,
- kOmitLast1 = 1,
- kOmitLast2 = 2,
- kOmitLast3 = 3,
- kOmitLast4 = 4,
- kOmitLast5 = 5,
- kOmitLast6 = 6,
- kOmitLast7 = 7,
- kOmitLast8 = 8,
- kOmitLast9 = 9,
- kUppercaseFirst = 10,
- kUppercaseAll = 11,
- kOmitFirst1 = 12,
- kOmitFirst2 = 13,
- kOmitFirst3 = 14,
- kOmitFirst4 = 15,
- kOmitFirst5 = 16,
- kOmitFirst6 = 17,
- kOmitFirst7 = 18,
- kOmitFirst8 = 19,
- kOmitFirst9 = 20
-};
-
-typedef struct {
- const uint8_t prefix_id;
- const uint8_t transform;
- const uint8_t suffix_id;
-} Transform;
-
-static const char kPrefixSuffix[208] =
- "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
- " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
- " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
- " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
-
-enum {
- /* EMPTY = ""
- SP = " "
- DQUOT = "\""
- SQUOT = "'"
- CLOSEBR = "]"
- OPEN = "("
- SLASH = "/"
- NBSP = non-breaking space "\0xc2\xa0"
- */
- kPFix_EMPTY = 0,
- kPFix_SP = 1,
- kPFix_COMMASP = 3,
- kPFix_SPofSPtheSP = 6,
- kPFix_SPtheSP = 9,
- kPFix_eSP = 12,
- kPFix_SPofSP = 15,
- kPFix_sSP = 20,
- kPFix_DOT = 23,
- kPFix_SPandSP = 25,
- kPFix_SPinSP = 31,
- kPFix_DQUOT = 36,
- kPFix_SPtoSP = 38,
- kPFix_DQUOTGT = 43,
- kPFix_NEWLINE = 46,
- kPFix_DOTSP = 48,
- kPFix_CLOSEBR = 51,
- kPFix_SPforSP = 53,
- kPFix_SPaSP = 59,
- kPFix_SPthatSP = 63,
- kPFix_SQUOT = 70,
- kPFix_SPwithSP = 72,
- kPFix_SPfromSP = 79,
- kPFix_SPbySP = 86,
- kPFix_OPEN = 91,
- kPFix_DOTSPTheSP = 93,
- kPFix_SPonSP = 100,
- kPFix_SPasSP = 105,
- kPFix_SPisSP = 110,
- kPFix_ingSP = 115,
- kPFix_NEWLINETAB = 120,
- kPFix_COLON = 123,
- kPFix_edSP = 125,
- kPFix_EQDQUOT = 129,
- kPFix_SPatSP = 132,
- kPFix_lySP = 137,
- kPFix_COMMA = 141,
- kPFix_EQSQUOT = 143,
- kPFix_DOTcomSLASH = 146,
- kPFix_DOTSPThisSP = 152,
- kPFix_SPnotSP = 160,
- kPFix_erSP = 166,
- kPFix_alSP = 170,
- kPFix_fulSP = 174,
- kPFix_iveSP = 179,
- kPFix_lessSP = 184,
- kPFix_estSP = 190,
- kPFix_izeSP = 195,
- kPFix_NBSP = 200,
- kPFix_ousSP = 203
-};
-
-static const Transform kTransforms[] = {
- { kPFix_EMPTY, kIdentity, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SP },
- { kPFix_SP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
- { kPFix_SP, kIdentity, kPFix_EMPTY },
- { kPFix_sSP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_SPofSP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPandSP },
- { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
- { kPFix_COMMASP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_COMMASP },
- { kPFix_SP, kUppercaseFirst, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_SPinSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
- { kPFix_eSP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_DQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_DOT },
- { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
- { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
- { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
- { kPFix_EMPTY, kIdentity, kPFix_SPforSP },
- { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPaSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
- { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_DOTSP },
- { kPFix_DOT, kIdentity, kPFix_EMPTY },
- { kPFix_SP, kIdentity, kPFix_COMMASP },
- { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
- { kPFix_EMPTY, kIdentity, kPFix_SQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPbySP },
- { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
- { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPonSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPasSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPisSP },
- { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
- { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
- { kPFix_EMPTY, kIdentity, kPFix_COLON },
- { kPFix_SP, kIdentity, kPFix_DOTSP },
- { kPFix_EMPTY, kIdentity, kPFix_edSP },
- { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
- { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPatSP },
- { kPFix_EMPTY, kIdentity, kPFix_lySP },
- { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
- { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
- { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
- { kPFix_DOT, kIdentity, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseAll, kPFix_SP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
- { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
- { kPFix_SP, kIdentity, kPFix_DOT },
- { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
- { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
- { kPFix_EMPTY, kIdentity, kPFix_COMMA },
- { kPFix_DOT, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
- { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
- { kPFix_SP, kIdentity, kPFix_EQDQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_erSP },
- { kPFix_SP, kUppercaseAll, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_alSP },
- { kPFix_SP, kUppercaseAll, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
- { kPFix_SP, kIdentity, kPFix_OPEN },
- { kPFix_EMPTY, kIdentity, kPFix_fulSP },
- { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
- { kPFix_EMPTY, kIdentity, kPFix_iveSP },
- { kPFix_EMPTY, kIdentity, kPFix_lessSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_estSP },
- { kPFix_SP, kUppercaseFirst, kPFix_DOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
- { kPFix_SP, kIdentity, kPFix_EQSQUOT },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
- { kPFix_EMPTY, kIdentity, kPFix_izeSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
- { kPFix_NBSP, kIdentity, kPFix_EMPTY },
- { kPFix_SP, kIdentity, kPFix_COMMA },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_ousSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
- { kPFix_SP, kUppercaseFirst, kPFix_COMMA },
- { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
- { kPFix_SP, kUppercaseAll, kPFix_COMMASP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
- { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
- { kPFix_SP, kUppercaseAll, kPFix_DOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
- { kPFix_SP, kUppercaseAll, kPFix_DOTSP },
- { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
- { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
- { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
-};
-
-static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
-
-static int ToUpperCase(uint8_t* p) {
- if (p[0] < 0xc0) {
- if (p[0] >= 'a' && p[0] <= 'z') {
- p[0] ^= 32;
- }
- return 1;
- }
- /* An overly simplified uppercasing model for UTF-8. */
- if (p[0] < 0xe0) {
- p[1] ^= 32;
- return 2;
- }
- /* An arbitrary transform for three byte characters. */
- p[2] ^= 5;
- return 3;
-}
-
-static BROTLI_NOINLINE int BrotliTransformDictionaryWord(
- uint8_t* dst, const uint8_t* word, int len, int transform) {
- int idx = 0;
- {
- const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
- while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
- }
- {
- const int t = kTransforms[transform].transform;
- int i = 0;
- int skip = t - (kOmitFirst1 - 1);
- if (skip > 0) {
- word += skip;
- len -= skip;
- } else if (t <= kOmitLast9) {
- len -= t;
- }
- while (i < len) { dst[idx++] = word[i++]; }
- if (t == kUppercaseFirst) {
- ToUpperCase(&dst[idx - len]);
- } else if (t == kUppercaseAll) {
- uint8_t* uppercase = &dst[idx - len];
- while (len > 0) {
- int step = ToUpperCase(uppercase);
- uppercase += step;
- len -= step;
- }
- }
- }
- {
- const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
- while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
- return idx;
- }
-}
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} /* extern "C" */
-#endif
-
-#endif /* BROTLI_DEC_TRANSFORM_H_ */
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c
index cce0cd4..62ecea7 100644
--- a/c/enc/backward_references.c
+++ b/c/enc/backward_references.c
@@ -102,23 +102,16 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#undef CAT
#undef EXPAND_CAT
-void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary,
- size_t num_bytes,
- size_t position,
- const uint8_t* ringbuffer,
- size_t ringbuffer_mask,
- const BrotliEncoderParams* params,
- HasherHandle hasher,
- int* dist_cache,
- size_t* last_insert_len,
- Command* commands,
- size_t* num_commands,
- size_t* num_literals) {
+void BrotliCreateBackwardReferences(
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+ size_t ringbuffer_mask, const BrotliEncoderParams* params,
+ HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+ Command* commands, size_t* num_commands, size_t* num_literals) {
switch (params->hasher.type) {
#define CASE_(N) \
case N: \
- CreateBackwardReferencesNH ## N(dictionary, \
- kStaticDictionaryHash, num_bytes, position, ringbuffer, \
+ CreateBackwardReferencesNH ## N( \
+ num_bytes, position, ringbuffer, \
ringbuffer_mask, params, hasher, dist_cache, \
last_insert_len, commands, num_commands, num_literals); \
return;
diff --git a/c/enc/backward_references.h b/c/enc/backward_references.h
index 631c2f6..3a41466 100644
--- a/c/enc/backward_references.h
+++ b/c/enc/backward_references.h
@@ -26,7 +26,6 @@ extern "C" {
CreateBackwardReferences calls, and must be incremented by the amount written
by this call. */
BROTLI_INTERNAL void BrotliCreateBackwardReferences(
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c
index b05cb4f..f2f9918 100644
--- a/c/enc/backward_references_hq.c
+++ b/c/enc/backward_references_hq.c
@@ -25,6 +25,10 @@
extern "C" {
#endif
+#define BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE ( \
+ BROTLI_NUM_DISTANCE_SHORT_CODES + (2 * BROTLI_LARGE_MAX_DISTANCE_BITS))
+/* BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE == 74 */
+
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
static const uint32_t kDistanceCacheIndex[] = {
@@ -39,40 +43,40 @@ void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
size_t i;
stub.length = 1;
stub.distance = 0;
- stub.insert_length = 0;
+ stub.dcode_insert_length = 0;
stub.u.cost = kInfinity;
for (i = 0; i < length; ++i) array[i] = stub;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
- return self->length & 0xffffff;
+ return self->length & 0x1FFFFFF;
}
static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
- const uint32_t modifier = self->length >> 24;
+ const uint32_t modifier = self->length >> 25;
return ZopfliNodeCopyLength(self) + 9u - modifier;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
- return self->distance & 0x7ffffff;
+ return self->distance;
}
static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
- const uint32_t short_code = self->distance >> 27;
+ const uint32_t short_code = self->dcode_insert_length >> 27;
return short_code == 0 ?
ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
short_code - 1;
}
static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
- return ZopfliNodeCopyLength(self) + self->insert_length;
+ return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
}
/* Histogram based cost model for zopflification. */
typedef struct ZopfliCostModel {
/* The insert and copy length symbols. */
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
- float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS];
+ float cost_dist_[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE];
/* Cumulative costs of literals per position in the stream. */
float* literal_costs_;
float min_cost_cmd_;
@@ -91,17 +95,26 @@ static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
}
static void SetCost(const uint32_t* histogram, size_t histogram_size,
- float* cost) {
+ BROTLI_BOOL literal_histogram, float* cost) {
size_t sum = 0;
+ size_t missing_symbol_sum;
float log2sum;
+ float missing_symbol_cost;
size_t i;
for (i = 0; i < histogram_size; i++) {
sum += histogram[i];
}
log2sum = (float)FastLog2(sum);
+ missing_symbol_sum = sum;
+ if (!literal_histogram) {
+ for (i = 0; i < histogram_size; i++) {
+ if (histogram[i] == 0) missing_symbol_sum++;
+ }
+ }
+ missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
for (i = 0; i < histogram_size; i++) {
if (histogram[i] == 0) {
- cost[i] = log2sum + 2;
+ cost[i] = missing_symbol_cost;
continue;
}
@@ -122,7 +135,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
size_t last_insert_len) {
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
- uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS];
+ uint32_t histogram_dist[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE];
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
size_t pos = position - last_insert_len;
float min_cost_cmd = kInfinity;
@@ -136,7 +149,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
for (i = 0; i < num_commands; i++) {
size_t inslength = commands[i].insert_len_;
size_t copylength = CommandCopyLen(&commands[i]);
- size_t distcode = commands[i].dist_prefix_;
+ size_t distcode = commands[i].dist_prefix_ & 0x3FF;
size_t cmdcode = commands[i].cmd_prefix_;
size_t j;
@@ -150,9 +163,12 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
pos += inslength + copylength;
}
- SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal);
- SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd);
- SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_);
+ SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
+ cost_literal);
+ SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
+ cost_cmd);
+ SetCost(histogram_dist, BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE, BROTLI_FALSE,
+ self->cost_dist_);
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
@@ -161,11 +177,14 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
{
float* literal_costs = self->literal_costs_;
+ float literal_carry = 0.0;
size_t num_bytes = self->num_bytes_;
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
- literal_costs[i + 1] = literal_costs[i] +
+ literal_carry +=
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+ literal_costs[i + 1] = literal_costs[i] + literal_carry;
+ literal_carry -= literal_costs[i + 1] - literal_costs[i];
}
}
}
@@ -175,6 +194,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
float* literal_costs = self->literal_costs_;
+ float literal_carry = 0.0;
float* cost_dist = self->cost_dist_;
float* cost_cmd = self->cost_cmd_;
size_t num_bytes = self->num_bytes_;
@@ -183,12 +203,14 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
ringbuffer, &literal_costs[1]);
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
- literal_costs[i + 1] += literal_costs[i];
+ literal_carry += literal_costs[i + 1];
+ literal_costs[i + 1] = literal_costs[i] + literal_carry;
+ literal_carry -= literal_costs[i + 1] - literal_costs[i];
}
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
}
- for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) {
+ for (i = 0; i < BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE; ++i) {
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
}
self->min_cost_cmd_ = (float)FastLog2(11);
@@ -221,9 +243,10 @@ static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
size_t start_pos, size_t len, size_t len_code, size_t dist,
size_t short_code, float cost) {
ZopfliNode* next = &nodes[pos + len];
- next->length = (uint32_t)(len | ((len + 9u - len_code) << 24));
- next->distance = (uint32_t)(dist | (short_code << 27));
- next->insert_length = (uint32_t)(pos - start_pos);
+ next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
+ next->distance = (uint32_t)dist;
+ next->dcode_insert_length = (uint32_t)(
+ (short_code << 27) | (pos - start_pos));
next->u.cost = cost;
}
@@ -303,7 +326,7 @@ static uint32_t ComputeDistanceShortcut(const size_t block_start,
const size_t gap,
const ZopfliNode* nodes) {
const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
- const size_t ilen = nodes[pos].insert_length;
+ const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
/* Since |block_start + pos| is the end position of the command, the copy part
starts from |block_start + pos - clen|. Distances that are greater than
@@ -335,7 +358,7 @@ static void ComputeDistanceCache(const size_t pos,
int idx = 0;
size_t p = nodes[pos].u.shortcut;
while (idx < 4 && p > 0) {
- const size_t ilen = nodes[p].insert_length;
+ const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
dist_cache[idx++] = (int)dist;
@@ -483,9 +506,9 @@ static size_t UpdateNodes(
float dist_cost;
size_t max_match_len;
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
- distnumextra = distextra >> 24;
+ distnumextra = dist_symbol >> 10;
dist_cost = base_cost + (float)distnumextra +
- ZopfliCostModelGetDistanceCost(model, dist_symbol);
+ ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
/* Try all copy lengths up until the maximum copy length corresponding
to this distance. If the distance refers to the static dictionary, or
@@ -517,7 +540,8 @@ static size_t ComputeShortestPathFromNodes(size_t num_bytes,
ZopfliNode* nodes) {
size_t index = num_bytes;
size_t num_commands = 0;
- while (nodes[index].insert_length == 0 && nodes[index].length == 1) --index;
+ while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
+ nodes[index].length == 1) --index;
nodes[index].u.next = BROTLI_UINT32_MAX;
while (index != 0) {
size_t len = ZopfliNodeCommandLength(&nodes[index]);
@@ -546,7 +570,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
const ZopfliNode* next = &nodes[pos + offset];
size_t copy_length = ZopfliNodeCopyLength(next);
- size_t insert_length = next->insert_length;
+ size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
pos += insert_length;
offset = next->u.next;
if (i == 0) {
@@ -624,7 +648,6 @@ static size_t ZopfliIterate(size_t num_bytes,
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher,
@@ -649,9 +672,9 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
const size_t pos = position + i;
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
size_t skip;
- size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
- ringbuffer_mask, pos, num_bytes - i, max_distance, gap, params,
- &matches[lz_matches_offset]);
+ size_t num_matches = FindAllMatchesH10(hasher, &params->dictionary,
+ ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance, gap,
+ params, &matches[lz_matches_offset]);
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
matches[0] = matches[num_matches - 1];
@@ -683,7 +706,6 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
}
void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
@@ -693,7 +715,7 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m)) return;
BrotliInitZopfliNodes(nodes, num_bytes + 1);
- *num_commands += BrotliZopfliComputeShortestPath(m, dictionary,
+ *num_commands += BrotliZopfliComputeShortestPath(m,
num_bytes, position, ringbuffer, ringbuffer_mask,
params, max_backward_limit, dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) return;
@@ -703,7 +725,6 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
}
void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
@@ -736,8 +757,8 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
if (BROTLI_IS_OOM(m)) return;
- num_found_matches = FindAllMatchesH10(hasher, dictionary,
- ringbuffer, ringbuffer_mask, pos, max_length,
+ num_found_matches = FindAllMatchesH10(hasher,
+ &params->dictionary, ringbuffer, ringbuffer_mask, pos, max_length,
max_distance, gap, params, &matches[cur_match_pos + shadow_matches]);
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
diff --git a/c/enc/backward_references_hq.h b/c/enc/backward_references_hq.h
index cc19544..7c38bd6 100644
--- a/c/enc/backward_references_hq.h
+++ b/c/enc/backward_references_hq.h
@@ -23,29 +23,26 @@ extern "C" {
#endif
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
typedef struct ZopfliNode {
- /* best length to get up to this byte (not including this byte itself)
- highest 8 bit is used to reconstruct the length code */
+ /* Best length to get up to this byte (not including this byte itself)
+ highest 7 bit is used to reconstruct the length code. */
uint32_t length;
- /* distance associated with the length; highest 5 bits contain distance
- short code + 1 (or zero if no short code); this way only distances shorter
- than 128MiB are allowed here */
+ /* Distance associated with the length. */
uint32_t distance;
- /* number of literal inserts before this copy */
- uint32_t insert_length;
+ /* Number of literal inserts before this copy; highest 5 bits contain
+ distance short code + 1 (or zero if no short code). */
+ uint32_t dcode_insert_length;
/* This union holds information used by dynamic-programming. During forward
pass |cost| it used to store the goal function. When node is processed its
@@ -78,7 +75,6 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
(2) nodes[i].command_length() <= i and
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher,
diff --git a/c/enc/backward_references_inc.h b/c/enc/backward_references_inc.h
index 0a715b2..967545d 100644
--- a/c/enc/backward_references_inc.h
+++ b/c/enc/backward_references_inc.h
@@ -8,8 +8,6 @@
/* template parameters: EXPORT_FN, FN */
static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
- const BrotliDictionary* dictionary,
- const uint16_t* dictionary_hash,
size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
@@ -43,9 +41,10 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
sr.len_code_delta = 0;
sr.distance = 0;
sr.score = kMinScore;
- FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, ringbuffer,
- ringbuffer_mask, dist_cache, position,
- max_length, max_distance, gap, &sr);
+ FN(FindLongestMatch)(hasher, &params->dictionary,
+ ringbuffer, ringbuffer_mask, dist_cache, position,
+ max_length, max_distance, gap,
+ params->dist.max_distance, &sr);
if (sr.score > kMinScore) {
/* Found a match. Let's look for something even better ahead. */
int delayed_backward_references_in_row = 0;
@@ -59,9 +58,9 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
sr2.distance = 0;
sr2.score = kMinScore;
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
- FN(FindLongestMatch)(hasher, dictionary, dictionary_hash,
+ FN(FindLongestMatch)(hasher, &params->dictionary,
ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
- max_distance, gap, &sr2);
+ max_distance, gap, params->dist.max_distance, &sr2);
if (sr2.score >= sr.score + cost_diff_lazy) {
/* Ok, let's just write one byte for now and start a match from the
next byte. */
diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h
index e8b7013..6586469 100644
--- a/c/enc/bit_cost.h
+++ b/c/enc/bit_cost.h
@@ -18,11 +18,11 @@
extern "C" {
#endif
-static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
- size_t size, size_t *total) {
+static BROTLI_INLINE double ShannonEntropy(
+ const uint32_t* population, size_t size, size_t* total) {
size_t sum = 0;
double retval = 0;
- const uint32_t *population_end = population + size;
+ const uint32_t* population_end = population + size;
size_t p;
if (size & 1) {
goto odd_number_of_elements_left;
@@ -42,7 +42,7 @@ static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
}
static BROTLI_INLINE double BitsEntropy(
- const uint32_t *population, size_t size) {
+ const uint32_t* population, size_t size) {
size_t sum;
double retval = ShannonEntropy(population, size, &sum);
if (retval < sum) {
diff --git a/c/enc/block_encoder_inc.h b/c/enc/block_encoder_inc.h
index 2a08f90..8cbd5ea 100644
--- a/c/enc/block_encoder_inc.h
+++ b/c/enc/block_encoder_inc.h
@@ -13,9 +13,9 @@
stream. */
static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
const HistogramType* histograms, const size_t histograms_size,
- HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
- const size_t alphabet_size = self->alphabet_size_;
- const size_t table_size = histograms_size * alphabet_size;
+ const size_t alphabet_size, HuffmanTree* tree,
+ size_t* storage_ix, uint8_t* storage) {
+ const size_t table_size = histograms_size * self->histogram_length_;
self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
if (BROTLI_IS_OOM(m)) return;
@@ -23,9 +23,10 @@ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
{
size_t i;
for (i = 0; i < histograms_size; ++i) {
- size_t ix = i * alphabet_size;
- BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree,
- &self->depths_[ix], &self->bits_[ix], storage_ix, storage);
+ size_t ix = i * self->histogram_length_;
+ BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
+ alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
+ storage_ix, storage);
}
}
}
diff --git a/c/enc/block_splitter.c b/c/enc/block_splitter.c
index 6362211..d308eca 100644
--- a/c/enc/block_splitter.c
+++ b/c/enc/block_splitter.c
@@ -174,7 +174,7 @@ void BrotliSplitBlock(MemoryManager* m,
for (i = 0; i < num_commands; ++i) {
const Command* cmd = &cmds[i];
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
- distance_prefixes[j++] = cmd->dist_prefix_;
+ distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
}
}
/* Create the block split on the array of distance prefixes. */
diff --git a/c/enc/block_splitter_inc.h b/c/enc/block_splitter_inc.h
index 5712572..023712b 100644
--- a/c/enc/block_splitter_inc.h
+++ b/c/enc/block_splitter_inc.h
@@ -70,7 +70,7 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
double* insert_cost,
double* cost,
uint8_t* switch_signal,
- uint8_t *block_id) {
+ uint8_t* block_id) {
const size_t data_size = FN(HistogramDataSize)();
const size_t bitmaplen = (num_histograms + 7) >> 3;
size_t num_blocks = 1;
diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c
index cd9c594..aaf2dad 100644
--- a/c/enc/brotli_bit_stream.c
+++ b/c/enc/brotli_bit_stream.c
@@ -13,12 +13,13 @@
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
-#include "./context.h"
#include "./entropy_encode.h"
#include "./entropy_encode_static.h"
#include "./fast_log.h"
+#include "./histogram.h"
#include "./memory.h"
#include "./write_bits.h"
@@ -27,12 +28,11 @@ extern "C" {
#endif
#define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1)
-/* The size of Huffman dictionary for distances assuming that NPOSTFIX = 0 and
- NDIRECT = 0. */
-#define SIMPLE_DISTANCE_ALPHABET_SIZE (BROTLI_NUM_DISTANCE_SHORT_CODES + \
- (2 * BROTLI_MAX_DISTANCE_BITS))
-/* SIMPLE_DISTANCE_ALPHABET_SIZE == 64 */
-#define SIMPLE_DISTANCE_ALPHABET_BITS 6
+/* The maximum size of Huffman dictionary for distances assuming that
+ NPOSTFIX = 0 and NDIRECT = 0. */
+#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \
+ BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS)
+/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */
/* Represents the range of values belonging to a prefix code:
[offset, offset + 2^nbits) */
@@ -258,7 +258,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
size_t symbols[4],
size_t num_symbols,
size_t max_bits,
- size_t *storage_ix, uint8_t *storage) {
+ size_t* storage_ix, uint8_t* storage) {
/* value of 1 indicates a simple Huffman code */
BrotliWriteBits(2, 1, storage_ix, storage);
BrotliWriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */
@@ -297,7 +297,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
depths = symbol depths */
void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
HuffmanTree* tree,
- size_t *storage_ix, uint8_t *storage) {
+ size_t* storage_ix, uint8_t* storage) {
/* Write the Huffman tree into the brotli-representation.
The command alphabet is the largest, so this allocation will fit all
alphabets. */
@@ -360,8 +360,9 @@ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
bits[0:length] and stores the encoded tree to the bit stream. */
-static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
- const size_t length,
+static void BuildAndStoreHuffmanTree(const uint32_t* histogram,
+ const size_t histogram_length,
+ const size_t alphabet_size,
HuffmanTree* tree,
uint8_t* depth,
uint16_t* bits,
@@ -371,7 +372,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
size_t s4[4] = { 0 };
size_t i;
size_t max_bits = 0;
- for (i = 0; i < length; i++) {
+ for (i = 0; i < histogram_length; i++) {
if (histogram[i]) {
if (count < 4) {
s4[count] = i;
@@ -383,7 +384,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
}
{
- size_t max_bits_counter = length - 1;
+ size_t max_bits_counter = alphabet_size - 1;
while (max_bits_counter) {
max_bits_counter >>= 1;
++max_bits;
@@ -398,14 +399,14 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
return;
}
- memset(depth, 0, length * sizeof(depth[0]));
- BrotliCreateHuffmanTree(histogram, length, 15, tree, depth);
- BrotliConvertBitDepthsToSymbols(depth, length, bits);
+ memset(depth, 0, histogram_length * sizeof(depth[0]));
+ BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth);
+ BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits);
if (count <= 4) {
StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
} else {
- BrotliStoreHuffmanTree(depth, length, tree, storage_ix, storage);
+ BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage);
}
}
@@ -729,6 +730,7 @@ static void EncodeContextMap(MemoryManager* m,
}
}
BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
+ num_clusters + max_run_length_prefix,
tree, depths, bits, storage_ix, storage);
for (i = 0; i < num_rle_symbols; ++i) {
const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
@@ -788,10 +790,11 @@ static void BuildAndStoreBlockSplitCode(const uint8_t* types,
}
StoreVarLenUint8(num_types - 1, storage_ix, storage);
if (num_types > 1) { /* TODO: else? could StoreBlockSwitch occur? */
- BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
+ BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree,
&code->type_depths[0], &code->type_bits[0],
storage_ix, storage);
BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+ BROTLI_NUM_BLOCK_LEN_SYMBOLS,
tree, &code->length_depths[0],
&code->length_bits[0], storage_ix, storage);
StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage);
@@ -822,8 +825,8 @@ static void StoreTrivialContextMap(size_t num_types,
for (i = context_bits; i < alphabet_size; ++i) {
histogram[i] = 1;
}
- BuildAndStoreHuffmanTree(histogram, alphabet_size, tree,
- depths, bits, storage_ix, storage);
+ BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size,
+ tree, depths, bits, storage_ix, storage);
for (i = 0; i < num_types; ++i) {
size_t code = (i == 0 ? 0 : i + context_bits - 1);
BrotliWriteBits(depths[code], bits[code], storage_ix, storage);
@@ -838,7 +841,7 @@ static void StoreTrivialContextMap(size_t num_types,
/* Manages the encoding of one block category (literal, command or distance). */
typedef struct BlockEncoder {
- size_t alphabet_size_;
+ size_t histogram_length_;
size_t num_block_types_;
const uint8_t* block_types_; /* Not owned. */
const uint32_t* block_lengths_; /* Not owned. */
@@ -851,10 +854,10 @@ typedef struct BlockEncoder {
uint16_t* bits_;
} BlockEncoder;
-static void InitBlockEncoder(BlockEncoder* self, size_t alphabet_size,
+static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length,
size_t num_block_types, const uint8_t* block_types,
const uint32_t* block_lengths, const size_t num_blocks) {
- self->alphabet_size_ = alphabet_size;
+ self->histogram_length_ = histogram_length;
self->num_block_types_ = num_block_types;
self->block_types_ = block_types;
self->block_lengths_ = block_lengths;
@@ -890,7 +893,7 @@ static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix,
uint32_t block_len = self->block_lengths_[block_ix];
uint8_t block_type = self->block_types_[block_ix];
self->block_len_ = block_len;
- self->entropy_ix_ = block_type * self->alphabet_size_;
+ self->entropy_ix_ = block_type * self->histogram_length_;
StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
storage_ix, storage);
}
@@ -919,7 +922,7 @@ static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol,
--self->block_len_;
{
size_t histo_ix = context_map[self->entropy_ix_ + context];
- size_t ix = histo_ix * self->alphabet_size_ + symbol;
+ size_t ix = histo_ix * self->histogram_length_ + symbol;
BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
}
}
@@ -945,42 +948,38 @@ static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
}
void BrotliStoreMetaBlock(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- uint8_t prev_byte,
- uint8_t prev_byte2,
- BROTLI_BOOL is_last,
- uint32_t num_direct_distance_codes,
- uint32_t distance_postfix_bits,
- ContextType literal_context_mode,
- const Command *commands,
- size_t n_commands,
- const MetaBlockSplit* mb,
- size_t *storage_ix,
- uint8_t *storage) {
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+ const BrotliEncoderParams* params, ContextType literal_context_mode,
+ const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+ size_t* storage_ix, uint8_t* storage) {
+
size_t pos = start_pos;
size_t i;
- size_t num_distance_codes =
- BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_distance_codes +
- (48u << distance_postfix_bits);
+ uint32_t num_distance_symbols = params->dist.alphabet_size;
+ uint32_t num_effective_distance_symbols = num_distance_symbols;
HuffmanTree* tree;
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
BlockEncoder literal_enc;
BlockEncoder command_enc;
BlockEncoder distance_enc;
+ const BrotliDistanceParams* dist = &params->dist;
+ if (params->large_window &&
+ num_effective_distance_symbols > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+ num_effective_distance_symbols = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
+ }
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
if (BROTLI_IS_OOM(m)) return;
- InitBlockEncoder(&literal_enc, 256, mb->literal_split.num_types,
- mb->literal_split.types, mb->literal_split.lengths,
- mb->literal_split.num_blocks);
+ InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS,
+ mb->literal_split.num_types, mb->literal_split.types,
+ mb->literal_split.lengths, mb->literal_split.num_blocks);
InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
mb->command_split.num_types, mb->command_split.types,
mb->command_split.lengths, mb->command_split.num_blocks);
- InitBlockEncoder(&distance_enc, num_distance_codes,
+ InitBlockEncoder(&distance_enc, num_effective_distance_symbols,
mb->distance_split.num_types, mb->distance_split.types,
mb->distance_split.lengths, mb->distance_split.num_blocks);
@@ -989,9 +988,10 @@ void BrotliStoreMetaBlock(MemoryManager* m,
BuildAndStoreBlockSwitchEntropyCodes(
&distance_enc, tree, storage_ix, storage);
- BrotliWriteBits(2, distance_postfix_bits, storage_ix, storage);
- BrotliWriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
- storage_ix, storage);
+ BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage);
+ BrotliWriteBits(
+ 4, dist->num_direct_distance_codes >> dist->distance_postfix_bits,
+ storage_ix, storage);
for (i = 0; i < mb->literal_split.num_types; ++i) {
BrotliWriteBits(2, literal_context_mode, storage_ix, storage);
}
@@ -1017,13 +1017,16 @@ void BrotliStoreMetaBlock(MemoryManager* m,
}
BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms,
- mb->literal_histograms_size, tree, storage_ix, storage);
+ mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree,
+ storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms,
- mb->command_histograms_size, tree, storage_ix, storage);
+ mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+ storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms,
- mb->distance_histograms_size, tree, storage_ix, storage);
+ mb->distance_histograms_size, num_distance_symbols, tree,
+ storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, tree);
@@ -1041,7 +1044,8 @@ void BrotliStoreMetaBlock(MemoryManager* m,
} else {
size_t j;
for (j = cmd.insert_len_; j != 0; --j) {
- size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
+ size_t context =
+ BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
uint8_t literal = input[pos & mask];
StoreSymbolWithContext(&literal_enc, literal, context,
mb->literal_context_map, storage_ix, storage,
@@ -1056,9 +1060,9 @@ void BrotliStoreMetaBlock(MemoryManager* m,
prev_byte2 = input[(pos - 2) & mask];
prev_byte = input[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
- size_t dist_code = cmd.dist_prefix_;
- uint32_t distnumextra = cmd.dist_extra_ >> 24;
- uint64_t distextra = cmd.dist_extra_ & 0xffffff;
+ size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+ uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+ uint64_t distextra = cmd.dist_extra_;
if (mb->distance_context_map_size == 0) {
StoreSymbol(&distance_enc, dist_code, storage_ix, storage);
} else {
@@ -1082,7 +1086,7 @@ void BrotliStoreMetaBlock(MemoryManager* m,
static void BuildHistograms(const uint8_t* input,
size_t start_pos,
size_t mask,
- const Command *commands,
+ const Command* commands,
size_t n_commands,
HistogramLiteral* lit_histo,
HistogramCommand* cmd_histo,
@@ -1099,7 +1103,7 @@ static void BuildHistograms(const uint8_t* input,
}
pos += CommandCopyLen(&cmd);
if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
- HistogramAddDistance(dist_histo, cmd.dist_prefix_);
+ HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF);
}
}
}
@@ -1107,7 +1111,7 @@ static void BuildHistograms(const uint8_t* input,
static void StoreDataWithHuffmanCodes(const uint8_t* input,
size_t start_pos,
size_t mask,
- const Command *commands,
+ const Command* commands,
size_t n_commands,
const uint8_t* lit_depth,
const uint16_t* lit_bits,
@@ -1134,9 +1138,9 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input,
}
pos += CommandCopyLen(&cmd);
if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
- const size_t dist_code = cmd.dist_prefix_;
- const uint32_t distnumextra = cmd.dist_extra_ >> 24;
- const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
+ const size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+ const uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+ const uint32_t distextra = cmd.dist_extra_;
BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code],
storage_ix, storage);
BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
@@ -1145,15 +1149,10 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input,
}
void BrotliStoreMetaBlockTrivial(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t *storage_ix,
- uint8_t *storage) {
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage) {
HistogramLiteral lit_histo;
HistogramCommand cmd_histo;
HistogramDistance dist_histo;
@@ -1161,9 +1160,10 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m,
uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
- uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE];
- uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
HuffmanTree* tree;
+ uint32_t num_distance_symbols = params->dist.alphabet_size;
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
@@ -1178,14 +1178,16 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m,
tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
if (BROTLI_IS_OOM(m)) return;
- BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, tree,
+ BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS,
+ BROTLI_NUM_LITERAL_SYMBOLS, tree,
lit_depth, lit_bits,
storage_ix, storage);
- BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+ BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS,
+ BROTLI_NUM_COMMAND_SYMBOLS, tree,
cmd_depth, cmd_bits,
storage_ix, storage);
- BuildAndStoreHuffmanTree(dist_histo.data_, SIMPLE_DISTANCE_ALPHABET_SIZE,
- tree,
+ BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE,
+ num_distance_symbols, tree,
dist_depth, dist_bits,
storage_ix, storage);
BROTLI_FREE(m, tree);
@@ -1200,15 +1202,14 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m,
}
void BrotliStoreMetaBlockFast(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t *storage_ix,
- uint8_t *storage) {
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage) {
+ uint32_t num_distance_symbols = params->dist.alphabet_size;
+ uint32_t distance_alphabet_bits =
+ Log2FloorNonZero(num_distance_symbols - 1) + 1;
+
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
BrotliWriteBits(13, 0, storage_ix, storage);
@@ -1252,8 +1253,8 @@ void BrotliStoreMetaBlockFast(MemoryManager* m,
uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
- uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE];
- uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
HistogramClearLiteral(&lit_histo);
HistogramClearCommand(&cmd_histo);
HistogramClearDistance(&dist_histo);
@@ -1274,7 +1275,7 @@ void BrotliStoreMetaBlockFast(MemoryManager* m,
BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_,
dist_histo.total_count_,
/* max_bits = */
- SIMPLE_DISTANCE_ALPHABET_BITS,
+ distance_alphabet_bits,
dist_depth, dist_bits,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
@@ -1293,11 +1294,11 @@ void BrotliStoreMetaBlockFast(MemoryManager* m,
/* This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes). */
void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block,
- const uint8_t * BROTLI_RESTRICT input,
+ const uint8_t* BROTLI_RESTRICT input,
size_t position, size_t mask,
size_t len,
- size_t * BROTLI_RESTRICT storage_ix,
- uint8_t * BROTLI_RESTRICT storage) {
+ size_t* BROTLI_RESTRICT storage_ix,
+ uint8_t* BROTLI_RESTRICT storage) {
size_t masked_pos = position & mask;
BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage);
JumpToByteBoundary(storage_ix, storage);
diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h
index 1324b18..9089b1d 100644
--- a/c/enc/brotli_bit_stream.h
+++ b/c/enc/brotli_bit_stream.h
@@ -16,10 +16,10 @@
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./command.h"
-#include "./context.h"
#include "./entropy_encode.h"
#include "./memory.h"
#include "./metablock.h"
@@ -32,7 +32,7 @@ extern "C" {
position for the current storage. */
BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
- HuffmanTree* tree, size_t *storage_ix, uint8_t *storage);
+ HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
@@ -42,51 +42,31 @@ BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
/* REQUIRES: length > 0 */
/* REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- uint8_t prev_byte,
- uint8_t prev_byte2,
- BROTLI_BOOL is_final_block,
- uint32_t num_direct_distance_codes,
- uint32_t distance_postfix_bits,
- ContextType literal_context_mode,
- const Command* commands,
- size_t n_commands,
- const MetaBlockSplit* mb,
- size_t* storage_ix,
- uint8_t* storage);
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+ const BrotliEncoderParams* params, ContextType literal_context_mode,
+ const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+ size_t* storage_ix, uint8_t* storage);
/* Stores the meta-block without doing any block splitting, just collects
one histogram per block category and uses that for entropy coding.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t* storage_ix,
- uint8_t* storage);
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage);
/* Same as above, but uses static prefix codes for histograms with a only a few
symbols, and uses static code length prefix codes for all other histograms.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t* storage_ix,
- uint8_t* storage);
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage);
/* This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes).
diff --git a/c/enc/command.h b/c/enc/command.h
index 3bf0cf7..0526815 100644
--- a/c/enc/command.h
+++ b/c/enc/command.h
@@ -105,10 +105,13 @@ static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
typedef struct Command {
uint32_t insert_len_;
- /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
+ /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */
uint32_t copy_len_;
+ /* Stores distance extra bits. */
uint32_t dist_extra_;
uint16_t cmd_prefix_;
+ /* Stores distance code in low 10 bits
+ and number of extra bits in high 6 bits. */
uint16_t dist_prefix_;
} Command;
@@ -118,7 +121,7 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
/* Don't rely on signed int representation, use honest casts. */
uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
self->insert_len_ = (uint32_t)insertlen;
- self->copy_len_ = (uint32_t)(copylen | (delta << 24));
+ self->copy_len_ = (uint32_t)(copylen | (delta << 25));
/* The distance prefix and extra bits are stored in this Command as if
npostfix and ndirect were 0, they are only recomputed later after the
clustering if needed. */
@@ -126,29 +129,29 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_);
GetLengthCode(
insertlen, (size_t)((int)copylen + copylen_code_delta),
- TO_BROTLI_BOOL(self->dist_prefix_ == 0), &self->cmd_prefix_);
+ TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
}
static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
self->insert_len_ = (uint32_t)insertlen;
- self->copy_len_ = 4 << 24;
+ self->copy_len_ = 4 << 25;
self->dist_extra_ = 0;
self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES;
GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
}
static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(const Command* self) {
- if (self->dist_prefix_ < BROTLI_NUM_DISTANCE_SHORT_CODES) {
- return self->dist_prefix_;
+ if ((self->dist_prefix_ & 0x3FF) < BROTLI_NUM_DISTANCE_SHORT_CODES) {
+ return self->dist_prefix_ & 0x3FF;
} else {
- uint32_t nbits = self->dist_extra_ >> 24;
- uint32_t extra = self->dist_extra_ & 0xffffff;
+ uint32_t nbits = self->dist_prefix_ >> 10;
+ uint32_t extra = self->dist_extra_;
/* It is assumed that the distance was first encoded with NPOSTFIX = 0 and
NDIRECT = 0, so the code itself is of this form:
BROTLI_NUM_DISTANCE_SHORT_CODES + 2 * (nbits - 1) + prefix_bit
Therefore, the following expression results in (2 + prefix_bit). */
- uint32_t prefix =
- self->dist_prefix_ + 4u - BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits;
+ uint32_t prefix = (self->dist_prefix_ & 0x3FF) + 4u -
+ BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits;
/* Subtract 4 for offset (Chapter 4.) and
increase by BROTLI_NUM_DISTANCE_SHORT_CODES - 1 */
return (prefix << nbits) + extra + BROTLI_NUM_DISTANCE_SHORT_CODES - 4u;
@@ -165,12 +168,13 @@ static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
}
static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
- return self->copy_len_ & 0xFFFFFF;
+ return self->copy_len_ & 0x1FFFFFF;
}
static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
- int32_t delta = (int8_t)((uint8_t)(self->copy_len_ >> 24));
- return (uint32_t)((int32_t)(self->copy_len_ & 0xFFFFFF) + delta);
+ uint32_t modifier = self->copy_len_ >> 25;
+ int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
+ return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
}
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c
index 40dce3e..f75069b 100644
--- a/c/enc/compress_fragment.c
+++ b/c/enc/compress_fragment.c
@@ -38,7 +38,7 @@ extern "C" {
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
-static const uint32_t kHashMul32 = 0x1e35a7bd;
+static const uint32_t kHashMul32 = 0x1E35A7BD;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
@@ -343,7 +343,7 @@ static void BrotliStoreMetaBlockHeader(
}
static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
- uint8_t *array) {
+ uint8_t* array) {
while (n_bits > 0) {
size_t byte_pos = pos >> 3;
size_t n_unchanged_bits = pos & 7;
diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c
index 8259817..b8bd6e8 100644
--- a/c/enc/compress_fragment_two_pass.c
+++ b/c/enc/compress_fragment_two_pass.c
@@ -37,7 +37,7 @@ extern "C" {
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
-static const uint32_t kHashMul32 = 0x1e35a7bd;
+static const uint32_t kHashMul32 = 0x1E35A7BD;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 16) * kHashMul32;
diff --git a/c/enc/context.h b/c/enc/context.h
deleted file mode 100644
index caa4230..0000000
--- a/c/enc/context.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2013 Google Inc. All Rights Reserved.
-
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-
-/* Functions to map previous bytes into a context id. */
-
-#ifndef BROTLI_ENC_CONTEXT_H_
-#define BROTLI_ENC_CONTEXT_H_
-
-#include "../common/platform.h"
-#include <brotli/types.h>
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-/* Second-order context lookup table for UTF8 byte streams.
-
- If p1 and p2 are the previous two bytes, we calculate the context as
-
- context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
-
- If the previous two bytes are ASCII characters (i.e. < 128), this will be
- equivalent to
-
- context = 4 * context1(p1) + context2(p2),
-
- where context1 is based on the previous byte in the following way:
-
- 0 : non-ASCII control
- 1 : \t, \n, \r
- 2 : space
- 3 : other punctuation
- 4 : " '
- 5 : %
- 6 : ( < [ {
- 7 : ) > ] }
- 8 : , ; :
- 9 : .
- 10 : =
- 11 : number
- 12 : upper-case vowel
- 13 : upper-case consonant
- 14 : lower-case vowel
- 15 : lower-case consonant
-
- and context2 is based on the second last byte:
-
- 0 : control, space
- 1 : punctuation
- 2 : upper-case letter, number
- 3 : lower-case letter
-
- If the last byte is ASCII, and the second last byte is not (in a valid UTF8
- stream it will be a continuation byte, value between 128 and 191), the
- context is the same as if the second last byte was an ASCII control or space.
-
- If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
- be a continuation byte and the context id is 2 or 3 depending on the LSB of
- the last byte and to a lesser extent on the second last byte if it is ASCII.
-
- If the last byte is a UTF8 continuation byte, the second last byte can be:
- - continuation byte: the next byte is probably ASCII or lead byte (assuming
- 4-byte UTF8 characters are rare) and the context id is 0 or 1.
- - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
- - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
-
- The possible value combinations of the previous two bytes, the range of
- context ids and the type of the next byte is summarized in the table below:
-
- |--------\-----------------------------------------------------------------|
- | \ Last byte |
- | Second \---------------------------------------------------------------|
- | last byte \ ASCII | cont. byte | lead byte |
- | \ (0-127) | (128-191) | (192-) |
- |=============|===================|=====================|==================|
- | ASCII | next: ASCII/lead | not valid | next: cont. |
- | (0-127) | context: 4 - 63 | | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
- | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: ASCII/lead | not valid |
- | (192-207) | | context: 0 - 1 | |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: cont. | not valid |
- | (208-) | | context: 2 - 3 | |
- |-------------|-------------------|---------------------|------------------|
-*/
-static const uint8_t kUTF8ContextLookup[512] = {
- /* Last byte. */
- /* */
- /* ASCII range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
- 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
- 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
- 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
- 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
- 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
- /* UTF8 continuation byte range. */
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- /* UTF8 lead byte range. */
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- /* Second last byte. */
- /* */
- /* ASCII range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
- 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
- /* UTF8 continuation byte range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* UTF8 lead byte range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-};
-
-/* Context lookup table for small signed integers. */
-static const uint8_t kSigned3BitContextLookup[] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-};
-
-typedef enum ContextType {
- CONTEXT_LSB6 = 0,
- CONTEXT_MSB6 = 1,
- CONTEXT_UTF8 = 2,
- CONTEXT_SIGNED = 3
-} ContextType;
-
-static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
- switch (mode) {
- case CONTEXT_LSB6:
- return p1 & 0x3f;
- case CONTEXT_MSB6:
- return (uint8_t)(p1 >> 2);
- case CONTEXT_UTF8:
- return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
- case CONTEXT_SIGNED:
- return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
- kSigned3BitContextLookup[p2]);
- default:
- return 0;
- }
-}
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} /* extern "C" */
-#endif
-
-#endif /* BROTLI_ENC_CONTEXT_H_ */
diff --git a/c/enc/encode.c b/c/enc/encode.c
index 794a409..4fd28d0 100644
--- a/c/enc/encode.c
+++ b/c/enc/encode.c
@@ -11,6 +11,8 @@
#include <stdlib.h> /* free, malloc */
#include <string.h> /* memcpy, memset */
+#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include "../common/version.h"
#include "./backward_references.h"
@@ -19,7 +21,7 @@
#include "./brotli_bit_stream.h"
#include "./compress_fragment.h"
#include "./compress_fragment_two_pass.h"
-#include "./context.h"
+#include "./encoder_dict.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./hash.h"
@@ -69,8 +71,8 @@ typedef struct BrotliEncoderStateStruct {
uint64_t last_processed_pos_;
int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
int saved_dist_cache_[4];
- uint8_t last_byte_;
- uint8_t last_byte_bits_;
+ uint16_t last_bytes_;
+ uint8_t last_bytes_bits_;
uint8_t prev_byte_;
uint8_t prev_byte2_;
size_t storage_size_;
@@ -161,6 +163,10 @@ BROTLI_BOOL BrotliEncoderSetParameter(
state->params.size_hint = value;
return BROTLI_TRUE;
+ case BROTLI_PARAM_LARGE_WINDOW:
+ state->params.large_window = TO_BROTLI_BOOL(!!value);
+ return BROTLI_TRUE;
+
default: return BROTLI_FALSE;
}
}
@@ -251,20 +257,25 @@ static int* GetHashTable(BrotliEncoderState* s, int quality,
return table;
}
-static void EncodeWindowBits(int lgwin, uint8_t* last_byte,
- uint8_t* last_byte_bits) {
- if (lgwin == 16) {
- *last_byte = 0;
- *last_byte_bits = 1;
- } else if (lgwin == 17) {
- *last_byte = 1;
- *last_byte_bits = 7;
- } else if (lgwin > 17) {
- *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1);
- *last_byte_bits = 4;
+static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
+ uint16_t* last_bytes, uint8_t* last_bytes_bits) {
+ if (large_window) {
+ *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
+ *last_bytes_bits = 14;
} else {
- *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1);
- *last_byte_bits = 7;
+ if (lgwin == 16) {
+ *last_bytes = 0;
+ *last_bytes_bits = 1;
+ } else if (lgwin == 17) {
+ *last_bytes = 1;
+ *last_bytes_bits = 7;
+ } else if (lgwin > 17) {
+ *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
+ *last_bytes_bits = 4;
+ } else {
+ *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
+ *last_bytes_bits = 7;
+ }
}
}
@@ -420,6 +431,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
double entropy[3];
size_t dummy;
size_t i;
+ ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
const size_t stride_end_pos = start_pos + 64;
uint8_t prev2 = input[start_pos & mask];
@@ -430,7 +442,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
const uint8_t literal = input[pos & mask];
const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
- Context(prev1, prev2, CONTEXT_UTF8)];
+ BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
++total;
++combined_histo[literal >> 3];
++context_histo[context][literal >> 3];
@@ -519,12 +531,26 @@ static BROTLI_BOOL ShouldCompress(
return BROTLI_TRUE;
}
+/* Chooses the literal context mode for a metablock */
+static ContextType ChooseContextMode(const BrotliEncoderParams* params,
+ const uint8_t* data, const size_t pos, const size_t mask,
+ const size_t length) {
+ /* We only do the computation for the option of something else than
+ CONTEXT_UTF8 for the highest qualities */
+ if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
+ !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
+ return CONTEXT_SIGNED;
+ }
+ return CONTEXT_UTF8;
+}
+
static void WriteMetaBlockInternal(MemoryManager* m,
const uint8_t* data,
const size_t mask,
const uint64_t last_flush_pos,
const size_t bytes,
const BROTLI_BOOL is_last,
+ ContextType literal_context_mode,
const BrotliEncoderParams* params,
const uint8_t prev_byte,
const uint8_t prev_byte2,
@@ -536,10 +562,9 @@ static void WriteMetaBlockInternal(MemoryManager* m,
size_t* storage_ix,
uint8_t* storage) {
const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
- uint8_t last_byte;
- uint8_t last_byte_bits;
- uint32_t num_direct_distance_codes = 0;
- uint32_t distance_postfix_bits = 0;
+ uint16_t last_bytes;
+ uint8_t last_bytes_bits;
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
if (bytes == 0) {
/* Write the ISLAST and ISEMPTY bits. */
@@ -559,31 +584,29 @@ static void WriteMetaBlockInternal(MemoryManager* m,
return;
}
- last_byte = storage[0];
- last_byte_bits = (uint8_t)(*storage_ix & 0xff);
- if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES &&
- params->mode == BROTLI_MODE_FONT) {
- num_direct_distance_codes = 12;
- distance_postfix_bits = 1;
+ BROTLI_DCHECK(*storage_ix <= 14);
+ last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
+ last_bytes_bits = (uint8_t)(*storage_ix);
+ if (params->dist.num_direct_distance_codes != 0 ||
+ params->dist.distance_postfix_bits != 0) {
RecomputeDistancePrefixes(commands,
num_commands,
- num_direct_distance_codes,
- distance_postfix_bits);
+ params->dist.num_direct_distance_codes,
+ params->dist.distance_postfix_bits);
}
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
- bytes, mask, is_last,
+ bytes, mask, is_last, params,
commands, num_commands,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
} else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
- bytes, mask, is_last,
+ bytes, mask, is_last, params,
commands, num_commands,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
} else {
- ContextType literal_context_mode = CONTEXT_UTF8;
MetaBlockSplit mb;
InitMetaBlockSplit(&mb);
if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
@@ -596,14 +619,10 @@ static void WriteMetaBlockInternal(MemoryManager* m,
&literal_context_map);
}
BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
- prev_byte, prev_byte2, literal_context_mode, num_literal_contexts,
+ prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
literal_context_map, commands, num_commands, &mb);
if (BROTLI_IS_OOM(m)) return;
} else {
- if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes,
- kMinUTF8Ratio)) {
- literal_context_mode = CONTEXT_SIGNED;
- }
BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params,
prev_byte, prev_byte2,
commands, num_commands,
@@ -612,15 +631,18 @@ static void WriteMetaBlockInternal(MemoryManager* m,
if (BROTLI_IS_OOM(m)) return;
}
if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
- BrotliOptimizeHistograms(num_direct_distance_codes,
- distance_postfix_bits,
- &mb);
+ /* The number of distance symbols effectively used by
+ "Large Window Brotli" (32-bit). */
+ uint32_t num_effective_dist_codes = params->dist.alphabet_size;
+ if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+ num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
+ }
+ BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
}
BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
prev_byte, prev_byte2,
is_last,
- num_direct_distance_codes,
- distance_postfix_bits,
+ params,
literal_context_mode,
commands, num_commands,
&mb,
@@ -631,20 +653,54 @@ static void WriteMetaBlockInternal(MemoryManager* m,
if (bytes + 4 < (*storage_ix >> 3)) {
/* Restore the distance cache and last byte. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
- storage[0] = last_byte;
- *storage_ix = last_byte_bits;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
+ *storage_ix = last_bytes_bits;
BrotliStoreUncompressedMetaBlock(is_last, data,
wrapped_last_flush_pos, mask,
bytes, storage_ix, storage);
}
}
+static void ChooseDistanceParams(BrotliEncoderParams* params) {
+ uint32_t num_direct_distance_codes = 0;
+ uint32_t distance_postfix_bits = 0;
+ uint32_t alphabet_size;
+ size_t max_distance = BROTLI_MAX_DISTANCE;
+
+ if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES &&
+ params->mode == BROTLI_MODE_FONT) {
+ num_direct_distance_codes = 12;
+ distance_postfix_bits = 1;
+ max_distance = (1U << 27) + 4;
+ }
+
+ alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
+ num_direct_distance_codes, distance_postfix_bits,
+ BROTLI_MAX_DISTANCE_BITS);
+ if (params->large_window) {
+ max_distance = BROTLI_MAX_ALLOWED_DISTANCE;
+ if (num_direct_distance_codes != 0 || distance_postfix_bits != 0) {
+ max_distance = (3U << 29) - 4;
+ }
+ alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
+ num_direct_distance_codes, distance_postfix_bits,
+ BROTLI_LARGE_MAX_DISTANCE_BITS);
+ }
+
+ params->dist.num_direct_distance_codes = num_direct_distance_codes;
+ params->dist.distance_postfix_bits = distance_postfix_bits;
+ params->dist.alphabet_size = alphabet_size;
+ params->dist.max_distance = max_distance;
+}
+
static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
if (s->is_initialized_) return BROTLI_TRUE;
SanitizeParams(&s->params);
s->params.lgblock = ComputeLgBlock(&s->params);
+ ChooseDistanceParams(&s->params);
s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
@@ -657,7 +713,8 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
lgwin = BROTLI_MAX(int, lgwin, 18);
}
- EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_);
+ EncodeWindowBits(lgwin, s->params.large_window,
+ &s->last_bytes_, &s->last_bytes_bits_);
}
if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
@@ -671,11 +728,18 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
params->mode = BROTLI_DEFAULT_MODE;
+ params->large_window = BROTLI_FALSE;
params->quality = BROTLI_DEFAULT_QUALITY;
params->lgwin = BROTLI_DEFAULT_WINDOW;
params->lgblock = 0;
params->size_hint = 0;
params->disable_literal_context_modeling = BROTLI_FALSE;
+ BrotliInitEncoderDictionary(&params->dictionary);
+ params->dist.num_direct_distance_codes = 0;
+ params->dist.distance_postfix_bits = 0;
+ params->dist.alphabet_size =
+ BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
+ params->dist.max_distance = BROTLI_MAX_DISTANCE;
}
static void BrotliEncoderInitState(BrotliEncoderState* s) {
@@ -837,6 +901,37 @@ static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
}
+static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
+ uint32_t* wrapped_last_processed_pos) {
+ Command* last_command = &s->commands_[s->num_commands_ - 1];
+ const uint8_t* data = s->ringbuffer_.buffer_;
+ const uint32_t mask = s->ringbuffer_.mask_;
+ uint64_t max_backward_distance = (1u << s->params.lgwin) - BROTLI_WINDOW_GAP;
+ uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
+ uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
+ uint64_t max_distance = last_processed_pos < max_backward_distance ?
+ last_processed_pos : max_backward_distance;
+ uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
+ uint32_t distance_code = CommandRestoreDistanceCode(last_command);
+ if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
+ distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
+ if (cmd_dist <= max_distance) {
+ while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
+ data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
+ last_command->copy_len_++;
+ (*bytes)--;
+ (*wrapped_last_processed_pos)++;
+ }
+ }
+ /* The copy length is at most the metablock size, and thus expressible. */
+ GetLengthCode(last_command->insert_len_,
+ (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
+ (int)(last_command->copy_len_ >> 25)),
+ TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
+ &last_command->cmd_prefix_);
+ }
+}
+
/*
Processes the accumulated input data and sets |*out_size| to the length of
the new output meta-block, or to zero if no new output meta-block has been
@@ -853,13 +948,12 @@ static BROTLI_BOOL EncodeData(
BrotliEncoderState* s, const BROTLI_BOOL is_last,
const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
const uint64_t delta = UnprocessedInputSize(s);
- const uint32_t bytes = (uint32_t)delta;
- const uint32_t wrapped_last_processed_pos =
- WrapPosition(s->last_processed_pos_);
+ uint32_t bytes = (uint32_t)delta;
+ uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
uint8_t* data;
uint32_t mask;
MemoryManager* m = &s->memory_manager_;
- const BrotliDictionary* dictionary = BrotliGetDictionary();
+ ContextType literal_context_mode;
if (!EnsureInitialized(s)) return BROTLI_FALSE;
data = s->ringbuffer_.buffer_;
@@ -884,7 +978,7 @@ static BROTLI_BOOL EncodeData(
if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
uint8_t* storage;
- size_t storage_ix = s->last_byte_bits_;
+ size_t storage_ix = s->last_bytes_bits_;
size_t table_size;
int* table;
@@ -894,9 +988,10 @@ static BROTLI_BOOL EncodeData(
*out_size = 0;
return BROTLI_TRUE;
}
- storage = GetBrotliStorage(s, 2 * bytes + 502);
+ storage = GetBrotliStorage(s, 2 * bytes + 503);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
- storage[0] = s->last_byte_;
+ storage[0] = (uint8_t)s->last_bytes_;
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
table = GetHashTable(s, s->params.quality, bytes, &table_size);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
@@ -917,8 +1012,8 @@ static BROTLI_BOOL EncodeData(
&storage_ix, storage);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
}
- s->last_byte_ = storage[storage_ix >> 3];
- s->last_byte_bits_ = storage_ix & 7u;
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+ s->last_bytes_bits_ = storage_ix & 7u;
UpdateLastProcessedPos(s);
*output = &storage[0];
*out_size = storage_ix >> 3;
@@ -946,27 +1041,36 @@ static BROTLI_BOOL EncodeData(
InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
wrapped_last_processed_pos, bytes, is_last);
+
+ literal_context_mode = ChooseContextMode(
+ &s->params, data, WrapPosition(s->last_flush_pos_),
+ mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ if (s->num_commands_ && s->last_insert_len_ == 0) {
+ ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
+ }
+
if (s->params.quality == ZOPFLIFICATION_QUALITY) {
BROTLI_DCHECK(s->params.hasher.type == 10);
- BrotliCreateZopfliBackwardReferences(
- m, dictionary, bytes, wrapped_last_processed_pos,
+ BrotliCreateZopfliBackwardReferences(m,
+ bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
BROTLI_DCHECK(s->params.hasher.type == 10);
- BrotliCreateHqZopfliBackwardReferences(
- m, dictionary, bytes, wrapped_last_processed_pos,
+ BrotliCreateHqZopfliBackwardReferences(m,
+ bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else {
BrotliCreateBackwardReferences(
- dictionary, bytes, wrapped_last_processed_pos,
+ bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
@@ -1018,18 +1122,19 @@ static BROTLI_BOOL EncodeData(
{
const uint32_t metablock_size =
(uint32_t)(s->input_pos_ - s->last_flush_pos_);
- uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502);
- size_t storage_ix = s->last_byte_bits_;
+ uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
+ size_t storage_ix = s->last_bytes_bits_;
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
- storage[0] = s->last_byte_;
+ storage[0] = (uint8_t)s->last_bytes_;
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
WriteMetaBlockInternal(
m, data, mask, s->last_flush_pos_, metablock_size, is_last,
- &s->params, s->prev_byte_, s->prev_byte2_,
+ literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
s->dist_cache_, &storage_ix, storage);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
- s->last_byte_ = storage[storage_ix >> 3];
- s->last_byte_bits_ = storage_ix & 7u;
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+ s->last_bytes_bits_ = storage_ix & 7u;
s->last_flush_pos_ = s->input_pos_;
if (UpdateLastProcessedPos(s)) {
HasherReset(s->hasher_);
@@ -1058,10 +1163,11 @@ static BROTLI_BOOL EncodeData(
static size_t WriteMetadataHeader(
BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
size_t storage_ix;
- storage_ix = s->last_byte_bits_;
- header[0] = s->last_byte_;
- s->last_byte_ = 0;
- s->last_byte_bits_ = 0;
+ storage_ix = s->last_bytes_bits_;
+ header[0] = (uint8_t)s->last_bytes_;
+ header[1] = (uint8_t)(s->last_bytes_ >> 8);
+ s->last_bytes_ = 0;
+ s->last_bytes_bits_ = 0;
BrotliWriteBits(1, 0, &storage_ix, header);
BrotliWriteBits(2, 3, &storage_ix, header);
@@ -1091,15 +1197,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
BROTLI_BOOL ok = BROTLI_TRUE;
const size_t max_out_size = *encoded_size;
size_t total_out_size = 0;
- uint8_t last_byte;
- uint8_t last_byte_bits;
+ uint16_t last_bytes;
+ uint8_t last_bytes_bits;
HasherHandle hasher = NULL;
const size_t hasher_eff_size =
BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP);
BrotliEncoderParams params;
- const BrotliDictionary* dictionary = BrotliGetDictionary();
const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
size_t max_block_size;
@@ -1113,14 +1218,18 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
BrotliEncoderInitParams(&params);
params.quality = 10;
params.lgwin = lgwin;
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+ params.large_window = BROTLI_TRUE;
+ }
SanitizeParams(&params);
params.lgblock = ComputeLgBlock(&params);
+ ChooseDistanceParams(&params);
max_block_size = (size_t)1 << params.lgblock;
BrotliInitMemoryManager(m, 0, 0, 0);
BROTLI_DCHECK(input_size <= mask + 1);
- EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
+ EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
0, hasher_eff_size, BROTLI_TRUE);
if (BROTLI_IS_OOM(m)) goto oom;
@@ -1140,6 +1249,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
uint8_t* storage;
size_t storage_ix;
+ ContextType literal_context_mode = ChooseContextMode(&params,
+ input_buffer, metablock_start, mask, metablock_end - metablock_start);
+
size_t block_start;
for (block_start = metablock_start; block_start < metablock_end; ) {
size_t block_size =
@@ -1151,10 +1263,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
BrotliInitZopfliNodes(nodes, block_size + 1);
StitchToPreviousBlockH10(hasher, block_size, block_start,
input_buffer, mask);
- path_size = BrotliZopfliComputeShortestPath(
- m, dictionary, block_size, block_start,
- input_buffer, mask, &params, max_backward_limit, dist_cache, hasher,
- nodes);
+ path_size = BrotliZopfliComputeShortestPath(m,
+ block_size, block_start, input_buffer, mask, &params,
+ max_backward_limit, dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) goto oom;
/* We allocate a command buffer in the first iteration of this loop that
will be likely big enough for the whole metablock, so that for most
@@ -1197,13 +1308,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
storage = NULL;
- storage_ix = last_byte_bits;
+ storage_ix = last_bytes_bits;
if (metablock_size == 0) {
/* Write the ISLAST and ISEMPTY bits. */
storage = BROTLI_ALLOC(m, uint8_t, 16);
if (BROTLI_IS_OOM(m)) goto oom;
- storage[0] = last_byte;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
BrotliWriteBits(2, 3, &storage_ix, storage);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (!ShouldCompress(input_buffer, mask, metablock_start,
@@ -1213,37 +1325,35 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
if (BROTLI_IS_OOM(m)) goto oom;
- storage[0] = last_byte;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask, metablock_size,
&storage_ix, storage);
} else {
- uint32_t num_direct_distance_codes = 0;
- uint32_t distance_postfix_bits = 0;
- ContextType literal_context_mode = CONTEXT_UTF8;
MetaBlockSplit mb;
- InitMetaBlockSplit(&mb);
- if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask,
- metablock_size, kMinUTF8Ratio)) {
- literal_context_mode = CONTEXT_SIGNED;
+ /* The number of distance symbols effectively used by
+ "Large Window Brotli" (32-bit). */
+ uint32_t num_effective_dist_codes = params.dist.alphabet_size;
+ if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+ num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
}
+ InitMetaBlockSplit(&mb);
BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, &params,
prev_byte, prev_byte2,
commands, num_commands,
literal_context_mode,
&mb);
if (BROTLI_IS_OOM(m)) goto oom;
- BrotliOptimizeHistograms(num_direct_distance_codes,
- distance_postfix_bits,
- &mb);
- storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502);
+ BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
+ storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
if (BROTLI_IS_OOM(m)) goto oom;
- storage[0] = last_byte;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
mask, prev_byte, prev_byte2,
is_last,
- num_direct_distance_codes,
- distance_postfix_bits,
+ &params,
literal_context_mode,
commands, num_commands,
&mb,
@@ -1252,16 +1362,17 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
if (metablock_size + 4 < (storage_ix >> 3)) {
/* Restore the distance cache and last byte. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
- storage[0] = last_byte;
- storage_ix = last_byte_bits;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
+ storage_ix = last_bytes_bits;
BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask,
metablock_size, &storage_ix, storage);
}
DestroyMetaBlockSplit(m, &mb);
}
- last_byte = storage[storage_ix >> 3];
- last_byte_bits = storage_ix & 7u;
+ last_bytes = (uint16_t)(storage[storage_ix >> 3]);
+ last_bytes_bits = storage_ix & 7u;
metablock_start += metablock_size;
if (metablock_start < input_size) {
prev_byte = input_buffer[metablock_start - 1];
@@ -1296,8 +1407,8 @@ oom:
size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
/* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
- size_t num_small_blocks = input_size >> 14;
- size_t overhead = 2 + (4 * num_small_blocks) + 3 + 1;
+ size_t num_large_blocks = input_size >> 14;
+ size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
size_t result = input_size + overhead;
if (input_size == 0) return 2;
return (result < input_size) ? 0 : result;
@@ -1360,7 +1471,7 @@ BROTLI_BOOL BrotliEncoderCompress(
}
if (quality == 10) {
/* TODO: Implement this direct path for all quality levels. */
- const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS,
+ const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
BROTLI_MAX(int, 16, lgwin));
int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
encoded_size, encoded_buffer);
@@ -1384,6 +1495,9 @@ BROTLI_BOOL BrotliEncoderCompress(
BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
+ }
result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
&available_in, &next_in, &available_out, &next_out, &total_out);
if (!BrotliEncoderIsFinished(s)) result = 0;
@@ -1406,11 +1520,11 @@ fallback:
}
static void InjectBytePaddingBlock(BrotliEncoderState* s) {
- uint32_t seal = s->last_byte_;
- size_t seal_bits = s->last_byte_bits_;
+ uint32_t seal = s->last_bytes_;
+ size_t seal_bits = s->last_bytes_bits_;
uint8_t* destination;
- s->last_byte_ = 0;
- s->last_byte_bits_ = 0;
+ s->last_bytes_ = 0;
+ s->last_bytes_bits_ = 0;
/* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
seal |= 0x6u << seal_bits;
seal_bits += 6;
@@ -1424,6 +1538,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
}
destination[0] = (uint8_t)seal;
if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
+ if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
s->available_out_ += (seal_bits + 7) >> 3;
}
@@ -1432,7 +1547,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
size_t* available_out, uint8_t** next_out, size_t* total_out) {
if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
- s->last_byte_bits_ != 0) {
+ s->last_bytes_bits_ != 0) {
InjectBytePaddingBlock(s);
return BROTLI_TRUE;
}
@@ -1513,10 +1628,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
(*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
BROTLI_BOOL force_flush =
(*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
- size_t max_out_size = 2 * block_size + 502;
+ size_t max_out_size = 2 * block_size + 503;
BROTLI_BOOL inplace = BROTLI_TRUE;
uint8_t* storage = NULL;
- size_t storage_ix = s->last_byte_bits_;
+ size_t storage_ix = s->last_bytes_bits_;
size_t table_size;
int* table;
@@ -1531,7 +1646,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
storage = GetBrotliStorage(s, max_out_size);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
}
- storage[0] = s->last_byte_;
+ storage[0] = (uint8_t)s->last_bytes_;
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
table = GetHashTable(s, s->params.quality, block_size, &table_size);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
@@ -1561,8 +1677,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
s->next_out_ = storage;
s->available_out_ = out_bytes;
}
- s->last_byte_ = storage[storage_ix >> 3];
- s->last_byte_bits_ = storage_ix & 7u;
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+ s->last_bytes_bits_ = storage_ix & 7u;
if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
diff --git a/c/enc/encoder_dict.c b/c/enc/encoder_dict.c
new file mode 100755
index 0000000..8b2f6ad
--- /dev/null
+++ b/c/enc/encoder_dict.c
@@ -0,0 +1,32 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./encoder_dict.h"
+
+#include "../common/dictionary.h"
+#include "../common/transform.h"
+#include "./dictionary_hash.h"
+#include "./hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
+ dict->words = BrotliGetDictionary();
+
+ dict->hash_table = kStaticDictionaryHash;
+ dict->buckets = kStaticDictionaryBuckets;
+ dict->dict_words = kStaticDictionaryWords;
+
+ dict->cutoffTransformsCount = kCutoffTransformsCount;
+ dict->cutoffTransforms = kCutoffTransforms;
+
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h
new file mode 100755
index 0000000..9ac8b4a
--- /dev/null
+++ b/c/enc/encoder_dict.h
@@ -0,0 +1,42 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_ENC_ENCODER_DICT_H_
+#define BROTLI_ENC_ENCODER_DICT_H_
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./static_dict_lut.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Dictionary data (words and transforms) for 1 possible context */
+typedef struct BrotliEncoderDictionary {
+ const BrotliDictionary* words;
+
+ /* cut off for fast encoder */
+ uint32_t cutoffTransformsCount;
+ uint64_t cutoffTransforms;
+
+ /* from dictionary_hash.h, for fast encoder */
+ const uint16_t* hash_table;
+
+ /* from static_dict_lut.h, for slow encoder */
+ const uint16_t* buckets;
+ const DictWord* dict_words;
+} BrotliEncoderDictionary;
+
+BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_ENC_ENCODER_DICT_H_ */
diff --git a/c/enc/entropy_encode.c b/c/enc/entropy_encode.c
index 9e0ea11..97f9dfb 100644
--- a/c/enc/entropy_encode.c
+++ b/c/enc/entropy_encode.c
@@ -66,11 +66,11 @@ static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
we are not planning to use this with extremely long blocks.
See http://en.wikipedia.org/wiki/Huffman_coding */
-void BrotliCreateHuffmanTree(const uint32_t *data,
+void BrotliCreateHuffmanTree(const uint32_t* data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
- uint8_t *depth) {
+ uint8_t* depth) {
uint32_t count_limit;
HuffmanTree sentinel;
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
@@ -371,8 +371,8 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
}
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
- BROTLI_BOOL *use_rle_for_non_zero,
- BROTLI_BOOL *use_rle_for_zero) {
+ BROTLI_BOOL* use_rle_for_non_zero,
+ BROTLI_BOOL* use_rle_for_zero) {
size_t total_reps_zero = 0;
size_t total_reps_non_zero = 0;
size_t count_reps_zero = 1;
@@ -454,26 +454,26 @@ void BrotliWriteHuffmanTree(const uint8_t* depth,
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
- 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
- 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+ 0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
+ 0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
};
- size_t retval = kLut[bits & 0xf];
+ size_t retval = kLut[bits & 0x0F];
size_t i;
for (i = 4; i < num_bits; i += 4) {
retval <<= 4;
bits = (uint16_t)(bits >> 4);
- retval |= kLut[bits & 0xf];
+ retval |= kLut[bits & 0x0F];
}
- retval >>= ((0 - num_bits) & 0x3);
+ retval >>= ((0 - num_bits) & 0x03);
return (uint16_t)retval;
}
/* 0..15 are values for bits */
#define MAX_HUFFMAN_BITS 16
-void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
size_t len,
- uint16_t *bits) {
+ uint16_t* bits) {
/* In Brotli, all bit depths are [1..15]
0 bit depth means that the symbol does not exist. */
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
diff --git a/c/enc/entropy_encode.h b/c/enc/entropy_encode.h
index ef7c216..f23d9c3 100644
--- a/c/enc/entropy_encode.h
+++ b/c/enc/entropy_encode.h
@@ -46,11 +46,11 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
be at least 2 * length + 1 long.
See http://en.wikipedia.org/wiki/Huffman_coding */
-BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data,
+BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
- uint8_t *depth);
+ uint8_t* depth);
/* Change the population counts in a way that the consequent
Huffman tree compression, especially its RLE-part will be more
@@ -72,9 +72,9 @@ BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
uint8_t* extra_bits_data);
/* Get the actual bit values for a tree of bit depths. */
-BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
size_t len,
- uint16_t *bits);
+ uint16_t* bits);
/* Input size optimized Shell sort. */
typedef BROTLI_BOOL (*HuffmanTreeComparator)(
diff --git a/c/enc/entropy_encode_static.h b/c/enc/entropy_encode_static.h
index b2c1fbb..62b99a9 100644
--- a/c/enc/entropy_encode_static.h
+++ b/c/enc/entropy_encode_static.h
@@ -83,7 +83,7 @@ static const uint32_t kCodeLengthBits[18] = {
static BROTLI_INLINE void StoreStaticCodeLengthCode(
size_t* storage_ix, uint8_t* storage) {
BrotliWriteBits(
- 40, BROTLI_MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage);
+ 40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage);
}
static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
@@ -529,7 +529,7 @@ static const uint16_t kStaticDistanceCodeBits[64] = {
static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
size_t* storage_ix, uint8_t* storage) {
- BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage);
+ BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
}
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/hash.h b/c/enc/hash.h
index 2a1634d..1827ce6 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -16,6 +16,7 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
+#include "./encoder_dict.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./memory.h"
@@ -73,10 +74,10 @@ typedef struct HasherSearchResult {
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
-static const uint32_t kHashMul32 = 0x1e35a7bd;
-static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd);
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
static const uint64_t kHashMul64Long =
- BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U);
+ BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
@@ -146,8 +147,9 @@ static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
}
static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
- const BrotliDictionary* dictionary, size_t item, const uint8_t* data,
- size_t max_length, size_t max_backward, HasherSearchResult* out) {
+ const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data,
+ size_t max_length, size_t max_backward, size_t max_distance,
+ HasherSearchResult* out) {
size_t len;
size_t dist;
size_t offset;
@@ -156,24 +158,24 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
score_t score;
len = item & 0x1F;
dist = item >> 5;
- offset = dictionary->offsets_by_length[len] + len * dist;
+ offset = dictionary->words->offsets_by_length[len] + len * dist;
if (len > max_length) {
return BROTLI_FALSE;
}
matchlen =
- FindMatchLengthWithLimit(data, &dictionary->data[offset], len);
- if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
+ FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
+ if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
return BROTLI_FALSE;
}
{
size_t cut = len - matchlen;
- size_t transform_id =
- (cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F);
+ size_t transform_id = (cut << 2) +
+ (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
backward = max_backward + dist + 1 +
- (transform_id << dictionary->size_bits_by_length[len]);
+ (transform_id << dictionary->words->size_bits_by_length[len]);
}
- if (backward >= BROTLI_MAX_DISTANCE) {
+ if (backward > max_distance) {
return BROTLI_FALSE;
}
score = BackwardReferenceScore(matchlen, backward);
@@ -188,9 +190,10 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
}
static BROTLI_INLINE void SearchInStaticDictionary(
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
HasherHandle handle, const uint8_t* data, size_t max_length,
- size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
+ size_t max_backward, size_t max_distance,
+ HasherSearchResult* out, BROTLI_BOOL shallow) {
size_t key;
size_t i;
HasherCommon* self = GetHasherCommon(handle);
@@ -199,11 +202,11 @@ static BROTLI_INLINE void SearchInStaticDictionary(
}
key = Hash14(data) << 1;
for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
- size_t item = dictionary_hash[key];
+ size_t item = dictionary->hash_table[key];
self->dict_num_lookups++;
if (item != 0) {
BROTLI_BOOL item_matches = TestStaticDictionaryItem(
- dictionary, item, data, max_length, max_backward, out);
+ dictionary, item, data, max_length, max_backward, max_distance, out);
if (item_matches) {
self->dict_num_matches++;
}
diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h
index 46d363c..41cb3ff 100644
--- a/c/enc/hash_forgetful_chain_inc.h
+++ b/c/enc/hash_forgetful_chain_inc.h
@@ -28,7 +28,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* HashBytes is the function that chooses the bucket to place the address in.*/
-static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) {
+static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) {
const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -115,7 +115,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -154,11 +154,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
- const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t gap, const size_t max_distance,
+ HasherSearchResult* BROTLI_RESTRICT out) {
HashForgetfulChain* self = FN(Self)(handle);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
/* Don't accept a short copy from far away. */
@@ -240,9 +241,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
FN(Store)(handle, data, ring_buffer_mask, cur_ix);
}
if (out->score == min_score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_FALSE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_FALSE);
}
}
diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h
index 6b0697b..e099edf 100644
--- a/c/enc/hash_longest_match64_inc.h
+++ b/c/enc/hash_longest_match64_inc.h
@@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
/* HashBytes is the function that chooses the bucket to place the address in. */
-static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t *data,
+static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* data,
const uint64_t mask,
const int shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
@@ -105,7 +105,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
-static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
+static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
const size_t mask, const size_t ix) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
@@ -119,7 +119,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -158,11 +158,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward, const size_t gap,
- HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
@@ -257,9 +257,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
++num[key];
}
if (min_score == out->score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_FALSE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_FALSE);
}
}
diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h
index d24576d..951d7a4 100644
--- a/c/enc/hash_longest_match_inc.h
+++ b/c/enc/hash_longest_match_inc.h
@@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* HashBytes is the function that chooses the bucket to place the address in. */
-static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) {
+static uint32_t FN(HashBytes)(const uint8_t* data, const int shift) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -112,7 +112,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -151,11 +151,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward, const size_t gap,
- HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
@@ -249,9 +249,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
++num[key];
}
if (min_score == out->score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_FALSE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_FALSE);
}
}
diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h
index 2c78351..a7b9639 100644
--- a/c/enc/hash_longest_match_quickly_inc.h
+++ b/c/enc/hash_longest_match_quickly_inc.h
@@ -81,7 +81,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
static BROTLI_INLINE void FN(Store)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix) {
+ const uint8_t* data, const size_t mask, const size_t ix) {
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
/* Wiggle the value with the bucket sweep range. */
const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
@@ -89,7 +89,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -125,11 +125,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(
- HasherHandle handle, const BrotliDictionary* dictionary,
- const uint16_t* dictionary_hash, const uint8_t* BROTLI_RESTRICT data,
+ HasherHandle handle, const BrotliEncoderDictionary* dictionary,
+ const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
- const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t gap, const size_t max_distance,
+ HasherSearchResult* BROTLI_RESTRICT out) {
HashLongestMatchQuickly* self = FN(Self)(handle);
const size_t best_len_in = out->len;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
@@ -191,7 +192,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
}
}
} else {
- uint32_t *bucket = self->buckets_ + key;
+ uint32_t* bucket = self->buckets_ + key;
int i;
prev_ix = *bucket++;
for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
@@ -221,9 +222,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
}
}
if (USE_DICTIONARY && min_score == out->score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_TRUE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_TRUE);
}
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
}
diff --git a/c/enc/hash_to_binary_tree_inc.h b/c/enc/hash_to_binary_tree_inc.h
index 73774b2..48097b1 100644
--- a/c/enc/hash_to_binary_tree_inc.h
+++ b/c/enc/hash_to_binary_tree_inc.h
@@ -24,7 +24,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
return MAX_TREE_COMP_LENGTH;
}
-static uint32_t FN(HashBytes)(const uint8_t *data) {
+static uint32_t FN(HashBytes)(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -200,7 +200,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
sorted by strictly increasing length and (non-strictly) increasing
distance. */
static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint8_t* data,
+ const BrotliEncoderDictionary* dictionary, const uint8_t* data,
const size_t ring_buffer_mask, const size_t cur_ix,
const size_t max_length, const size_t max_backward, const size_t gap,
const BrotliEncoderParams* params, BackwardMatch* matches) {
@@ -252,7 +252,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
uint32_t dict_id = dict_matches[l];
if (dict_id < kInvalidMatch) {
size_t distance = max_backward + gap + (dict_id >> 5) + 1;
- if (distance < BROTLI_MAX_DISTANCE) {
+ if (distance <= params->dist.max_distance) {
InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
}
}
@@ -265,7 +265,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
current sequence, without returning any matches.
REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
-static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
+static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
const size_t mask, const size_t ix) {
HashToBinaryTree* self = FN(Self)(handle);
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
@@ -275,7 +275,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i = ix_start;
size_t j = ix_start;
diff --git a/c/enc/histogram.c b/c/enc/histogram.c
index bb7b4c5..6da2ff6 100644
--- a/c/enc/histogram.c
+++ b/c/enc/histogram.c
@@ -8,9 +8,9 @@
#include "./histogram.h"
+#include "../common/context.h"
#include "./block_splitter.h"
#include "./command.h"
-#include "./context.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -63,13 +63,16 @@ void BrotliBuildHistogramsWithContext(
BlockSplitIteratorNext(&insert_and_copy_it);
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
cmd->cmd_prefix_);
+ /* TODO: unwrap iterator blocks. */
for (j = cmd->insert_len_; j != 0; --j) {
size_t context;
BlockSplitIteratorNext(&literal_it);
- context = context_modes ?
- ((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
- Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) :
- literal_it.type_;
+ context = literal_it.type_;
+ if (context_modes) {
+ ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
+ context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
+ BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
+ }
HistogramAddLiteral(&literal_histograms[context],
ringbuffer[pos & mask]);
prev_byte2 = prev_byte;
@@ -86,7 +89,7 @@ void BrotliBuildHistogramsWithContext(
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
CommandDistanceContext(cmd);
HistogramAddDistance(&copy_dist_histograms[context],
- cmd->dist_prefix_);
+ cmd->dist_prefix_ & 0x3FF);
}
}
}
diff --git a/c/enc/histogram.h b/c/enc/histogram.h
index b1b8d11..42af3c3 100644
--- a/c/enc/histogram.h
+++ b/c/enc/histogram.h
@@ -12,16 +12,19 @@
#include <string.h> /* memset */
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./block_splitter.h"
#include "./command.h"
-#include "./context.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
+/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
+#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
+
#define FN(X) X ## Literal
#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
#define DataType uint8_t
@@ -38,7 +41,7 @@ extern "C" {
#undef FN
#define FN(X) X ## Distance
-#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS
+#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
#include "./histogram_inc.h" /* NOLINT(build/include) */
#undef DataType
#undef DATA_SIZE
diff --git a/c/enc/histogram_inc.h b/c/enc/histogram_inc.h
index 7807036..50eaf74 100644
--- a/c/enc/histogram_inc.h
+++ b/c/enc/histogram_inc.h
@@ -33,7 +33,7 @@ static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
}
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
- const DataType *p, size_t n) {
+ const DataType* p, size_t n) {
self->total_count_ += n;
n += 1;
while (--n) ++self->data_[*p++];
diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c
index 9bcb680..c231100 100644
--- a/c/enc/literal_cost.c
+++ b/c/enc/literal_cost.c
@@ -25,7 +25,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
return BROTLI_MIN(size_t, 1, clamp);
} else {
/* Let's decide over the last byte if this ends the sequence. */
- if (last < 0xe0) {
+ if (last < 0xE0) {
return 0; /* Completed two or three byte coding. */
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
return BROTLI_MIN(size_t, 2, clamp);
@@ -34,7 +34,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
}
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
- const uint8_t *data) {
+ const uint8_t* data) {
size_t counts[3] = { 0 };
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
size_t last_c = 0;
@@ -54,7 +54,7 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
}
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
- const uint8_t *data, float *cost) {
+ const uint8_t* data, float* cost) {
/* max_utf8 is 0 (normal ASCII single byte modeling),
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
@@ -125,7 +125,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
}
void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
- const uint8_t *data, float *cost) {
+ const uint8_t* data, float* cost) {
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
return;
diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h
index d2f430c..8f53f39 100644
--- a/c/enc/literal_cost.h
+++ b/c/enc/literal_cost.h
@@ -21,7 +21,7 @@ extern "C" {
ring-buffer (data, mask) will take entropy coded and writes these estimates
to the cost[0..len) array. */
BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
- size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost);
+ size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
diff --git a/c/enc/memory.c b/c/enc/memory.c
index 3716b98..f6ed7e3 100644
--- a/c/enc/memory.c
+++ b/c/enc/memory.c
@@ -27,22 +27,12 @@ extern "C" {
#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
-static void* DefaultAllocFunc(void* opaque, size_t size) {
- BROTLI_UNUSED(opaque);
- return malloc(size);
-}
-
-static void DefaultFreeFunc(void* opaque, void* address) {
- BROTLI_UNUSED(opaque);
- free(address);
-}
-
void BrotliInitMemoryManager(
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
void* opaque) {
if (!alloc_func) {
- m->alloc_func = DefaultAllocFunc;
- m->free_func = DefaultFreeFunc;
+ m->alloc_func = BrotliDefaultAllocFunc;
+ m->free_func = BrotliDefaultFreeFunc;
m->opaque = 0;
} else {
m->alloc_func = alloc_func;
diff --git a/c/enc/metablock.c b/c/enc/metablock.c
index 50f2ea2..6219292 100644
--- a/c/enc/metablock.c
+++ b/c/enc/metablock.c
@@ -10,12 +10,12 @@
#include "./metablock.h"
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./cluster.h"
-#include "./context.h"
#include "./entropy_encode.h"
#include "./histogram.h"
#include "./memory.h"
@@ -398,9 +398,9 @@ static void MapStaticContexts(MemoryManager* m,
static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
- uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
+ uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
const size_t num_contexts, const uint32_t* static_context_map,
- const Command *commands, size_t n_commands, MetaBlockSplit* mb) {
+ const Command* commands, size_t n_commands, MetaBlockSplit* mb) {
union {
BlockSplitterLiteral plain;
ContextBlockSplitter ctx;
@@ -441,7 +441,8 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
if (num_contexts == 1) {
BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
} else {
- size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
+ size_t context =
+ BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
static_context_map[context]);
if (BROTLI_IS_OOM(m)) return;
@@ -455,7 +456,7 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
- BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
+ BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF);
}
}
}
@@ -482,7 +483,7 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
- ContextType literal_context_mode,
+ ContextLut literal_context_lut,
size_t num_contexts,
const uint32_t* static_context_map,
const Command* commands,
@@ -490,19 +491,17 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m,
MetaBlockSplit* mb) {
if (num_contexts == 1) {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
- prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb);
+ prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb);
} else {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
- prev_byte2, literal_context_mode, num_contexts, static_context_map,
+ prev_byte2, literal_context_lut, num_contexts, static_context_map,
commands, n_commands, mb);
}
}
-void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
- size_t distance_postfix_bits,
+void BrotliOptimizeHistograms(uint32_t num_distance_codes,
MetaBlockSplit* mb) {
uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
- size_t num_distance_codes;
size_t i;
for (i = 0; i < mb->literal_histograms_size; ++i) {
BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
@@ -513,9 +512,6 @@ void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
mb->command_histograms[i].data_,
good_for_rle);
}
- num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
- num_direct_distance_codes +
- ((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits);
for (i = 0; i < mb->distance_histograms_size; ++i) {
BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
mb->distance_histograms[i].data_,
diff --git a/c/enc/metablock.h b/c/enc/metablock.h
index 3fa6d65..76a6594 100644
--- a/c/enc/metablock.h
+++ b/c/enc/metablock.h
@@ -10,11 +10,11 @@
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./block_splitter.h"
#include "./command.h"
-#include "./context.h"
#include "./histogram.h"
#include "./memory.h"
#include "./quality.h"
@@ -85,12 +85,11 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
is the same for all block types. */
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
- uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
+ uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
size_t num_contexts, const uint32_t* static_context_map,
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
-BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
- size_t distance_postfix_bits,
+BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
MetaBlockSplit* mb);
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/params.h b/c/enc/params.h
index acb3668..9bcf236 100755
--- a/c/enc/params.h
+++ b/c/enc/params.h
@@ -10,6 +10,7 @@
#define BROTLI_ENC_PARAMS_H_
#include <brotli/encode.h>
+#include "./encoder_dict.h"
typedef struct BrotliHasherParams {
int type;
@@ -19,6 +20,13 @@ typedef struct BrotliHasherParams {
int num_last_distances_to_check;
} BrotliHasherParams;
+typedef struct BrotliDistanceParams {
+ uint32_t num_direct_distance_codes;
+ uint32_t distance_postfix_bits;
+ uint32_t alphabet_size;
+ size_t max_distance;
+} BrotliDistanceParams;
+
/* Encoding parameters */
typedef struct BrotliEncoderParams {
BrotliEncoderMode mode;
@@ -27,7 +35,10 @@ typedef struct BrotliEncoderParams {
int lgblock;
size_t size_hint;
BROTLI_BOOL disable_literal_context_modeling;
+ BROTLI_BOOL large_window;
BrotliHasherParams hasher;
+ BrotliDistanceParams dist;
+ BrotliEncoderDictionary dictionary;
} BrotliEncoderParams;
#endif /* BROTLI_ENC_PARAMS_H_ */
diff --git a/c/enc/prefix.h b/c/enc/prefix.h
index 0168d4e..fd359a4 100644
--- a/c/enc/prefix.h
+++ b/c/enc/prefix.h
@@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
size_t prefix = (dist >> bucket) & 1;
size_t offset = (2 + prefix) << bucket;
size_t nbits = bucket - postfix_bits;
- *code = (uint16_t)(
+ *code = (uint16_t)((nbits << 10) |
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
- *extra_bits = (uint32_t)(
- (nbits << 24) | ((dist - offset) >> postfix_bits));
+ *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
}
}
diff --git a/c/enc/quality.h b/c/enc/quality.h
index 80b7051..f9b1111 100644
--- a/c/enc/quality.h
+++ b/c/enc/quality.h
@@ -31,7 +31,7 @@
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
so we buffer at most this much literals and commands. */
-#define MAX_NUM_DELAYED_SYMBOLS 0x2fff
+#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
/* Returns hash-table size for quality levels 0 and 1. */
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
@@ -60,10 +60,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates(
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
+ if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+ params->large_window = BROTLI_FALSE;
+ }
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
params->lgwin = BROTLI_MIN_WINDOW_BITS;
- } else if (params->lgwin > BROTLI_MAX_WINDOW_BITS) {
- params->lgwin = BROTLI_MAX_WINDOW_BITS;
+ } else {
+ int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
+ BROTLI_MAX_WINDOW_BITS;
+ if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
}
}
diff --git a/c/enc/ringbuffer.h b/c/enc/ringbuffer.h
index 4e58749..86079a8 100644
--- a/c/enc/ringbuffer.h
+++ b/c/enc/ringbuffer.h
@@ -41,9 +41,9 @@ typedef struct RingBuffer {
uint32_t pos_;
/* The actual ring buffer containing the copy of the last two bytes, the data,
and the copy of the beginning as a tail. */
- uint8_t *data_;
+ uint8_t* data_;
/* The start of the ring-buffer. */
- uint8_t *buffer_;
+ uint8_t* buffer_;
} RingBuffer;
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
@@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer(
}
static BROTLI_INLINE void RingBufferWriteTail(
- const uint8_t *bytes, size_t n, RingBuffer* rb) {
+ const uint8_t* bytes, size_t n, RingBuffer* rb) {
const size_t masked_pos = rb->pos_ & rb->mask_;
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
/* Just fill the tail buffer with the beginning data. */
@@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail(
/* Push bytes into the ring buffer. */
static BROTLI_INLINE void RingBufferWrite(
- MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) {
+ MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
if (rb->pos_ == 0 && n < rb->tail_size_) {
/* Special case for the first write: to process the first block, we don't
need to allocate the whole ring-buffer and we don't need the tail
@@ -144,12 +144,16 @@ static BROTLI_INLINE void RingBufferWrite(
n - (rb->size_ - masked_pos));
}
}
- rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
- rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
- rb->pos_ += (uint32_t)n;
- if (rb->pos_ > (1u << 30)) {
- /* Wrap, but preserve not-a-first-lap feature. */
- rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30);
+ {
+ BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
+ uint32_t rb_pos_mask = (1u << 31) - 1;
+ rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
+ rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
+ rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
+ if (not_first_lap) {
+ /* Wrap, but preserve not-a-first-lap feature. */
+ rb->pos_ |= 1u << 31;
+ }
}
}
diff --git a/c/enc/static_dict.c b/c/enc/static_dict.c
index 36caa61..758ef80 100644
--- a/c/enc/static_dict.c
+++ b/c/enc/static_dict.c
@@ -8,19 +8,20 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include "../common/transform.h"
+#include "./encoder_dict.h"
#include "./find_match_length.h"
-#include "./static_dict_lut.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-static const uint8_t kUppercaseFirst = 10;
+/* TODO: use BrotliTransforms.cutOffTransforms instead. */
static const uint8_t kOmitLastNTransforms[10] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
};
-static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
+static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -79,32 +80,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
}
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
- const BrotliDictionary* dictionary, const uint8_t* data,
+ const BrotliEncoderDictionary* dictionary, const uint8_t* data,
size_t min_length, size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match = BROTLI_FALSE;
{
- size_t offset = kStaticDictionaryBuckets[Hash(data)];
+ size_t offset = dictionary->buckets[Hash(data)];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const size_t matchlen =
- DictMatchLength(dictionary, data, id, l, max_length);
+ DictMatchLength(dictionary->words, data, id, l, max_length);
const uint8_t* s;
size_t minlen;
size_t maxlen;
size_t len;
- /* Transform "" + kIdentity + "" */
+ /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
if (matchlen == l) {
AddMatch(id, l, l, matches);
has_found_match = BROTLI_TRUE;
}
- /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
+ /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
+ "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
if (matchlen >= l - 1) {
AddMatch(id + 12 * n, l - 1, l, matches);
if (l + 2 < max_length &&
@@ -114,7 +116,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
has_found_match = BROTLI_TRUE;
}
- /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
+ /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
minlen = min_length;
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
@@ -126,7 +128,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
continue;
}
s = &data[l];
- /* Transforms "" + kIdentity + <suffix> */
+ /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
if (s[0] == ' ') {
AddMatch(id + n, l + 1, l, matches);
if (s[1] == 'a') {
@@ -273,12 +275,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
}
} else {
- /* Set is_all_caps=0 for kUppercaseFirst and
- is_all_caps=1 otherwise (kUppercaseAll) transform. */
+ /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+ is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+ transform. */
const BROTLI_BOOL is_all_caps =
- TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
+ TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
const uint8_t* s;
- if (!IsMatch(dictionary, w, data, max_length)) {
+ if (!IsMatch(dictionary->words, w, data, max_length)) {
continue;
}
/* Transform "" + kUppercase{First,All} + "" */
@@ -323,27 +326,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
/* Transforms with prefixes " " and "." */
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
- size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];
+ size_t offset = dictionary->buckets[Hash(&data[1])];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const uint8_t* s;
- if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
+ if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
continue;
}
- /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
+ /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
+ "." + BROTLI_TRANSFORM_IDENTITY + "" */
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 2 >= max_length) {
continue;
}
- /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
+ /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
+ "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
*/
s = &data[l + 1];
if (s[0] == ' ') {
@@ -370,12 +375,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
}
} else if (is_space) {
- /* Set is_all_caps=0 for kUppercaseFirst and
- is_all_caps=1 otherwise (kUppercaseAll) transform. */
+ /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+ is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+ transform. */
const BROTLI_BOOL is_all_caps =
- TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
+ TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
const uint8_t* s;
- if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
+ if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + kUppercase{First,All} + "" */
@@ -411,22 +417,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
}
if (max_length >= 6) {
- /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
+ /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
if ((data[1] == ' ' &&
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
- (data[0] == 0xc2 && data[1] == 0xa0)) {
- size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];
+ (data[0] == 0xC2 && data[1] == 0xA0)) {
+ size_t offset = dictionary->buckets[Hash(&data[2])];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
- IsMatch(dictionary, w, &data[2], max_length - 2)) {
- if (data[0] == 0xc2) {
+ IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
+ if (data[0] == 0xC2) {
AddMatch(id + 102 * n, l + 2, l, matches);
has_found_match = BROTLI_TRUE;
} else if (l + 2 < max_length && data[l + 2] == ' ') {
@@ -444,17 +450,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
data[3] == 'e' && data[4] == ' ') ||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
data[3] == 'm' && data[4] == '/')) {
- size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];
+ size_t offset = dictionary->buckets[Hash(&data[5])];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
- IsMatch(dictionary, w, &data[5], max_length - 5)) {
+ IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 5 < max_length) {
diff --git a/c/enc/static_dict.h b/c/enc/static_dict.h
index 0b3f6b3..6b5d4eb 100644
--- a/c/enc/static_dict.h
+++ b/c/enc/static_dict.h
@@ -12,13 +12,14 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
+#include "./encoder_dict.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
-static const uint32_t kInvalidMatch = 0xfffffff;
+static const uint32_t kInvalidMatch = 0xFFFFFFF;
/* Matches data against static dictionary words, and for each length l,
for which a match is found, updates matches[l] to be the minimum possible
@@ -28,7 +29,7 @@ static const uint32_t kInvalidMatch = 0xfffffff;
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
all elements are initialized to kInvalidMatch */
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
- const BrotliDictionary* dictionary,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* data, size_t min_length, size_t max_length,
uint32_t* matches);
diff --git a/c/enc/static_dict_lut.h b/c/enc/static_dict_lut.h
index ba94f76..e299cda 100644
--- a/c/enc/static_dict_lut.h
+++ b/c/enc/static_dict_lut.h
@@ -23,7 +23,7 @@ typedef struct DictWord {
} DictWord;
static const int kDictNumBits = 15;
-static const uint32_t kDictHashMul32 = 0x1e35a7bd;
+static const uint32_t kDictHashMul32 = 0x1E35A7BD;
static const uint16_t kStaticDictionaryBuckets[32768] = {
1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29,
diff --git a/c/enc/utf8_util.c b/c/enc/utf8_util.c
index a334927..04a7805 100644
--- a/c/enc/utf8_util.c
+++ b/c/enc/utf8_util.c
@@ -25,37 +25,37 @@ static size_t BrotliParseAsUTF8(
}
/* 2-byte UTF8 */
if (size > 1u &&
- (input[0] & 0xe0) == 0xc0 &&
- (input[1] & 0xc0) == 0x80) {
- *symbol = (((input[0] & 0x1f) << 6) |
- (input[1] & 0x3f));
- if (*symbol > 0x7f) {
+ (input[0] & 0xE0) == 0xC0 &&
+ (input[1] & 0xC0) == 0x80) {
+ *symbol = (((input[0] & 0x1F) << 6) |
+ (input[1] & 0x3F));
+ if (*symbol > 0x7F) {
return 2;
}
}
/* 3-byte UFT8 */
if (size > 2u &&
- (input[0] & 0xf0) == 0xe0 &&
- (input[1] & 0xc0) == 0x80 &&
- (input[2] & 0xc0) == 0x80) {
- *symbol = (((input[0] & 0x0f) << 12) |
- ((input[1] & 0x3f) << 6) |
- (input[2] & 0x3f));
- if (*symbol > 0x7ff) {
+ (input[0] & 0xF0) == 0xE0 &&
+ (input[1] & 0xC0) == 0x80 &&
+ (input[2] & 0xC0) == 0x80) {
+ *symbol = (((input[0] & 0x0F) << 12) |
+ ((input[1] & 0x3F) << 6) |
+ (input[2] & 0x3F));
+ if (*symbol > 0x7FF) {
return 3;
}
}
/* 4-byte UFT8 */
if (size > 3u &&
- (input[0] & 0xf8) == 0xf0 &&
- (input[1] & 0xc0) == 0x80 &&
- (input[2] & 0xc0) == 0x80 &&
- (input[3] & 0xc0) == 0x80) {
+ (input[0] & 0xF8) == 0xF0 &&
+ (input[1] & 0xC0) == 0x80 &&
+ (input[2] & 0xC0) == 0x80 &&
+ (input[3] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
- ((input[1] & 0x3f) << 12) |
- ((input[2] & 0x3f) << 6) |
- (input[3] & 0x3f));
- if (*symbol > 0xffff && *symbol <= 0x10ffff) {
+ ((input[1] & 0x3F) << 12) |
+ ((input[2] & 0x3F) << 6) |
+ (input[3] & 0x3F));
+ if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
return 4;
}
}
diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h
index efa66f8..7733d92 100644
--- a/c/enc/write_bits.h
+++ b/c/enc/write_bits.h
@@ -35,25 +35,27 @@ extern "C" {
and locate the rest in BYTE+1, BYTE+2, etc. */
static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
uint64_t bits,
- size_t * BROTLI_RESTRICT pos,
- uint8_t * BROTLI_RESTRICT array) {
+ size_t* BROTLI_RESTRICT pos,
+ uint8_t* BROTLI_RESTRICT array) {
#ifdef BROTLI_LITTLE_ENDIAN
/* This branch of the code can write up to 56 bits at a time,
7 bits are lost by being perhaps already in *p and at least
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
bits are in *p and we write 57 bits, then the next write will
access a byte that was never initialized). */
- uint8_t *p = &array[*pos >> 3];
+ uint8_t* p = &array[*pos >> 3];
uint64_t v = *p;
- BROTLI_LOG(("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos));
+ BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
+ (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
+ (int)*pos));
BROTLI_DCHECK((bits >> n_bits) == 0);
BROTLI_DCHECK(n_bits <= 56);
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
*pos += n_bits;
#else
- /* implicit & 0xff is assumed for uint8_t arithmetics */
- uint8_t *array_pos = &array[*pos >> 3];
+ /* implicit & 0xFF is assumed for uint8_t arithmetics */
+ uint8_t* array_pos = &array[*pos >> 3];
const size_t bits_reserved_in_first_byte = (*pos & 7);
size_t bits_left_to_write;
bits <<= bits_reserved_in_first_byte;
@@ -70,8 +72,8 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
}
static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
- size_t pos, uint8_t *array) {
- BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", pos));
+ size_t pos, uint8_t* array) {
+ BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos));
BROTLI_DCHECK((pos & 7) == 0);
array[pos >> 3] = 0;
}
diff --git a/c/include/brotli/decode.h b/c/include/brotli/decode.h
index 1acf605..61a4326 100644
--- a/c/include/brotli/decode.h
+++ b/c/include/brotli/decode.h
@@ -34,11 +34,11 @@ typedef struct BrotliDecoderStateStruct BrotliDecoderState;
typedef enum {
/** Decoding error, e.g. corrupted input or memory allocation problem. */
BROTLI_DECODER_RESULT_ERROR = 0,
- /** Decoding successfully completed */
+ /** Decoding successfully completed. */
BROTLI_DECODER_RESULT_SUCCESS = 1,
- /** Partially done; should be called again with more input */
+ /** Partially done; should be called again with more input. */
BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT = 2,
- /** Partially done; should be called again with more output */
+ /** Partially done; should be called again with more output. */
BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT = 3
} BrotliDecoderResult;
@@ -83,8 +83,9 @@ typedef enum {
BROTLI_ERROR_CODE(_ERROR_FORMAT_, WINDOW_BITS, -13) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR \
+ BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR \
\
- /* -16..-17 codes are reserved */ \
+ /* -17 code is reserved */ \
\
BROTLI_ERROR_CODE(_ERROR_, COMPOUND_DICTIONARY, -18) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR \
@@ -135,7 +136,11 @@ typedef enum BrotliDecoderParameter {
* Ring buffer is allocated according to window size, despite the real size of
* the content.
*/
- BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0
+ BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0,
+ /**
+ * Flag that determines if "Large Window Brotli" is used.
+ */
+ BROTLI_DECODER_PARAM_LARGE_WINDOW = 1
} BrotliDecoderParameter;
/**
diff --git a/c/include/brotli/encode.h b/c/include/brotli/encode.h
index 1fa85cc..0b5c8c7 100644
--- a/c/include/brotli/encode.h
+++ b/c/include/brotli/encode.h
@@ -27,6 +27,11 @@ extern "C" {
* @note equal to @c BROTLI_MAX_DISTANCE_BITS constant.
*/
#define BROTLI_MAX_WINDOW_BITS 24
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter
+ * in "Large Window Brotli" (32-bit).
+ */
+#define BROTLI_LARGE_MAX_WINDOW_BITS 30
/** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */
#define BROTLI_MIN_INPUT_BLOCK_BITS 16
/** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */
@@ -176,7 +181,11 @@ typedef enum BrotliEncoderParameter {
*
* The default value is 0, which means that the total input size is unknown.
*/
- BROTLI_PARAM_SIZE_HINT = 5
+ BROTLI_PARAM_SIZE_HINT = 5,
+ /**
+ * Flag that determines if "Large Window Brotli" is used.
+ */
+ BROTLI_PARAM_LARGE_WINDOW = 6
} BrotliEncoderParameter;
/**
diff --git a/c/tools/brotli.c b/c/tools/brotli.c
index 497ae65..2abfc27 100644
--- a/c/tools/brotli.c
+++ b/c/tools/brotli.c
@@ -111,8 +111,15 @@ typedef struct {
uint8_t* output;
const char* current_input_path;
const char* current_output_path;
+ int64_t input_file_length; /* -1, if impossible to calculate */
FILE* fin;
FILE* fout;
+
+ /* I/O buffers */
+ size_t available_in;
+ const uint8_t* next_in;
+ size_t available_out;
+ uint8_t* next_out;
} Context;
/* Parse up to 5 decimal digits. */
@@ -185,8 +192,8 @@ static Command ParseParams(Context* params) {
/* Too many options. The expected longest option list is:
"-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
- This check is an additinal guard that is never triggered, but provides an
- additional guard for future changes. */
+ This check is an additional guard that is never triggered, but provides
+ a guard for future changes. */
if (next_option_index > (MAX_OPTIONS - 2)) {
return COMMAND_INVALID;
}
@@ -414,8 +421,8 @@ static void PrintHelp(const char* name) {
" -t, --test test compressed file integrity\n"
" -v, --verbose verbose mode\n");
fprintf(stdout,
-" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d) (default:%d)\n",
- BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS, DEFAULT_LGWIN);
+" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d)\n",
+ BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS);
fprintf(stdout,
" window size = 2**NUM - 16\n"
" 0 lets compressor choose the optimal value\n");
@@ -473,6 +480,23 @@ static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f,
return BROTLI_TRUE;
}
+static int64_t FileSize(const char* path) {
+ FILE* f = fopen(path, "rb");
+ int64_t retval;
+ if (f == NULL) {
+ return -1;
+ }
+ if (fseek(f, 0L, SEEK_END) != 0) {
+ fclose(f);
+ return -1;
+ }
+ retval = ftell(f);
+ if (fclose(f) != 0) {
+ return -1;
+ }
+ return retval;
+}
+
/* Copy file times and permissions.
TODO: this is a "best effort" implementation; honest cross-platform
fully featured implementation is way too hacky; add more hacks by request. */
@@ -513,6 +537,8 @@ static BROTLI_BOOL NextFile(Context* context) {
/* Iterator points to last used arg; increment to search for the next one. */
context->iterator++;
+ context->input_file_length = -1;
+
/* No input path; read from console. */
if (context->input_count == 0) {
if (context->iterator > 1) return BROTLI_FALSE;
@@ -542,6 +568,7 @@ static BROTLI_BOOL NextFile(Context* context) {
}
context->current_input_path = arg;
+ context->input_file_length = FileSize(arg);
context->current_output_path = context->output_path;
if (context->output_path) return BROTLI_TRUE;
@@ -626,44 +653,73 @@ static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) {
static const size_t kFileBufferSize = 1 << 16;
+static void InitializeBuffers(Context* context) {
+ context->available_in = 0;
+ context->next_in = NULL;
+ context->available_out = kFileBufferSize;
+ context->next_out = context->output;
+}
+
+static BROTLI_BOOL HasMoreInput(Context* context) {
+ return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideInput(Context* context) {
+ context->available_in =
+ fread(context->input, 1, kFileBufferSize, context->fin);
+ context->next_in = context->input;
+ if (ferror(context->fin)) {
+ fprintf(stderr, "failed to read input [%s]: %s\n",
+ PrintablePath(context->current_input_path), strerror(errno));
+ return BROTLI_FALSE;
+ }
+ return BROTLI_TRUE;
+}
+
+/* Internal: should be used only in Provide-/Flush-Output. */
+static BROTLI_BOOL WriteOutput(Context* context) {
+ size_t out_size = (size_t)(context->next_out - context->output);
+ if (out_size == 0) return BROTLI_TRUE;
+ if (context->test_integrity) return BROTLI_TRUE;
+
+ fwrite(context->output, 1, out_size, context->fout);
+ if (ferror(context->fout)) {
+ fprintf(stderr, "failed to write output [%s]: %s\n",
+ PrintablePath(context->current_output_path), strerror(errno));
+ return BROTLI_FALSE;
+ }
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideOutput(Context* context) {
+ if (!WriteOutput(context)) return BROTLI_FALSE;
+ context->available_out = kFileBufferSize;
+ context->next_out = context->output;
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL FlushOutput(Context* context) {
+ if (!WriteOutput(context)) return BROTLI_FALSE;
+ context->available_out = 0;
+ return BROTLI_TRUE;
+}
+
static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
- size_t available_in = 0;
- const uint8_t* next_in = NULL;
- size_t available_out = kFileBufferSize;
- uint8_t* next_out = context->output;
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+ InitializeBuffers(context);
for (;;) {
- if (next_out != context->output) {
- if (!context->test_integrity) {
- size_t out_size = (size_t)(next_out - context->output);
- fwrite(context->output, 1, out_size, context->fout);
- if (ferror(context->fout)) {
- fprintf(stderr, "failed to write output [%s]: %s\n",
- PrintablePath(context->current_output_path), strerror(errno));
- return BROTLI_FALSE;
- }
- }
- available_out = kFileBufferSize;
- next_out = context->output;
- }
-
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
- if (feof(context->fin)) {
+ if (!HasMoreInput(context)) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
- available_in = fread(context->input, 1, kFileBufferSize, context->fin);
- next_in = context->input;
- if (ferror(context->fin)) {
- fprintf(stderr, "failed to read input [%s]: %s\n",
- PrintablePath(context->current_input_path), strerror(errno));
- return BROTLI_FALSE;
- }
+ if (!ProvideInput(context)) return BROTLI_FALSE;
} else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
- /* Nothing to do - output is already written. */
+ if (!ProvideOutput(context)) return BROTLI_FALSE;
} else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
- if (available_in != 0 || !feof(context->fin)) {
+ if (!FlushOutput(context)) return BROTLI_FALSE;
+ if (context->available_in != 0 || HasMoreInput(context)) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
@@ -675,8 +731,8 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
return BROTLI_FALSE;
}
- result = BrotliDecoderDecompressStream(
- s, &available_in, &next_in, &available_out, &next_out, 0);
+ result = BrotliDecoderDecompressStream(s, &context->available_in,
+ &context->next_in, &context->available_out, &context->next_out, 0);
}
}
@@ -703,46 +759,31 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
}
static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
- size_t available_in = 0;
- const uint8_t* next_in = NULL;
- size_t available_out = kFileBufferSize;
- uint8_t* next_out = context->output;
BROTLI_BOOL is_eof = BROTLI_FALSE;
-
+ InitializeBuffers(context);
for (;;) {
- if (available_in == 0 && !is_eof) {
- available_in = fread(context->input, 1, kFileBufferSize, context->fin);
- next_in = context->input;
- if (ferror(context->fin)) {
- fprintf(stderr, "failed to read input [%s]: %s\n",
- PrintablePath(context->current_input_path), strerror(errno));
- return BROTLI_FALSE;
- }
- is_eof = feof(context->fin) ? BROTLI_TRUE : BROTLI_FALSE;
+ if (context->available_in == 0 && !is_eof) {
+ if (!ProvideInput(context)) return BROTLI_FALSE;
+ is_eof = !HasMoreInput(context);
}
if (!BrotliEncoderCompressStream(s,
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
- &available_in, &next_in, &available_out, &next_out, NULL)) {
+ &context->available_in, &context->next_in,
+ &context->available_out, &context->next_out, NULL)) {
/* Should detect OOM? */
fprintf(stderr, "failed to compress data [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
- if (available_out != kFileBufferSize) {
- size_t out_size = kFileBufferSize - available_out;
- fwrite(context->output, 1, out_size, context->fout);
- if (ferror(context->fout)) {
- fprintf(stderr, "failed to write output [%s]: %s\n",
- PrintablePath(context->current_output_path), strerror(errno));
- return BROTLI_FALSE;
- }
- available_out = kFileBufferSize;
- next_out = context->output;
+ if (context->available_out == 0) {
+ if (!ProvideOutput(context)) return BROTLI_FALSE;
}
- if (BrotliEncoderIsFinished(s)) return BROTLI_TRUE;
+ if (BrotliEncoderIsFinished(s)) {
+ return FlushOutput(context);
+ }
}
}
@@ -756,8 +797,30 @@ static BROTLI_BOOL CompressFiles(Context* context) {
}
BrotliEncoderSetParameter(s,
BROTLI_PARAM_QUALITY, (uint32_t)context->quality);
- BrotliEncoderSetParameter(s,
- BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
+ if (context->lgwin > 0) {
+ /* Specified by user. */
+ BrotliEncoderSetParameter(s,
+ BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
+ } else {
+ /* 0, or not specified by user; could be chosen by compressor. */
+ uint32_t lgwin = DEFAULT_LGWIN;
+ /* Use file size to limit lgwin. */
+ if (context->input_file_length >= 0) {
+ int32_t size = 1 << BROTLI_MIN_WINDOW_BITS;
+ lgwin = BROTLI_MIN_WINDOW_BITS;
+ while (size < context->input_file_length) {
+ size <<= 1;
+ lgwin++;
+ if (lgwin == BROTLI_MAX_WINDOW_BITS) break;
+ }
+ }
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin);
+ }
+ if (context->input_file_length > 0) {
+ uint32_t size_hint = context->input_file_length < (1 << 30) ?
+ (uint32_t)context->input_file_length : (1u << 30);
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
+ }
is_ok = OpenFiles(context);
if (is_ok && !context->current_output_path &&
!context->force_overwrite && isatty(STDOUT_FILENO)) {
@@ -779,7 +842,7 @@ int main(int argc, char** argv) {
int i;
context.quality = 11;
- context.lgwin = DEFAULT_LGWIN;
+ context.lgwin = -1;
context.force_overwrite = BROTLI_FALSE;
context.junk_source = BROTLI_FALSE;
context.copy_stat = BROTLI_TRUE;