aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZoltan Szabadka <szabadka@google.com>2014-01-08 12:28:28 +0100
committerZoltan Szabadka <szabadka@google.com>2014-01-08 12:28:28 +0100
commit4c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348 (patch)
tree2c0e6096ed820a5b699eb6b66321504b1edb6b14
parentefbc1a896593be75066ba8769915f19a6c1d7485 (diff)
downloadsrc-4c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348.tar.gz
Brotli format change: small improvement to the encoding of Huffman codes
Combine the HSKIP and the simple/complex Huffman code type bits.
-rw-r--r--brotli/brotlispec.txt20
-rw-r--r--brotli/dec/decode.c13
-rw-r--r--brotli/enc/encode.cc22
-rw-r--r--brotli/enc/literal_cost.cc2
4 files changed, 32 insertions, 25 deletions
diff --git a/brotli/brotlispec.txt b/brotli/brotlispec.txt
index 765abfb..23de497 100644
--- a/brotli/brotlispec.txt
+++ b/brotli/brotlispec.txt
@@ -412,16 +412,16 @@ Abstract
3.4. Simple Huffman codes
- The first bit of the compressed representation of each Huffman
+ The first two bits of the compressed representation of each Huffman
code distinguishes between simple and complex Huffman codes. If
- the first bit is 1, then a simple, otherwise a complex Huffman
- code follows.
+ this value is 1, then a simple Huffman code follows. Otherwise
+ the value indicates the number of leading zeros.
A simple Huffman code can have only up to four symbols with non-
zero code length. The format of the simple Huffman code is as
follows:
- 1 bit: 1, indicating a simple Huffman code
+ 2 bits: value of 1 indicates a simple Huffman code
2 bits: NSYM - 1, where NSYM = # of symbols with non-zero
code length
@@ -507,8 +507,10 @@ Abstract
We can now define the format of the complex Huffman code as
follows:
- 1 bit: 0, indicating a complex Huffman code
- 1 bit : HSKIP, if 1, skip over first two code length codes
+ 2 bits: HSKIP, values of 0, 2 or 3 represent the respective
+ number of leading zeros. (Value of 1 indicates the
+ Simple Huffman code.)
+
Code lengths for symbols in the code length alphabet given
just above, in the order: 1, 2, 3, 4, 0, 17, 5, 6, 16, 7,
@@ -519,9 +521,9 @@ Abstract
the static Huffman code above. A code length of 0 means
the corresponding code length symbol is not used.
- If HSKIP is 1, code lengths of code length symbols 1 and
- 2 are implicit zeros and are not present in the code
- lengths sequence above. If there are at least two non-
+ If HSKIP is 2 or 3, a respective number of leading code
+ lengths are implicit zeros and are not present in the
+ code lengths sequence above. If there are at least two non-
zero code lengths, any trailing zero code lengths are
omitted, i.e. the last code length in the sequence must
be non-zero. In this case the sum of (32 >> code length)
diff --git a/brotli/dec/decode.c b/brotli/dec/decode.c
index 4dce7bb..df2e1f9 100644
--- a/brotli/dec/decode.c
+++ b/brotli/dec/decode.c
@@ -234,7 +234,7 @@ static int ReadHuffmanCode(int alphabet_size,
HuffmanTree* tree,
BrotliBitReader* br) {
int ok = 1;
- int simple_code;
+ int simple_code_or_skip;
uint8_t* code_lengths = NULL;
code_lengths =
@@ -247,9 +247,12 @@ static int ReadHuffmanCode(int alphabet_size,
printf("[ReadHuffmanCode] Unexpected end of input.\n");
return 0;
}
- simple_code = BrotliReadBits(br, 1);
- BROTLI_LOG_UINT(simple_code);
- if (simple_code) { /* Read symbols, codes & code lengths directly. */
+ /* simple_code_or_skip is used as follows:
+ 1 for simple code;
+ 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+ simple_code_or_skip = BrotliReadBits(br, 2);
+ BROTLI_LOG_UINT(simple_code_or_skip);
+ if (simple_code_or_skip == 1) { /* Read symbols, codes & code lengths directly. */
int i;
int max_bits_counter = alphabet_size - 1;
int max_bits = 0;
@@ -286,7 +289,7 @@ static int ReadHuffmanCode(int alphabet_size,
int i;
uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 };
int space = 32;
- for (i = BrotliReadBits(br, 1) * 2;
+ for (i = simple_code_or_skip;
i < CODE_LENGTH_CODES && space > 0; ++i) {
int code_len_idx = kCodeLengthCodeOrder[i];
int v = BrotliReadBits(br, 2);
diff --git a/brotli/enc/encode.cc b/brotli/enc/encode.cc
index 8e497fa..b492421 100644
--- a/brotli/enc/encode.cc
+++ b/brotli/enc/encode.cc
@@ -136,12 +136,16 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
if (num_codes == 1) {
codes_to_store = kCodeLengthCodes;
}
- const int skip_two_first =
- code_length_bitdepth[kStorageOrder[0]] == 0 &&
- code_length_bitdepth[kStorageOrder[1]] == 0;
- WriteBits(1, skip_two_first, storage_ix, storage);
-
- for (int i = skip_two_first * 2; i < codes_to_store; ++i) {
+ int skip_some = 0; // skips none.
+ if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+ code_length_bitdepth[kStorageOrder[1]] == 0) {
+ skip_some = 2; // skips two.
+ if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+ skip_some = 3; // skips three.
+ }
+ }
+ WriteBits(2, skip_some, storage_ix, storage);
+ for (int i = skip_some; i < codes_to_store; ++i) {
uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
uint8_t bits[] = { 0, 5, 1, 3, 2, 13 };
int v = code_length_bitdepth[kStorageOrder[i]];
@@ -182,7 +186,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
}
if (code.count_ == 0) { // emit minimal tree for empty cases
// bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
- WriteBits(3 + max_bits, 0x01, storage_ix, storage);
+ WriteBits(4 + max_bits, 0x1, storage_ix, storage);
return;
}
if (code.count_ <= 4) {
@@ -202,7 +206,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
}
}
// Small tree marker to encode 1-4 symbols.
- WriteBits(1, 1, storage_ix, storage);
+ WriteBits(2, 1, storage_ix, storage);
WriteBits(2, code.count_ - 1, storage_ix, storage);
for (int i = 0; i < code.count_; ++i) {
WriteBits(max_bits, symbols[i], storage_ix, storage);
@@ -219,8 +223,6 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
}
return;
}
- WriteBits(1, 0, storage_ix, storage);
-
uint8_t huffman_tree[kSize];
uint8_t huffman_tree_extra_bits[kSize];
int huffman_tree_size = 0;
diff --git a/brotli/enc/literal_cost.cc b/brotli/enc/literal_cost.cc
index 2a388d7..bf05a98 100644
--- a/brotli/enc/literal_cost.cc
+++ b/brotli/enc/literal_cost.cc
@@ -51,7 +51,7 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
histo = 1;
}
cost[masked_pos] = log2(static_cast<double>(in_window) / histo);
- cost[masked_pos] += 0.03;
+ cost[masked_pos] += 0.029;
if (cost[masked_pos] < 1.0) {
cost[masked_pos] *= 0.5;
cost[masked_pos] += 0.5;