aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Chromium Automerger <chromium-automerger@android>2014-03-24 09:39:27 +0000
committerAndroid Chromium Automerger <chromium-automerger@android>2014-03-24 09:39:27 +0000
commitb0cb7a7abfbc2dd90a0720a0d35fb981fa884f2e (patch)
tree6698a5c00c8ad9b43a06522072d874de39cde5cb
parent1d0ca0b871a2c7297034c588d4117a3d5a9ec0c9 (diff)
parent494c85cebbaaa0db345df69ffa1b639aa4652022 (diff)
downloadsrc-b0cb7a7abfbc2dd90a0720a0d35fb981fa884f2e.tar.gz
Merge third_party/brotli/src from https://chromium.googlesource.com/external/font-compression-reference.git at 494c85cebbaaa0db345df69ffa1b639aa4652022
This commit was generated by merge_from_chromium.py. Change-Id: Ib3fbe3e72d8026481ab406195954e0ee382d7816
-rw-r--r--brotli/brotlispec.txt4
-rw-r--r--brotli/dec/decode.c27
-rw-r--r--brotli/enc/backward_references.cc74
-rw-r--r--brotli/enc/backward_references.h3
-rw-r--r--brotli/enc/bit_cost.h22
-rw-r--r--brotli/enc/block_splitter.cc1
-rw-r--r--brotli/enc/command.h8
-rw-r--r--brotli/enc/encode.cc304
-rw-r--r--brotli/enc/encode.h21
-rw-r--r--brotli/enc/hash.h257
-rw-r--r--brotli/enc/literal_cost.cc33
-rw-r--r--brotli/enc/literal_cost.h4
-rw-r--r--brotli/enc/prefix.cc2
-rw-r--r--brotli/enc/static_dict.h68
-rw-r--r--woff2/file.h6
-rw-r--r--woff2/font.h6
-rw-r--r--woff2/glyph.h6
-rw-r--r--woff2/normalize.h6
-rw-r--r--woff2/ots.h6
-rw-r--r--woff2/port.h6
-rw-r--r--woff2/round.h6
-rw-r--r--woff2/store_bytes.h6
-rw-r--r--woff2/transform.h6
-rw-r--r--woff2/woff2.cc34
-rw-r--r--woff2/woff2.h6
25 files changed, 587 insertions, 335 deletions
diff --git a/brotli/brotlispec.txt b/brotli/brotlispec.txt
index 0010bfb..594517f 100644
--- a/brotli/brotlispec.txt
+++ b/brotli/brotlispec.txt
@@ -589,7 +589,9 @@ Abstract
in the sequence of distances) is not pushed to the ring-buffer of
last distances, in other words, the expression "(second, third,
fourth) last distance" means the (second, third, fourth) last
- distance that was not represented by a 0 distance code.
+ distance that was not represented by a 0 distance code. Similarly,
+ distances that represent static dictionary words (see Section 8.) are
+ not pushed to the ringbuffer of last distances.
The next NDIRECT distance codes, from 16 to 15 + NDIRECT, represent
distances from 1 to NDIRECT. Neither the distance short codes, nor
diff --git a/brotli/dec/decode.c b/brotli/dec/decode.c
index fc66e15..5df28e0 100644
--- a/brotli/dec/decode.c
+++ b/brotli/dec/decode.c
@@ -247,12 +247,23 @@ static int ReadHuffmanCode(int alphabet_size,
code_lengths[symbols[0]] = 1;
switch (num_symbols) {
case 1:
+ break;
case 3:
+ ok = ((symbols[0] != symbols[1]) &&
+ (symbols[0] != symbols[2]) &&
+ (symbols[1] != symbols[2]));
break;
case 2:
+ ok = (symbols[0] != symbols[1]);
code_lengths[symbols[1]] = 1;
break;
case 4:
+ ok = ((symbols[0] != symbols[1]) &&
+ (symbols[0] != symbols[2]) &&
+ (symbols[0] != symbols[3]) &&
+ (symbols[1] != symbols[2]) &&
+ (symbols[1] != symbols[3]) &&
+ (symbols[2] != symbols[3]));
if (BrotliReadBits(br, 1)) {
code_lengths[symbols[2]] = 3;
code_lengths[symbols[3]] = 3;
@@ -266,6 +277,7 @@ static int ReadHuffmanCode(int alphabet_size,
int i;
uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 };
int space = 32;
+ int num_codes = 0;
/* Static Huffman code for the code length code lengths */
static const HuffmanCode huff[16] = {
{2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1},
@@ -283,10 +295,12 @@ static int ReadHuffmanCode(int alphabet_size,
BROTLI_LOG_ARRAY_INDEX(code_length_code_lengths, code_len_idx);
if (v != 0) {
space -= (32 >> v);
+ ++num_codes;
}
}
- ok = ReadHuffmanCodeLengths(code_length_code_lengths,
- alphabet_size, code_lengths, br);
+ ok = (num_codes == 1 || space == 0) &&
+ ReadHuffmanCodeLengths(code_length_code_lengths,
+ alphabet_size, code_lengths, br);
}
if (ok) {
table_size = BrotliBuildHuffmanTable(table, HUFFMAN_TABLE_BITS,
@@ -961,10 +975,6 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
ok = 0;
goto End;
}
- if (distance_code > 0) {
- dist_rb[dist_rb_idx & 3] = distance;
- ++dist_rb_idx;
- }
BROTLI_LOG_UINT(distance);
if (pos < max_backward_distance &&
@@ -1016,6 +1026,11 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
goto End;
}
} else {
+ if (distance_code > 0) {
+ dist_rb[dist_rb_idx & 3] = distance;
+ ++dist_rb_idx;
+ }
+
if (copy_length > meta_block_remaining_len) {
printf("Invalid backward reference. pos: %d distance: %d "
"len: %d bytes left: %d\n", pos, distance, copy_length,
diff --git a/brotli/enc/backward_references.cc b/brotli/enc/backward_references.cc
index f76d7d4..837dbf3 100644
--- a/brotli/enc/backward_references.cc
+++ b/brotli/enc/backward_references.cc
@@ -23,6 +23,7 @@
namespace brotli {
+template<typename Hasher>
void CreateBackwardReferences(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
@@ -38,6 +39,9 @@ void CreateBackwardReferences(size_t num_bytes,
const int i_diff = position - i;
const size_t i_end = i + num_bytes;
+ const int random_heuristics_window_size = 512;
+ int apply_random_heuristics = i + random_heuristics_window_size;
+
double average_cost = 0.0;
for (int k = position; k < position + num_bytes; ++k) {
average_cost += literal_cost[k & ringbuffer_mask];
@@ -65,7 +69,8 @@ void CreateBackwardReferences(size_t num_bytes,
bool match_found = hasher->FindLongestMatch(
ringbuffer, literal_cost, ringbuffer_mask,
i + i_diff, i_end - i, max_distance,
- &best_len, &best_len_code, &best_dist, &best_score, &in_dictionary);
+ &best_len, &best_len_code, &best_dist, &best_score,
+ &in_dictionary);
bool best_in_dictionary = in_dictionary;
if (match_found) {
if (match_found_M1 && best_score_M1 > best_score) {
@@ -138,16 +143,20 @@ void CreateBackwardReferences(size_t num_bytes,
}
}
}
+ apply_random_heuristics =
+ i + 2 * best_len + random_heuristics_window_size;
Command cmd;
cmd.insert_length_ = insert_length;
cmd.copy_length_ = best_len;
cmd.copy_length_code_ = best_len_code;
cmd.copy_distance_ = best_dist;
commands->push_back(cmd);
- hasher->set_last_distance(best_dist);
-
insert_length = 0;
++i;
+ if (best_dist <= std::min(i + i_diff, max_backward_limit)) {
+ hasher->set_last_distance(best_dist);
+ }
+
// Copy all copied literals to the hasher, except the last one.
// We cannot store the last one yet, otherwise we couldn't find
// the possible M1 match.
@@ -158,7 +167,8 @@ void CreateBackwardReferences(size_t num_bytes,
++i;
}
// Prepare M1 match.
- if (best_len >= 4 && i + 20 < i_end && !best_in_dictionary) {
+ if (hasher->HasStaticDictionary() &&
+ best_len >= 4 && i + 20 < i_end && !best_in_dictionary) {
max_distance = std::min(i + i_diff, max_backward_limit);
match_found_M1 = hasher->FindLongestMatch(
ringbuffer, literal_cost, ringbuffer_mask,
@@ -185,6 +195,32 @@ void CreateBackwardReferences(size_t num_bytes,
++insert_length;
hasher->Store(ringbuffer + i, i + i_diff);
++i;
+ // If we have not seen matches for a long time, we can skip some
+ // match lookups. Unsuccessful match lookups are very very expensive
+ // and this kind of a heuristic speeds up compression quite
+ // a lot.
+ if (i > apply_random_heuristics) {
+ // Going through uncompressible data, jump.
+ if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
+ // It is quite a long time since we saw a copy, so we assume
+ // that this data is not compressible, and store hashes less
+ // often. Hashes of non compressible data are less likely to
+ // turn out to be useful in the future, too, so we store less of
+ // them to not to flood out the hash table of good compressible
+ // data.
+ int i_jump = std::min(i + 16, i_end - 4);
+ for (; i < i_jump; i += 4) {
+ hasher->Store(ringbuffer + i, i + i_diff);
+ insert_length += 4;
+ }
+ } else {
+ int i_jump = std::min(i + 8, i_end - 2);
+ for (; i < i_jump; i += 2) {
+ hasher->Store(ringbuffer + i, i + i_diff);
+ insert_length += 2;
+ }
+ }
+ }
}
}
insert_length += (i_end - i);
@@ -198,4 +234,34 @@ void CreateBackwardReferences(size_t num_bytes,
}
}
+void CreateBackwardReferences(size_t num_bytes,
+ size_t position,
+ const uint8_t* ringbuffer,
+ const float* literal_cost,
+ size_t ringbuffer_mask,
+ const size_t max_backward_limit,
+ Hashers* hashers,
+ Hashers::Type hash_type,
+ std::vector<Command>* commands) {
+ switch (hash_type) {
+ case Hashers::HASH_15_8_4:
+ CreateBackwardReferences(
+ num_bytes, position, ringbuffer, literal_cost,
+ ringbuffer_mask, max_backward_limit,
+ hashers->hash_15_8_4.get(),
+ commands);
+ break;
+ case Hashers::HASH_15_8_2:
+ CreateBackwardReferences(
+ num_bytes, position, ringbuffer, literal_cost,
+ ringbuffer_mask, max_backward_limit,
+ hashers->hash_15_8_2.get(),
+ commands);
+ break;
+ default:
+ break;
+ }
+}
+
+
} // namespace brotli
diff --git a/brotli/enc/backward_references.h b/brotli/enc/backward_references.h
index f666ef6..bf90c7e 100644
--- a/brotli/enc/backward_references.h
+++ b/brotli/enc/backward_references.h
@@ -31,7 +31,8 @@ void CreateBackwardReferences(size_t num_bytes,
const float* literal_cost,
size_t ringbuffer_mask,
const size_t max_backward_limit,
- Hasher* hasher,
+ Hashers* hashers,
+ Hashers::Type hash_type,
std::vector<Command>* commands);
} // namespace brotli
diff --git a/brotli/enc/bit_cost.h b/brotli/enc/bit_cost.h
index c2fd3e4..46e6229 100644
--- a/brotli/enc/bit_cost.h
+++ b/brotli/enc/bit_cost.h
@@ -46,6 +46,7 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) {
int max_depth = 1;
int histogram[kCodeLengthCodes] = { 0 };
int tail_start = 0;
+ int prev_value = 8;
// compute histogram of compacted huffman tree
for (int i = 0; i < length;) {
const int value = depth[i];
@@ -57,29 +58,32 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) {
++reps;
}
i += reps;
+ if (i == length && value == 0)
+ break;
if (value == 0) {
if (reps < 3) {
histogram[0] += reps;
} else {
- reps -= 3;
- while (reps >= 0) {
+ reps -= 2;
+ while (reps > 0) {
++histogram[17];
reps >>= 3;
- --reps;
}
}
} else {
tail_start = i;
- ++histogram[value];
- --reps;
+ if (value != prev_value) {
+ ++histogram[value];
+ --reps;
+ }
+ prev_value = value;
if (reps < 3) {
histogram[value] += reps;
} else {
- reps -= 3;
- while (reps >= 0) {
+ reps -= 2;
+ while (reps > 0) {
++histogram[16];
reps >>= 2;
- --reps;
}
}
}
@@ -93,7 +97,7 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) {
cost[17] += 3;
int tree_size = 0;
- int bits = 6 + 2 * max_depth; // huffman tree of huffman tree cost
+ int bits = 18 + 2 * max_depth; // huffman tree of huffman tree cost
for (int i = 0; i < kCodeLengthCodes; ++i) {
bits += histogram[i] * cost[i]; // huffman tree bit cost
tree_size += histogram[i];
diff --git a/brotli/enc/block_splitter.cc b/brotli/enc/block_splitter.cc
index 57c1e90..a24d0fb 100644
--- a/brotli/enc/block_splitter.cc
+++ b/brotli/enc/block_splitter.cc
@@ -348,6 +348,7 @@ void SplitBlock(const std::vector<Command>& cmds,
&insert_and_copy_codes,
&distance_prefixes);
+
SplitByteVector<HistogramLiteral>(
literals,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
diff --git a/brotli/enc/command.h b/brotli/enc/command.h
index 7a9f481..f979973 100644
--- a/brotli/enc/command.h
+++ b/brotli/enc/command.h
@@ -24,9 +24,13 @@ namespace brotli {
// Command holds a sequence of literals and a backward reference copy.
class Command {
public:
+ // distance_code_ is initialized to 17 because it refers to the distance
+ // code of a backward distance of 1, this way the last insert-only command
+ // won't use the last-distance short code, and accordingly distance_prefix_ is
+ // set to 16
Command() : insert_length_(0), copy_length_(0), copy_length_code_(0),
- copy_distance_(0), distance_code_(0),
- distance_prefix_(0), command_prefix_(0),
+ copy_distance_(0), distance_code_(17),
+ distance_prefix_(16), command_prefix_(0),
distance_extra_bits_(0), distance_extra_bits_value_(0) {}
uint32_t insert_length_;
diff --git a/brotli/enc/encode.cc b/brotli/enc/encode.cc
index fa04834..4ae96bc 100644
--- a/brotli/enc/encode.cc
+++ b/brotli/enc/encode.cc
@@ -168,15 +168,6 @@ void EncodeMetaBlockLength(size_t meta_block_size,
}
}
-template<int kSize>
-void EntropyEncode(int val, const EntropyCode<kSize>& code,
- int* storage_ix, uint8_t* storage) {
- if (code.count_ <= 1) {
- return;
- };
- WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
-}
-
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
const uint8_t* code_length_bitdepth,
int* storage_ix, uint8_t* storage) {
@@ -225,7 +216,9 @@ void StoreHuffmanTreeToBitMask(
for (int i = 0; i < huffman_tree_size; ++i) {
const int ix = huffman_tree[i];
const int extra_bits = huffman_tree_extra_bits[i];
- EntropyEncode(ix, entropy, storage_ix, storage);
+ if (entropy.count_ > 1) {
+ WriteBits(entropy.depth_[ix], entropy.bits_[ix], storage_ix, storage);
+ }
switch (ix) {
case 16:
WriteBits(2, extra_bits, storage_ix, storage);
@@ -240,8 +233,7 @@ void StoreHuffmanTreeToBitMask(
template<int kSize>
void StoreHuffmanCodeSimple(
const EntropyCode<kSize>& code, int alphabet_size,
- int max_bits,
- int* storage_ix, uint8_t* storage) {
+ int max_bits, int* storage_ix, uint8_t* storage) {
const uint8_t *depth = &code.depth_[0];
int symbols[4];
// Quadratic sort.
@@ -304,37 +296,66 @@ void StoreHuffmanCodeComplex(
storage_ix, storage);
}
-
template<int kSize>
-void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
- int* storage_ix, uint8_t* storage) {
+void BuildAndStoreEntropyCode(const Histogram<kSize>& histogram,
+ const int tree_limit,
+ const int alphabet_size,
+ EntropyCode<kSize>* code,
+ int* storage_ix, uint8_t* storage) {
+ memset(code->depth_, 0, sizeof(code->depth_));
+ memset(code->bits_, 0, sizeof(code->bits_));
+ memset(code->symbols_, 0, sizeof(code->symbols_));
+ code->count_ = 0;
+
int max_bits_counter = alphabet_size - 1;
int max_bits = 0;
while (max_bits_counter) {
max_bits_counter >>= 1;
++max_bits;
}
- if (code.count_ == 0) {
- // Emit a minimal tree for empty cases.
- // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
- WriteBits(4 + max_bits, 0x1, storage_ix, storage);
- } else if (code.count_ <= 4) {
- StoreHuffmanCodeSimple(
- code, alphabet_size, max_bits,
- storage_ix, storage);
+
+ for (size_t i = 0; i < alphabet_size; i++) {
+ if (histogram.data_[i] > 0) {
+ if (code->count_ < 4) code->symbols_[code->count_] = i;
+ ++code->count_;
+ }
+ }
+
+ if (code->count_ <= 1) {
+ WriteBits(2, 1, storage_ix, storage);
+ WriteBits(2, 0, storage_ix, storage);
+ WriteBits(max_bits, code->symbols_[0], storage_ix, storage);
+ return;
+ }
+
+ if (alphabet_size >= 50 && code->count_ >= 16) {
+ std::vector<int> counts(alphabet_size);
+ memcpy(&counts[0], histogram.data_, sizeof(counts[0]) * alphabet_size);
+ OptimizeHuffmanCountsForRle(alphabet_size, &counts[0]);
+ CreateHuffmanTree(&counts[0], alphabet_size, tree_limit, code->depth_);
} else {
- StoreHuffmanCodeComplex(
- code, alphabet_size,
- storage_ix, storage);
+ CreateHuffmanTree(histogram.data_, alphabet_size, tree_limit, code->depth_);
+ }
+ ConvertBitDepthsToSymbols(code->depth_, alphabet_size, code->bits_);
+
+ if (code->count_ <= 4) {
+ StoreHuffmanCodeSimple(*code, alphabet_size, max_bits, storage_ix, storage);
+ } else {
+ StoreHuffmanCodeComplex(*code, alphabet_size, storage_ix, storage);
}
}
template<int kSize>
-void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
- int alphabet_size,
- int* storage_ix, uint8_t* storage) {
- for (int i = 0; i < codes.size(); ++i) {
- StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
+void BuildAndStoreEntropyCodes(
+ const std::vector<Histogram<kSize> >& histograms,
+ int alphabet_size,
+ std::vector<EntropyCode<kSize> >* entropy_codes,
+ int* storage_ix, uint8_t* storage) {
+ entropy_codes->resize(histograms.size());
+ for (int i = 0; i < histograms.size(); ++i) {
+ BuildAndStoreEntropyCode(histograms[i], 15, alphabet_size,
+ &(*entropy_codes)[i],
+ storage_ix, storage);
}
}
@@ -342,7 +363,7 @@ void EncodeCommand(const Command& cmd,
const EntropyCodeCommand& entropy,
int* storage_ix, uint8_t* storage) {
int code = cmd.command_prefix_;
- EntropyEncode(code, entropy, storage_ix, storage);
+ WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
if (code >= 128) {
code -= 128;
}
@@ -364,13 +385,15 @@ void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
int code = cmd.distance_prefix_;
int extra_bits = cmd.distance_extra_bits_;
uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
- EntropyEncode(code, entropy, storage_ix, storage);
+ WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
if (extra_bits > 0) {
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
}
}
void ComputeDistanceShortCodes(std::vector<Command>* cmds,
+ size_t pos,
+ const size_t max_backward,
int* dist_ringbuffer,
size_t* ringbuffer_idx) {
static const int kIndexOffset[16] = {
@@ -380,30 +403,40 @@ void ComputeDistanceShortCodes(std::vector<Command>* cmds,
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
for (int i = 0; i < cmds->size(); ++i) {
+ pos += (*cmds)[i].insert_length_;
+ size_t max_distance = std::min(pos, max_backward);
int cur_dist = (*cmds)[i].copy_distance_;
- if (cur_dist == 0) break;
int dist_code = cur_dist + 16;
- int limits[16] = { 0, 4, 10, 11,
- 6, 6, 11, 11,
- 11, 11, 11, 11,
- 12, 12, 12, 12 };
- for (int k = 0; k < 16; ++k) {
- // Only accept more popular choices.
- if (cur_dist < limits[k]) {
- // Typically unpopular ranges, don't replace a short distance
- // with them.
- continue;
+ if (cur_dist <= max_distance) {
+ if (cur_dist == 0) break;
+ int limits[16] = { 0, 0, 0, 0,
+ 6, 6, 11, 11,
+ 11, 11, 11, 11,
+ 12, 12, 12, 12 };
+ for (int k = 0; k < 16; ++k) {
+ // Only accept more popular choices.
+ if (cur_dist < limits[k]) {
+ // Typically unpopular ranges, don't replace a short distance
+ // with them.
+ continue;
+ }
+ int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
+ kValueOffset[k]);
+ if (cur_dist == comp) {
+ dist_code = k + 1;
+ break;
+ }
}
- int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
- kValueOffset[k]);
- if (cur_dist == comp) {
- dist_code = k + 1;
- break;
+ if (dist_code > 1) {
+ dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
+ ++(*ringbuffer_idx);
}
- }
- if (dist_code > 1) {
- dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
- ++(*ringbuffer_idx);
+ pos += (*cmds)[i].copy_length_;
+ } else {
+ int word_idx = cur_dist - max_distance - 1;
+ const std::string word =
+ GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx);
+ pos += word.size();
}
(*cmds)[i].distance_code_ = dist_code;
}
@@ -558,18 +591,20 @@ void EncodeContextMap(const std::vector<int>& context_map,
for (int i = 0; i < rle_symbols.size(); ++i) {
symbol_histogram.Add(rle_symbols[i]);
}
- EntropyCodeContextMap symbol_code;
- BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
- &symbol_code);
bool use_rle = max_run_length_prefix > 0;
WriteBits(1, use_rle, storage_ix, storage);
if (use_rle) {
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
}
- StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
- storage_ix, storage);
+ EntropyCodeContextMap symbol_code;
+ BuildAndStoreEntropyCode(symbol_histogram, 15,
+ num_clusters + max_run_length_prefix,
+ &symbol_code,
+ storage_ix, storage);
for (int i = 0; i < rle_symbols.size(); ++i) {
- EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
+ WriteBits(symbol_code.depth_[rle_symbols[i]],
+ symbol_code.bits_[rle_symbols[i]],
+ storage_ix, storage);
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
}
@@ -577,16 +612,6 @@ void EncodeContextMap(const std::vector<int>& context_map,
WriteBits(1, 1, storage_ix, storage); // use move-to-front
}
-template<int kSize>
-void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
- int alphabet_size,
- std::vector<EntropyCode<kSize> >* entropy_codes) {
- entropy_codes->resize(histograms.size());
- for (int i = 0; i < histograms.size(); ++i) {
- BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
- }
-}
-
struct BlockSplitCode {
EntropyCodeBlockType block_type_code;
EntropyCodeBlockLength block_len_code;
@@ -598,8 +623,8 @@ void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
int len_code = BlockLengthPrefix(length);
int extra_bits = BlockLengthExtraBits(len_code);
int extra_bits_value = length - BlockLengthOffset(len_code);
- EntropyEncode(len_code, entropy, storage_ix, storage);
-
+ WriteBits(entropy.depth_[len_code], entropy.bits_[len_code],
+ storage_ix, storage);
if (extra_bits > 0) {
WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
}
@@ -632,26 +657,25 @@ void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
BlockSplitCode* code,
int* storage_ix, uint8_t* storage) {
EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
+
if (split.num_types_ == 1) {
return;
}
HistogramBlockType type_histo;
- for (int i = 0; i < split.type_codes_.size(); ++i) {
+ for (int i = 1; i < split.type_codes_.size(); ++i) {
type_histo.Add(split.type_codes_[i]);
}
- BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
- &code->block_type_code);
HistogramBlockLength length_histo;
for (int i = 0; i < split.lengths_.size(); ++i) {
length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
}
- BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
- &code->block_len_code);
- StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
- storage_ix, storage);
- StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
- storage_ix, storage);
+ BuildAndStoreEntropyCode(type_histo, 15, split.num_types_ + 2,
+ &code->block_type_code,
+ storage_ix, storage);
+ BuildAndStoreEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
+ &code->block_len_code,
+ storage_ix, storage);
EncodeBlockLength(code->block_len_code, split.lengths_[0],
storage_ix, storage);
}
@@ -664,7 +688,9 @@ void MoveAndEncode(const BlockSplitCode& code,
it->type_ = it->split_.types_[it->idx_];
it->length_ = it->split_.lengths_[it->idx_];
int type_code = it->split_.type_codes_[it->idx_];
- EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
+ WriteBits(code.block_type_code.depth_[type_code],
+ code.block_type_code.bits_[type_code],
+ storage_ix, storage);
EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
}
--it->length_;
@@ -773,10 +799,8 @@ void StoreMetaBlock(const MetaBlock& mb,
int* storage_ix, uint8_t* storage) {
size_t length = MetaBlockLength(mb.cmds);
const size_t end_pos = *pos + length;
- EncodeMetaBlockLength(length,
- is_last,
- false,
- storage_ix, storage);
+ EncodeMetaBlockLength(length, is_last, false, storage_ix, storage);
+
if (length == 0) {
return;
}
@@ -792,26 +816,27 @@ void StoreMetaBlock(const MetaBlock& mb,
WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
WriteBits(4,
mb.params.num_direct_distance_codes >>
- mb.params.distance_postfix_bits, storage_ix, storage);
+ mb.params.distance_postfix_bits,
+ storage_ix, storage);
int num_distance_codes =
kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
(48 << mb.params.distance_postfix_bits);
for (int i = 0; i < mb.literal_split.num_types_; ++i) {
WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
}
- EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
- EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
+ EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
+ storage_ix, storage);
+ EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
+ storage_ix, storage);
std::vector<EntropyCodeLiteral> literal_codes;
std::vector<EntropyCodeCommand> command_codes;
std::vector<EntropyCodeDistance> distance_codes;
- BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
- BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
- &command_codes);
- BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
- &distance_codes);
- StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
- StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
- StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
+ BuildAndStoreEntropyCodes(mb.literal_histograms, 256, &literal_codes,
+ storage_ix, storage);
+ BuildAndStoreEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
+ &command_codes, storage_ix, storage);
+ BuildAndStoreEntropyCodes(mb.distance_histograms, num_distance_codes,
+ &distance_codes, storage_ix, storage);
BlockSplitIterator literal_it(mb.literal_split);
BlockSplitIterator command_it(mb.command_split);
BlockSplitIterator distance_it(mb.distance_split);
@@ -828,8 +853,10 @@ void StoreMetaBlock(const MetaBlock& mb,
Context(prev_byte, prev_byte2,
mb.literal_context_modes[literal_it.type_]));
histogram_idx = mb.literal_context_map[context];
- EntropyEncode(ringbuffer[*pos & mask],
- literal_codes[histogram_idx], storage_ix, storage);
+ int literal = ringbuffer[*pos & mask];
+ WriteBits(literal_codes[histogram_idx].depth_[literal],
+ literal_codes[histogram_idx].bits_[literal],
+ storage_ix, storage);
++(*pos);
}
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
@@ -845,9 +872,10 @@ void StoreMetaBlock(const MetaBlock& mb,
}
}
-BrotliCompressor::BrotliCompressor()
- : window_bits_(kWindowBits),
- hasher_(new Hasher),
+BrotliCompressor::BrotliCompressor(BrotliParams params)
+ : params_(params),
+ window_bits_(kWindowBits),
+ hashers_(new Hashers()),
dist_ringbuffer_idx_(0),
input_pos_(0),
ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
@@ -859,28 +887,41 @@ BrotliCompressor::BrotliCompressor()
dist_ringbuffer_[2] = 11;
dist_ringbuffer_[3] = 4;
storage_[0] = 0;
- StoreDictionaryWordHashes();
+ switch (params.mode) {
+ case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break;
+ case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break;
+ default: break;
+ }
+ hashers_->Init(hash_type_);
+ if (params.mode == BrotliParams::MODE_TEXT) {
+ StoreDictionaryWordHashes();
+ }
}
BrotliCompressor::~BrotliCompressor() {
- delete hasher_;
delete[] storage_;
}
+StaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
+
void BrotliCompressor::StoreDictionaryWordHashes() {
- for (int t = kNumTransforms - 1; t >= 0; --t) {
- for (int i = kMaxDictionaryWordLength; i >= 3; --i) {
- const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
- for (int j = num_words - 1; j >= 0; --j) {
- int word_id = t * num_words + j;
- std::string word = GetTransformedDictionaryWord(i, word_id);
- if (word.size() >= 3) {
- hasher_->Store(reinterpret_cast<const uint8_t*>(&word[0]),
- (-1) * ((i << 20) + word_id + 1));
+ const int num_transforms = kNumTransforms;
+ if (static_dictionary_ == NULL) {
+ static_dictionary_ = new StaticDictionary;
+ for (int t = num_transforms - 1; t >= 0; --t) {
+ for (int i = kMaxDictionaryWordLength; i >= 3; --i) {
+ const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
+ for (int j = num_words - 1; j >= 0; --j) {
+ int word_id = t * num_words + j;
+ std::string word = GetTransformedDictionaryWord(i, word_id);
+ if (word.size() >= 3) {
+ static_dictionary_->Insert(word, i, word_id);
+ }
}
}
}
}
+ hashers_->SetStaticDictionary(static_dictionary_);
}
void BrotliCompressor::WriteStreamHeader() {
@@ -908,25 +949,30 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size,
input_size, kMinUTF8Ratio);
if (utf8_mode) {
EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
- kRingBufferMask, ringbuffer_.start(),
- &literal_cost_[0]);
+ kRingBufferMask, kRingBufferMask,
+ ringbuffer_.start(), &literal_cost_[0]);
} else {
EstimateBitCostsForLiterals(input_pos_, input_size,
- kRingBufferMask, ringbuffer_.start(),
- &literal_cost_[0]);
- }
- CreateBackwardReferences(input_size, input_pos_,
- ringbuffer_.start(),
- &literal_cost_[0],
- kRingBufferMask, kMaxBackwardDistance,
- hasher_,
- &commands);
- ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
+ kRingBufferMask, kRingBufferMask,
+ ringbuffer_.start(), &literal_cost_[0]);
+ }
+ CreateBackwardReferences(
+ input_size, input_pos_,
+ ringbuffer_.start(),
+ &literal_cost_[0],
+ kRingBufferMask, kMaxBackwardDistance,
+ hashers_.get(),
+ hash_type_,
+ &commands);
+ ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance,
+ dist_ringbuffer_,
&dist_ringbuffer_idx_);
}
EncodingParams params;
- params.num_direct_distance_codes = 12;
- params.distance_postfix_bits = 1;
+ params.num_direct_distance_codes =
+ params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
+ params.distance_postfix_bits =
+ params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
params.literal_context_mode = CONTEXT_SIGNED;
const int storage_ix0 = storage_ix_;
MetaBlock mb;
@@ -935,6 +981,7 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size,
StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
&input_pos_, &storage_ix_, storage_);
size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
+ output_size -= (storage_ix0 >> 3);
if (input_size + 4 < output_size) {
storage_ix_ = storage_ix0;
storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
@@ -968,7 +1015,8 @@ void BrotliCompressor::FinishStream(
}
-int BrotliCompressBuffer(size_t input_size,
+int BrotliCompressBuffer(BrotliParams params,
+ size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
@@ -978,7 +1026,7 @@ int BrotliCompressBuffer(size_t input_size,
return 1;
}
- BrotliCompressor compressor;
+ BrotliCompressor compressor(params);
compressor.WriteStreamHeader();
const int max_block_size = 1 << kMetaBlockSizeBits;
diff --git a/brotli/enc/encode.h b/brotli/enc/encode.h
index 49bd5b1..a128f7e 100644
--- a/brotli/enc/encode.h
+++ b/brotli/enc/encode.h
@@ -23,12 +23,23 @@
#include <vector>
#include "./hash.h"
#include "./ringbuffer.h"
+#include "./static_dict.h"
namespace brotli {
+struct BrotliParams {
+ enum Mode {
+ MODE_TEXT = 0,
+ MODE_FONT = 1,
+ };
+ Mode mode;
+
+ BrotliParams() : mode(MODE_TEXT) {}
+};
+
class BrotliCompressor {
public:
- BrotliCompressor();
+ explicit BrotliCompressor(BrotliParams params);
~BrotliCompressor();
// Writes the stream header into the internal output buffer.
@@ -53,8 +64,10 @@ class BrotliCompressor {
// Initializes the hasher with the hashes of dictionary words.
void StoreDictionaryWordHashes();
+ BrotliParams params_;
int window_bits_;
- Hasher* hasher_;
+ std::unique_ptr<Hashers> hashers_;
+ Hashers::Type hash_type_;
int dist_ringbuffer_[4];
size_t dist_ringbuffer_idx_;
size_t input_pos_;
@@ -62,12 +75,14 @@ class BrotliCompressor {
std::vector<float> literal_cost_;
int storage_ix_;
uint8_t* storage_;
+ static StaticDictionary *static_dictionary_;
};
// Compresses the data in input_buffer into encoded_buffer, and sets
// *encoded_size to the compressed length.
// Returns 0 if there was an error and 1 otherwise.
-int BrotliCompressBuffer(size_t input_size,
+int BrotliCompressBuffer(BrotliParams params,
+ size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer);
diff --git a/brotli/enc/hash.h b/brotli/enc/hash.h
index d6a415b..b07a538 100644
--- a/brotli/enc/hash.h
+++ b/brotli/enc/hash.h
@@ -24,12 +24,14 @@
#include <sys/types.h>
#include <algorithm>
#include <cstdlib>
+#include <memory>
#include <string>
#include "./transform.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./port.h"
+#include "./static_dict.h"
namespace brotli {
@@ -41,11 +43,21 @@ namespace brotli {
// * The number has been tuned heuristically against compression benchmarks.
static const uint32_t kHashMul32 = 0x1e35a7bd;
-inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
- uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
- // The higher bits contain more mixture from the multiplication,
- // so we take our results from there.
- return h >> (32 - bits);
+template<int kShiftBits, int kMinLength>
+inline uint32_t Hash(const uint8_t *data) {
+ if (kMinLength <= 3) {
+ // If kMinLength is 2 or 3, we hash the first 3 bytes of data.
+ uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
+ // The higher bits contain more mixture from the multiplication,
+ // so we take our results from there.
+ return h >> (32 - kShiftBits);
+ } else {
+ // If kMinLength is at least 4, we hash the first 4 bytes of data.
+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+ // The higher bits contain more mixture from the multiplication,
+ // so we take our results from there.
+ return h >> (32 - kShiftBits);
+ }
}
// Usually, we always choose the longest backward reference. This function
@@ -67,32 +79,35 @@ inline double BackwardReferenceScore(double average_cost,
double start_cost3,
double start_cost2,
int copy_length,
- int backward_reference_offset,
- int last_distance1,
- int last_distance2,
- int last_distance3,
- int last_distance4) {
+ int backward_reference_offset) {
double retval = 0;
switch (copy_length) {
case 2: retval = start_cost2; break;
case 3: retval = start_cost3; break;
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
}
- int diff_last1 = abs(backward_reference_offset - last_distance1);
- int diff_last2 = abs(backward_reference_offset - last_distance2);
- if (diff_last1 == 0) {
- retval += 0.6;
- } else if (diff_last1 < 4) {
- retval -= 0.9 + 0.03 * diff_last1;
- } else if (diff_last2 < 4) {
- retval -= 0.95 + 0.1 * diff_last2;
- } else if (backward_reference_offset == last_distance3) {
- retval -= 1.17;
- } else if (backward_reference_offset == last_distance4) {
- retval -= 1.27;
- } else {
- retval -= 1.20 * Log2Floor(backward_reference_offset);
+ retval -= 1.20 * Log2Floor(backward_reference_offset);
+ return retval;
+}
+
+inline double BackwardReferenceScoreUsingLastDistance(double average_cost,
+ double start_cost4,
+ double start_cost3,
+ double start_cost2,
+ int copy_length,
+ int distance_short_code) {
+ double retval = 0;
+ switch (copy_length) {
+ case 2: retval = start_cost2; break;
+ case 3: retval = start_cost3; break;
+ default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
}
+ static const double kDistanceShortCodeBitCost[16] = {
+ -0.6, 0.95, 1.17, 1.27,
+ 0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
+ 1.05, 1.05, 1.15, 1.15, 1.25, 1.25
+ };
+ retval -= kDistanceShortCodeBitCost[distance_short_code];
return retval;
}
@@ -102,7 +117,7 @@ inline double BackwardReferenceScore(double average_cost,
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
// index positions of the given hash key in the compressed data.
-template <int kBucketBits, int kBlockBits>
+template <int kBucketBits, int kBlockBits, int kMinLength>
class HashLongestMatch {
public:
HashLongestMatch()
@@ -111,17 +126,24 @@ class HashLongestMatch {
last_distance3_(15),
last_distance4_(16),
insert_length_(0),
- average_cost_(5.4) {
+ average_cost_(5.4),
+ static_dict_(NULL) {
Reset();
}
void Reset() {
std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
}
+ void SetStaticDictionary(const StaticDictionary *dict) {
+ static_dict_ = dict;
+ }
+ bool HasStaticDictionary() const {
+ return static_dict_ != NULL;
+ }
// Look at 3 bytes at data.
// Compute a hash from these, and store the value of ix at that position.
inline void Store(const uint8_t *data, const int ix) {
- const uint32_t key = Hash3Bytes(data, kBucketBits);
+ const uint32_t key = Hash<kBucketBits, kMinLength>(data);
const int minor_ix = num_[key] & kBlockMask;
buckets_[key][minor_ix] = ix;
++num_[key];
@@ -218,19 +240,17 @@ class HashLongestMatch {
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
- if (len >= 3 || (len == 2 && i < 2)) {
+ if (len >= std::max(kMinLength, 3) ||
+ (kMinLength == 2 && len == 2 && i < 2)) {
// Comparing for >= 2 does not change the semantics, but just saves for
// a few unnecessary binary logarithms in backward reference score,
// since we are not interested in such short matches.
- const double score = BackwardReferenceScore(average_cost_,
- start_cost4,
- start_cost3,
- start_cost2,
- len, backward,
- last_distance1_,
- last_distance2_,
- last_distance3_,
- last_distance4_);
+ const double score = BackwardReferenceScoreUsingLastDistance(
+ average_cost_,
+ start_cost4,
+ start_cost3,
+ start_cost2,
+ len, i);
if (best_score < score) {
best_score = score;
best_len = len;
@@ -244,76 +264,41 @@ class HashLongestMatch {
}
}
}
- const uint32_t key = Hash3Bytes(&data[cur_ix_masked], kBucketBits);
- const int * __restrict const bucket = &buckets_[key][0];
- const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
- int stop = int(cur_ix) - 64;
- if (stop < 0) { stop = 0; }
+ if (kMinLength == 2) {
+ int stop = int(cur_ix) - 64;
+ if (stop < 0) { stop = 0; }
+ start_cost2 -= 1.0;
+ for (int i = cur_ix - 1; i > stop; --i) {
+ size_t prev_ix = i;
+ const size_t backward = cur_ix - prev_ix;
+ if (PREDICT_FALSE(backward > max_backward)) {
+ break;
+ }
+ prev_ix &= ring_buffer_mask;
+ if (data[cur_ix_masked] != data[prev_ix] ||
+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+ continue;
+ }
+ int len = 2;
+ const double score = start_cost2 - 2.3 * Log2Floor(backward);
- start_cost2 -= 1.0;
- for (int i = cur_ix - 1; i > stop; --i) {
- size_t prev_ix = i;
- const size_t backward = cur_ix - prev_ix;
- if (PREDICT_FALSE(backward > max_backward)) {
- break;
- }
- prev_ix &= ring_buffer_mask;
- if (data[cur_ix_masked] != data[prev_ix] ||
- data[cur_ix_masked + 1] != data[prev_ix + 1]) {
- continue;
- }
- int len = 2;
- const double score = start_cost2 - 2.3 * Log2Floor(backward);
-
- if (best_score < score) {
- best_score = score;
- best_len = len;
- best_ix = backward;
- *best_len_out = best_len;
- *best_len_code_out = best_len;
- *best_distance_out = best_ix;
- match_found = true;
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_len_code_out = best_len;
+ *best_distance_out = best_ix;
+ match_found = true;
+ }
}
}
+ const uint32_t key = Hash<kBucketBits, kMinLength>(&data[cur_ix_masked]);
+ const int * __restrict const bucket = &buckets_[key][0];
+ const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (int i = num_[key] - 1; i >= down; --i) {
int prev_ix = bucket[i & kBlockMask];
- if (prev_ix < 0) {
- prev_ix *= -1;
- prev_ix -= 1;
- int copy_len_code = prev_ix >> 20;
- int word_id = prev_ix & 0xfffff;
- std::string word = GetTransformedDictionaryWord(copy_len_code, word_id);
- int len = word.size();
- const size_t backward = max_backward + word_id + 1;
- bool word_matched = (len >= 3 && len <= max_length);
- for (int k = 0; k < len && word_matched; ++k) {
- if ((uint8_t)(word[k]) != data[cur_ix_masked + k]) {
- word_matched = false;
- }
- }
- if (word_matched) {
- const double score = BackwardReferenceScore(average_cost_,
- start_cost4,
- start_cost3,
- start_cost2,
- len, backward,
- last_distance1_,
- last_distance2_,
- last_distance3_,
- last_distance4_);
- if (best_score < score) {
- best_score = score;
- best_len = len;
- best_ix = backward;
- *best_len_out = best_len;
- *best_len_code_out = copy_len_code;
- *best_distance_out = best_ix;
- *best_score_out = best_score;
- match_found = true;
- *in_dictionary = true;
- }
- }
- } else {
+ if (prev_ix >= 0) {
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
@@ -327,7 +312,7 @@ class HashLongestMatch {
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
- if (len >= 3) {
+ if (len >= std::max(kMinLength, 3)) {
// Comparing for >= 3 does not change the semantics, but just saves
// for a few unnecessary binary logarithms in backward reference
// score, since we are not interested in such short matches.
@@ -335,11 +320,7 @@ class HashLongestMatch {
start_cost4,
start_cost3,
start_cost2,
- len, backward,
- last_distance1_,
- last_distance2_,
- last_distance3_,
- last_distance4_);
+ len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
@@ -354,6 +335,36 @@ class HashLongestMatch {
}
}
}
+ if (static_dict_ != NULL) {
+ // We decide based on first 4 bytes how many bytes to test for.
+ int prefix = BROTLI_UNALIGNED_LOAD32(&data[cur_ix_masked]);
+ int maxlen = static_dict_->GetLength(prefix);
+ for (int len = std::min<size_t>(maxlen, max_length);
+ len > best_len && len >= 4; --len) {
+ std::string snippet((const char *)&data[cur_ix_masked], len);
+ int copy_len_code;
+ int word_id;
+ if (static_dict_->Get(snippet, &copy_len_code, &word_id)) {
+ const size_t backward = max_backward + word_id + 1;
+ const double score = BackwardReferenceScore(average_cost_,
+ start_cost4,
+ start_cost3,
+ start_cost2,
+ len, backward);
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_len_code_out = copy_len_code;
+ *best_distance_out = best_ix;
+ *best_score_out = best_score;
+ match_found = true;
+ *in_dictionary = true;
+ }
+ }
+ }
+ }
return match_found;
}
@@ -399,9 +410,37 @@ class HashLongestMatch {
int insert_length_;
double average_cost_;
+
+ const StaticDictionary *static_dict_;
};
-typedef HashLongestMatch<13, 11> Hasher;
+struct Hashers {
+ enum Type {
+ HASH_15_8_4 = 0,
+ HASH_15_8_2 = 1,
+ };
+
+ void Init(Type type) {
+ switch (type) {
+ case HASH_15_8_4:
+ hash_15_8_4.reset(new HashLongestMatch<15, 8, 4>());
+ break;
+ case HASH_15_8_2:
+ hash_15_8_2.reset(new HashLongestMatch<15, 8, 2>());
+ break;
+ default:
+ break;
+ }
+ }
+
+ void SetStaticDictionary(const StaticDictionary *dict) {
+ if (hash_15_8_4.get() != NULL) hash_15_8_4->SetStaticDictionary(dict);
+ if (hash_15_8_2.get() != NULL) hash_15_8_2->SetStaticDictionary(dict);
+ }
+
+ std::unique_ptr<HashLongestMatch<15, 8, 4> > hash_15_8_4;
+ std::unique_ptr<HashLongestMatch<15, 8, 2> > hash_15_8_2;
+};
} // namespace brotli
diff --git a/brotli/enc/literal_cost.cc b/brotli/enc/literal_cost.cc
index a944599..9179923 100644
--- a/brotli/enc/literal_cost.cc
+++ b/brotli/enc/literal_cost.cc
@@ -59,7 +59,8 @@ static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
}
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
- const uint8_t *data, float *cost) {
+ size_t cost_mask, const uint8_t *data,
+ float *cost) {
// max_utf8 is 0 (normal ascii single byte modeling),
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
@@ -110,18 +111,20 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
if (histo == 0) {
histo = 1;
}
- cost[masked_pos] = log2(static_cast<double>(in_window_utf8[utf8_pos])
- / histo);
- cost[masked_pos] += 0.02905;
- if (cost[masked_pos] < 1.0) {
- cost[masked_pos] *= 0.5;
- cost[masked_pos] += 0.5;
+ float lit_cost = log2(static_cast<double>(in_window_utf8[utf8_pos])
+ / histo);
+ lit_cost += 0.02905;
+ if (lit_cost < 1.0) {
+ lit_cost *= 0.5;
+ lit_cost += 0.5;
}
+ cost[(pos + i) & cost_mask] = lit_cost;
}
}
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
- const uint8_t *data, float *cost) {
+ size_t cost_mask, const uint8_t *data,
+ float *cost) {
int histogram[256] = { 0 };
int window_half = 2000;
int in_window = std::min(static_cast<size_t>(window_half), len);
@@ -143,17 +146,17 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
++histogram[data[(pos + i + window_half) & mask]];
++in_window;
}
- int masked_pos = (pos + i) & mask;
- int histo = histogram[data[masked_pos]];
+ int histo = histogram[data[(pos + i) & mask]];
if (histo == 0) {
histo = 1;
}
- cost[masked_pos] = log2(static_cast<double>(in_window) / histo);
- cost[masked_pos] += 0.029;
- if (cost[masked_pos] < 1.0) {
- cost[masked_pos] *= 0.5;
- cost[masked_pos] += 0.5;
+ float lit_cost = log2(static_cast<double>(in_window) / histo);
+ lit_cost += 0.029;
+ if (lit_cost < 1.0) {
+ lit_cost *= 0.5;
+ lit_cost += 0.5;
}
+ cost[(pos + i) & cost_mask] = lit_cost;
}
}
diff --git a/brotli/enc/literal_cost.h b/brotli/enc/literal_cost.h
index ca39a4e..5c1eca4 100644
--- a/brotli/enc/literal_cost.h
+++ b/brotli/enc/literal_cost.h
@@ -26,11 +26,11 @@ namespace brotli {
// ringbuffer (data, mask) will take entropy coded and writes these estimates
// to the ringbuffer (cost, mask).
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
- const uint8_t *data,
+ size_t cost_mask, const uint8_t *data,
float *cost);
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
- const uint8_t *data,
+ size_t cost_mask, const uint8_t *data,
float *cost);
} // namespace brotli
diff --git a/brotli/enc/prefix.cc b/brotli/enc/prefix.cc
index 3e43501..50b671e 100644
--- a/brotli/enc/prefix.cc
+++ b/brotli/enc/prefix.cc
@@ -90,7 +90,7 @@ int CopyLengthPrefix(int length) {
int CommandPrefix(int insert_length, int copy_length) {
if (copy_length == 0) {
- copy_length = 3;
+ copy_length = 4;
}
int insert_prefix = InsertLengthPrefix(insert_length);
int copy_prefix = CopyLengthPrefix(copy_length);
diff --git a/brotli/enc/static_dict.h b/brotli/enc/static_dict.h
new file mode 100644
index 0000000..1b5a77f
--- /dev/null
+++ b/brotli/enc/static_dict.h
@@ -0,0 +1,68 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Class to model the static dictionary.
+
+#ifndef BROTLI_ENC_STATIC_DICT_H_
+#define BROTLI_ENC_STATIC_DICT_H_
+
+#include <algorithm>
+#include <unordered_map>
+#include <string>
+
+namespace brotli {
+
+class StaticDictionary {
+ public:
+ StaticDictionary() {}
+ void Insert(const std::string &str, int len, int dist) {
+ int ix = (dist << 6) + len;
+ std::unordered_map<std::string, int>::const_iterator it = map_.find(str);
+ if (it != map_.end() && ix >= it->second) {
+ return;
+ }
+ map_[str] = ix;
+ int v = 0;
+ for (int i = 0; i < 4 && i < str.size(); ++i) {
+ v += str[i] << (8 * i);
+ }
+ if (prefix_map_[v] < str.size()) {
+ prefix_map_[v] = str.size();
+ }
+ }
+ int GetLength(int v) const {
+ std::unordered_map<int, int>::const_iterator it = prefix_map_.find(v);
+ if (it == prefix_map_.end()) {
+ return 0;
+ }
+ return it->second;
+ }
+ bool Get(const std::string &str, int *len, int *dist) const {
+ std::unordered_map<std::string, int>::const_iterator it = map_.find(str);
+ if (it == map_.end()) {
+ return false;
+ }
+ int v = it->second;
+ *len = v & 63;
+ *dist = v >> 6;
+ return true;
+ }
+ private:
+ std::unordered_map<std::string, int> map_;
+ std::unordered_map<int, int> prefix_map_;
+};
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_STATIC_DICT_H_
diff --git a/woff2/file.h b/woff2/file.h
index f93fdee..69a92f8 100644
--- a/woff2/file.h
+++ b/woff2/file.h
@@ -14,8 +14,8 @@
//
// File IO helpers
-#ifndef BROTLI_WOFF2_FILE_H_
-#define BROTLI_WOFF2_FILE_H_
+#ifndef WOFF2_FILE_H_
+#define WOFF2_FILE_H_
#include <fstream>
#include <iterator>
@@ -37,4 +37,4 @@ inline void SetFileContents(std::string filename, std::string content) {
}
} // namespace woff2
-#endif // BROTLI_WOFF2_FILE_H_
+#endif // WOFF2_FILE_H_
diff --git a/woff2/font.h b/woff2/font.h
index 21fd634..dd003fb 100644
--- a/woff2/font.h
+++ b/woff2/font.h
@@ -15,8 +15,8 @@
// Data model for a font file in sfnt format, reading and writing functions and
// accessors for the glyph data.
-#ifndef BROTLI_WOFF2_FONT_H_
-#define BROTLI_WOFF2_FONT_H_
+#ifndef WOFF2_FONT_H_
+#define WOFF2_FONT_H_
#include <stddef.h>
#include <inttypes.h>
@@ -78,4 +78,4 @@ bool GetGlyphData(const Font& font, int glyph_index,
} // namespace woff2
-#endif // BROTLI_WOFF2_FONT_H_
+#endif // WOFF2_FONT_H_
diff --git a/woff2/glyph.h b/woff2/glyph.h
index 2e249f6..0ee755c 100644
--- a/woff2/glyph.h
+++ b/woff2/glyph.h
@@ -15,8 +15,8 @@
// Data model and I/O for glyph data within sfnt format files for the purpose of
// performing the preprocessing step of the WOFF 2.0 conversion.
-#ifndef BROTLI_WOFF2_GLYPH_H_
-#define BROTLI_WOFF2_GLYPH_H_
+#ifndef WOFF2_GLYPH_H_
+#define WOFF2_GLYPH_H_
#include <stddef.h>
#include <inttypes.h>
@@ -68,4 +68,4 @@ bool StoreGlyph(const Glyph& glyph, uint8_t* dst, size_t* dst_size);
} // namespace woff2
-#endif // BROTLI_WOFF2_GLYPH_H_
+#endif // WOFF2_GLYPH_H_
diff --git a/woff2/normalize.h b/woff2/normalize.h
index b3d8331..dcb473b 100644
--- a/woff2/normalize.h
+++ b/woff2/normalize.h
@@ -16,8 +16,8 @@
// files in normalized form, the WOFF 2.0 conversion is guaranteed to be
// lossless (in a bitwise sense) only for normalized font files.
-#ifndef BROTLI_WOFF2_NORMALIZE_H_
-#define BROTLI_WOFF2_NORMALIZE_H_
+#ifndef WOFF2_NORMALIZE_H_
+#define WOFF2_NORMALIZE_H_
namespace woff2 {
@@ -42,4 +42,4 @@ bool NormalizeFont(Font* font);
} // namespace woff2
-#endif // BROTLI_WOFF2_NORMALIZE_H_
+#endif // WOFF2_NORMALIZE_H_
diff --git a/woff2/ots.h b/woff2/ots.h
index 4eac1cb..9cf91bc 100644
--- a/woff2/ots.h
+++ b/woff2/ots.h
@@ -15,8 +15,8 @@
// The parts of ots.h & opentype-sanitiser.h that we need, taken from the
// https://code.google.com/p/ots/ project.
-#ifndef BROTLI_WOFF2_OTS_H_
-#define BROTLI_WOFF2_OTS_H_
+#ifndef WOFF2_OTS_H_
+#define WOFF2_OTS_H_
#include <stdint.h>
#include <arpa/inet.h>
@@ -150,4 +150,4 @@ class Buffer {
} // namespace ots
-#endif // BROTLI_WOFF2_OTS_H_
+#endif // WOFF2_OTS_H_
diff --git a/woff2/port.h b/woff2/port.h
index e7a2708..fd5498e 100644
--- a/woff2/port.h
+++ b/woff2/port.h
@@ -14,8 +14,8 @@
//
// Helper function for bit twiddling
-#ifndef BROTLI_WOFF2_PORT_H_
-#define BROTLI_WOFF2_PORT_H_
+#ifndef WOFF2_PORT_H_
+#define WOFF2_PORT_H_
namespace woff2 {
@@ -43,4 +43,4 @@ inline int Log2Floor(uint32 n) {
}
} // namespace woff2
-#endif // BROTLI_WOFF2_PORT_H_
+#endif // WOFF2_PORT_H_
diff --git a/woff2/round.h b/woff2/round.h
index 4d88862..cd6e5aa 100644
--- a/woff2/round.h
+++ b/woff2/round.h
@@ -14,8 +14,8 @@
//
// Helper for rounding
-#ifndef BROTLI_WOFF2_ROUND_H_
-#define BROTLI_WOFF2_ROUND_H_
+#ifndef WOFF2_ROUND_H_
+#define WOFF2_ROUND_H_
namespace woff2 {
@@ -30,4 +30,4 @@ template<typename T> T Round4(T value) {
} // namespace woff2
-#endif // BROTLI_WOFF2_ROUND_H_
+#endif // WOFF2_ROUND_H_
diff --git a/woff2/store_bytes.h b/woff2/store_bytes.h
index 37054b2..a9a3401 100644
--- a/woff2/store_bytes.h
+++ b/woff2/store_bytes.h
@@ -15,8 +15,8 @@
// Helper functions for storing integer values into byte streams.
// No bounds checking is performed, that is the responsibility of the caller.
-#ifndef BROTLI_WOFF2_STORE_BYTES_H_
-#define BROTLI_WOFF2_STORE_BYTES_H_
+#ifndef WOFF2_STORE_BYTES_H_
+#define WOFF2_STORE_BYTES_H_
#include <inttypes.h>
#include <stddef.h>
@@ -58,4 +58,4 @@ inline void StoreBytes(const uint8_t* data, size_t len,
} // namespace woff2
-#endif // BROTLI_WOFF2_STORE_BYTES_H_
+#endif // WOFF2_STORE_BYTES_H_
diff --git a/woff2/transform.h b/woff2/transform.h
index dd63e73..3722e10 100644
--- a/woff2/transform.h
+++ b/woff2/transform.h
@@ -14,8 +14,8 @@
//
// Library for preprocessing fonts as part of the WOFF 2.0 conversion.
-#ifndef BROTLI_WOFF2_TRANSFORM_H_
-#define BROTLI_WOFF2_TRANSFORM_H_
+#ifndef WOFF2_TOOLS_TRANSFORM_H_
+#define WOFF2_TOOLS_TRANSFORM_H_
#include "./font.h"
@@ -28,4 +28,4 @@ bool TransformGlyfAndLocaTables(Font* font);
} // namespace woff2
-#endif // BROTLI_WOFF2_TRANSFORM_H_
+#endif // WOFF2_TRANSFORM_H_
diff --git a/woff2/woff2.cc b/woff2/woff2.cc
index 43e0861..1625c00 100644
--- a/woff2/woff2.cc
+++ b/woff2/woff2.cc
@@ -40,7 +40,6 @@ using std::string;
using std::vector;
-
// simple glyph flags
const int kGlyfOnCurve = 1 << 0;
const int kGlyfXShort = 1 << 1;
@@ -88,9 +87,7 @@ const size_t kLzmaHeaderSize = 13;
const uint32_t kCompressionTypeMask = 0xf;
const uint32_t kCompressionTypeNone = 0;
const uint32_t kCompressionTypeGzip = 1;
-const uint32_t kCompressionTypeLzma = 2;
-const uint32_t kCompressionTypeBrotli = 3;
-const uint32_t kCompressionTypeLzham = 4;
+const uint32_t kCompressionTypeBrotli = 2;
// This is a special value for the short format only, as described in
// "Design for compressed header format" in draft doc.
@@ -736,8 +733,9 @@ bool Woff2Compress(const uint8_t* data, const size_t len,
uint8_t* result, uint32_t* result_len) {
if (compression_type == kCompressionTypeBrotli) {
size_t compressed_len = *result_len;
-
- brotli::BrotliCompressBuffer(len, data, &compressed_len, result);
+ brotli::BrotliParams params;
+ params.mode = brotli::BrotliParams::MODE_FONT;
+ brotli::BrotliCompressBuffer(params, len, data, &compressed_len, result);
*result_len = compressed_len;
return true;
}
@@ -839,7 +837,7 @@ bool ReadShortDirectory(ots::Buffer* file, std::vector<Table>* tables,
} else {
if (flags == kCompressionTypeNone ||
flags == kCompressionTypeGzip ||
- flags == kCompressionTypeLzma) {
+ flags == kCompressionTypeBrotli) {
last_compression_type = flags;
} else {
return OTS_FAILURE();
@@ -911,19 +909,13 @@ bool ConvertWOFF2ToTTF(uint8_t* result, size_t result_length,
if (!file.ReadU16(&num_tables) || !num_tables) {
return OTS_FAILURE();
}
- // These reserved bits will be always zero in the final format, but they
- // temporarily indicate the use of brotli, so that we can evaluate gzip, lzma
- // and brotli side-by-side.
- uint16_t reserved;
- if (!file.ReadU16(&reserved)) {
- return OTS_FAILURE();
- }
// We don't care about these fields of the header:
+ // uint16_t reserved
// uint32_t total_sfnt_size
// uint16_t major_version, minor_version
// uint32_t meta_offset, meta_length, meta_orig_length
// uint32_t priv_offset, priv_length
- if (!file.Skip(28)) {
+ if (!file.Skip(30)) {
return OTS_FAILURE();
}
std::vector<Table> tables(num_tables);
@@ -996,9 +988,6 @@ bool ConvertWOFF2ToTTF(uint8_t* result, size_t result_length,
uint32_t flags = table->flags;
const uint8_t* src_buf = data + table->src_offset;
uint32_t compression_type = flags & kCompressionTypeMask;
- if (compression_type == kCompressionTypeLzma && reserved > 0) {
- compression_type = kCompressionTypeLzma + reserved;
- }
size_t transform_length = table->transform_length;
if ((flags & kWoff2FlagsContinueStream) != 0) {
if (!continue_valid) {
@@ -1090,7 +1079,7 @@ size_t TableEntrySize(const Table& table) {
}
if ((table.flags & kWoff2FlagsContinueStream) == 0 &&
((table.flags & 3) == kCompressionTypeGzip ||
- (table.flags & 3) == kCompressionTypeLzma)) {
+ (table.flags & 3) == kCompressionTypeBrotli)) {
size += Base128Size(table.dst_length);
}
return size;
@@ -1226,7 +1215,7 @@ bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
}
Table table;
table.tag = src_table.tag;
- table.flags = std::min(options.compression_type, kCompressionTypeLzma);
+ table.flags = options.compression_type;
table.src_length = src_table.length;
table.transform_length = src_table.length;
const uint8_t* transformed_data = src_table.data;
@@ -1278,16 +1267,13 @@ bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
return false;
}
*result_length = woff2_length;
- uint16_t reserved =
- (options.compression_type > kCompressionTypeLzma) ?
- options.compression_type - kCompressionTypeLzma : 0;
size_t offset = 0;
StoreU32(kWoff2Signature, &offset, result);
StoreU32(font.flavor, &offset, result);
StoreU32(woff2_length, &offset, result);
Store16(tables.size(), &offset, result);
- Store16(reserved, &offset, result);
+ Store16(0, &offset, result); // reserved
StoreU32(ComputeTTFLength(tables), &offset, result);
StoreBytes(head_table->data + 4, 4, &offset, result); // font revision
StoreU32(0, &offset, result); // metaOffset
diff --git a/woff2/woff2.h b/woff2/woff2.h
index aba5080..2e4ee25 100644
--- a/woff2/woff2.h
+++ b/woff2/woff2.h
@@ -14,8 +14,8 @@
//
// Library for converting WOFF2 format font files to their TTF versions.
-#ifndef BROTLI_WOFF2_WOFF2_H_
-#define BROTLI_WOFF2_WOFF2_H_
+#ifndef WOFF2_WOFF2_H_
+#define WOFF2_WOFF2_H_
#include <stddef.h>
#include <inttypes.h>
@@ -47,4 +47,4 @@ bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
} // namespace woff2
-#endif // BROTLI_WOFF2_WOFF2_H_
+#endif // WOFF2_WOFF2_H_