aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZoltan Szabadka <szabadka@google.com>2013-10-23 13:06:13 +0200
committerZoltan Szabadka <szabadka@google.com>2013-10-23 13:06:13 +0200
commit79e99afe468407e9ff9f0820df7190cb069eabeb (patch)
tree57dce6c1f464bb0905c90cc9778b9e136dc086d2
parentb35077ca42301e3307aef363f426ec38fa1ad24c (diff)
downloadsrc-79e99afe468407e9ff9f0820df7190cb069eabeb.tar.gz
Add brotli compressor
This commit is for the encoder for brotli compression format. Brotli is a generic byte-level compression algorithm.
-rw-r--r--brotli/enc/README3
-rw-r--r--brotli/enc/backward_references.cc137
-rw-r--r--brotli/enc/backward_references.h33
-rw-r--r--brotli/enc/bit_cost.h150
-rw-r--r--brotli/enc/block_splitter.cc411
-rw-r--r--brotli/enc/block_splitter.h77
-rw-r--r--brotli/enc/cluster.h288
-rw-r--r--brotli/enc/command.h45
-rw-r--r--brotli/enc/context.h130
-rw-r--r--brotli/enc/encode.cc778
-rw-r--r--brotli/enc/encode.h37
-rw-r--r--brotli/enc/entropy_encode.cc397
-rw-r--r--brotli/enc/entropy_encode.h112
-rw-r--r--brotli/enc/fast_log.h161
-rw-r--r--brotli/enc/find_match_length.h85
-rw-r--r--brotli/enc/hash.h354
-rw-r--r--brotli/enc/histogram.cc72
-rw-r--r--brotli/enc/histogram.h97
-rw-r--r--brotli/enc/literal_cost.cc60
-rw-r--r--brotli/enc/literal_cost.h31
-rw-r--r--brotli/enc/port.h138
-rw-r--r--brotli/enc/prefix.cc166
-rw-r--r--brotli/enc/prefix.h51
-rw-r--r--brotli/enc/write_bits.h91
24 files changed, 3904 insertions, 0 deletions
diff --git a/brotli/enc/README b/brotli/enc/README
new file mode 100644
index 0000000..c988ae7
--- /dev/null
+++ b/brotli/enc/README
@@ -0,0 +1,3 @@
+This directory holds the encoder for brotli compression format.
+
+Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.
diff --git a/brotli/enc/backward_references.cc b/brotli/enc/backward_references.cc
new file mode 100644
index 0000000..606fb99
--- /dev/null
+++ b/brotli/enc/backward_references.cc
@@ -0,0 +1,137 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#include "./backward_references.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "./command.h"
+#include "./hash.h"
+#include "./literal_cost.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(const uint8_t* data,
+ int length,
+ std::vector<Command>* commands) {
+ HashLongestMatch<13,11> *hasher = new HashLongestMatch<13,11>;
+ float *literal_cost = new float[length];
+ EstimateBitCostsForLiterals(length, data, literal_cost);
+ hasher->SetLiteralCost(literal_cost);
+
+ // Length heuristic that seems to help probably by better selection
+ // of lazy matches of similar lengths.
+ int insert_length = 0;
+ size_t i = 0;
+
+ double average_cost = 0.0;
+ for (int i = 0; i < length; ++i) {
+ average_cost += literal_cost[i];
+ }
+ average_cost /= length;
+ hasher->set_average_cost(average_cost);
+
+ while (i + 2 < length) {
+ size_t best_len = 0;
+ size_t best_dist = 0;
+ double best_score = 0;
+ const size_t max_distance = std::min(i, 1UL << 24);
+ hasher->set_insert_length(insert_length);
+ bool match_found = hasher->FindLongestMatch(
+ data, i, length - i, max_distance,
+ &best_len, &best_dist, &best_score);
+ if (match_found) {
+ // Found a match. Let's look for something even better ahead.
+ int delayed_backward_references_in_row = 0;
+ while (i + 4 < length &&
+ delayed_backward_references_in_row < 4) {
+ size_t best_len_2 = 0;
+ size_t best_dist_2 = 0;
+ double best_score_2 = 0;
+ hasher->Store(data + i, i);
+ match_found = hasher->FindLongestMatch(
+ data, i + 1, length - i - 1, max_distance,
+ &best_len_2, &best_dist_2, &best_score_2);
+ double cost_diff_lazy = 0;
+ if (best_len >= 4) {
+ cost_diff_lazy += hasher->literal_cost(i + 4) - average_cost;
+ }
+ {
+ const int tail_length = best_len_2 - best_len + 1;
+ for (int k = 0; k < tail_length; ++k) {
+ cost_diff_lazy -= hasher->literal_cost(i + best_len + k) -
+ average_cost;
+ }
+ }
+ // If we are not inserting any symbols, inserting one is more
+ // expensive than if we were inserting symbols anyways.
+ if (insert_length < 1) {
+ cost_diff_lazy += 1.0;
+ }
+ // Add bias to slightly avoid lazy matching.
+ cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
+ cost_diff_lazy += 0.04 * hasher->literal_cost(i);
+
+ if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
+ // Ok, let's just write one byte for now and start a match from the
+ // next byte.
+ ++insert_length;
+ ++delayed_backward_references_in_row;
+ best_len = best_len_2;
+ best_dist = best_dist_2;
+ best_score = best_score_2;
+ i++;
+ } else {
+ break;
+ }
+ }
+ Command cmd;
+ cmd.insert_length_ = insert_length;
+ cmd.copy_length_ = best_len;
+ cmd.copy_distance_ = best_dist;
+ commands->push_back(cmd);
+ hasher->set_last_distance(best_dist);
+
+ insert_length = 0;
+ ++i;
+ for (int j = 1; j < best_len; ++j) {
+ if (i + 2 < length) {
+ hasher->Store(data + i, i);
+ }
+ ++i;
+ }
+ } else {
+ ++insert_length;
+ hasher->Store(data + i, i);
+ ++i;
+ }
+ }
+ insert_length += (length - i);
+
+ if (insert_length > 0) {
+ Command cmd;
+ cmd.insert_length_ = insert_length;
+ cmd.copy_length_ = 0;
+ cmd.copy_distance_ = 0;
+ commands->push_back(cmd);
+ }
+
+ delete[] literal_cost;
+ delete hasher;
+}
+
+} // namespace brotli
diff --git a/brotli/enc/backward_references.h b/brotli/enc/backward_references.h
new file mode 100644
index 0000000..795f613
--- /dev/null
+++ b/brotli/enc/backward_references.h
@@ -0,0 +1,33 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include <stdint.h>
+#include <vector>
+
+#include "./command.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(const uint8_t* data,
+ int length,
+ std::vector<Command>* commands);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
diff --git a/brotli/enc/bit_cost.h b/brotli/enc/bit_cost.h
new file mode 100644
index 0000000..fc5381b
--- /dev/null
+++ b/brotli/enc/bit_cost.h
@@ -0,0 +1,150 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to estimate the bit cost of Huffman trees.
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include <stdint.h>
+
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+
+namespace brotli {
+
+static const int kHuffmanExtraBits[kCodeLengthCodes] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7,
+};
+
+static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
+ int nbits = 0;
+ for (int i = 0; i < kCodeLengthCodes; ++i) {
+ nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
+ }
+ return nbits;
+}
+
+static inline int HuffmanTreeBitCost(
+ const Histogram<kCodeLengthCodes>& histogram,
+ const EntropyCode<kCodeLengthCodes>& entropy) {
+ return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
+}
+
+static inline int HuffmanBitCost(const uint8_t* depth, int length) {
+ int max_depth = 1;
+ int histogram[kCodeLengthCodes] = { 0 };
+ int tail_start = 0;
+ // compute histogram of compacted huffman tree
+ for (int i = 0; i < length;) {
+ const int value = depth[i];
+ if (value > max_depth) {
+ max_depth = value;
+ }
+ int reps = 1;
+ for (int k = i + 1; k < length && depth[k] == value; ++k) {
+ ++reps;
+ }
+ i += reps;
+ if (value == 0) {
+ while (reps > 10) {
+ ++histogram[18];
+ reps -= 138;
+ }
+ if (reps > 2) {
+ ++histogram[17];
+ } else if (reps > 0) {
+ histogram[0] += reps;
+ }
+ } else {
+ tail_start = i;
+ ++histogram[value];
+ --reps;
+ while (reps > 2) {
+ ++histogram[16];
+ reps -= 6;
+ }
+ if (reps > 0) {
+ histogram[value] += reps;
+ }
+ }
+ }
+
+ // create huffman tree of huffman tree
+ uint8_t cost[kCodeLengthCodes] = { 0 };
+ CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
+ // account for rle extra bits
+ cost[16] += 2;
+ cost[17] += 3;
+ cost[18] += 7;
+
+ int tree_size = 0;
+ int bits = 6 + 3 * max_depth; // huffman tree of huffman tree cost
+ for (int i = 0; i < kCodeLengthCodes; ++i) {
+ bits += histogram[i] * cost[i]; // huffman tree bit cost
+ tree_size += histogram[i];
+ }
+ // bit cost adjustment for long trailing zero sequence
+ int tail_size = length - tail_start;
+ int tail_bits = 0;
+ while (tail_size >= 1) {
+ if (tail_size < 3) {
+ tail_bits += tail_size * cost[0];
+ tree_size -= tail_size;
+ break;
+ } else if (tail_size < 11) {
+ tail_bits += cost[17];
+ --tree_size;
+ break;
+ } else {
+ tail_bits += cost[18];
+ tail_size -= 138;
+ --tree_size;
+ }
+ }
+ if (tail_bits > 12) {
+ bits += ((Log2Ceiling(tree_size - 1) + 1) & ~1) + 3 - tail_bits;
+ }
+ return bits;
+}
+
+template<int kSize>
+double PopulationCost(const Histogram<kSize>& histogram) {
+ if (histogram.total_count_ == 0) {
+ return 4;
+ }
+ int symbols[2] = { 0 };
+ int count = 0;
+ for (int i = 0; i < kSize && count < 3; ++i) {
+ if (histogram.data_[i] > 0) {
+ if (count < 2) symbols[count] = i;
+ ++count;
+ }
+ }
+ if (count <= 2 && symbols[0] < 256 && symbols[1] < 256) {
+ return ((symbols[0] <= 1 ? 4 : 11) +
+ (count == 2 ? 8 + histogram.total_count_ : 0));
+ }
+ uint8_t depth[kSize] = { 0 };
+ CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
+ int bits = HuffmanBitCost(depth, kSize);
+ for (int i = 0; i < kSize; ++i) {
+ bits += histogram.data_[i] * depth[i];
+ }
+ return bits;
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_BIT_COST_H_
diff --git a/brotli/enc/block_splitter.cc b/brotli/enc/block_splitter.cc
new file mode 100644
index 0000000..5552541
--- /dev/null
+++ b/brotli/enc/block_splitter.cc
@@ -0,0 +1,411 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#include "./block_splitter.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <map>
+
+#include "./cluster.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+static const int kMaxLiteralHistograms = 48;
+static const int kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 26;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const int kLiteralStrideLength = 70;
+static const int kCommandStrideLength = 40;
+static const int kSymbolsPerLiteralHistogram = 550;
+static const int kSymbolsPerCommandHistogram = 530;
+static const int kSymbolsPerDistanceHistogram = 550;
+static const int kMinLengthForBlockSplitting = 128;
+static const int kIterMulForRefining = 2;
+static const int kMinItersForRefining = 100;
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ std::vector<uint8_t>* literals) {
+ // Count how many we have.
+ size_t total_length = 0;
+ for (int i = 0; i < cmds.size(); ++i) {
+ total_length += cmds[i].insert_length_;
+ }
+ if (total_length == 0) {
+ return;
+ }
+
+ // Allocate.
+ literals->resize(total_length);
+
+ // Loop again, and copy this time.
+ size_t pos = 0;
+ size_t from_pos = 0;
+ for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
+ memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_);
+ pos += cmds[i].insert_length_;
+ from_pos += cmds[i].insert_length_ + cmds[i].copy_length_;
+ }
+}
+
+void CopyCommandsToByteArray(const std::vector<Command>& cmds,
+ std::vector<uint16_t>* insert_and_copy_codes,
+ std::vector<uint8_t>* distance_prefixes) {
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command& cmd = cmds[i];
+ insert_and_copy_codes->push_back(cmd.command_prefix_);
+ if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+ distance_prefixes->push_back(cmd.distance_prefix_);
+ }
+ }
+}
+
+template<int kSize>
+double HistogramAddEval(const Histogram<kSize>& a,
+ const Histogram<kSize>& b) {
+ int total = a.total_count_ + b.total_count_;
+ double retval = total * FastLog2(total);
+ for (int i = 0; i < kSize; ++i) {
+ int count = a.data_[i] + b.data_[i];
+ retval -= count * FastLog2(count);
+ }
+ return retval;
+}
+
+template<typename HistogramType, typename DataType>
+void InitialEntropyCodes(const DataType* data, size_t length,
+ int literals_per_histogram,
+ int max_histograms,
+ size_t stride,
+ std::vector<HistogramType>* vec) {
+ int total_histograms = length / literals_per_histogram + 1;
+ if (total_histograms > max_histograms) {
+ total_histograms = max_histograms;
+ }
+ unsigned int seed = 7;
+ int block_length = length / total_histograms;
+ for (int i = 0; i < total_histograms; ++i) {
+ int pos = length * i / total_histograms;
+ if (i != 0) {
+ pos += rand_r(&seed) % block_length;
+ }
+ if (pos + stride >= length) {
+ pos = length - stride - 1;
+ }
+ HistogramType histo;
+ histo.Add(data + pos, stride);
+ vec->push_back(histo);
+ }
+}
+
+template<typename HistogramType>
+int FindClosest(const HistogramType& sample,
+ const std::vector<HistogramType>& vec) {
+ double best_distance = 1e99;
+ int best_ix = 0;
+ for (int i = 0; i < vec.size(); ++i) {
+ double distance = HistogramAddEval(sample, vec[i]);
+ if (distance < best_distance) {
+ best_ix = i;
+ best_distance = distance;
+ }
+ }
+ return best_ix;
+}
+
+template<typename HistogramType, typename DataType>
+void RandomSample(unsigned int* seed,
+ const DataType* data,
+ size_t length,
+ size_t stride,
+ HistogramType* sample) {
+ size_t pos = rand_r(seed) % (length - stride);
+ sample->Add(data + pos, stride);
+}
+
+template<typename HistogramType, typename DataType>
+void RefineEntropyCodes(const DataType* data, size_t length,
+ size_t stride,
+ std::vector<HistogramType>* vec) {
+ const int iters =
+ kIterMulForRefining * length / stride + kMinItersForRefining;
+ unsigned int seed = 7;
+ for (int iter = 0; iter < iters; ++iter) {
+ HistogramType sample;
+ RandomSample(&seed, data, length, stride, &sample);
+ int ix = FindClosest(sample, *vec);
+ (*vec)[ix].AddHistogram(sample);
+ }
+}
+
+inline static float BitCost(int total, int count) {
+ return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
+}
+
+template<typename DataType, int kSize>
+void FindBlocks(const DataType* data, const size_t length,
+ const double block_switch_bitcost,
+ const std::vector<Histogram<kSize> > &vec,
+ uint8_t *block_id) {
+ if (vec.size() <= 1) {
+ for (int i = 0; i < length; ++i) {
+ block_id[i] = 0;
+ }
+ return;
+ }
+ int vecsize = vec.size();
+ double* insert_cost = new double[kSize * vecsize];
+ memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
+ for (int i = 0; i < kSize; ++i) {
+ for (int j = 0; j < vecsize; ++j) {
+ insert_cost[i * vecsize + j] =
+ BitCost(vec[j].total_count_, vec[j].data_[i]);
+ }
+ }
+ double *cost = new double[vecsize];
+ memset(cost, 0, sizeof(cost[0]) * vecsize);
+ bool* switch_signal = new bool[length * vecsize];
+ memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
+ // After each iteration of this loop, cost[k] will contain the difference
+ // between the minimum cost of arriving at the current byte position using
+ // entropy code k, and the minimum cost of arriving at the current byte
+ // position. This difference is capped at the block switch cost, and if it
+ // reaches block switch cost, it means that when we trace back from the last
+ // position, we need to switch here.
+ for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
+ int ix = byte_ix * vecsize;
+ int insert_cost_ix = data[byte_ix] * vecsize;
+ double min_cost = 1e99;
+ for (int k = 0; k < vecsize; ++k) {
+ // We are coding the symbol in data[byte_ix] with entropy code k.
+ cost[k] += insert_cost[insert_cost_ix + k];
+ if (cost[k] < min_cost) {
+ min_cost = cost[k];
+ block_id[byte_ix] = k;
+ }
+ }
+ double block_switch_cost = block_switch_bitcost;
+ // More blocks for the beginning.
+ if (byte_ix < 2000) {
+ block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
+ }
+ for (int k = 0; k < vecsize; ++k) {
+ cost[k] -= min_cost;
+ if (cost[k] >= block_switch_cost) {
+ cost[k] = block_switch_cost;
+ switch_signal[ix + k] = true;
+ }
+ }
+ }
+ // Now trace back from the last position and switch at the marked places.
+ int byte_ix = length - 1;
+ int ix = byte_ix * vecsize;
+ int cur_id = block_id[byte_ix];
+ while (byte_ix > 0) {
+ --byte_ix;
+ ix -= vecsize;
+ if (switch_signal[ix + cur_id]) {
+ cur_id = block_id[byte_ix];
+ }
+ block_id[byte_ix] = cur_id;
+ }
+ delete[] insert_cost;
+ delete[] cost;
+ delete[] switch_signal;
+}
+
+int RemapBlockIds(uint8_t* block_ids, const size_t length) {
+ std::map<uint8_t, uint8_t> new_id;
+ int next_id = 0;
+ for (int i = 0; i < length; ++i) {
+ if (new_id.find(block_ids[i]) == new_id.end()) {
+ new_id[block_ids[i]] = next_id;
+ ++next_id;
+ }
+ }
+ for (int i = 0; i < length; ++i) {
+ block_ids[i] = new_id[block_ids[i]];
+ }
+ return next_id;
+}
+
+template<typename HistogramType, typename DataType>
+void BuildBlockHistograms(const DataType* data, const size_t length,
+ uint8_t* block_ids,
+ std::vector<HistogramType>* histograms) {
+ int num_types = RemapBlockIds(block_ids, length);
+ histograms->clear();
+ histograms->resize(num_types);
+ for (int i = 0; i < length; ++i) {
+ (*histograms)[block_ids[i]].Add(data[i]);
+ }
+}
+
+template<typename HistogramType, typename DataType>
+void ClusterBlocks(const DataType* data, const size_t length,
+ uint8_t* block_ids) {
+ std::vector<HistogramType> histograms;
+ std::vector<int> block_index(length);
+ int cur_idx = 0;
+ HistogramType cur_histogram;
+ for (int i = 0; i < length; ++i) {
+ bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
+ block_index[i] = cur_idx;
+ cur_histogram.Add(data[i]);
+ if (block_boundary) {
+ histograms.push_back(cur_histogram);
+ cur_histogram.Clear();
+ ++cur_idx;
+ }
+ }
+ std::vector<HistogramType> clustered_histograms;
+ std::vector<int> histogram_symbols;
+ // Block ids need to fit in one byte and there are two ids reserved for
+ // indicating 'same as last' and 'last plus one'.
+ static const int kMaxNumberOfBlockTypes = 254;
+ ClusterHistograms(histograms, 1, histograms.size(),
+ kMaxNumberOfBlockTypes,
+ &clustered_histograms,
+ &histogram_symbols);
+ for (int i = 0; i < length; ++i) {
+ block_ids[i] = histogram_symbols[block_index[i]];
+ }
+}
+
+void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
+ int cur_id = block_ids[0];
+ int cur_length = 1;
+ split->num_types_ = -1;
+ for (int i = 1; i < block_ids.size(); ++i) {
+ if (block_ids[i] != cur_id) {
+ split->types_.push_back(cur_id);
+ split->lengths_.push_back(cur_length);
+ split->num_types_ = std::max(split->num_types_, cur_id);
+ cur_id = block_ids[i];
+ cur_length = 0;
+ }
+ ++cur_length;
+ }
+ split->types_.push_back(cur_id);
+ split->lengths_.push_back(cur_length);
+ split->num_types_ = std::max(split->num_types_, cur_id);
+ ++split->num_types_;
+}
+
+template<typename HistogramType, typename DataType>
+void SplitByteVector(const std::vector<DataType>& data,
+ const int literals_per_histogram,
+ const int max_histograms,
+ const int sampling_stride_length,
+ const double block_switch_cost,
+ BlockSplit* split) {
+ if (data.empty()) {
+ split->num_types_ = 0;
+ return;
+ } else if (data.size() < kMinLengthForBlockSplitting) {
+ split->num_types_ = 1;
+ split->types_.push_back(0);
+ split->lengths_.push_back(data.size());
+ return;
+ }
+ std::vector<HistogramType> histograms;
+ // Find good entropy codes.
+ InitialEntropyCodes(data.data(), data.size(),
+ literals_per_histogram,
+ max_histograms,
+ sampling_stride_length,
+ &histograms);
+ RefineEntropyCodes(data.data(), data.size(),
+ sampling_stride_length,
+ &histograms);
+ // Find a good path through literals with the good entropy codes.
+ std::vector<uint8_t> block_ids(data.size());
+ for (int i = 0; i < 10; ++i) {
+ FindBlocks(data.data(), data.size(),
+ block_switch_cost,
+ histograms,
+ &block_ids[0]);
+ BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms);
+ }
+ ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]);
+ BuildBlockSplit(block_ids, split);
+}
+
+void SplitBlock(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ BlockSplit* literal_split,
+ BlockSplit* insert_and_copy_split,
+ BlockSplit* dist_split) {
+ // Create a continuous array of literals.
+ std::vector<uint8_t> literals;
+ CopyLiteralsToByteArray(cmds, data, &literals);
+
+ // Compute prefix codes for commands.
+ std::vector<uint16_t> insert_and_copy_codes;
+ std::vector<uint8_t> distance_prefixes;
+ CopyCommandsToByteArray(cmds,
+ &insert_and_copy_codes,
+ &distance_prefixes);
+
+ SplitByteVector<HistogramLiteral>(
+ literals,
+ kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+ kLiteralStrideLength, kLiteralBlockSwitchCost,
+ literal_split);
+ SplitByteVector<HistogramCommand>(
+ insert_and_copy_codes,
+ kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+ kCommandStrideLength, kCommandBlockSwitchCost,
+ insert_and_copy_split);
+ SplitByteVector<HistogramDistance>(
+ distance_prefixes,
+ kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+ kCommandStrideLength, kDistanceBlockSwitchCost,
+ dist_split);
+}
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+ int input_size,
+ int target_length,
+ std::vector<std::vector<Command> >* blocks) {
+ int num_blocks = input_size / target_length + 1;
+ int length_limit = input_size / num_blocks + 1;
+ int total_length = 0;
+ std::vector<Command> cur_block;
+ for (int i = 0; i < all_commands.size(); ++i) {
+ const Command& cmd = all_commands[i];
+ int cmd_length = cmd.insert_length_ + cmd.copy_length_;
+ if (total_length > length_limit) {
+ blocks->push_back(cur_block);
+ cur_block.clear();
+ total_length = 0;
+ }
+ cur_block.push_back(cmd);
+ total_length += cmd_length;
+ }
+ blocks->push_back(cur_block);
+}
+
+} // namespace brotli
diff --git a/brotli/enc/block_splitter.h b/brotli/enc/block_splitter.h
new file mode 100644
index 0000000..272904c
--- /dev/null
+++ b/brotli/enc/block_splitter.h
@@ -0,0 +1,77 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+
+#include "./command.h"
+
+namespace brotli {
+
+struct BlockSplit {
+ int num_types_;
+ std::vector<uint8_t> types_;
+ std::vector<uint8_t> type_codes_;
+ std::vector<int> lengths_;
+};
+
+struct BlockSplitIterator {
+ explicit BlockSplitIterator(const BlockSplit& split)
+ : split_(split), idx_(0), type_(0), length_(0) {
+ if (!split.lengths_.empty()) {
+ length_ = split.lengths_[0];
+ }
+ }
+
+ void Next() {
+ if (length_ == 0) {
+ ++idx_;
+ type_ = split_.types_[idx_];
+ length_ = split_.lengths_[idx_];
+ }
+ --length_;
+ }
+
+ const BlockSplit& split_;
+ int idx_;
+ int type_;
+ int length_;
+};
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ std::vector<uint8_t>* literals);
+
+void SplitBlock(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ BlockSplit* literal_split,
+ BlockSplit* insert_and_copy_split,
+ BlockSplit* dist_split);
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+ int input_size,
+ int target_length,
+ std::vector<std::vector<Command> >* blocks);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_BLOCK_SPLITTER_H_
diff --git a/brotli/enc/cluster.h b/brotli/enc/cluster.h
new file mode 100644
index 0000000..855a88d
--- /dev/null
+++ b/brotli/enc/cluster.h
@@ -0,0 +1,288 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for clustering similar histograms together.
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <complex>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "./bit_cost.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+struct HistogramPair {
+ int idx1;
+ int idx2;
+ bool valid;
+ double cost_combo;
+ double cost_diff;
+};
+
+struct HistogramPairComparator {
+ bool operator()(const HistogramPair& p1, const HistogramPair& p2) {
+ if (p1.cost_diff != p2.cost_diff) {
+ return p1.cost_diff > p2.cost_diff;
+ }
+ return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
+ }
+};
+
+// Returns entropy reduction of the context map when we combine two clusters.
+inline double ClusterCostDiff(int size_a, int size_b) {
+ int size_c = size_a + size_b;
+ return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
+ size_c * FastLog2(size_c);
+}
+
+// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
+template<int kSize>
+void CompareAndPushToHeap(const Histogram<kSize>* out,
+ const int* cluster_size,
+ int idx1, int idx2,
+ std::vector<HistogramPair>* pairs) {
+ if (idx1 == idx2) {
+ return;
+ }
+ if (idx2 < idx1) {
+ int t = idx2;
+ idx2 = idx1;
+ idx1 = t;
+ }
+ bool store_pair = false;
+ HistogramPair p;
+ p.idx1 = idx1;
+ p.idx2 = idx2;
+ p.valid = true;
+ p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+ p.cost_diff -= out[idx1].bit_cost_;
+ p.cost_diff -= out[idx2].bit_cost_;
+
+ if (out[idx1].total_count_ == 0) {
+ p.cost_combo = out[idx2].bit_cost_;
+ store_pair = true;
+ } else if (out[idx2].total_count_ == 0) {
+ p.cost_combo = out[idx1].bit_cost_;
+ store_pair = true;
+ } else {
+ double threshold = pairs->empty() ? 1e99 :
+ std::max(0.0, (*pairs)[0].cost_diff);
+ Histogram<kSize> combo = out[idx1];
+ combo.AddHistogram(out[idx2]);
+ double cost_combo = PopulationCost(combo);
+ if (cost_combo < threshold - p.cost_diff) {
+ p.cost_combo = cost_combo;
+ store_pair = true;
+ }
+ }
+ if (store_pair) {
+ p.cost_diff += p.cost_combo;
+ pairs->push_back(p);
+ push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
+ }
+}
+
+template<int kSize>
+void HistogramCombine(Histogram<kSize>* out,
+ int* cluster_size,
+ int* symbols,
+ int symbols_size,
+ int max_clusters) {
+ double cost_diff_threshold = 0.0;
+ int min_cluster_size = 1;
+ std::set<int> all_symbols;
+ std::vector<int> clusters;
+ for (int i = 0; i < symbols_size; ++i) {
+ if (all_symbols.find(symbols[i]) == all_symbols.end()) {
+ all_symbols.insert(symbols[i]);
+ clusters.push_back(symbols[i]);
+ }
+ }
+
+ // We maintain a heap of histogram pairs, ordered by the bit cost reduction.
+ std::vector<HistogramPair> pairs;
+ for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
+ for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
+ CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
+ &pairs);
+ }
+ }
+
+ while (clusters.size() > min_cluster_size) {
+ if (pairs[0].cost_diff >= cost_diff_threshold) {
+ cost_diff_threshold = 1e99;
+ min_cluster_size = max_clusters;
+ continue;
+ }
+ // Take the best pair from the top of heap.
+ int best_idx1 = pairs[0].idx1;
+ int best_idx2 = pairs[0].idx2;
+ out[best_idx1].AddHistogram(out[best_idx2]);
+ out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+ cluster_size[best_idx1] += cluster_size[best_idx2];
+ for (int i = 0; i < symbols_size; ++i) {
+ if (symbols[i] == best_idx2) {
+ symbols[i] = best_idx1;
+ }
+ }
+ for (int i = 0; i + 1 < clusters.size(); ++i) {
+ if (clusters[i] >= best_idx2) {
+ clusters[i] = clusters[i + 1];
+ }
+ }
+ clusters.pop_back();
+ // Invalidate pairs intersecting the just combined best pair.
+ for (int i = 0; i < pairs.size(); ++i) {
+ HistogramPair& p = pairs[i];
+ if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
+ p.idx1 == best_idx2 || p.idx2 == best_idx2) {
+ p.valid = false;
+ }
+ }
+ // Pop invalid pairs from the top of the heap.
+ while (!pairs.empty() && !pairs[0].valid) {
+ pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
+ pairs.pop_back();
+ }
+ // Push new pairs formed with the combined histogram to the heap.
+ for (int i = 0; i < clusters.size(); ++i) {
+ CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
+ }
+ }
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving histogram from cur_symbol to candidate.
+template<int kSize>
+double HistogramBitCostDistance(const Histogram<kSize>& histogram,
+ const Histogram<kSize>& candidate) {
+ if (histogram.total_count_ == 0) {
+ return 0.0;
+ }
+ Histogram<kSize> tmp = histogram;
+ tmp.AddHistogram(candidate);
+ return PopulationCost(tmp) - candidate.bit_cost_;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+template<int kSize>
+void HistogramRemap(const Histogram<kSize>* in, int in_size,
+ Histogram<kSize>* out, int* symbols) {
+ std::set<int> all_symbols;
+ for (int i = 0; i < in_size; ++i) {
+ all_symbols.insert(symbols[i]);
+ }
+ for (int i = 0; i < in_size; ++i) {
+ int best_out = i == 0 ? symbols[0] : symbols[i - 1];
+ double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
+ for (std::set<int>::const_iterator k = all_symbols.begin();
+ k != all_symbols.end(); ++k) {
+ const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
+ if (cur_bits < best_bits) {
+ best_bits = cur_bits;
+ best_out = *k;
+ }
+ }
+ symbols[i] = best_out;
+ }
+
+ // Recompute each out based on raw and symbols.
+ for (std::set<int>::const_iterator k = all_symbols.begin();
+ k != all_symbols.end(); ++k) {
+ out[*k].Clear();
+ }
+ for (int i = 0; i < in_size; ++i) {
+ out[symbols[i]].AddHistogram(in[i]);
+ }
+}
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+template<int kSize>
+void HistogramReindex(std::vector<Histogram<kSize> >* out,
+ std::vector<int>* symbols) {
+ std::vector<Histogram<kSize> > tmp(*out);
+ std::map<int, int> new_index;
+ int next_index = 0;
+ for (int i = 0; i < symbols->size(); ++i) {
+ if (new_index.find((*symbols)[i]) == new_index.end()) {
+ new_index[(*symbols)[i]] = next_index;
+ (*out)[next_index] = tmp[(*symbols)[i]];
+ ++next_index;
+ }
+ }
+ out->resize(next_index);
+ for (int i = 0; i < symbols->size(); ++i) {
+ (*symbols)[i] = new_index[(*symbols)[i]];
+ }
+}
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+template<int kSize>
+void ClusterHistograms(const std::vector<Histogram<kSize> >& in,
+ int num_contexts, int num_blocks,
+ int max_histograms,
+ std::vector<Histogram<kSize> >* out,
+ std::vector<int>* histogram_symbols) {
+ const int in_size = num_contexts * num_blocks;
+ std::vector<int> cluster_size(in_size, 1);
+ out->resize(in_size);
+ histogram_symbols->resize(in_size);
+ for (int i = 0; i < in_size; ++i) {
+ (*out)[i] = in[i];
+ (*out)[i].bit_cost_ = PopulationCost(in[i]);
+ (*histogram_symbols)[i] = i;
+ }
+
+ // Collapse similar histograms within a block type.
+ if (num_contexts > 1) {
+ for (int i = 0; i < num_blocks; ++i) {
+ HistogramCombine(&(*out)[0], &cluster_size[0],
+ &(*histogram_symbols)[i * num_contexts], num_contexts,
+ max_histograms);
+ }
+ }
+
+ // Collapse similar histograms.
+ HistogramCombine(&(*out)[0], &cluster_size[0],
+ &(*histogram_symbols)[0], in_size,
+ max_histograms);
+
+ // Find the optimal map from original histograms to the final ones.
+ HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
+
+ // Convert the context map to a canonical form.
+ HistogramReindex(out, histogram_symbols);
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_CLUSTER_H_
diff --git a/brotli/enc/command.h b/brotli/enc/command.h
new file mode 100644
index 0000000..8a539d0
--- /dev/null
+++ b/brotli/enc/command.h
@@ -0,0 +1,45 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This class models a sequence of literals and a backward reference copy.
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+// Command holds a sequence of literals and a backward reference copy.
+class Command {
+ public:
+ Command() : insert_length_(0), copy_length_(0),
+ copy_distance_(0), distance_code_(0),
+ distance_prefix_(0), command_prefix_(0),
+ distance_extra_bits_(0), distance_extra_bits_value_(0) {}
+
+ uint32_t insert_length_;
+ uint32_t copy_length_;
+ uint32_t copy_distance_;
+ // Values <= 16 are short codes, values > 16 are distances shifted by 16.
+ uint32_t distance_code_;
+ uint16_t distance_prefix_;
+ uint16_t command_prefix_;
+ int distance_extra_bits_;
+ uint32_t distance_extra_bits_value_;
+};
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_COMMAND_H_
diff --git a/brotli/enc/context.h b/brotli/enc/context.h
new file mode 100644
index 0000000..9bf9227
--- /dev/null
+++ b/brotli/enc/context.h
@@ -0,0 +1,130 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to map previous bytes into a context id.
+
+#ifndef BROTLI_ENC_CONTEXT_H_
+#define BROTLI_ENC_CONTEXT_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+static const int kSigned2BitContextLookup[] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+};
+
+static const int kSigned3BitContextLookup[] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+static const int kSigned4BitContextLookup[] = {
+ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 15,
+};
+
+enum ContextType {
+ CONTEXT_NONE = 0,
+ CONTEXT_FULL = 1,
+ CONTEXT_MSB7 = 2,
+ CONTEXT_MSB6 = 3,
+ CONTEXT_MSB5 = 4,
+ CONTEXT_MSB4 = 5,
+ CONTEXT_MSB3 = 6,
+ CONTEXT_MSB2 = 7,
+ CONTEXT_MSB1 = 8,
+ CONTEXT_IS_ZERO = 9,
+ CONTEXT_SIGNED_2BIT = 10,
+ CONTEXT_SIGNED_3BIT = 11,
+ CONTEXT_SIGNED_4BIT = 12,
+ CONTEXT_SIGNED_MIXED_3BYTE = 13,
+};
+
+static const int kContextSize[] = {
+ 1, 256, 128, 64, 32, 16, 8, 4, 2, 2, 4, 8, 16, 64,
+};
+
+static inline int NumContexts(int mode) {
+ return kContextSize[mode];
+}
+
+static inline uint8_t Context(uint8_t prev_byte, uint8_t prev_byte2,
+ uint8_t prev_byte3, int mode) {
+ switch (mode) {
+ case CONTEXT_NONE:
+ return 0;
+ case CONTEXT_IS_ZERO:
+ return prev_byte == 0 ? 0 : 1;
+ case CONTEXT_SIGNED_2BIT:
+ return kSigned2BitContextLookup[prev_byte];
+ case CONTEXT_SIGNED_3BIT:
+ return kSigned3BitContextLookup[prev_byte];
+ case CONTEXT_SIGNED_4BIT:
+ return kSigned4BitContextLookup[prev_byte];
+ case CONTEXT_SIGNED_MIXED_3BYTE:
+ return ((kSigned3BitContextLookup[prev_byte] << 3) +
+ (kSigned2BitContextLookup[prev_byte2] << 1) +
+ (prev_byte3 == 0 ? 0 : 1));
+ default:
+ return prev_byte >> (mode - 1);
+ }
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_CONTEXT_H_
diff --git a/brotli/enc/encode.cc b/brotli/enc/encode.cc
new file mode 100644
index 0000000..cefc7dc
--- /dev/null
+++ b/brotli/enc/encode.cc
@@ -0,0 +1,778 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Implementation of Brotli compressor.
+
+#include "./encode.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "./backward_references.h"
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./cluster.h"
+#include "./context.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./prefix.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+template<int kSize>
+double Entropy(const std::vector<Histogram<kSize> >& histograms) {
+ double retval = 0;
+ for (int i = 0; i < histograms.size(); ++i) {
+ retval += histograms[i].EntropyBitCost();
+ }
+ return retval;
+}
+
+void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
+ std::vector<uint8_t> len_bytes;
+ while (len > 0) {
+ len_bytes.push_back(len & 0xff);
+ len >>= 8;
+ };
+ WriteBits(3, len_bytes.size(), storage_ix, storage);
+ for (int i = 0; i < len_bytes.size(); ++i) {
+ WriteBits(8, len_bytes[i], storage_ix, storage);
+ }
+}
+
+void EncodeMetaBlockLength(int input_size_bits,
+ size_t meta_block_size,
+ bool is_last_meta_block,
+ int* storage_ix, uint8_t* storage) {
+ WriteBits(1, is_last_meta_block, storage_ix, storage);
+ if (is_last_meta_block) return;
+ while (input_size_bits > 0) {
+ WriteBits(8, meta_block_size & 0xff, storage_ix, storage);
+ meta_block_size >>= 8;
+ input_size_bits -= 8;
+ }
+ if (input_size_bits > 0) {
+ WriteBits(input_size_bits, meta_block_size, storage_ix, storage);
+ }
+}
+
+template<int kSize>
+void EntropyEncode(int val, const EntropyCode<kSize>& code,
+ int* storage_ix, uint8_t* storage) {
+ if (code.count_ <= 1) {
+ return;
+ };
+ WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
+}
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+ const uint8_t* code_length_bitdepth,
+ int* storage_ix, uint8_t* storage) {
+ static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+ 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ };
+ // Throw away trailing zeros:
+ int codes_to_store = kCodeLengthCodes;
+ for (; codes_to_store > 4; --codes_to_store) {
+ if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+ break;
+ }
+ }
+ WriteBits(4, codes_to_store - 4, storage_ix, storage);
+ for (int i = 0; i < codes_to_store; ++i) {
+ WriteBits(3, code_length_bitdepth[kStorageOrder[i]], storage_ix, storage);
+ }
+}
+
+void StoreHuffmanTreeToBitMask(
+ const uint8_t* huffman_tree,
+ const uint8_t* huffman_tree_extra_bits,
+ const int huffman_tree_size,
+ const EntropyCode<kCodeLengthCodes>& entropy,
+ int* storage_ix, uint8_t* storage) {
+ for (int i = 0; i < huffman_tree_size; ++i) {
+ const int ix = huffman_tree[i];
+ const int extra_bits = huffman_tree_extra_bits[i];
+ EntropyEncode(ix, entropy, storage_ix, storage);
+ switch (ix) {
+ case 16:
+ WriteBits(2, extra_bits, storage_ix, storage);
+ break;
+ case 17:
+ WriteBits(3, extra_bits, storage_ix, storage);
+ break;
+ case 18:
+ WriteBits(7, extra_bits, storage_ix, storage);
+ break;
+ }
+ }
+}
+
+template<int kSize>
+void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
+ int* storage_ix, uint8_t* storage) {
+ const int kMaxBits = 8;
+ const int kMaxSymbol = 1 << kMaxBits;
+
+ if (code.count_ == 0) { // emit minimal tree for empty cases
+ // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
+ WriteBits(4, 0x01, storage_ix, storage);
+ return;
+ }
+ if (code.count_ <= 2 &&
+ code.symbols_[0] < kMaxSymbol &&
+ code.symbols_[1] < kMaxSymbol) {
+ // Small tree marker to encode 1 or 2 symbols.
+ WriteBits(1, 1, storage_ix, storage);
+ WriteBits(1, code.count_ - 1, storage_ix, storage);
+ if (code.symbols_[0] <= 1) {
+ // Code bit for small (1 bit) symbol value.
+ WriteBits(1, 0, storage_ix, storage);
+ WriteBits(1, code.symbols_[0], storage_ix, storage);
+ } else {
+ WriteBits(1, 1, storage_ix, storage);
+ WriteBits(8, code.symbols_[0], storage_ix, storage);
+ }
+ if (code.count_ == 2) {
+ WriteBits(8, code.symbols_[1], storage_ix, storage);
+ }
+ return;
+ }
+ WriteBits(1, 0, storage_ix, storage);
+
+ uint8_t huffman_tree[kSize];
+ uint8_t huffman_tree_extra_bits[kSize];
+ int huffman_tree_size = 0;
+ WriteHuffmanTree(&code.depth_[0],
+ alphabet_size,
+ &huffman_tree[0],
+ &huffman_tree_extra_bits[0],
+ &huffman_tree_size);
+ Histogram<kCodeLengthCodes> huffman_tree_histogram;
+ memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
+ for (int i = 0; i < huffman_tree_size; ++i) {
+ huffman_tree_histogram.Add(huffman_tree[i]);
+ }
+ EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
+ BuildEntropyCode(huffman_tree_histogram, 7, kCodeLengthCodes,
+ &huffman_tree_entropy);
+ Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
+ uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
+ while (*last_code == 0 || *last_code >= 17) {
+ trimmed_histogram.Remove(*last_code--);
+ }
+ int trimmed_size = trimmed_histogram.total_count_;
+ bool write_length = false;
+ if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
+ EntropyCode<kCodeLengthCodes> trimmed_entropy;
+ BuildEntropyCode(trimmed_histogram, 7, kCodeLengthCodes, &trimmed_entropy);
+ int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
+ huffman_tree_entropy);
+ int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
+ trimmed_entropy);;
+ const int nbits = Log2Ceiling(trimmed_size - 1);
+ const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+ if (trimmed_bit_cost + 3 + 2 * nbitpairs < huffman_bit_cost) {
+ write_length = true;
+ huffman_tree_size = trimmed_size;
+ huffman_tree_entropy = trimmed_entropy;
+ }
+ }
+
+ StoreHuffmanTreeOfHuffmanTreeToBitMask(
+ &huffman_tree_entropy.depth_[0], storage_ix, storage);
+ WriteBits(1, write_length, storage_ix, storage);
+ if (write_length) {
+ const int nbits = Log2Ceiling(huffman_tree_size - 1);
+ const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+ WriteBits(3, nbitpairs - 1, storage_ix, storage);
+ WriteBits(nbitpairs * 2, huffman_tree_size - 2, storage_ix, storage);
+ }
+ StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
+ huffman_tree_size, huffman_tree_entropy,
+ storage_ix, storage);
+}
+
+template<int kSize>
+void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
+ int alphabet_size,
+ int* storage_ix, uint8_t* storage) {
+ for (int i = 0; i < codes.size(); ++i) {
+ StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
+ }
+}
+
+void EncodeCommand(const Command& cmd,
+ const EntropyCodeCommand& entropy,
+ int* storage_ix, uint8_t* storage) {
+ int code = cmd.command_prefix_;
+ EntropyEncode(code, entropy, storage_ix, storage);
+ if (code >= 128) {
+ code -= 128;
+ }
+ int insert_extra_bits = InsertLengthExtraBits(code);
+ uint64_t insert_extra_bits_val =
+ cmd.insert_length_ - InsertLengthOffset(code);
+ int copy_extra_bits = CopyLengthExtraBits(code);
+ uint64_t copy_extra_bits_val = cmd.copy_length_ - CopyLengthOffset(code);
+ if (insert_extra_bits > 0) {
+ WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
+ }
+ if (copy_extra_bits > 0) {
+ WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
+ }
+}
+
+void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
+ int* storage_ix, uint8_t* storage) {
+ int code = cmd.distance_prefix_;
+ int extra_bits = cmd.distance_extra_bits_;
+ uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
+ EntropyEncode(code, entropy, storage_ix, storage);
+ if (extra_bits > 0) {
+ WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
+ }
+}
+
+
+void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
+ static const int kIndexOffset[16] = {
+ 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
+ };
+ static const int kValueOffset[16] = {
+ 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+ };
+ int dist_ringbuffer[4] = { 4, 11, 15, 16 };
+ int ringbuffer_idx = 0;
+ for (int i = 0; i < cmds->size(); ++i) {
+ int cur_dist = (*cmds)[i].copy_distance_;
+ if (cur_dist == 0) break;
+ int dist_code = cur_dist + 16;
+ for (int k = 0; k < 16; ++k) {
+ // Only accept more popular choices.
+ if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
+ // Typically unpopular ranges, don't replace a short distance
+ // with them.
+ continue;
+ }
+ int comp = (dist_ringbuffer[(ringbuffer_idx + kIndexOffset[k]) & 3] +
+ kValueOffset[k]);
+ if (cur_dist == comp) {
+ dist_code = k + 1;
+ break;
+ }
+ }
+ if (dist_code > 1) {
+ dist_ringbuffer[ringbuffer_idx & 3] = cur_dist;
+ ++ringbuffer_idx;
+ }
+ (*cmds)[i].distance_code_ = dist_code;
+ }
+}
+
+void ComputeCommandPrefixes(std::vector<Command>* cmds,
+ int num_direct_distance_codes,
+ int distance_postfix_bits) {
+ for (int i = 0; i < cmds->size(); ++i) {
+ Command* cmd = &(*cmds)[i];
+ cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
+ cmd->copy_length_);
+ if (cmd->copy_length_ > 0) {
+ PrefixEncodeCopyDistance(cmd->distance_code_,
+ num_direct_distance_codes,
+ distance_postfix_bits,
+ &cmd->distance_prefix_,
+ &cmd->distance_extra_bits_,
+ &cmd->distance_extra_bits_value_);
+ }
+ if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
+ cmd->distance_prefix_ = 0xffff;
+ } else {
+ cmd->command_prefix_ += 128;
+ }
+ }
+}
+
+int IndexOf(const std::vector<int>& v, int value) {
+ for (int i = 0; i < v.size(); ++i) {
+ if (v[i] == value) return i;
+ }
+ return -1;
+}
+
+void MoveToFront(std::vector<int>* v, int index) {
+ int value = (*v)[index];
+ for (int i = index; i > 0; --i) {
+ (*v)[i] = (*v)[i - 1];
+ }
+ (*v)[0] = value;
+}
+
+std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
+ if (v.empty()) return v;
+ std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
+ for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
+ std::vector<int> result(v.size());
+ for (int i = 0; i < v.size(); ++i) {
+ int index = IndexOf(mtf, v[i]);
+ result[i] = index;
+ MoveToFront(&mtf, index);
+ }
+ return result;
+}
+
+// Finds runs of zeros in v_in and replaces them with a prefix code of the run
+// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
+// shifted by *max_length_prefix. Will not create prefix codes bigger than the
+// initial value of *max_run_length_prefix. The prefix code of run length L is
+// simply Log2Floor(L) and the number of extra bits is the same as the prefix
+// code.
+void RunLengthCodeZeros(const std::vector<int>& v_in,
+ int* max_run_length_prefix,
+ std::vector<int>* v_out,
+ std::vector<int>* extra_bits) {
+ int max_reps = 0;
+ for (int i = 0; i < v_in.size();) {
+ for (; i < v_in.size() && v_in[i] != 0; ++i) ;
+ int reps = 0;
+ for (; i < v_in.size() && v_in[i] == 0; ++i) {
+ ++reps;
+ }
+ max_reps = std::max(reps, max_reps);
+ }
+ int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
+ *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
+ for (int i = 0; i < v_in.size();) {
+ if (v_in[i] != 0) {
+ v_out->push_back(v_in[i] + *max_run_length_prefix);
+ extra_bits->push_back(0);
+ ++i;
+ } else {
+ int reps = 1;
+ for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
+ ++reps;
+ }
+ i += reps;
+ while (reps) {
+ if (reps < (2 << *max_run_length_prefix)) {
+ int run_length_prefix = Log2Floor(reps);
+ v_out->push_back(run_length_prefix);
+ extra_bits->push_back(reps - (1 << run_length_prefix));
+ break;
+ } else {
+ v_out->push_back(*max_run_length_prefix);
+ extra_bits->push_back((1 << *max_run_length_prefix) - 1);
+ reps -= (2 << *max_run_length_prefix) - 1;
+ }
+ }
+ }
+ }
+}
+
+// Returns a maximum zero-run-length-prefix value such that run-length coding
+// zeros in v with this maximum prefix value and then encoding the resulting
+// histogram and entropy-coding v produces the least amount of bits.
+int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
+ int min_cost = std::numeric_limits<int>::max();
+ int best_max_prefix = 0;
+ for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
+ std::vector<int> rle_symbols;
+ std::vector<int> extra_bits;
+ int max_run_length_prefix = max_prefix;
+ RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
+ if (max_run_length_prefix < max_prefix) break;
+ HistogramLiteral histogram;
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ histogram.Add(rle_symbols[i]);
+ }
+ int bit_cost = PopulationCost(histogram);
+ if (max_prefix > 0) {
+ bit_cost += 4;
+ }
+ for (int i = 1; i <= max_prefix; ++i) {
+ bit_cost += histogram.data_[i] * i; // extra bits
+ }
+ if (bit_cost < min_cost) {
+ min_cost = bit_cost;
+ best_max_prefix = max_prefix;
+ }
+ }
+ return best_max_prefix;
+}
+
+void EncodeContextMap(const std::vector<int>& context_map,
+ int context_mode,
+ int context_mode_bits,
+ int num_clusters,
+ int* storage_ix, uint8_t* storage) {
+ if (context_mode == 0) {
+ WriteBits(1, 0, storage_ix, storage); // no context
+ return;
+ }
+
+ WriteBits(1, 1, storage_ix, storage); // have context
+ if (context_mode_bits > 0) {
+ WriteBits(context_mode_bits, context_mode - 1, storage_ix, storage);
+ }
+ WriteBits(8, num_clusters - 1, storage_ix, storage);
+
+ if (num_clusters == 1 || num_clusters == context_map.size()) {
+ return;
+ }
+
+ std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
+ std::vector<int> rle_symbols;
+ std::vector<int> extra_bits;
+ int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
+ RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
+ &rle_symbols, &extra_bits);
+ HistogramLiteral symbol_histogram;
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ symbol_histogram.Add(rle_symbols[i]);
+ }
+ EntropyCodeLiteral symbol_code;
+ BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
+ &symbol_code);
+ bool use_rle = max_run_length_prefix > 0;
+ WriteBits(1, use_rle, storage_ix, storage);
+ if (use_rle) {
+ WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+ }
+ StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
+ storage_ix, storage);
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
+ if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
+ WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
+ }
+ }
+ WriteBits(1, 1, storage_ix, storage); // use move-to-front
+}
+
+template<int kSize>
+void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
+ int alphabet_size,
+ std::vector<EntropyCode<kSize> >* entropy_codes) {
+ entropy_codes->resize(histograms.size());
+ for (int i = 0; i < histograms.size(); ++i) {
+ BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
+ }
+}
+
+struct BlockSplitCode {
+ EntropyCodeLiteral block_type_code;
+ EntropyCodeBlockLength block_len_code;
+};
+
+void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
+ int length,
+ int* storage_ix, uint8_t* storage) {
+ int len_code = BlockLengthPrefix(length);
+ int extra_bits = BlockLengthExtraBits(len_code);
+ int extra_bits_value = length - BlockLengthOffset(len_code);
+ EntropyEncode(len_code, entropy, storage_ix, storage);
+
+ if (extra_bits > 0) {
+ WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
+ }
+}
+
+void ComputeBlockTypeShortCodes(BlockSplit* split) {
+ if (split->num_types_ <= 1) {
+ split->num_types_ = 1;
+ return;
+ }
+ int ringbuffer[2] = { 0, 1 };
+ size_t index = 0;
+ for (int i = 0; i < split->types_.size(); ++i) {
+ int type = split->types_[i];
+ int type_code;
+ if (type == ringbuffer[index & 1]) {
+ type_code = 0;
+ } else if (type == ringbuffer[(index - 1) & 1] + 1) {
+ type_code = 1;
+ } else {
+ type_code = type + 2;
+ }
+ ringbuffer[index & 1] = type;
+ ++index;
+ split->type_codes_.push_back(type_code);
+ }
+}
+
+void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
+ BlockSplitCode* code,
+ int* storage_ix, uint8_t* storage) {
+ if (split.num_types_ <= 1) {
+ WriteBits(1, 0, storage_ix, storage);
+ return;
+ }
+ WriteBits(1, 1, storage_ix, storage);
+ HistogramLiteral type_histo;
+ for (int i = 0; i < split.type_codes_.size(); ++i) {
+ type_histo.Add(split.type_codes_[i]);
+ }
+ BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
+ &code->block_type_code);
+ HistogramBlockLength length_histo;
+ for (int i = 0; i < split.lengths_.size(); ++i) {
+ length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
+ }
+ BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
+ &code->block_len_code);
+ WriteBits(8, split.num_types_ - 1, storage_ix, storage);
+ StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
+ storage_ix, storage);
+ StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
+ storage_ix, storage);
+ EncodeBlockLength(code->block_len_code, split.lengths_[0],
+ storage_ix, storage);
+}
+
+void MoveAndEncode(const BlockSplitCode& code,
+ BlockSplitIterator* it,
+ int* storage_ix, uint8_t* storage) {
+ if (it->length_ == 0) {
+ ++it->idx_;
+ it->type_ = it->split_.types_[it->idx_];
+ it->length_ = it->split_.lengths_[it->idx_];
+ uint8_t type_code = it->split_.type_codes_[it->idx_];
+ EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
+ EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
+ }
+ --it->length_;
+}
+
+struct EncodingParams {
+ int num_direct_distance_codes;
+ int distance_postfix_bits;
+ int literal_context_mode;
+ int distance_context_mode;
+};
+
+struct MetaBlock {
+ std::vector<Command> cmds;
+ EncodingParams params;
+ BlockSplit literal_split;
+ BlockSplit command_split;
+ BlockSplit distance_split;
+ std::vector<int> literal_context_map;
+ std::vector<int> distance_context_map;
+ std::vector<HistogramLiteral> literal_histograms;
+ std::vector<HistogramCommand> command_histograms;
+ std::vector<HistogramDistance> distance_histograms;
+};
+
+void BuildMetaBlock(const EncodingParams& params,
+ const std::vector<Command>& cmds,
+ const uint8_t* input_buffer,
+ size_t pos,
+ MetaBlock* mb) {
+ mb->cmds = cmds;
+ mb->params = params;
+ ComputeCommandPrefixes(&mb->cmds,
+ mb->params.num_direct_distance_codes,
+ mb->params.distance_postfix_bits);
+ SplitBlock(mb->cmds,
+ input_buffer + pos,
+ &mb->literal_split,
+ &mb->command_split,
+ &mb->distance_split);
+ ComputeBlockTypeShortCodes(&mb->literal_split);
+ ComputeBlockTypeShortCodes(&mb->command_split);
+ ComputeBlockTypeShortCodes(&mb->distance_split);
+
+ int num_literal_contexts_per_block_type =
+ NumContexts(mb->params.literal_context_mode);
+ int num_literal_contexts =
+ mb->literal_split.num_types_ *
+ num_literal_contexts_per_block_type;
+ int num_distance_contexts_per_block_type =
+ (mb->params.distance_context_mode > 0 ? 4 : 1);
+ int num_distance_contexts =
+ mb->distance_split.num_types_ *
+ num_distance_contexts_per_block_type;
+ std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+ mb->command_histograms.resize(mb->command_split.num_types_);
+ std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+ BuildHistograms(mb->cmds,
+ mb->literal_split,
+ mb->command_split,
+ mb->distance_split,
+ input_buffer,
+ pos,
+ mb->params.literal_context_mode,
+ mb->params.distance_context_mode,
+ &literal_histograms,
+ &mb->command_histograms,
+ &distance_histograms);
+
+ // Histogram ids need to fit in one byte and there are 16 ids reserved for
+ // run length codes, which leaves a maximum number of 240 histograms.
+ static const int kMaxNumberOfHistograms = 240;
+
+ mb->literal_histograms = literal_histograms;
+ if (mb->params.literal_context_mode > 0) {
+ ClusterHistograms(literal_histograms,
+ num_literal_contexts_per_block_type,
+ mb->literal_split.num_types_,
+ kMaxNumberOfHistograms,
+ &mb->literal_histograms,
+ &mb->literal_context_map);
+ }
+
+ mb->distance_histograms = distance_histograms;
+ if (mb->params.distance_context_mode > 0) {
+ ClusterHistograms(distance_histograms,
+ num_distance_contexts_per_block_type,
+ mb->distance_split.num_types_,
+ kMaxNumberOfHistograms,
+ &mb->distance_histograms,
+ &mb->distance_context_map);
+ }
+}
+
+size_t MetaBlockLength(const std::vector<Command>& cmds) {
+ size_t length = 0;
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command& cmd = cmds[i];
+ length += cmd.insert_length_ + cmd.copy_length_;
+ }
+ return length;
+}
+
+void StoreMetaBlock(const MetaBlock& mb,
+ const uint8_t* input_buffer,
+ int input_size_bits,
+ bool is_last,
+ size_t* pos,
+ int* storage_ix, uint8_t* storage) {
+ size_t length = MetaBlockLength(mb.cmds);
+ const size_t end_pos = *pos + length;
+ EncodeMetaBlockLength(input_size_bits, length - 1, is_last,
+ storage_ix, storage);
+ BlockSplitCode literal_split_code;
+ BlockSplitCode command_split_code;
+ BlockSplitCode distance_split_code;
+ BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
+ storage_ix, storage);
+ BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
+ storage_ix, storage);
+ BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
+ storage_ix, storage);
+ WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
+ WriteBits(4,
+ mb.params.num_direct_distance_codes >>
+ mb.params.distance_postfix_bits, storage_ix, storage);
+ int num_distance_codes =
+ kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
+ (48 << mb.params.distance_postfix_bits);
+ EncodeContextMap(mb.literal_context_map, mb.params.literal_context_mode, 4,
+ mb.literal_histograms.size(), storage_ix, storage);
+ EncodeContextMap(mb.distance_context_map, mb.params.distance_context_mode, 0,
+ mb.distance_histograms.size(), storage_ix, storage);
+ std::vector<EntropyCodeLiteral> literal_codes;
+ std::vector<EntropyCodeCommand> command_codes;
+ std::vector<EntropyCodeDistance> distance_codes;
+ BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
+ BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
+ &command_codes);
+ BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
+ &distance_codes);
+ StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
+ StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
+ StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
+ BlockSplitIterator literal_it(mb.literal_split);
+ BlockSplitIterator command_it(mb.command_split);
+ BlockSplitIterator distance_it(mb.distance_split);
+ for (int i = 0; i < mb.cmds.size(); ++i) {
+ const Command& cmd = mb.cmds[i];
+ MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
+ EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
+ for (int j = 0; j < cmd.insert_length_; ++j) {
+ MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
+ int histogram_idx = literal_it.type_;
+ if (mb.params.literal_context_mode > 0) {
+ uint8_t prev_byte = *pos > 0 ? input_buffer[*pos - 1] : 0;
+ uint8_t prev_byte2 = *pos > 1 ? input_buffer[*pos - 2] : 0;
+ uint8_t prev_byte3 = *pos > 2 ? input_buffer[*pos - 3] : 0;
+ int context = (literal_it.type_ *
+ NumContexts(mb.params.literal_context_mode) +
+ Context(prev_byte, prev_byte2, prev_byte3,
+ mb.params.literal_context_mode));
+ histogram_idx = mb.literal_context_map[context];
+ }
+ EntropyEncode(input_buffer[(*pos)++],
+ literal_codes[histogram_idx], storage_ix, storage);
+ }
+ if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
+ MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
+ int histogram_index = distance_it.type_;
+ if (mb.params.distance_context_mode > 0) {
+ int context = distance_it.type_ << 2;
+ context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
+ histogram_index = mb.distance_context_map[context];
+ }
+ EncodeCopyDistance(cmd, distance_codes[histogram_index],
+ storage_ix, storage);
+ }
+ *pos += cmd.copy_length_;
+ }
+}
+
+int BrotliCompressBuffer(size_t input_size,
+ const uint8_t* input_buffer,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer) {
+ int storage_ix = 0;
+ uint8_t* storage = encoded_buffer;
+ WriteBitsPrepareStorage(storage_ix, storage);
+ EncodeSize(input_size, &storage_ix, storage);
+
+ if (input_size == 0) {
+ *encoded_size = (storage_ix + 7) >> 3;
+ return 1;
+ }
+ int input_size_bits = Log2Ceiling(input_size);
+
+ std::vector<Command> all_commands;
+ CreateBackwardReferences(input_buffer, input_size, &all_commands);
+ ComputeDistanceShortCodes(&all_commands);
+
+ std::vector<std::vector<Command> > meta_block_commands;
+ SplitBlockByTotalLength(all_commands, input_size, 2 << 20,
+ &meta_block_commands);
+
+ size_t pos = 0;
+ for (int block_idx = 0; block_idx < meta_block_commands.size(); ++block_idx) {
+ const std::vector<Command>& commands = meta_block_commands[block_idx];
+ bool is_last_meta_block = (block_idx + 1 == meta_block_commands.size());
+ EncodingParams params;
+ params.num_direct_distance_codes = 12;
+ params.distance_postfix_bits = 1;
+ params.literal_context_mode = CONTEXT_SIGNED_MIXED_3BYTE;
+ params.distance_context_mode = 1;
+ MetaBlock mb;
+ BuildMetaBlock(params, commands, input_buffer, pos, &mb);
+ StoreMetaBlock(mb, input_buffer, input_size_bits, is_last_meta_block,
+ &pos, &storage_ix, storage);
+ }
+
+ *encoded_size = (storage_ix + 7) >> 3;
+ return 1;
+}
+
+} // namespace brotli
diff --git a/brotli/enc/encode.h b/brotli/enc/encode.h
new file mode 100644
index 0000000..6e4044d
--- /dev/null
+++ b/brotli/enc/encode.h
@@ -0,0 +1,37 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// API for Brotli compression
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+
+namespace brotli {
+
+// Compresses the data in input_buffer into encoded_buffer, and sets
+// *encoded_size to the compressed length.
+// Returns 0 if there was an error and 1 otherwise.
+int BrotliCompressBuffer(size_t input_size,
+ const uint8_t* input_buffer,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer);
+
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_ENCODE_H_
diff --git a/brotli/enc/entropy_encode.cc b/brotli/enc/entropy_encode.cc
new file mode 100644
index 0000000..9a4d3e4
--- /dev/null
+++ b/brotli/enc/entropy_encode.cc
@@ -0,0 +1,397 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#include "./entropy_encode.h"
+
+#include <stdint.h>
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "./histogram.h"
+
+namespace brotli {
+
+namespace {
+
+struct HuffmanTree {
+ HuffmanTree();
+ HuffmanTree(int count, int16_t left, int16_t right)
+ : total_count_(count),
+ index_left_(left),
+ index_right_or_value_(right) {
+ }
+ int total_count_;
+ int16_t index_left_;
+ int16_t index_right_or_value_;
+};
+
+HuffmanTree::HuffmanTree() {}
+
+// Sort the root nodes, least popular first.
+bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
+ return v0.total_count_ < v1.total_count_;
+}
+
+void SetDepth(const HuffmanTree &p,
+ HuffmanTree *pool,
+ uint8_t *depth,
+ int level) {
+ if (p.index_left_ >= 0) {
+ ++level;
+ SetDepth(pool[p.index_left_], pool, depth, level);
+ SetDepth(pool[p.index_right_or_value_], pool, depth, level);
+ } else {
+ depth[p.index_right_or_value_] = level;
+ }
+}
+
+} // namespace
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+ const int length,
+ const int tree_limit,
+ uint8_t *depth) {
+ // For block sizes below 64 kB, we never need to do a second iteration
+ // of this loop. Probably all of our block sizes will be smaller than
+ // that, so this loop is mostly of academic interest. If we actually
+ // would need this, we would be better off with the Katajainen algorithm.
+ for (int count_limit = 1; ; count_limit *= 2) {
+ std::vector<HuffmanTree> tree;
+ tree.reserve(2 * length + 1);
+
+ for (int i = 0; i < length; ++i) {
+ if (data[i]) {
+ const int count = std::max(data[i], count_limit);
+ tree.push_back(HuffmanTree(count, -1, i));
+ }
+ }
+
+ const int n = tree.size();
+ if (n == 1) {
+ depth[tree[0].index_right_or_value_] = 1; // Only one element.
+ break;
+ }
+
+ std::sort(tree.begin(), tree.end(), SortHuffmanTree);
+
+ // The nodes are:
+ // [0, n): the sorted leaf nodes that we start with.
+ // [n]: we add a sentinel here.
+ // [n + 1, 2n): new parent nodes are added here, starting from
+ // (n+1). These are naturally in ascending order.
+ // [2n]: we add a sentinel at the end as well.
+ // There will be (2n+1) elements at the end.
+ const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
+ tree.push_back(sentinel);
+ tree.push_back(sentinel);
+
+ int i = 0; // Points to the next leaf node.
+ int j = n + 1; // Points to the next non-leaf node.
+ for (int k = n - 1; k > 0; --k) {
+ int left, right;
+ if (tree[i].total_count_ <= tree[j].total_count_) {
+ left = i;
+ ++i;
+ } else {
+ left = j;
+ ++j;
+ }
+ if (tree[i].total_count_ <= tree[j].total_count_) {
+ right = i;
+ ++i;
+ } else {
+ right = j;
+ ++j;
+ }
+
+ // The sentinel node becomes the parent node.
+ int j_end = tree.size() - 1;
+ tree[j_end].total_count_ =
+ tree[left].total_count_ + tree[right].total_count_;
+ tree[j_end].index_left_ = left;
+ tree[j_end].index_right_or_value_ = right;
+
+ // Add back the last sentinel node.
+ tree.push_back(sentinel);
+ }
+ SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+ // We need to pack the Huffman tree in tree_limit bits.
+ // If this was not successful, add fake entities to the lowest values
+ // and retry.
+ if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+ break;
+ }
+ }
+}
+
+void WriteHuffmanTreeRepetitions(
+ const int previous_value,
+ const int value,
+ int repetitions,
+ uint8_t* tree,
+ uint8_t* extra_bits,
+ int* tree_size) {
+ if (previous_value != value) {
+ tree[*tree_size] = value;
+ extra_bits[*tree_size] = 0;
+ ++(*tree_size);
+ --repetitions;
+ }
+ while (repetitions >= 1) {
+ if (repetitions < 3) {
+ for (int i = 0; i < repetitions; ++i) {
+ tree[*tree_size] = value;
+ extra_bits[*tree_size] = 0;
+ ++(*tree_size);
+ }
+ return;
+ } else if (repetitions < 7) {
+ // 3 to 6 left.
+ tree[*tree_size] = 16;
+ extra_bits[*tree_size] = repetitions - 3;
+ ++(*tree_size);
+ return;
+ } else {
+ tree[*tree_size] = 16;
+ extra_bits[*tree_size] = 3;
+ ++(*tree_size);
+ repetitions -= 6;
+ }
+ }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(
+ int repetitions,
+ uint8_t* tree,
+ uint8_t* extra_bits,
+ int* tree_size) {
+ while (repetitions >= 1) {
+ if (repetitions < 3) {
+ for (int i = 0; i < repetitions; ++i) {
+ tree[*tree_size] = 0;
+ extra_bits[*tree_size] = 0;
+ ++(*tree_size);
+ }
+ return;
+ } else if (repetitions < 11) {
+ tree[*tree_size] = 17;
+ extra_bits[*tree_size] = repetitions - 3;
+ ++(*tree_size);
+ return;
+ } else if (repetitions < 139) {
+ tree[*tree_size] = 18;
+ extra_bits[*tree_size] = repetitions - 11;
+ ++(*tree_size);
+ return;
+ } else {
+ tree[*tree_size] = 18;
+ extra_bits[*tree_size] = 0x7f; // 138 repeated 0s
+ ++(*tree_size);
+ repetitions -= 138;
+ }
+ }
+}
+
+
+// Heuristics for selecting the stride ranges to collapse.
+int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+ return abs(a - b) < 4;
+}
+
+int OptimizeHuffmanCountsForRle(int length, int* counts) {
+ int stride;
+ int limit;
+ int sum;
+ uint8_t* good_for_rle;
+ // Let's make the Huffman code more compatible with rle encoding.
+ int i;
+ for (; length >= 0; --length) {
+ if (length == 0) {
+ return 1; // All zeros.
+ }
+ if (counts[length - 1] != 0) {
+ // Now counts[0..length - 1] does not have trailing zeros.
+ break;
+ }
+ }
+ // 2) Let's mark all population counts that already can be encoded
+ // with an rle code.
+ good_for_rle = (uint8_t*)calloc(length, 1);
+ if (good_for_rle == NULL) {
+ return 0;
+ }
+ {
+ // Let's not spoil any of the existing good rle codes.
+ // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+ // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+ int symbol = counts[0];
+ int stride = 0;
+ for (i = 0; i < length + 1; ++i) {
+ if (i == length || counts[i] != symbol) {
+ if ((symbol == 0 && stride >= 5) ||
+ (symbol != 0 && stride >= 7)) {
+ int k;
+ for (k = 0; k < stride; ++k) {
+ good_for_rle[i - k - 1] = 1;
+ }
+ }
+ stride = 1;
+ if (i != length) {
+ symbol = counts[i];
+ }
+ } else {
+ ++stride;
+ }
+ }
+ }
+ // 3) Let's replace those population counts that lead to more rle codes.
+ stride = 0;
+ limit = counts[0];
+ sum = 0;
+ for (i = 0; i < length + 1; ++i) {
+ if (i == length || good_for_rle[i] ||
+ (i != 0 && good_for_rle[i - 1]) ||
+ !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+ if (stride >= 4 || (stride >= 3 && sum == 0)) {
+ int k;
+ // The stride must end, collapse what we have, if we have enough (4).
+ int count = (sum + stride / 2) / stride;
+ if (count < 1) {
+ count = 1;
+ }
+ if (sum == 0) {
+ // Don't make an all zeros stride to be upgraded to ones.
+ count = 0;
+ }
+ for (k = 0; k < stride; ++k) {
+ // We don't want to change value at counts[i],
+ // that is already belonging to the next stride. Thus - 1.
+ counts[i - k - 1] = count;
+ }
+ }
+ stride = 0;
+ sum = 0;
+ if (i < length - 3) {
+ // All interesting strides have a count of at least 4,
+ // at least when non-zeros.
+ limit = (counts[i] + counts[i + 1] +
+ counts[i + 2] + counts[i + 3] + 2) / 4;
+ } else if (i < length) {
+ limit = counts[i];
+ } else {
+ limit = 0;
+ }
+ }
+ ++stride;
+ if (i != length) {
+ sum += counts[i];
+ if (stride >= 4) {
+ limit = (sum + stride / 2) / stride;
+ }
+ }
+ }
+ free(good_for_rle);
+ return 1;
+}
+
+
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+ uint8_t* tree,
+ uint8_t* extra_bits_data,
+ int* huffman_tree_size) {
+ int previous_value = 0;
+ for (uint32_t i = 0; i < length;) {
+ const int value = depth[i];
+ int reps = 1;
+ for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
+ ++reps;
+ }
+ if (value == 0) {
+ WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data,
+ huffman_tree_size);
+ } else {
+ WriteHuffmanTreeRepetitions(previous_value, value, reps, tree,
+ extra_bits_data, huffman_tree_size);
+ previous_value = value;
+ }
+ i += reps;
+ }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+ static const size_t kLut[16] = { // Pre-reversed 4-bit values.
+ 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+ 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+ };
+ size_t retval = kLut[bits & 0xf];
+ for (int i = 4; i < num_bits; i += 4) {
+ retval <<= 4;
+ bits >>= 4;
+ retval |= kLut[bits & 0xf];
+ }
+ retval >>= (-num_bits & 0x3);
+ return retval;
+}
+
+} // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
+ // In Brotli, all bit depths are [1..15]
+ // 0 bit depth means that the symbol does not exist.
+ const int kMaxBits = 16; // 0..15 are values for bits
+ uint16_t bl_count[kMaxBits] = { 0 };
+ {
+ for (int i = 0; i < len; ++i) {
+ ++bl_count[depth[i]];
+ }
+ bl_count[0] = 0;
+ }
+ uint16_t next_code[kMaxBits];
+ next_code[0] = 0;
+ {
+ int code = 0;
+ for (int bits = 1; bits < kMaxBits; ++bits) {
+ code = (code + bl_count[bits - 1]) << 1;
+ next_code[bits] = code;
+ }
+ }
+ for (int i = 0; i < len; ++i) {
+ if (depth[i]) {
+ bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+ }
+ }
+}
+
+} // namespace brotli
diff --git a/brotli/enc/entropy_encode.h b/brotli/enc/entropy_encode.h
new file mode 100644
index 0000000..e5993e6
--- /dev/null
+++ b/brotli/enc/entropy_encode.h
@@ -0,0 +1,112 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include <stdint.h>
+#include <string.h>
+#include "./histogram.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+ const int length,
+ const int tree_limit,
+ uint8_t *depth);
+
+// Change the population counts in a way that the consequent
+// Hufmann tree compression, especially its rle-part will be more
+// likely to compress this data more efficiently.
+//
+// length contains the size of the histogram.
+// counts contains the population counts.
+int OptimizeHuffmanCountsForRle(int length, int* counts);
+
+
+// Write a huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+ uint8_t* tree,
+ uint8_t* extra_bits_data,
+ int* huffman_tree_size);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
+
+template<int kSize>
+struct EntropyCode {
+ // How many bits for symbol.
+ uint8_t depth_[kSize];
+ // Actual bits used to represent the symbol.
+ uint16_t bits_[kSize];
+ // How many non-zero depth.
+ int count_;
+ // First two symbols with non-zero depth.
+ int symbols_[2];
+};
+
+template<int kSize>
+void BuildEntropyCode(const Histogram<kSize>& histogram,
+ const int tree_limit,
+ const int alphabet_size,
+ EntropyCode<kSize>* code) {
+ memset(code->depth_, 0, sizeof(code->depth_));
+ memset(code->bits_, 0, sizeof(code->bits_));
+ memset(code->symbols_, 0, sizeof(code->symbols_));
+ code->count_ = 0;
+ if (histogram.total_count_ == 0) return;
+ for (int i = 0; i < kSize; ++i) {
+ if (histogram.data_[i] > 0) {
+ if (code->count_ < 2) code->symbols_[code->count_] = i;
+ ++code->count_;
+ }
+ }
+ if (code->count_ >= 64) {
+ int counts[kSize];
+ memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize);
+ OptimizeHuffmanCountsForRle(alphabet_size, counts);
+ CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]);
+ } else {
+ CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit,
+ &code->depth_[0]);
+ }
+ ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]);
+}
+
+static const int kCodeLengthCodes = 19;
+
+// Literal entropy code.
+typedef EntropyCode<256> EntropyCodeLiteral;
+// Prefix entropy codes.
+typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
+typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
+typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
diff --git a/brotli/enc/fast_log.h b/brotli/enc/fast_log.h
new file mode 100644
index 0000000..0b09ea6
--- /dev/null
+++ b/brotli/enc/fast_log.h
@@ -0,0 +1,161 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utilities for fast computation of logarithms.
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+inline int Log2Floor(uint32_t n) {
+#if defined(__clang__) || \
+ (defined(__GNUC__) && \
+ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+#else
+ if (n == 0)
+ return -1;
+ int log = 0;
+ uint32_t value = n;
+ for (int i = 4; i >= 0; --i) {
+ int shift = (1 << i);
+ uint32_t x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ assert(value == 1);
+ return log;
+#endif
+}
+
+// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
+inline int Log2Ceiling(uint32_t n) {
+ int floor = Log2Floor(n);
+ if (n == (n &~ (n - 1))) // zero or a power of two
+ return floor;
+ else
+ return floor + 1;
+}
+
+// A lookup table for small values of log2(int) to be used in entropy
+// computation.
+//
+// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
+static const float kLog2Table[] = {
+ 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+ 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+ 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+ 3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+ 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+ 3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+ 4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+ 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+ 4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+ 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+ 4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+ 5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+ 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+ 5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+ 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+ 5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+ 5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+ 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+ 5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+ 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+ 5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+ 5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+ 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+ 6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+ 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+ 6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+ 6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+ 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+ 6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+ 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+ 6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+ 6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+ 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+ 6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+ 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+ 6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+ 6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+ 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+ 6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+ 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+ 6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+ 6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+ 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+ 7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+ 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+ 7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+ 7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+ 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+ 7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+ 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+ 7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+ 7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+ 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+ 7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+ 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+ 7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+ 7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+ 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+ 7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+ 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+ 7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+ 7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+ 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+ 7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+ 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+ 7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+ 7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+ 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+ 7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+ 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+ 7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+ 7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+ 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+ 7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+ 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+ 7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+ 7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+ 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+ 7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+ 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+ 7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+ 7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+ 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+ 7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+ 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+ 7.9943534368588578f
+};
+
+// Faster logarithm for small integers, with the property of log2(0) == 0.
+static inline double FastLog2(int v) {
+ if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
+ return kLog2Table[v];
+ }
+ return log2(v);
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_FAST_LOG_H_
diff --git a/brotli/enc/find_match_length.h b/brotli/enc/find_match_length.h
new file mode 100644
index 0000000..0994ac2
--- /dev/null
+++ b/brotli/enc/find_match_length.h
@@ -0,0 +1,85 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find maximal matching prefixes of strings.
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+#include <stdint.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+// Separate implementation for x86_64, for speed.
+#if defined(__GNUC__) && defined(ARCH_K8)
+
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+ const uint8_t* s2,
+ size_t limit) {
+ int matched = 0;
+ size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
+ while (PREDICT_TRUE(--limit2)) {
+ if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
+ BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
+ s2 += 8;
+ matched += 8;
+ } else {
+ uint64_t x =
+ BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
+ int matching_bits = __builtin_ctzll(x);
+ matched += matching_bits >> 3;
+ return matched;
+ }
+ }
+ limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
+ while (--limit) {
+ if (PREDICT_TRUE(s1[matched] == *s2)) {
+ ++s2;
+ ++matched;
+ } else {
+ return matched;
+ }
+ }
+ return matched;
+}
+#else
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+ const uint8_t* s2,
+ size_t limit) {
+ int matched = 0;
+ const uint8_t* s2_limit = s2 + limit;
+ const uint8_t* s2_ptr = s2;
+ // Find out how long the match is. We loop over the data 32 bits at a
+ // time until we find a 32-bit block that doesn't match; then we find
+ // the first non-matching bit and use that to calculate the total
+ // length of the match.
+ while (s2_ptr <= s2_limit - 4 &&
+ BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
+ BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
+ s2_ptr += 4;
+ matched += 4;
+ }
+ while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+ ++s2_ptr;
+ ++matched;
+ }
+ return matched;
+}
+#endif
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
diff --git a/brotli/enc/hash.h b/brotli/enc/hash.h
new file mode 100644
index 0000000..9cacf7e
--- /dev/null
+++ b/brotli/enc/hash.h
@@ -0,0 +1,354 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./port.h"
+
+namespace brotli {
+
+// kHashMul32 multiplier has these properties:
+// * The multiplier must be odd. Otherwise we may lose the highest bit.
+// * No long streaks of 1s or 0s.
+// * There is no effort to ensure that it is a prime, the oddity is enough
+// for this use.
+// * The number has been tuned heuristically against compression benchmarks.
+static const uint32_t kHashMul32 = 0x1e35a7bd;
+
+inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
+ uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
+ // The higher bits contain more mixture from the multiplication,
+ // so we take our results from there.
+ return h >> (32 - bits);
+}
+
+// Usually, we always choose the longest backward reference. This function
+// allows for the exception of that rule.
+//
+// If we choose a backward reference that is further away, it will
+// usually be coded with more bits. We approximate this by assuming
+// log2(distance). If the distance can be expressed in terms of the
+// last four distances, we use some heuristic constants to estimate
+// the bits cost. For the first up to four literals we use the bit
+// cost of the literals from the literal cost model, after that we
+// use the average bit cost of the cost model.
+//
+// This function is used to sometimes discard a longer backward reference
+// when it is not much longer and the bit cost for encoding it is more
+// than the saved literals.
+inline double BackwardReferenceScore(double average_cost,
+ double start_cost4,
+ double start_cost3,
+ double start_cost2,
+ int copy_length,
+ int backward_reference_offset,
+ int last_distance1,
+ int last_distance2,
+ int last_distance3,
+ int last_distance4) {
+ double retval = 0;
+ switch (copy_length) {
+ case 2: retval = start_cost2; break;
+ case 3: retval = start_cost3; break;
+ default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
+ }
+ int diff_last1 = abs(backward_reference_offset - last_distance1);
+ int diff_last2 = abs(backward_reference_offset - last_distance2);
+ if (diff_last1 == 0) {
+ retval += 0.6;
+ } else if (diff_last1 < 4) {
+ retval -= 0.9 + 0.03 * diff_last1;
+ } else if (diff_last2 < 4) {
+ retval -= 0.95 + 0.1 * diff_last2;
+ } else if (backward_reference_offset == last_distance3) {
+ retval -= 1.17;
+ } else if (backward_reference_offset == last_distance4) {
+ retval -= 1.27;
+ } else {
+ retval -= 1.20 * Log2Floor(backward_reference_offset);
+ }
+ return retval;
+}
+
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+//
+// This is a hash map of fixed size (kBucketSize) to a ring buffer of
+// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
+// index positions of the given hash key in the compressed data.
+template <int kBucketBits, int kBlockBits>
+class HashLongestMatch {
+ public:
+ HashLongestMatch()
+ : literal_cost_(NULL),
+ last_distance1_(4),
+ last_distance2_(11),
+ last_distance3_(15),
+ last_distance4_(16),
+ insert_length_(0),
+ average_cost_(5.4) {
+ Reset();
+ }
+ void Reset() {
+ std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
+ }
+ void SetLiteralCost(float *cost) {
+ literal_cost_ = cost;
+ }
+ double literal_cost(int i) const { return literal_cost_[i]; }
+
+ // Look at 3 bytes at data.
+ // Compute a hash from these, and store the value of ix at that position.
+ inline void Store(const uint8_t *data, const int ix) {
+ const uint32_t key = Hash3Bytes(data, kBucketBits);
+ const int minor_ix = num_[key] & kBlockMask;
+ buckets_[key][minor_ix] = ix;
+ ++num_[key];
+ }
+
+ // Store hashes for a range of data.
+ void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
+ for (int p = 0; p < len; ++p) {
+ Store(&data[p & mask], startix + p);
+ }
+ }
+
+ // Find a longest backward match of &data[cur_ix] up to the length of
+ // max_length.
+ //
+ // Does not look for matches longer than max_length.
+ // Does not look for matches further away than max_backward.
+ // Writes the best found match length into best_len_out.
+ // Writes the index (&data[index]) offset from the start of the best match
+ // into best_distance_out.
+ // Write the score of the best match into best_score_out.
+ bool FindLongestMatch(const uint8_t * __restrict data,
+ const uint32_t cur_ix,
+ uint32_t max_length,
+ const uint32_t max_backward,
+ size_t * __restrict best_len_out,
+ size_t * __restrict best_distance_out,
+ double * __restrict best_score_out) {
+ const double start_cost4 = literal_cost_ == NULL ? 20 :
+ literal_cost_[cur_ix] +
+ literal_cost_[cur_ix + 1] +
+ literal_cost_[cur_ix + 2] +
+ literal_cost_[cur_ix + 3];
+
+ const double start_cost3 = literal_cost_ == NULL ? 15 :
+ literal_cost_[cur_ix] +
+ literal_cost_[cur_ix + 1] +
+ literal_cost_[cur_ix + 2] + 0.3;
+ double start_cost2 = literal_cost_ == NULL ? 10 :
+ literal_cost_[cur_ix] +
+ literal_cost_[cur_ix + 1] + 1.2;
+ bool match_found = false;
+ // Don't accept a short copy from far away.
+ double best_score = 8.25;
+ if (insert_length_ < 4) {
+ double cost_diff[4] = { 0.20, 0.09, 0.05, 0.03 };
+ best_score += cost_diff[insert_length_];
+ }
+ size_t best_len = *best_len_out;
+ *best_len_out = 0;
+ size_t best_ix = 1;
+ // Try last distance first.
+ for (int i = 0; i < 16; ++i) {
+ int prev_ix = cur_ix;
+ switch(i) {
+ case 0: prev_ix -= last_distance1_; break;
+ case 1: prev_ix -= last_distance2_; break;
+ case 2: prev_ix -= last_distance3_; break;
+ case 3: prev_ix -= last_distance4_; break;
+
+ case 4: prev_ix -= last_distance1_ - 1; break;
+ case 5: prev_ix -= last_distance1_ + 1; break;
+ case 6: prev_ix -= last_distance1_ - 2; break;
+ case 7: prev_ix -= last_distance1_ + 2; break;
+ case 8: prev_ix -= last_distance1_ - 3; break;
+ case 9: prev_ix -= last_distance1_ + 3; break;
+
+ case 10: prev_ix -= last_distance2_ - 1; break;
+ case 11: prev_ix -= last_distance2_ + 1; break;
+ case 12: prev_ix -= last_distance2_ - 2; break;
+ case 13: prev_ix -= last_distance2_ + 2; break;
+ case 14: prev_ix -= last_distance2_ - 3; break;
+ case 15: prev_ix -= last_distance2_ + 3; break;
+ }
+ if (prev_ix >= cur_ix) {
+ continue;
+ }
+ const size_t backward = cur_ix - prev_ix;
+ if (PREDICT_FALSE(backward > max_backward)) {
+ continue;
+ }
+ if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
+ continue;
+ }
+ const size_t len =
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
+ if (len >= 3 || (len == 2 && i < 2)) {
+ // Comparing for >= 2 does not change the semantics, but just saves for
+ // a few unnecessary binary logarithms in backward reference score,
+ // since we are not interested in such short matches.
+ const double score = BackwardReferenceScore(average_cost_,
+ start_cost4,
+ start_cost3,
+ start_cost2,
+ len, backward,
+ last_distance1_,
+ last_distance2_,
+ last_distance3_,
+ last_distance4_);
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_distance_out = best_ix;
+ *best_score_out = best_score;
+ match_found = true;
+ }
+ }
+ }
+ const uint32_t key = Hash3Bytes(&data[cur_ix], kBucketBits);
+ const uint32_t * __restrict const bucket = &buckets_[key][0];
+ const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+ int stop = int(cur_ix) - 64;
+ if (stop < 0) { stop = 0; }
+
+ start_cost2 -= 1.0;
+ for (int i = cur_ix - 1; i > stop; --i) {
+ size_t prev_ix = i;
+ const size_t backward = cur_ix - prev_ix;
+ if (PREDICT_FALSE(backward > max_backward)) {
+ break;
+ }
+ if (data[cur_ix] != data[prev_ix] ||
+ data[cur_ix + 1] != data[prev_ix + 1]) {
+ continue;
+ }
+ int len = 2;
+ const double score = start_cost2 - 1.70 * Log2Floor(backward);
+
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_distance_out = best_ix;
+ match_found = true;
+ }
+ }
+ for (int i = num_[key] - 1; i >= down; --i) {
+ size_t prev_ix = bucket[i & kBlockMask];
+ const size_t backward = cur_ix - prev_ix;
+ if (PREDICT_FALSE(backward > max_backward)) {
+ break;
+ }
+ if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
+ continue;
+ }
+ const size_t len =
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
+ if (len >= 3) {
+ // Comparing for >= 3 does not change the semantics, but just saves for
+ // a few unnecessary binary logarithms in backward reference score,
+ // since we are not interested in such short matches.
+ const double score = BackwardReferenceScore(average_cost_,
+ start_cost4,
+ start_cost3,
+ start_cost2,
+ len, backward,
+ last_distance1_,
+ last_distance2_,
+ last_distance3_,
+ last_distance4_);
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_distance_out = best_ix;
+ *best_score_out = best_score;
+ match_found = true;
+ }
+ }
+ }
+ return match_found;
+ }
+
+ void set_last_distance(int v) {
+ if (last_distance1_ != v) {
+ last_distance4_ = last_distance3_;
+ last_distance3_ = last_distance2_;
+ last_distance2_ = last_distance1_;
+ last_distance1_ = v;
+ }
+ }
+
+ int last_distance() const { return last_distance1_; }
+
+ void set_insert_length(int v) { insert_length_ = v; }
+
+ void set_average_cost(double v) { average_cost_ = v; }
+
+ private:
+ // Number of hash buckets.
+ static const uint32_t kBucketSize = 1 << kBucketBits;
+
+ // Only kBlockSize newest backward references are kept,
+ // and the older are forgotten.
+ static const uint32_t kBlockSize = 1 << kBlockBits;
+
+ // Mask for accessing entries in a block (in a ringbuffer manner).
+ static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
+
+ // Number of entries in a particular bucket.
+ uint16_t num_[kBucketSize];
+
+ // Buckets containing kBlockSize of backward references.
+ uint32_t buckets_[kBucketSize][kBlockSize];
+
+ // Model of how much the ith literal costs to encode using
+ // the entropy model.
+ float *literal_cost_;
+
+ int last_distance1_;
+ int last_distance2_;
+ int last_distance3_;
+ int last_distance4_;
+
+ // Cost adjustment for how many literals we are planning to insert
+ // anyway.
+ int insert_length_;
+
+ double average_cost_;
+};
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_HASH_H_
diff --git a/brotli/enc/histogram.cc b/brotli/enc/histogram.cc
new file mode 100644
index 0000000..32d7730
--- /dev/null
+++ b/brotli/enc/histogram.cc
@@ -0,0 +1,72 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Build per-context histograms of literals, commands and distance codes.
+
+#include "./histogram.h"
+
+#include <stdint.h>
+#include <cmath>
+
+#include "./block_splitter.h"
+#include "./command.h"
+#include "./context.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+void BuildHistograms(
+ const std::vector<Command>& cmds,
+ const BlockSplit& literal_split,
+ const BlockSplit& insert_and_copy_split,
+ const BlockSplit& dist_split,
+ const uint8_t* input_buffer,
+ size_t pos,
+ int context_mode,
+ int distance_context_mode,
+ std::vector<HistogramLiteral>* literal_histograms,
+ std::vector<HistogramCommand>* insert_and_copy_histograms,
+ std::vector<HistogramDistance>* copy_dist_histograms) {
+ BlockSplitIterator literal_it(literal_split);
+ BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
+ BlockSplitIterator dist_it(dist_split);
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command &cmd = cmds[i];
+ insert_and_copy_it.Next();
+ (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
+ cmd.command_prefix_);
+ for (int j = 0; j < cmd.insert_length_; ++j) {
+ literal_it.Next();
+ uint8_t prev_byte = pos > 0 ? input_buffer[pos - 1] : 0;
+ uint8_t prev_byte2 = pos > 1 ? input_buffer[pos - 2] : 0;
+ uint8_t prev_byte3 = pos > 2 ? input_buffer[pos - 3] : 0;
+ int context = (literal_it.type_ * NumContexts(context_mode) +
+ Context(prev_byte, prev_byte2, prev_byte3, context_mode));
+ (*literal_histograms)[context].Add(input_buffer[pos]);
+ ++pos;
+ }
+ pos += cmd.copy_length_;
+ if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+ dist_it.Next();
+ int context = dist_it.type_;
+ if (distance_context_mode > 0) {
+ context <<= 2;
+ context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
+ }
+ (*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
+ }
+ }
+}
+
+} // namespace brotli
diff --git a/brotli/enc/histogram.h b/brotli/enc/histogram.h
new file mode 100644
index 0000000..69f8b9a
--- /dev/null
+++ b/brotli/enc/histogram.h
@@ -0,0 +1,97 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Models the histograms of literals, commands and distance codes.
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+#include "./command.h"
+#include "./fast_log.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+class BlockSplit;
+
+// A simple container for histograms of data in blocks.
+template<int kDataSize>
+struct Histogram {
+ Histogram() {
+ Clear();
+ }
+ void Clear() {
+ memset(data_, 0, sizeof(data_));
+ total_count_ = 0;
+ }
+ void Add(int val) {
+ ++data_[val];
+ ++total_count_;
+ }
+ void Remove(int val) {
+ --data_[val];
+ --total_count_;
+ }
+ template<typename DataType>
+ void Add(const DataType *p, size_t n) {
+ total_count_ += n;
+ n += 1;
+ while(--n) ++data_[*p++];
+ }
+ void AddHistogram(const Histogram& v) {
+ total_count_ += v.total_count_;
+ for (int i = 0; i < kDataSize; ++i) {
+ data_[i] += v.data_[i];
+ }
+ }
+ double EntropyBitCost() const {
+ double retval = total_count_ * FastLog2(total_count_);
+ for (int i = 0; i < kDataSize; ++i) {
+ retval -= data_[i] * FastLog2(data_[i]);
+ }
+ return retval;
+ }
+
+ int data_[kDataSize];
+ int total_count_;
+ double bit_cost_;
+};
+
+// Literal histogram.
+typedef Histogram<256> HistogramLiteral;
+// Prefix histograms.
+typedef Histogram<kNumCommandPrefixes> HistogramCommand;
+typedef Histogram<kNumDistancePrefixes> HistogramDistance;
+typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
+
+void BuildHistograms(
+ const std::vector<Command>& cmds,
+ const BlockSplit& literal_split,
+ const BlockSplit& insert_and_copy_split,
+ const BlockSplit& dist_split,
+ const uint8_t* input_buffer,
+ size_t pos,
+ int context_mode,
+ int distance_context_mode,
+ std::vector<HistogramLiteral>* literal_histograms,
+ std::vector<HistogramCommand>* insert_and_copy_histograms,
+ std::vector<HistogramDistance>* copy_dist_histograms);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_HISTOGRAM_H_
diff --git a/brotli/enc/literal_cost.cc b/brotli/enc/literal_cost.cc
new file mode 100644
index 0000000..0dac1a6
--- /dev/null
+++ b/brotli/enc/literal_cost.cc
@@ -0,0 +1,60 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#include "./literal_cost.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <algorithm>
+
+namespace brotli {
+
+void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost) {
+ int histogram[256] = { 0 };
+ int window_half = 2000;
+ int in_window = std::min(static_cast<size_t>(window_half), len);
+
+ // Bootstrap histogram.
+ for (int i = 0; i < in_window; ++i) {
+ ++histogram[data[i]];
+ }
+
+ // Compute bit costs with sliding window.
+ for (int i = 0; i < len; ++i) {
+ if (i - window_half >= 0) {
+ // Remove a byte in the past.
+ --histogram[data[i - window_half]];
+ --in_window;
+ }
+ if (i + window_half < len) {
+ // Add a byte in the future.
+ ++histogram[data[i + window_half]];
+ ++in_window;
+ }
+ int histo = histogram[data[i]];
+ if (histo == 0) {
+ histo = 1;
+ }
+ cost[i] = log2(static_cast<double>(in_window) / histo);
+ cost[i] += 0.03;
+ if (cost[i] < 1.0) {
+ cost[i] *= 0.5;
+ cost[i] += 0.5;
+ }
+ }
+}
+
+} // namespace brotli
diff --git a/brotli/enc/literal_cost.h b/brotli/enc/literal_cost.h
new file mode 100644
index 0000000..0dbbc3f
--- /dev/null
+++ b/brotli/enc/literal_cost.h
@@ -0,0 +1,31 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Input: length of data, and the bytes.
+// Output: estimate of how many bits the literal will take entropy coded.
+void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_LITERAL_COST_H_
diff --git a/brotli/enc/port.h b/brotli/enc/port.h
new file mode 100644
index 0000000..36a365e
--- /dev/null
+++ b/brotli/enc/port.h
@@ -0,0 +1,138 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Macros for endianness, branch prediction and unaligned loads and stores.
+
+#ifndef BROTLI_ENC_PORT_H_
+#define BROTLI_ENC_PORT_H_
+
+#if defined OS_LINUX || defined OS_CYGWIN
+#include <endian.h>
+#elif defined OS_FREEBSD
+#include <machine/endian.h>
+#elif defined OS_MACOSX
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#endif
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif
+
+#else
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif
+#endif // __BYTE_ORDER
+
+#if defined(COMPILER_GCC3)
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define PREDICT_FALSE(x) x
+#define PREDICT_TRUE(x) x
+#endif
+
+// Portable handling of unaligned loads, stores, and copies.
+// On some platforms, like ARM, the copy functions can be more efficient
+// then a load and a store.
+
+#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
+ defined(ARCH_K8) || defined(_ARCH_PPC)
+
+// x86 and x86-64 can perform unaligned loads/stores directly;
+// modern PowerPC hardware can also do unaligned integer loads and stores;
+// but note: the FPU still sends unaligned loads and stores to a trap handler!
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
+
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+ (*reinterpret_cast<uint32_t *>(_p) = (_val))
+#define BROTLI_UNALIGNED_STORE64(_p, _val) \
+ (*reinterpret_cast<uint64_t *>(_p) = (_val))
+
+#elif defined(__arm__) && \
+ !defined(__ARM_ARCH_5__) && \
+ !defined(__ARM_ARCH_5T__) && \
+ !defined(__ARM_ARCH_5TE__) && \
+ !defined(__ARM_ARCH_5TEJ__) && \
+ !defined(__ARM_ARCH_6__) && \
+ !defined(__ARM_ARCH_6J__) && \
+ !defined(__ARM_ARCH_6K__) && \
+ !defined(__ARM_ARCH_6Z__) && \
+ !defined(__ARM_ARCH_6ZK__) && \
+ !defined(__ARM_ARCH_6T2__)
+
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode).
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+ (*reinterpret_cast<uint32_t *>(_p) = (_val))
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+ uint64_t t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+ memcpy(p, &v, sizeof v);
+}
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
+ uint32_t t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+ uint64_t t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
+ memcpy(p, &v, sizeof v);
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+ memcpy(p, &v, sizeof v);
+}
+
+#endif
+
+#endif // BROTLI_ENC_PORT_H_
diff --git a/brotli/enc/prefix.cc b/brotli/enc/prefix.cc
new file mode 100644
index 0000000..3e43501
--- /dev/null
+++ b/brotli/enc/prefix.cc
@@ -0,0 +1,166 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#include "./prefix.h"
+
+#include "./fast_log.h"
+
+namespace brotli {
+
+// Represents the range of values belonging to a prefix code:
+// [offset, offset + 2^nbits)
+struct PrefixCodeRange {
+ int offset;
+ int nbits;
+};
+
+static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
+ { 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
+ { 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
+ { 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
+ { 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
+ { 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
+ { 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
+ {8433, 13}, {16625, 24}
+};
+
+static const PrefixCodeRange kInsertLengthPrefixCode[kNumInsertLenPrefixes] = {
+ { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0},
+ { 4, 0}, { 5, 0}, { 6, 1}, { 8, 1},
+ { 10, 2}, { 14, 2}, { 18, 3}, { 26, 3},
+ { 34, 4}, { 50, 4}, { 66, 5}, { 98, 5},
+ { 130, 6}, { 194, 7}, { 322, 8}, { 578, 9},
+ {1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
+};
+
+static const PrefixCodeRange kCopyLengthPrefixCode[kNumCopyLenPrefixes] = {
+ { 2, 0}, { 3, 0}, { 4, 0}, { 5, 0},
+ { 6, 0}, { 7, 0}, { 8, 0}, { 9, 0},
+ { 10, 1}, { 12, 1}, { 14, 2}, { 18, 2},
+ { 22, 3}, { 30, 3}, { 38, 4}, { 54, 4},
+ { 70, 5}, { 102, 5}, { 134, 6}, { 198, 7},
+ {326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
+};
+
+static const int kInsertAndCopyRangeLut[9] = {
+ 0, 1, 4, 2, 3, 6, 5, 7, 8,
+};
+
+static const int kInsertRangeLut[9] = {
+ 0, 0, 1, 1, 0, 2, 1, 2, 2,
+};
+
+static const int kCopyRangeLut[9] = {
+ 0, 1, 0, 1, 2, 0, 2, 1, 2,
+};
+
+int InsertLengthPrefix(int length) {
+ for (int i = 0; i < kNumInsertLenPrefixes; ++i) {
+ const PrefixCodeRange& range = kInsertLengthPrefixCode[i];
+ if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int CopyLengthPrefix(int length) {
+ for (int i = 0; i < kNumCopyLenPrefixes; ++i) {
+ const PrefixCodeRange& range = kCopyLengthPrefixCode[i];
+ if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int CommandPrefix(int insert_length, int copy_length) {
+ if (copy_length == 0) {
+ copy_length = 3;
+ }
+ int insert_prefix = InsertLengthPrefix(insert_length);
+ int copy_prefix = CopyLengthPrefix(copy_length);
+ int range_idx = 3 * (insert_prefix >> 3) + (copy_prefix >> 3);
+ return ((kInsertAndCopyRangeLut[range_idx] << 6) +
+ ((insert_prefix & 7) << 3) + (copy_prefix & 7));
+}
+
+int InsertLengthExtraBits(int code) {
+ int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+ return kInsertLengthPrefixCode[insert_code].nbits;
+}
+
+int InsertLengthOffset(int code) {
+ int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+ return kInsertLengthPrefixCode[insert_code].offset;
+}
+
+int CopyLengthExtraBits(int code) {
+ int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+ return kCopyLengthPrefixCode[copy_code].nbits;
+}
+
+int CopyLengthOffset(int code) {
+ int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+ return kCopyLengthPrefixCode[copy_code].offset;
+}
+
+void PrefixEncodeCopyDistance(int distance_code,
+ int num_direct_codes,
+ int postfix_bits,
+ uint16_t* code,
+ int* nbits,
+ uint32_t* extra_bits) {
+ distance_code -= 1;
+ if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
+ *code = distance_code;
+ *nbits = 0;
+ *extra_bits = 0;
+ return;
+ }
+ distance_code -= kNumDistanceShortCodes + num_direct_codes;
+ distance_code += (1 << (postfix_bits + 2));
+ int bucket = Log2Floor(distance_code) - 1;
+ int postfix_mask = (1 << postfix_bits) - 1;
+ int postfix = distance_code & postfix_mask;
+ int prefix = (distance_code >> bucket) & 1;
+ int offset = (2 + prefix) << bucket;
+ *nbits = bucket - postfix_bits;
+ *code = kNumDistanceShortCodes + num_direct_codes +
+ ((2 * (*nbits - 1) + prefix) << postfix_bits) + postfix;
+ *extra_bits = (distance_code - offset) >> postfix_bits;
+}
+
+int BlockLengthPrefix(int length) {
+ for (int i = 0; i < kNumBlockLenPrefixes; ++i) {
+ const PrefixCodeRange& range = kBlockLengthPrefixCode[i];
+ if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int BlockLengthExtraBits(int length_code) {
+ return kBlockLengthPrefixCode[length_code].nbits;
+}
+
+int BlockLengthOffset(int length_code) {
+ return kBlockLengthPrefixCode[length_code].offset;
+}
+
+} // namespace brotli
diff --git a/brotli/enc/prefix.h b/brotli/enc/prefix.h
new file mode 100644
index 0000000..47974f8
--- /dev/null
+++ b/brotli/enc/prefix.h
@@ -0,0 +1,51 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+static const int kNumInsertLenPrefixes = 24;
+static const int kNumCopyLenPrefixes = 24;
+static const int kNumCommandPrefixes = 704;
+static const int kNumBlockLenPrefixes = 26;
+static const int kNumDistanceShortCodes = 16;
+static const int kNumDistancePrefixes = 520;
+
+int CommandPrefix(int insert_length, int copy_length);
+int InsertLengthExtraBits(int prefix);
+int InsertLengthOffset(int prefix);
+int CopyLengthExtraBits(int prefix);
+int CopyLengthOffset(int prefix);
+
+void PrefixEncodeCopyDistance(int distance_code,
+ int num_direct_codes,
+ int shift_bits,
+ uint16_t* prefix,
+ int* nbits,
+ uint32_t* extra_bits);
+
+int BlockLengthPrefix(int length);
+int BlockLengthExtraBits(int prefix);
+int BlockLengthOffset(int prefix);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_PREFIX_H_
diff --git a/brotli/enc/write_bits.h b/brotli/enc/write_bits.h
new file mode 100644
index 0000000..ce888e6
--- /dev/null
+++ b/brotli/enc/write_bits.h
@@ -0,0 +1,91 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Write bits into a byte array.
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include <assert.h>
+#include <endian.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+//#define BIT_WRITER_DEBUG
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 56 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0 BYTE+1 BYTE+2
+//
+// 0000 0RRR 0000 0000 0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just sifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1, BYTE+2, etc.
+inline void WriteBits(int n_bits,
+ uint64_t bits,
+ int * __restrict pos,
+ uint8_t * __restrict array) {
+#ifdef BIT_WRITER_DEBUG
+ printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
+#endif
+#ifdef IS_LITTLE_ENDIAN
+ // This branch of the code can write up to 56 bits at a time,
+ // 7 bits are lost by being perhaps already in *p and at least
+ // 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+ // bits are in *p and we write 57 bits, then the next write will
+ // access a byte that was never initialized).
+ uint8_t *p = &array[*pos >> 3];
+ uint64_t v = *p;
+ v |= bits << (*pos & 7);
+ BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
+ *pos += n_bits;
+#else
+ // implicit & 0xff is assumed for uint8_t arithmetics
+ uint8_t *array_pos = &array[*pos >> 3];
+ const int bits_reserved_in_first_byte = (*pos & 7);
+ bits <<= bits_reserved_in_first_byte;
+ *array_pos++ |= bits;
+ for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
+ bits_left_to_write >= 1;
+ bits_left_to_write -= 8) {
+ bits >>= 8;
+ *array_pos++ = bits;
+ }
+ *array_pos = 0;
+ *pos += n_bits;
+#endif
+}
+
+inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
+#ifdef BIT_WRITER_DEBUG
+ printf("WriteBitsPrepareStorage %10d\n", pos);
+#endif
+ assert((pos & 7) == 0);
+ array[pos >> 3] = 0;
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_WRITE_BITS_H_