aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTorne (Richard Coles) <torne@google.com>2014-02-21 12:17:39 +0000
committerTorne (Richard Coles) <torne@google.com>2014-02-21 12:17:39 +0000
commit2237d30d6e68c2c67c5877094de2dce4f9441765 (patch)
tree10b9caaf1a19f4793e3a4e48900c053783e844e8
parent62030c64c7123356cce4fabcc12a921dc8aaabc5 (diff)
parent8cba613a0b71e71abcab87aa2478cdddf5bef1ef (diff)
downloadsrc-2237d30d6e68c2c67c5877094de2dce4f9441765.tar.gz
Merge from Chromium at DEPS revision 251904
This commit was generated by merge_to_master.py. Change-Id: Ib6407e6b02e637e5cd72c4ff15d21cf63542412e
-rw-r--r--LICENSE27
-rw-r--r--README160
-rw-r--r--brotli/LICENSE202
-rw-r--r--brotli/brotlispec.txt1264
-rw-r--r--brotli/dec/Makefile10
-rw-r--r--brotli/dec/README3
-rw-r--r--brotli/dec/bit_reader.c50
-rw-r--r--brotli/dec/bit_reader.h176
-rw-r--r--brotli/dec/context.h259
-rw-r--r--brotli/dec/decode.c1023
-rw-r--r--brotli/dec/decode.h52
-rw-r--r--brotli/dec/huffman.c257
-rw-r--r--brotli/dec/huffman.h75
-rw-r--r--brotli/dec/prefix.h65
-rw-r--r--brotli/dec/safe_malloc.c42
-rw-r--r--brotli/dec/safe_malloc.h45
-rw-r--r--brotli/dec/streams.c117
-rw-r--r--brotli/dec/streams.h103
-rw-r--r--brotli/dec/types.h42
-rw-r--r--brotli/enc/Makefile11
-rw-r--r--brotli/enc/README3
-rw-r--r--brotli/enc/backward_references.cc143
-rw-r--r--brotli/enc/backward_references.h39
-rw-r--r--brotli/enc/bit_cost.h139
-rw-r--r--brotli/enc/block_splitter.cc390
-rw-r--r--brotli/enc/block_splitter.h77
-rw-r--r--brotli/enc/cluster.h288
-rw-r--r--brotli/enc/command.h46
-rw-r--r--brotli/enc/context.h185
-rw-r--r--brotli/enc/encode.cc896
-rw-r--r--brotli/enc/encode.h75
-rw-r--r--brotli/enc/entropy_encode.cc409
-rw-r--r--brotli/enc/entropy_encode.h116
-rw-r--r--brotli/enc/fast_log.h161
-rw-r--r--brotli/enc/find_match_length.h85
-rw-r--r--brotli/enc/hash.h367
-rw-r--r--brotli/enc/histogram.cc94
-rw-r--r--brotli/enc/histogram.h114
-rw-r--r--brotli/enc/literal_cost.cc62
-rw-r--r--brotli/enc/literal_cost.h33
-rw-r--r--brotli/enc/port.h138
-rw-r--r--brotli/enc/prefix.cc166
-rw-r--r--brotli/enc/prefix.h51
-rw-r--r--brotli/enc/ringbuffer.h89
-rw-r--r--brotli/enc/write_bits.h95
-rw-r--r--build.xml32
-rw-r--r--docs/WOFFUltraCondensedfileformat.pdfbin0 -> 192108 bytes
-rw-r--r--ots-lzma.patch5500
-rw-r--r--shared.mk17
-rw-r--r--woff2/Makefile28
-rw-r--r--woff2/file.h40
-rw-r--r--woff2/font.cc176
-rw-r--r--woff2/font.h81
-rw-r--r--woff2/glyph.cc380
-rw-r--r--woff2/glyph.h71
-rw-r--r--woff2/normalize.cc194
-rw-r--r--woff2/normalize.h45
-rw-r--r--woff2/ots.h153
-rw-r--r--woff2/port.h46
-rw-r--r--woff2/round.h33
-rw-r--r--woff2/store_bytes.h61
-rw-r--r--woff2/transform.cc263
-rw-r--r--woff2/transform.h31
-rw-r--r--woff2/woff2.cc1313
-rw-r--r--woff2/woff2.h50
-rw-r--r--woff2/woff2_compress.cc52
-rw-r--r--woff2/woff2_decompress.cc54
-rw-r--r--woff2_header_dump.py38
68 files changed, 16902 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e74c256
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,27 @@
+// Copyright (c) 2011 Google Inc. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README b/README
new file mode 100644
index 0000000..df97cc2
--- /dev/null
+++ b/README
@@ -0,0 +1,160 @@
+This is a README for the font compression reference code. There are several
+compression related modules in this repository.
+
+brotli/ contains reference code for the Brotli byte-level compression
+algorithm. Note that it is licensed under an Apache 2 license.
+
+src/ contains prototype Java code for compressing fonts.
+
+cpp/ contains prototype C++ code for decompressing fonts.
+
+docs/ contains documents describing the proposed compression format.
+
+= How to run the compression test tool =
+
+This document documents how to run the compression reference code. At this
+writing, the code, while it is intended to produce a bytestream that can be
+reconstructed into a working font, the reference decompression code is not
+done, and the exact format of that bytestream is subject to change.
+
+== Building the tool ==
+
+On a standard Unix-style environment, it should be as simple as running “ant”.
+
+The tool depends on sfntly for much of the font work. The lib/ directory
+contains a snapshot jar. If you want to use the latest sfntly sources, then cd
+to the java subdirectory, run “ant”, then copy these files dist/lib/sfntly.jar
+dist/tools/conversion/eot/eotconverter.jar and
+dist.tools/conversion/woff/woffconverter.jar to $(thisproject)/lib:
+
+dist/lib/sfntly.jar dist/tools/conversion/eot/eotconverter.jar
+dist.tools/conversion/woff/woffconverter.jar
+
+There’s also a dependency on guava (see references below).
+
+The dependencies are subject to their own licenses.
+
+== Setting up the test ==
+
+A run of the tool evaluates a “base” configuration plus one or more test
+configurations, for each font. It measures the file size of the test as a ratio
+over the base file size, then graphs the value of that ratio sorted across all
+files given on the command line.
+
+The test parameters are set by command line options (an improvement from the
+last snapshot). The base is set by the -b command line option, and the
+additional tests are specified by repeated -x command line options (see below).
+
+Each test is specified by a string description. It is a colon-separated list of
+stages. The final stage is entropy compression and can be one of “gzip”,
+“lzma”, “bzip2”, “woff”, “eot” (with actual wire-format MTX compression), or
+“uncomp” (for raw, uncompressed TTF’s). Also, the new wire-format draft
+WOFF2 spec is available as "woff2", and takes an entropy coding as an
+optional argument, as in "woff2/gzip" or "woff2/lzma".
+
+Other stages may optionally include subparameters (following a slash, and
+comma-separated). The stages are:
+
+glyf: performs glyf-table preprocessing based on MTX. There are subparameters:
+1. cbbox (composite bounding box). When specified, the bounding box for
+composite glyphs is included, otherwise stripped 2. sbbox (simple bounding
+box). When specified, the bounding box for simple glyphs is included 3. code:
+the bytecode is separated out into a separate stream 4. triplet: triplet coding
+(as in MTX) is used 5. push: push sequences are separated; if unset, pushes are
+kept inline in the bytecode 6. reslice: components of the glyf table are
+separated into individual streams, taking the MTX idea of separating the
+bytecodes further.
+
+hmtx: strips lsb’s from the hmtx table. Based on the idea that lsb’s can be
+reconstructed from bbox.
+
+hdmx: performs the delta coding on hdmx, essentially the same as MTX.
+
+cmap: compresses cmap table: wire format representation is inverse of cmap
+table plus exceptions (one glyph encoded by multiple character codes).
+
+kern: compresses kern table (not robust, intended just for rough testing).
+
+strip: the subparameters are a list of tables to be stripped entirely
+(comma-separated).
+
+The string roughly corresponding to MTX is:
+
+glyf/cbbox,code,triplet,push,hop:hdmx:gzip
+
+Meaning: glyph encoding is used, with simple glyph bboxes stripped (but
+composite glyph bboxes included), triplet coding, push sequences, and hop
+codes. The hdmx table is compressed. And finally, gzip is used as the entropy
+coder.
+
+This differs from MTX in a number of small ways: LZCOMP is not exactly the same
+as gzip. MTX uses three separate compression streams (the base font including
+triplet-coded glyph data), the bytecodes, and the push sequences, while this
+test uses a single stream. MTX also compresses the CVT table (an upper bound on
+the impact of this can be estimated by testing strip/cvt)
+
+Lastly, as a point of methodology, the code by default strips the “dsig” table,
+which would be invalidated by any non-bit-identical change to the font data. If
+it is desired to keep this table, add the “keepdsig” stage.
+
+The string representing the currently most aggressive optimization level is:
+
+glyf/triplet,code,push,reslice:hdmx:hmtx:cmap:kern:lzma
+
+In addition to the MTX one above, it strips the bboxes from composite glyphs,
+reslices the glyf table, compresses the htmx, cmap, and kern tables, and uses
+lzma as the entropy coding.
+
+The string corresponding to the current WOFF Ultra Condensed draft spec
+document is:
+
+glyf/cbbox,triplet,code,reslice:woff2/lzma
+
+The current C++ codebase can roundtrip compressed files as long as no per-table
+entropy coding is specified, as below (this will be fixed soon).
+
+glyf/cbbox,triplet,code,reslice:woff2
+
+
+== Running the tool ==
+
+java -jar build/jar/compression.jar *.ttf > chart.html
+
+The tool takes a list of OpenType fonts on the commandline, and generates an
+HTML chart, which it simply outputs to stdout. This chart uses the Google Chart
+API for plotting.
+
+Options:
+
+-b <desc>
+
+Sets the baseline experiment description.
+
+[ -x <desc> ]...
+
+Sets an experiment description. Can be used multiple times.
+
+-o
+
+Outputs the actual compressed file, substituting ".wof2" for ".ttf" in
+the input file name. Only useful when a single -x parameter is specified.
+
+= Decompressing the fonts =
+
+See the cpp/ directory (including cpp/README) for the C++ implementation of
+decompression. This code is based on OTS, and successfully roundtrips the
+basic compression as described in the draft spec.
+
+= References =
+
+sfntly: http://code.google.com/p/sfntly/ Guava:
+http://code.google.com/p/guava-libraries/ MTX:
+http://www.w3.org/Submission/MTX/
+
+Also please refer to documents (currently Google Docs):
+
+WOFF Ultra Condensed file format: proposals and discussion of wire format
+issues (PDF is in docs/ directory)
+
+WIFF Ultra Condensed: more discussion of results and compression techniques.
+This tool was used to prepare the data in that document.
diff --git a/brotli/LICENSE b/brotli/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/brotli/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/brotli/brotlispec.txt b/brotli/brotlispec.txt
new file mode 100644
index 0000000..23de497
--- /dev/null
+++ b/brotli/brotlispec.txt
@@ -0,0 +1,1264 @@
+J. Alakuijala
+Z. Szabadka
+ ______ _______ _______ _______ _________
+ ( __ \ ( ____ )( ___ )( ____ \\__ __/
+ | ( \ )| ( )|| ( ) || ( \/ ) (
+ | | ) || (____)|| (___) || (__ | |
+ | | | || __)| ___ || __) | |
+ | | ) || (\ ( | ( ) || ( | |
+ | (__/ )| ) \ \__| ) ( || ) | |
+ (______/ |/ \__/|/ \||/ )_(
+
+
+ DRAFT of
+ Brotli Compression Algorithm Compressed Data Format Specification 1.0
+
+Status of This Memo
+
+ This memo provides information for the Internet community. This memo
+ does not specify an Internet standard of any kind. Distribution of
+ this memo is unlimited.
+
+Notices
+
+ Copyright (c) 2013 J. Alakuijala and Z. Szabadka
+
+ Permission is granted to copy and distribute this document for any
+ purpose and without charge, including translations into other
+ languages and incorporation into compilations, provided that the
+ copyright notice and this notice are preserved, and that any
+ substantive changes or deletions from the original are clearly
+ marked.
+
+Abstract
+
+ This specification defines a lossless compressed data format that
+ compresses data using a combination of the LZ77 algorithm and Huffman
+ coding, with efficiency comparable to the best currently available
+ general-purpose compression methods.
+
+1. Introduction
+
+ 1.1. Purpose
+
+ The purpose of this specification is to define a lossless
+ compressed data format that:
+ * Is independent of CPU type, operating system, file system,
+ and character set, and hence can be used for interchange;
+ * Can be produced or consumed, even for an arbitrarily long
+ sequentially presented input data stream, using only an a
+ priori bounded amount of intermediate storage, and hence
+ can be used in data communications or similar structures,
+ such as Unix filters;
+ * Compresses data with efficiency comparable to the best
+ currently available general-purpose compression methods,
+ and in particular considerably better than the gzip program;
+ * Decompresses much faster than the LZMA implementations.
+
+ The data format defined by this specification does not attempt to:
+ * Allow random access to compressed data;
+ * Compress specialized data (e.g., raster graphics) as well
+ as the best currently available specialized algorithms.
+
+ 1.2. Intended audience
+
+ This specification is intended for use by software implementers
+ to compress data into and/or decompress data from "brotli" format.
+
+ The text of the specification assumes a basic background in
+ programming at the level of bits and other primitive data
+ representations. Familiarity with the technique of Huffman coding
+ is helpful but not required.
+
+ This specification uses heavily the notations and terminology
+ introduced in the DEFLATE format specification (RFC 1951, see
+ reference [3] below). For the sake of completeness, we always
+ include the whole text of the relevant parts of RFC 1951,
+ therefore familiarity with the DEFLATE format is helpful but not
+ required.
+
+ 1.3. Scope
+
+ The specification specifies a method for representing a sequence
+ of bytes as a (usually shorter) sequence of bits, and a method for
+ packing the latter bit sequence into bytes.
+
+ 1.4. Compliance
+
+ Unless otherwise indicated below, a compliant decompressor must be
+ able to accept and decompress any data set that conforms to all
+ the specifications presented here. A compliant compressor must
+ produce data sets that conform to all the specifications presented
+ here.
+
+ 1.5. Definitions of terms and conventions used
+
+ Byte: 8 bits stored or transmitted as a unit (same as an octet).
+ For this specification, a byte is exactly 8 bits, even on machines
+ which store a character on a number of bits different from eight.
+ See below for the numbering of bits within a byte.
+
+ String: a sequence of arbitrary bytes.
+
+ Bytes stored within a computer do not have a "bit order", since
+ they are always treated as a unit. However, a byte considered as
+ an integer between 0 and 255 does have a most- and least-
+ significant bit, and since we write numbers with the most-
+ significant digit on the left, we also write bytes with the most-
+ significant bit on the left. In the diagrams below, we number the
+ bits of a byte so that bit 0 is the least-significant bit, i.e.,
+ the bits are numbered:
+
+ +--------+
+ |76543210|
+ +--------+
+
+ Within a computer, a number may occupy multiple bytes. All
+ multi-byte numbers in the format described here are stored with
+ the least-significant byte first (at the lower memory address).
+ For example, the decimal number 520 is stored as:
+
+ 0 1
+ +--------+--------+
+ |00001000|00000010|
+ +--------+--------+
+ ^ ^
+ | |
+ | + more significant byte = 2 x 256
+ + less significant byte = 8
+
+
+ 1.5.1. Packing into bytes
+
+ This document does not address the issue of the order in which
+ bits of a byte are transmitted on a bit-sequential medium,
+ since the final data format described here is byte- rather than
+ bit-oriented. However, we describe the compressed block format
+ below as a sequence of data elements of various bit
+ lengths, not a sequence of bytes. We must therefore specify
+ how to pack these data elements into bytes to form the final
+ compressed byte sequence:
+
+ * Data elements are packed into bytes in order of
+ increasing bit number within the byte, i.e., starting
+ with the least-significant bit of the byte.
+ * Data elements other than Huffman codes are packed
+ starting with the least-significant bit of the data
+ element.
+ * Huffman codes are packed starting with the most-
+ significant bit of the code.
+
+ In other words, if one were to print out the compressed data as
+ a sequence of bytes, starting with the first byte at the
+ *right* margin and proceeding to the *left*, with the most-
+ significant bit of each byte on the left as usual, one would be
+ able to parse the result from right to left, with fixed-width
+ elements in the correct MSB-to-LSB order and Huffman codes in
+ bit-reversed order (i.e., with the first bit of the code in the
+ relative LSB position).
+
+2. Compressed representation overview
+
+ A compressed data set consists of a header and a series of meta-
+ blocks corresponding to successive meta-blocks of input data. The
+ meta-block sizes are limited to bytes and the maximum meta-block size
+ is 268,435,456 bytes.
+
+ The header contains the size of a sliding window on the input data
+ that is sufficient to keep on the intermediate storage at any given
+ point during decoding the stream.
+
+ Each meta-block is compressed using a combination of the LZ77
+ algorithm (Lempel-Ziv 1977, see reference [2] below) and Huffman
+ coding. The Huffman trees for each block are independent of those for
+ previous or subsequent blocks; the LZ77 algorithm may use a
+ reference to a duplicated string occurring in a previous meta-block,
+ up to sliding window size input bytes before.
+
+ Each meta-block consists of two parts: a meta-block header that
+ describes the representation of the compressed data part, and a
+ compressed data part. The compressed data consists of a series of
+ commands. Each command consists of two parts: a sequence of literal
+ bytes (of strings that have not been detected as duplicated within
+ the sliding window), and a pointer to a duplicated string,
+ represented as a pair <length, backward distance>.
+
+ Each command in the compressed data is represented using three kinds
+ of Huffman codes: one kind of code tree for the literal sequence
+ lengths (also referred to as literal insertion lengths) and backward
+ copy lengths (that is, a single code word represents two lengths,
+ one of the literal sequence and one of the backward copy), a separate
+ kind of code tree for literals, and a third kind of code tree for
+ distances. The code trees for each meta-block appear in a compact
+ form just before the compressed data in the meta-block header.
+
+ The sequence of each type of value in the representation of a command
+ (insert-and-copy lengths, literals and distances) within a meta-
+ block is further divided into blocks. In the "brotli" format, blocks
+ are not contiguous chunks of compressed data, but rather the pieces
+ of compressed data belonging to a block are interleaved with pieces
+ of data belonging to other blocks. Each meta-block can be logically
+ decomposed into a series of insert-and-copy length blocks, a series
+ of literal blocks and a series of distance blocks. These are also
+ called the three block categories: a meta-block has a series of
+ blocks for each block category. Note that the physical structure of
+ the meta-block is a series of commands, while the three series of
+ blocks is the logical structure. Consider the following example:
+
+ (IaC0, L0, L1, L2, D0)(IaC1, D1)(IaC2, L3, L4, D2)(IaC3, L5, D3)
+
+ The meta-block here has 4 commands, and each three types of symbols
+ within these commands can be rearranged for example into the
+ following logical block structure:
+
+ [IaC0, IaC1][IaC2, IaC3] <-- block types 0 and 1
+
+ [L0, L1][L2, L3, L4][L5] <-- block types 0, 1, and 0
+
+ [D0][D1, D2, D3] <-- block types 0 and 1
+
+ The subsequent blocks within each block category must have different
+ block types, but blocks further away in the block sequence can have
+ the same types. The block types are numbered from 0 to the maximum
+ block type number of 255 and the first block of each block category
+ must have type 0. The block structure of a meta-block is represented
+ by the sequence of block-switch commands for each block category,
+ where a block-switch command is a pair <block type, block length>.
+ The block-switch commands are represented in the compressed data
+ before the start of each new block using a Huffman code tree for
+ block types and a separate Huffman code tree for block lengths for
+ each block category. In the above example the physical layout of the
+ meta-block is the following:
+
+ IaC0 L0 L1 LBlockSwitch(1, 3) L2 D0 IaC1 DBlockSwitch(1, 1) D1
+ IaCBlockSwitch(1, 2) IaC2 L3 L4 D2 IaC3 LBlockSwitch(0, 1) D3
+
+ Note that the block switch commands for the first blocks are not part
+ of the meta-block compressed data part, they are encoded in the meta-
+ block header. The code trees for block types and lengths (total of
+ six Huffman code trees) appear in a compact form in the meta-block
+ header.
+
+ Each type of value (insert-and-copy lengths, literals and distances)
+ can be encoded with any Huffman tree from a collection of Huffman
+ trees of the same kind appearing in the meta-block header. The
+ particular Huffman tree used can depend on two factors: the block type
+ of the block the value appears in, and the context of the value. In
+ the case of the literals, the context is the previous two bytes in
+ the input data, and in the case of distances, the context is the copy
+ length from the same command. For insert-and-copy lengths, no context
+ is used and the Huffman tree depends only on the block type (in fact,
+ the index of the Huffman tree is the block type number). In the case
+ of literals and distances, the context is mapped to a context ID in
+ the rage [0, 63] for literals and [0, 3] for distances and the matrix
+ of the Huffman tree indices for each block type and context ID,
+ called the context map, is encoded in a compact form in the meta-
+ block header.
+
+ In addition to the parts listed above (Huffman code trees for insert-
+ and-copy lengths, literals, distances, block types and block lengths
+ and the context map), the meta-block header contains the number of
+ input bytes in the meta-block and two additional parameters used in
+ the representation of copy distances (number of "postfix bits" and
+ number of direct distance codes).
+
+3. Compressed representation of Huffman codes
+
+ 3.1. Introduction to prefix and Huffman coding
+
+ Prefix coding represents symbols from an a priori known alphabet
+ by bit sequences (codes), one code for each symbol, in a manner
+ such that different symbols may be represented by bit sequences of
+ different lengths, but a parser can always parse an encoded string
+ unambiguously symbol-by-symbol.
+
+ We define a prefix code in terms of a binary tree in which the two
+ edges descending from each non-leaf node are labeled 0 and 1 and
+ in which the leaf nodes correspond one-for-one with (are labeled
+ with) the symbols of the alphabet; then the code for a symbol is
+ the sequence of 0's and 1's on the edges leading from the root to
+ the leaf labeled with that symbol. For example:
+
+ /\ Symbol Code
+ 0 1 ------ ----
+ / \ A 00
+ /\ B B 1
+ 0 1 C 011
+ / \ D 010
+ A /\
+ 0 1
+ / \
+ D C
+
+ A parser can decode the next symbol from an encoded input stream
+ by walking down the tree from the root, at each step choosing the
+ edge corresponding to the next input bit.
+
+ Given an alphabet with known symbol frequencies, the Huffman
+ algorithm allows the construction of an optimal prefix code (one
+ which represents strings with those symbol frequencies using the
+ fewest bits of any possible prefix codes for that alphabet). Such
+ a code is called a Huffman code. (See reference [1] in Chapter 5,
+ references for additional information on Huffman codes.)
+
+ Note that in the "brotli" format, the Huffman codes for the
+ various alphabets must not exceed certain maximum code lengths.
+ This constraint complicates the algorithm for computing code
+ lengths from symbol frequencies. Again, see Chapter 5, references
+ for details.
+
+ 3.2. Use of Huffman coding in the "brotli" format
+
+ The Huffman codes used for each alphabet in the "brotli" format
+ are canonical Huffman codes, which have two additional rules:
+
+ * All codes of a given bit length have lexicographically
+ consecutive values, in the same order as the symbols they
+ represent;
+
+ * Shorter codes lexicographically precede longer codes.
+
+ We could recode the example above to follow this rule as follows,
+ assuming that the order of the alphabet is ABCD:
+
+ Symbol Code
+ ------ ----
+ A 10
+ B 0
+ C 110
+ D 111
+
+ I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are
+ lexicographically consecutive.
+
+ Given this rule, we can define the canonical Huffman code for an
+ alphabet just by giving the bit lengths of the codes for each
+ symbol of the alphabet in order; this is sufficient to determine
+ the actual codes. In our example, the code is completely defined
+ by the sequence of bit lengths (2, 1, 3, 3). The following
+ algorithm generates the codes as integers, intended to be read
+ from most- to least-significant bit. The code lengths are
+ initially in tree[I].Len; the codes are produced in tree[I].Code.
+
+ 1) Count the number of codes for each code length. Let
+ bl_count[N] be the number of codes of length N, N >= 1.
+
+ 2) Find the numerical value of the smallest code for each
+ code length:
+
+ code = 0;
+ bl_count[0] = 0;
+ for (bits = 1; bits <= MAX_BITS; bits++) {
+ code = (code + bl_count[bits-1]) << 1;
+ next_code[bits] = code;
+ }
+
+ 3) Assign numerical values to all codes, using consecutive
+ values for all codes of the same length with the base
+ values determined at step 2. Codes that are never used
+ (which have a bit length of zero) must not be assigned a
+ value.
+
+ for (n = 0; n <= max_code; n++) {
+ len = tree[n].Len;
+ if (len != 0) {
+ tree[n].Code = next_code[len];
+ next_code[len]++;
+ }
+ }
+
+ Example:
+
+ Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3, 3,
+ 2, 4, 4). After step 1, we have:
+
+ N bl_count[N]
+ - -----------
+ 2 1
+ 3 5
+ 4 2
+
+ Step 2 computes the following next_code values:
+
+ N next_code[N]
+ - ------------
+ 1 0
+ 2 0
+ 3 2
+ 4 14
+
+ Step 3 produces the following code values:
+
+ Symbol Length Code
+ ------ ------ ----
+ A 3 010
+ B 3 011
+ C 3 100
+ D 3 101
+ E 3 110
+ F 2 00
+ G 4 1110
+ H 4 1111
+
+ 3.3. Alphabet sizes
+
+ Huffman codes are used for different purposes in the "brotli"
+ format, and each purpose has a different alphabet size. For
+ literal codes the alphabet size is 256. For insert-and-copy
+ length codes the alphabet size is 704. For block length codes,
+ the alphabet size is 26. For distance codes, block type codes and
+ the Huffman codes used in compressing the context map, the
+ alphabet size is dynamic and is based on other parameters.
+
+ 3.4. Simple Huffman codes
+
+ The first two bits of the compressed representation of each Huffman
+ code distinguishes between simple and complex Huffman codes. If
+ this value is 1, then a simple Huffman code follows. Otherwise
+ the value indicates the number of leading zeros.
+
+ A simple Huffman code can have only up to four symbols with non-
+ zero code length. The format of the simple Huffman code is as
+ follows:
+
+ 2 bits: value of 1 indicates a simple Huffman code
+ 2 bits: NSYM - 1, where NSYM = # of symbols with non-zero
+ code length
+
+ NSYM symbols, each encoded using ALPHABET_BITS bits
+
+ 1 bit: tree-select, present only for NSYM = 4
+
+ The value of ALPHABET_BITS depends on the alphabet of the Huffman
+ code: it is the smallest number of bits that can represent all
+ symbols in the alphabet. E.g. for the alphabet of literal bytes,
+ ALPHABET_BITS is 8. The value of each of the NSYM symbols above is
+ the value of the ALPHABETS_BITS width machine integer representing
+ the symbol modulo the alphabet size of the Huffman code.
+
+ The (non-zero) code lengths of the symbols can be reconstructed as
+ follows:
+
+ * if NSYM = 1, the code length for the one symbol is one at
+ this stage, but only to distinguish it from the other zero
+ code length symbols, when encoding this symbol in the
+ compressed data stream using this Huffman code later, no
+ actual bits are emitted. Similarly, when decoding a symbol
+ using this Huffman code, no bits are read and the one symbol
+ is returned.
+
+ * if NSYM = 2, both symbols have code length 1.
+
+ * if NSYM = 3, the code lengths for the symbols are 1, 2, 2 in
+ the order they appear in the representation of the simple
+ Huffman code.
+
+ * if NSYM = 4, the code lengths (in order of symbols decoded)
+ depend on the tree-select bit: 2, 2, 2, 2, (tree-select bit 0)
+ or 1, 2, 3, 3 (tree-select bit 1).
+
+ 3.5. Complex Huffman codes
+
+ A complex Huffman code is a canonical Huffman code, defined by the
+ sequence of code lengths, as discussed in Paragraph 3.2, above.
+ For even greater compactness, the code length sequences themselves
+ are compressed using a Huffman code. The alphabet for code lengths
+ is as follows:
+
+ 0 - 15: Represent code lengths of 0 - 15
+ 16: Copy the previous non-zero code length 3 - 6 times
+ The next 2 bits indicate repeat length
+ (0 = 3, ... , 3 = 6)
+ If this is the first code length, or all previous
+ code lengths are zero, a code length of 8 is
+ repeated 3 - 6 times
+ A repeated code length code of 16 modifies the
+ repeat count of the previous one as follows:
+ repeat count = (4 * (repeat count - 2)) +
+ (3 - 6 on the next 2 bits)
+ Example: Codes 7, 16 (+2 bits 11), 16 (+2 bits 10)
+ will expand to 22 code lengths of 7
+ (1 + 4 * (6 - 2) + 5)
+ 17: Repeat a code length of 0 for 3 - 10 times.
+ (3 bits of length)
+ A repeated code length code of 17 modifies the
+ repeat count of the previous one as follows:
+ repeat count = (8 * (repeat count - 2)) +
+ (3 - 10 on the next 3 bits)
+
+ A code length of 0 indicates that the corresponding symbol in the
+ alphabet will not occur in the compressed data, and should not
+ participate in the Huffman code construction algorithm given
+ earlier. A complex Huffman code must have at least two non-zero
+ code lengths.
+
+ The bit lengths of the Huffman code over the code length alphabet
+ are compressed with the following static Huffman code:
+
+ Symbol Code
+ ------ ----
+ 0 00
+ 1 1010
+ 2 100
+ 3 11
+ 4 01
+ 5 1011
+
+ We can now define the format of the complex Huffman code as
+ follows:
+
+ 2 bits: HSKIP, values of 0, 2 or 3 represent the respective
+ number of leading zeros. (Value of 1 indicates the
+ Simple Huffman code.)
+
+
+ Code lengths for symbols in the code length alphabet given
+ just above, in the order: 1, 2, 3, 4, 0, 17, 5, 6, 16, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+
+ The code lengths of code length symbols are between 0 and
+ 5 and they are represented with 2 - 5 bits according to
+ the static Huffman code above. A code length of 0 means
+ the corresponding code length symbol is not used.
+
+ If HSKIP is 2 or 3, a respective number of leading code
+ lengths are implicit zeros and are not present in the
+ code lengths sequence above. If there are at least two non-
+ zero code lengths, any trailing zero code lengths are
+ omitted, i.e. the last code length in the sequence must
+ be non-zero. In this case the sum of (32 >> code length)
+ over all the non-zero code lengths must equal to 32.
+
+ Sequence of code lengths symbols, encoded using the code
+ length Huffman code. Any trailing 0 or 17 must be
+ omitted, i.e. the last encoded code length symbol must be
+ between 1 and 16. The sum of (32768 >> code length) over
+ all the non-zero code lengths in the alphabet, including
+ those encoded using repeat code(s) of 16, must equal to
+ 32768.
+
+4. Encoding of distances
+
+ As described in Section 2, one component of a compressed meta-block
+ is a sequence of backward distances. In this section we provide the
+ details to the encoding of distances.
+
+ Each distance in the compressed data part of a meta-block is
+ represented with a pair <distance code, extra bits>. The distance
+ code and the extra bits are encoded back-to-back, the distance code
+ is encoded using a Huffman code over the distance code alphabet,
+ while the extra bits value is encoded as a fixed-width machine
+ integer. The number of extra bits can be 0 - 24, and it is dependent
+ on the distance code.
+
+ To convert a distance code and associated extra bits to a backward
+ distance, we need the sequence of past distances and two additional
+ parameters, the number of "postfix bits", denoted by NPOSTFIX, and
+ the number of direct distance codes, denoted by NDIRECT. Both of
+ these parameters are encoded in the meta-block header. We will also
+ use the following derived parameter:
+
+ POSTFIX_MASK = ((1 << NPOSTFIX) - 1)
+
+ The first 16 distance codes are special short codes that reference
+ past distances as follows:
+
+ 0: last distance
+ 1: second last distance
+ 2: third last distance
+ 3: fourth last distance
+ 4: last distance - 1
+ 5: last distance + 1
+ 6: last distance - 2
+ 7: last distance + 2
+ 8: last distance - 3
+ 9: last disatnce + 3
+ 10: second last distance - 1
+ 11: second last distance + 1
+ 12: second last distance - 2
+ 13: second last distance + 2
+ 14: second last distance - 3
+ 15: second last distance + 3
+
+ The ring-buffer of four last distances is initialized by the values
+ 16, 15, 11 and 4 (i.e. the fourth last is set to 16, the third last
+ to 15, the second last to 11 and the last distance to 4) at the
+ beginning of the *stream* (as opposed to the beginning of the meta-
+ block) and it is not reset at meta-block boundaries. When a distance
+ code 0 appears, the distance it represents (i.e. the last distance
+ in the sequence of distances) is not pushed to the ring-buffer of
+ last distances, in other words, the expression "(second, third,
+ fourth) last distance" means the (second, third, fourth) last
+ distance that was not represented by a 0 distance code.
+
+ The next NDIRECT distance codes, from 16 to 15 + NDIRECT, represent
+ distances from 1 to NDIRECT. Neither the distance short codes, nor
+ the NDIRECT direct distance codes have any extra bits.
+
+ Distance codes 16 + NDIRECT and greater all have extra bits, the
+ number of extra bits for a distance code "dcode" is given by the
+ following formula:
+
+ ndistbits = 1 + ((dcode - NDIRECT - 16) >> (NPOSTFIX + 1))
+
+ The maximum number of extra bits is 24, therefore the size of the
+ distance code alphabet is (16 + NDIRECT + (48 << NPOSTFIX)).
+
+ Given a distance code "dcode" (>= 16 + NDIRECT), and extra bits
+ "dextra", the backward distance is given by the following formula:
+
+ hcode = (dcode - NDIRECT - 16) >> NPOSTFIX
+ lcode = (dcode - NDIRECT - 16) & POSTFIX_MASK
+ offset = ((2 + (hcode & 1)) << ndistbits) - 4;
+ distance = ((offset + dextra) << NPOSTFIX) + lcode + NDIRECT + 1
+
+5. Encoding of literal insertion lengths and copy lengths
+
+ As described in Section 2, the literal insertion lengths and backward
+ copy lengths are encoded using a single Huffman code. This section
+ provides the details to this encoding.
+
+ Each <insertion length, copy length> pair in the compressed data part
+ of a meta-block is represented with the following triplet:
+
+ <insert-and-copy length code, insert extra bits, copy extra bits>
+
+ The insert-and-copy length code, the insert extra bits and the copy
+ extra bits are encoded back-to-back, the insert-and-copy length code
+ is encoded using a Huffman code over the insert-and-copy length code
+ alphabet, while the extra bits values are encoded as fixed-width
+ machine integers. The number of insert and copy extra bits can be
+ 0 - 24, and they are dependent on the insert-and-copy length code.
+
+ Some of the insert-and-copy length codes also express the fact that
+ the distance code of the distance in the same command is 0, i.e. the
+ distance component of the command is the same as that of the previous
+ command. In this case, the distance code and extra bits for the
+ distance are omitted from the compressed data stream.
+
+ We describe the insert-and-copy length code alphabet in terms of the
+ (not directly used) insert length code and copy length code
+ alphabets. The symbols of the insert length code alphabet, along with
+ the number of insert extra bits and the range of the insert lengths
+ are as follows:
+
+ Extra Extra Extra
+ Code Bits Lengths Code Bits Lengths Code Bits Lengths
+ ---- ---- ------ ---- ---- ------- ---- ---- -------
+ 0 0 0 8 2 10-13 16 6 130-193
+ 1 0 1 9 2 14-17 17 7 194-321
+ 2 0 2 10 3 18-25 18 8 322-527
+ 3 0 3 11 3 26-33 19 9 578-1089
+ 4 0 4 12 4 34-49 20 10 1090-2113
+ 5 0 5 13 4 50-65 21 12 2114-6209
+ 6 1 6,7 14 5 66-97 22 14 6210-22593
+ 7 1 8,9 15 5 98-129 23 24 22594-16799809
+
+ The symbols of the copy length code alphabet, along with the number
+ of copy extra bits and the range of copy lengths are as follows:
+
+ Extra Extra Extra
+ Code Bits Lengths Code Bits Lengths Code Bits Lengths
+ ---- ---- ------ ---- ---- ------- ---- ---- -------
+ 0 0 2 8 1 10,11 16 5 70-101
+ 1 0 3 9 1 12,13 17 5 102-133
+ 2 0 4 10 2 14-17 18 6 134-197
+ 3 0 5 11 2 18-21 19 7 198-325
+ 4 0 6 12 3 22-29 20 8 326-581
+ 5 0 7 13 3 30-37 21 9 582-1093
+ 6 0 8 14 4 38-53 22 10 1094-2117
+ 7 0 9 15 4 54-69 23 24 2118-16779333
+
+ To convert an insert-and-copy length code to an insert length code
+ and a copy length code, the following table can be used:
+
+ Insert
+ length Copy length code
+ code 0-7 8-15 16-23
+ +---------+---------+
+ | | |
+ 0-7 | 0-63 | 64-127 | <--- distance code 0
+ | | |
+ +---------+---------+---------+
+ | | | |
+ 0-7 | 128-191 | 192-255 | 383-447 |
+ | | | |
+ +---------+---------+---------+
+ | | | |
+ 8-15 | 256-319 | 320-383 | 512-575 |
+ | | | |
+ +---------+---------+---------+
+ | | | |
+ 16-23 | 448-551 | 576-639 | 640-703 |
+ | | | |
+ +---------+---------+---------+
+
+ First, look up the cell with the 64 value range containing the
+ insert-and-copy length code, this gives the insert length code and
+ the copy length code ranges, both 8 values long. The copy length
+ code within its range is determined by the lowest 3 bits of the
+ insert-and-copy length code, and the insert length code within its
+ range is determined by bits 3-5 (counted from the LSB) of the insert-
+ and-copy length code. Given the insert length and copy length codes,
+ the actual insert and copy lengths can be obtained by reading the
+ number of extra bits given by the tables above.
+
+ If the insert-and-copy length code is between 0 and 127, the distance
+ code of the command is set to zero (the last distance reused).
+
+6. Encoding of block switch commands
+
+ As described in Section 2, a block-switch command is a pair
+ <block type, block length>. These are encoded in the compressed data
+ part of the meta-block, right before the start of each new block of a
+ particular block category.
+
+ Each block type in the compressed data is represented with a block
+ type code, encoded using a Huffman code over the block type code
+ alphabet. A block type code 0 means that the block type is the same
+ as the type of the second last block from the same block category,
+ while a block type code 1 means that the block type equals the last
+ block type plus one. If the last block type is the maximal possible,
+ then a block type code 1 means block type 0. Block type codes 2 - 257
+ represent block types 0 - 255. The second last and last block types
+ are initialized with 0 and 1, respectively, at the beginning of each
+ meta-block.
+
+ The first block type of each block category must be 0 and the block
+ type of the first block switch command is therefore not encoded in
+ the compressed data.
+
+ The number of different block types in each block category, denoted
+ by NBLTYPESL, NBLTYPESI, and NBLTYPESD for literals, insert-and-copy
+ lengths and distances, respectively, is encoded in the meta-block
+ header, and it must equal to the largest block type plus one in that
+ block category. In other words, the set of literal, insert-and-copy
+ length and distance block types must be [0..NBLTYPESL-1],
+ [0..NBLTYPESI-1], and [0..NBLTYPESD-1], respectively. From this it
+ follows that the alphabet size of literal, insert-and-copy length and
+ distance block type codes is NBLTYPES + 2, NBLTYPESI + 2 and
+ NBLTYPESD + 2, respectively.
+
+ Each block length in the compressed data is represented with a pair
+ <block length code, extra bits>. The block length code and the extra
+ bits are encoded back-to-back, the block length code is encoded using
+ a Huffman code over the block length code alphabet, while the extra
+ bits value is encoded as a fixed-width machine integer. The number of
+ extra bits can be 0 - 24, and it is dependent on the block length
+ code. The symbols of the block length code alphabet, along with the
+ number of extra bits and the range of block lengths are as follows:
+
+ Extra Extra Extra
+ Code Bits Lengths Code Bits Lengths Code Bits Lengths
+ ---- ---- ------ ---- ---- ------- ---- ---- -------
+ 0 2 1-4 9 4 65-80 18 7 369-496
+ 1 2 5-8 10 4 81-96 19 8 497-752
+ 2 2 9-12 11 4 97-112 20 9 753-1264
+ 3 2 13-16 12 5 113-144 21 10 1265-2288
+ 4 3 17-24 13 5 145-176 22 11 2289-4336
+ 5 3 25-32 14 5 177-208 23 12 4337-8432
+ 6 3 33-40 15 5 209-240 24 13 8433-16624
+ 7 3 41-48 16 6 241-304 25 24 16625-16793840
+ 8 4 49-64 17 6 305-368
+
+ The first block switch command of each block category is special in
+ the sense that it is encoded in the meta-block header, and as
+ described earlier the block type code is omitted, since it is an
+ implicit zero.
+
+7. Context modeling
+
+ As described in Section 2, the Huffman tree used to encode a literal
+ byte or a distance code depends on the context ID and the block type.
+ This section specifies how to compute the context ID for a particular
+ literal and distance code, and how to encode the context map that
+ maps a <context ID, block type> pair to the index of a Huffman
+ tree in the array of literal and distance Huffman trees.
+
+ 7.1. Context modes and context ID lookup for literals
+
+ The context for encoding the next literal is defined by the last
+ two bytes in the stream (p1, p2, where p1 is the most recent
+ byte), regardless if these bytes are produced by backward
+ references or by literal insertions.
+
+ There are four methods, called context modes, to compute the
+ Context ID:
+ * MSB6, where the Context ID is the value of six most
+ significant bits of p1,
+ * LSB6, where the Context ID is the value of six least
+ significant bits of p1,
+ * UTF8, where the Context ID is a complex function of p1, p2,
+ optimized for text compression, and
+ * Signed, where Context ID is a complex function of p1, p2,
+ optimized for compressing sequences of signed integers.
+
+ The Context ID for the UTF8 and Signed context modes is computed
+ using the following lookup tables Lut0, Lut1, and Lut2.
+
+ Lut0 :=
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+ 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+ 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+ 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+ 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3
+
+ Lut1 :=
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+
+ Lut2 :=
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
+
+ Given p1 is the last decoded byte and p2 is the second last
+ decoded byte the context IDs can be computed as follows:
+
+ For LSB6 : Context ID = p1 & 0x3f
+ For MSB6 : Context ID = p1 >> 2
+ For UTF8 : Context ID = Lut0[p1] | Lut1[p2]
+ For Signed: Context ID = (Lut2[p1] << 3) | Lut2[p2]
+
+ The context modes LSB6, MSB6, UTF8, and Signed are denoted by
+ integers 0, 1, 2, 3.
+
+ The context mode is defined for each literal block type and they
+ are stored in a consecutive array of bits in the meta-block
+ header, always two bits per block type.
+
+ 7.2. Context ID for distances
+
+ The context for encoding a distance code is defined by the copy
+ length corresponding to the distance. The context IDs are 0, 1, 2,
+ and 3 for copy lengths 2, 3, 4, and more than 4, respectively.
+
+ 7.3. Encoding of the context map
+
+ There are two kinds of context maps, for literals and for
+ distances. The size of the context map is 64 * NBLTYPESL for
+ literals, and 4 * NBLTYPESD for distances. Each value in the
+ context map is an integer between 0 and 255, indicating the index
+ of the Huffman tree to be used when encoding the next literal or
+ distance.
+
+ The context map is encoded as a one-dimensional array,
+ CMAPL[0..(64 * NBLTYPESL - 1)] and CMAPD[0..(4 * NBLTYPESD - 1)].
+
+ The index of the Huffman tree for encoding a literal or distance
+ code with context ID "cid" and block type "bltype" is
+
+ index of literal Huffman tree = CMAPL[bltype * 64 + cid]
+
+ index of distance Huffman tree = CMAPD[bltype * 4 + cid]
+
+ The values of the context map are encoded with the combination
+ of run length encoding for zero values and Huffman coding. Let
+ RLEMAX denote the number of run length codes and NTREES denote the
+ maximum value in the context map plus one. NTREES must equal the
+ number of different values in the context map, in other words,
+ the different values in the context map must be the [0..NTREES-1]
+ interval. The alphabet of the Huffman code has the following
+ RLEMAX + NTREES symbols:
+
+ 0: value zero
+ 1: repeat a zero 2-3 times, read 1 bit for repeat length
+ 2: repeat a zero 4-7 times, read 2 bits for repeat length
+ ...
+ RLEMAX: repeat a zero (2^RLEMAX)-(2^(RLEMAX+1) - 1) times,
+ read RLEMAX bits for repeat length
+ RLEMAX + 1: value 1
+ ...
+ RLEMAX + NTREES - 1: value NTREES - 1
+
+ If RLEMAX = 0, the run length coding is not used, and the symbols
+ of the alphabet are directly the values in the context map. We can
+ now define the format of the context map (the same format is used
+ for literal and distance context maps):
+
+ 1-5 bits: RLEMAX, 0 is encoded with one 0 bit, and values
+ 1 - 16 are encoded with bit pattern 1xxxx
+
+ Huffman code with alphabet size NTREES + RLEMAX
+
+ Context map size values encoded with the above Huffman code
+ and run length coding for zero values
+
+ 1 bit: IMTF bit, if set, we do an inverse move-to-front
+ transform on the values in the context map to get
+ the Huffman code indexes
+
+ For the encoding of NTREES see Section 9.2.
+
+8. Static dictionary
+
+ At any given point during decoding the compressed data, a reference
+ to a duplicated string in the output produced so far has a maximum
+ backward distance value, which is the minimum of the window size and
+ the number of output bytes produced. However, decoding a distance
+ from the input stream, as described in section 4, can produce
+ distances that are greater than this maximum allowed value. The
+ difference between these distances and the first invalid distance
+ value is treated as reference to a word in the static dictionary
+ given in Appendix A. The maximum valid copy length for a static
+ dictionary reference is 24. The static dictionary has three parts:
+
+ * DICT[0..DICTSIZE], an array of bytes
+ * DOFFSET[0..24], an array of byte offset values for each length
+ * NDBITS[0..24], an array of bit-depth values for each length
+
+ The number of static dictionary words for a given length is:
+
+ NWORDS[length] = 0 (if length < 3)
+ NWORDS[length] = (1 << NDBITS[lengths]) (if length >= 3)
+
+ DOFFSET and DICTSIZE are defined by the following recursion:
+
+ DOFFSET[0] = 0
+ DOFFSET[length + 1] = DOFFSET[length] + length * NWORDS[length]
+ DICTSIZE = DOFFSET[24] + 24 * NWORDS[24]
+
+ The offset of a word within the DICT array for a given length and
+ index is:
+
+ offset(length, index) = DOFFSET[length] + index * length
+
+ Each static dictionary word has 64 different forms, given by applying
+ a word transformation to a base word in the DICT array. The list of
+ word transformations is given in Appendix B. The static dictionary
+ word for a <length, distance> pair can be reconstructed as follows:
+
+ word_id = distance - (max allowed distance + 1)
+ index = word_id % NWORDS[length]
+ base_word = DICT[offset(length, index)..offset(length, index+1))
+ transform_id = word_id >> NBITS[length]
+
+ The string copied to the output stream is computed by applying the
+ transformation to the base dictionary word. If transform_id is
+ greater than 63 or length is greater than 24, the compressed data set
+ is invalid and must be discarded.
+
+9. Compressed data format
+
+ In this section we describe the format of the compressed data set in
+ terms of the format of the individual data items described in the
+ previous sections.
+
+ 9.1. Format of the stream header
+
+ The stream header has only the following one field:
+
+ 1-4 bits: WBITS, a value in the range 16 - 24, value 16 is
+ encoded with one 0 bit, and values 17 - 24 are
+ encoded with bit pattern 1xxx
+
+ The size of the sliding window, which is the maximum value of any
+ non-dictionary reference backward distance, is given by the
+ following formula:
+
+ window size = (1 << WBITS) - 16
+
+ 9.2. Format of the meta-block header
+
+ A compliant compressed data set has at least one meta-block. Each
+ meta-block contains a header with information about the
+ uncompressed length of the meta-block, and a bit signaling if the
+ meta-block is the last one. The format of the meta-block header is
+ the following:
+
+ 1 bit: ISLAST, set to 1 if this is the last meta-block
+ 1 bit: ISEMPTY, set to 1 if the meta-block is empty, this
+ field is only present if ISLAST bit is set, since
+ only the last meta-block can be empty
+ 2 bits: MNIBBLES, (# of nibbles to represent the length) - 4
+
+ (MNIBBLES + 4) x 4 bits: MLEN - 1, where MLEN is the length
+ of the meta-block in the input data in bytes
+
+ 1 bit: ISUNCOMPRESSED, if set to 1, any bits of input up to
+ the next byte boundary are ignored, and the rest of
+ the meta-block contains MLEN bytes of literal data;
+ this field is only present if ISLAST bit is not set
+
+ 1-11 bits: NBLTYPESL, # of literal block types, encoded with
+ the following variable length code:
+
+ Value Bit Pattern
+ ----- -----------
+ 1 0
+ 2 1000
+ 3-4 1001x
+ 5-8 1010xx
+ 9-16 1011xxx
+ 17-32 1100xxxx
+ 33-64 1101xxxxx
+ 65-128 1110xxxxxx
+ 129-256 1111xxxxxxx
+
+ Huffman code over the block type code alphabet for literal
+ block types, appears only if NBLTYPESL >= 2
+
+ Huffman code over the block length code alphabet for literal
+ block lengths, appears only if NBLTYPESL >= 2
+
+ Block length code + Extra bits for first literal block
+ length, appears only if NBLTYPESL >= 2
+
+ 1-11 bits: NBLTYPESI, # of insert-and-copy block types, encoded
+ with the same variable length code as above
+
+ Huffman code over the block type code alphabet for insert-
+ and-copy block types, only if NBLTYPESI >= 2
+
+ Huffman code over the block length code alphabet for insert-
+ and-copy block lengths, only if NBLTYPESI >= 2
+
+ Block length code + Extra bits for first insert-and-copy
+ block length, only if NBLTYPESI >= 2
+
+ 1-11 bits: NBLTYPESD, # of distance block types, encoded with
+ the same variable length code as above
+
+ Huffman code over the block type code alphabet for distance
+ block types, appears only if NBLTYPESD >= 2
+
+ Huffman code over the block length code alphabet for
+ distance block lengths, only if NBLTYPESD >= 2
+
+ Block length code + Extra bits for first distance block
+ length, only if NBLTYPESD >= 2
+
+ 2 bits: NPOSTFIX, parameter used in the distance coding
+
+ 4 bits: four most significant bits of NDIRECT, to get the
+ actual value of the parameter NDIRECT, left-shift
+ this four bit number by NPOSTFIX bits
+
+ NBLTYPESL x 2 bits: context mode for each literal block type
+
+ 1-11 bits: NTREESL, # of literal Huffman trees, encoded with
+ the same variable length code as NBLTYPESL
+
+ Literal context map, encoded as described in Paragraph 7.3,
+ appears only if NTREESL >= 2, otherwise the context map
+ has only zero values
+
+ 1-11 bits: NTREESD, # of distance Huffman trees, encoded with
+ the same variable length code as NBLTYPESD
+
+ Distance context map, encoded as described in Paragraph 7.3,
+ appears only if NTREESD >= 2, otherwise the context map
+ has only zero values
+
+ NTREESL Huffman codes for literals
+
+ NBLTYPESI Huffman codes for insert-and-copy lengths
+
+ NTREESD Huffman codes for distances
+
+ 9.3. Format of the meta-block data
+
+ The compressed data part of a meta-block consists of a series of
+ commands. Each command has the following format:
+
+ Block type code for next insert-and-copy block type, appears
+ only if NBLTYPESI >= 2 and the previous insert-and-copy
+ block has ended
+
+ Block length code + Extra bits for next insert-and-copy
+ block length, appears only if NBLTYPESI >= 2 and the
+ previous insert and-copy block has ended
+
+ Insert-and-copy length, encoded as in section 5, using the
+ insert-and-copy length Huffman code with the current
+ insert-and-copy block type index
+
+ Insert length number of literals, with the following format:
+
+ Block type code for next literal block type, appears
+ only if NBLTYPESL >= 2 and the previous literal
+ block has ended
+
+ Block length code + Extra bits for next literal block
+ length, appears only if NBLTYPESL >= 2 and the
+ previous literal block has ended
+
+ Next byte of the input data, encoded with the literal
+ Huffman code with the index determined by the
+ previuos two bytes of the input data, the current
+ literal block type and the context map, as
+ described in Paragraph 7.3.
+
+ Block type code for next distance block type, appears only
+ if NBLTYPESD >= 2 and the previous distance block has
+ ended
+
+ Block length code + Extra bits for next distance block
+ length, appears only if NBLTYPESD >= 2 and the previous
+ distance block has ended
+
+ Distance code, encoded as in section 4, using the distance
+ Huffman code with the current distance block type index,
+ appears only if the distance code is not an implicit 0,
+ as indicated by the insert-and-copy length code
+
+ The number of commands in the meta-block is such that the sum of
+ insert lengths and copy lengths over all the commands gives the
+ uncompressed length, MLEN encoded in the meta-block header.
+
+10. Decoding algorithm
+
+ The decoding algorithm that produces the output data is as follows:
+
+ read window size
+ do
+ read ISLAST bit
+ if ISLAST
+ read ISEMPTY bit
+ if ISEMPTY
+ break from loop
+ read MLEN
+ if not ISLAST
+ read ISUNCOMPRESSED bit
+ if ISUNCOMPRESSED
+ skip any bits up to the next byte boundary
+ copy MLEN bytes of input to the output stream
+ continue to the next meta-block
+ loop for each three block categories (i = L, I, D)
+ read NBLTYPESi
+ if NBLTYPESi >= 2
+ read Huffman code for block types, HTREE_BTYPE_i
+ read Huffman code for block lengths, HTREE_BLEN_i
+ read block length, BLEN_i
+ set block type, BTYPE_i to 0
+ initialize second last and last block types to 0 and 1
+ else
+ set block type, BTYPE_i to 0
+ set block length, BLEN_i to 268435456
+ read NPOSTFIX and NDIRECT
+ read array of literal context modes, CMODE[]
+ read NTREESL
+ if NTREESL >= 2
+ read literal context map, CMAPL[]
+ else
+ fill CMAPL[] with zeros
+ read NTREESD
+ if NTREESD >= 2
+ read distance context map, CMAPD[]
+ else
+ fill CMAPD[] with zeros
+ read array of Huffman codes for literals, HTREEL[]
+ read array of Huffman codes for insert-and-copy, HTREEI[]
+ read array of Huffman codes for distances, HTREED[]
+ do
+ if BLEN_I is zero
+ read block type using HTREE_BTYPE_I and set BTYPE_I
+ read block length using HTREE_BLEN_I and set BLEN_I
+ decrement BLEN_I
+ read insert and copy length, ILEN, CLEN with HTREEI[BTYPE_I]
+ loop for ILEN
+ if BLEN_L is zero
+ read block type using HTREE_BTYPE_L and set BTYPE_L
+ read block length using HTREE_BLEN_L and set BLEN_L
+ decrement BLEN_L
+ look up context mode CMODE[BTYPE_L]
+ compute context ID, CIDL from last two bytes of output
+ read literal using HTREEL[CMAPL[64 * BTYPE_L + CIDL]]
+ copy literal to output stream
+ if number of output bytes produced in the loop is MLEN
+ break from loop
+ if distance code is implicit zero from insert-and-copy code
+ set backward distance to the last distance
+ else
+ if BLEN_D is zero
+ read block type using HTREE_BTYPE_D and set BTYPE_D
+ read block length using HTREE_BLEN_D and set BLEN_D
+ decrement BLEN_D
+ compute context ID, CIDD from CLEN
+ read distance code with HTREED[CMAPD[4 * BTYPE_D + CIDD]]
+ compute distance by distance short code substitution
+ move backwards distance bytes in the output stream, and
+ copy CLEN bytes from this position to the output stream,
+ or look up the static dictionary word and copy it to the
+ output stram
+ while number of output bytes produced in the loop < MLEN
+ while not ISLAST
+
+ Note that a duplicated string reference may refer to a string in a
+ previous meta-block, i.e. the backward distance may cross one or
+ more meta-block boundaries. However a backward copy distance
+ cannot refer past the beginning of the output stream and it can
+ not be greater than the window size; any such distance must be
+ interpreted as a reference to a static dictionary word. Also note
+ that the referenced string may overlap the current position, for
+ example, if the last 2 bytes decoded have values X and Y, a string
+ reference with <length = 5, distance = 2> adds X,Y,X,Y,X to the
+ output stream.
+
+11. References
+
+ [1] Huffman, D. A., "A Method for the Construction of Minimum
+ Redundancy Codes", Proceedings of the Institute of Radio
+ Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101.
+
+ [2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data
+ Compression", IEEE Transactions on Information Theory, Vol. 23,
+ No. 3, pp. 337-343.
+
+ [3] Deutsch, P., "DEFLATE Compressed Data Format Specification
+ version 1.3", RFC 1951, Aladdin Enterprises, May 1996.
+ http://www.ietf.org/rfc/rfc1951.txt
+
+12. Source code
+
+ Source code for a C language implementation of a "brotli" compliant
+ decompressor and a C++ language implementation of a compressor is
+ available in the brotli/ directory within the font-compression-
+ reference open-source project:
+ https://code.google.com/p/font-compression-reference/source/browse/
+
+Appendix A. List of dictionary words
+
+ TO BE WRITTEN
+
+Appendix B. List of word transformations
+
+ TO BE WRITTEN
diff --git a/brotli/dec/Makefile b/brotli/dec/Makefile
new file mode 100644
index 0000000..f1e39b9
--- /dev/null
+++ b/brotli/dec/Makefile
@@ -0,0 +1,10 @@
+#brotli/dec
+
+include ../../shared.mk
+
+OBJS = bit_reader.o decode.o huffman.o safe_malloc.o streams.o
+
+all : $(OBJS)
+
+clean :
+ rm -f $(OBJS)
diff --git a/brotli/dec/README b/brotli/dec/README
new file mode 100644
index 0000000..933bdfd
--- /dev/null
+++ b/brotli/dec/README
@@ -0,0 +1,3 @@
+This directory holds the decoder for brotli compression format.
+
+Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.
diff --git a/brotli/dec/bit_reader.c b/brotli/dec/bit_reader.c
new file mode 100644
index 0000000..25e33e3
--- /dev/null
+++ b/brotli/dec/bit_reader.c
@@ -0,0 +1,50 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Bit reading helpers
+*/
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./bit_reader.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) {
+ size_t i;
+ assert(br != NULL);
+
+ br->buf_ptr_ = br->buf_;
+ br->input_ = input;
+ br->val_ = 0;
+ br->pos_ = 0;
+ br->bit_pos_ = 0;
+ br->bits_left_ = 64;
+ br->eos_ = 0;
+ if (!BrotliReadMoreInput(br)) {
+ return 0;
+ }
+ for (i = 0; i < sizeof(br->val_); ++i) {
+ br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
+ ++br->pos_;
+ }
+ return (br->bits_left_ > 64);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/brotli/dec/bit_reader.h b/brotli/dec/bit_reader.h
new file mode 100644
index 0000000..551cc14
--- /dev/null
+++ b/brotli/dec/bit_reader.h
@@ -0,0 +1,176 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Bit reading helpers
+*/
+
+#ifndef BROTLI_DEC_BIT_READER_H_
+#define BROTLI_DEC_BIT_READER_H_
+
+#include <string.h>
+#include "./streams.h"
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_MAX_NUM_BIT_READ 25
+#define BROTLI_READ_SIZE 4096
+#define BROTLI_IBUF_SIZE (2 * BROTLI_READ_SIZE + 32)
+#define BROTLI_IBUF_MASK (2 * BROTLI_READ_SIZE - 1)
+
+#define UNALIGNED_COPY64(dst, src) *(uint64_t*)(dst) = *(const uint64_t*)(src)
+
+static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = {
+ 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
+ 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
+};
+
+typedef struct {
+ /* Input byte buffer, consist of a ringbuffer and a "slack" region where */
+ /* bytes from the start of the ringbuffer are copied. */
+ uint8_t buf_[BROTLI_IBUF_SIZE];
+ uint8_t* buf_ptr_; /* next input will write here */
+ BrotliInput input_; /* input callback */
+ uint64_t val_; /* pre-fetched bits */
+ uint32_t pos_; /* byte position in stream */
+ uint32_t bit_pos_; /* current bit-reading position in val_ */
+ uint32_t bits_left_; /* how many valid bits left */
+ int eos_; /* input stream is finished */
+} BrotliBitReader;
+
+int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
+
+/* Return the prefetched bits, so they can be looked up. */
+static BROTLI_INLINE uint32_t BrotliPrefetchBits(BrotliBitReader* const br) {
+ return (uint32_t)(br->val_ >> br->bit_pos_);
+}
+
+/* For jumping over a number of bits in the bit stream when accessed with */
+/* BrotliPrefetchBits and BrotliFillBitWindow. */
+static BROTLI_INLINE void BrotliSetBitPos(BrotliBitReader* const br,
+ uint32_t val) {
+#ifdef BROTLI_DECODE_DEBUG
+ uint32_t n_bits = val - br->bit_pos_;
+ const uint32_t bval = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
+ printf("[BrotliReadBits] %010ld %2d val: %6x\n",
+ (br->pos_ << 3) + br->bit_pos_ - 64, n_bits, bval);
+#endif
+ br->bit_pos_ = val;
+}
+
+/* Reload up to 64 bits byte-by-byte */
+static BROTLI_INLINE void ShiftBytes(BrotliBitReader* const br) {
+ while (br->bit_pos_ >= 8) {
+ br->val_ >>= 8;
+ br->val_ |= ((uint64_t)br->buf_[br->pos_ & BROTLI_IBUF_MASK]) << 56;
+ ++br->pos_;
+ br->bit_pos_ -= 8;
+ br->bits_left_ -= 8;
+ }
+}
+
+/* Fills up the input ringbuffer by calling the input callback.
+
+ Does nothing if there are at least 32 bytes present after current position.
+
+ Returns 0 if either:
+ - the input callback returned an error, or
+ - there is no more input and the position is past the end of the stream.
+
+ After encountering the end of the input stream, 32 additional zero bytes are
+ copied to the ringbuffer, therefore it is safe to call this function after
+ every 32 bytes of input is read.
+*/
+static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
+ if (br->bits_left_ > 320) {
+ return 1;
+ } else if (br->eos_) {
+ return br->bit_pos_ <= br->bits_left_;
+ } else {
+ uint8_t* dst = br->buf_ptr_;
+ int bytes_read = BrotliRead(br->input_, dst, BROTLI_READ_SIZE);
+ if (bytes_read < 0) {
+ return 0;
+ }
+ if (bytes_read < BROTLI_READ_SIZE) {
+ br->eos_ = 1;
+ /* Store 32 bytes of zero after the stream end. */
+#if (defined(__x86_64__) || defined(_M_X64))
+ *(uint64_t*)(dst + bytes_read) = 0;
+ *(uint64_t*)(dst + bytes_read + 8) = 0;
+ *(uint64_t*)(dst + bytes_read + 16) = 0;
+ *(uint64_t*)(dst + bytes_read + 24) = 0;
+#else
+ memset(dst + bytes_read, 0, 32);
+#endif
+ }
+ if (dst == br->buf_) {
+ /* Copy the head of the ringbuffer to the slack region. */
+#if (defined(__x86_64__) || defined(_M_X64))
+ UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 32, br->buf_);
+ UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 24, br->buf_ + 8);
+ UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 16, br->buf_ + 16);
+ UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 8, br->buf_ + 24);
+#else
+ memcpy(br->buf_ + (BROTLI_READ_SIZE << 1), br->buf_, 32);
+#endif
+ br->buf_ptr_ = br->buf_ + BROTLI_READ_SIZE;
+ } else {
+ br->buf_ptr_ = br->buf_;
+ }
+ br->bits_left_ += ((uint32_t)bytes_read << 3);
+ return 1;
+ }
+}
+
+/* Advances the Read buffer by 5 bytes to make room for reading next 24 bits. */
+static BROTLI_INLINE void BrotliFillBitWindow(BrotliBitReader* const br) {
+ if (br->bit_pos_ >= 40) {
+#if (defined(__x86_64__) || defined(_M_X64))
+ br->val_ >>= 40;
+ /* The expression below needs a little-endian arch to work correctly. */
+ /* This gives a large speedup for decoding speed. */
+ br->val_ |= *(const uint64_t*)(
+ br->buf_ + (br->pos_ & BROTLI_IBUF_MASK)) << 24;
+ br->pos_ += 5;
+ br->bit_pos_ -= 40;
+ br->bits_left_ -= 40;
+#else
+ ShiftBytes(br);
+#endif
+ }
+}
+
+/* Reads the specified number of bits from Read Buffer. */
+/* Requires that n_bits is positive. */
+static BROTLI_INLINE uint32_t BrotliReadBits(
+ BrotliBitReader* const br, int n_bits) {
+ uint32_t val;
+ BrotliFillBitWindow(br);
+ val = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
+#ifdef BROTLI_DECODE_DEBUG
+ printf("[BrotliReadBits] %010ld %2d val: %6x\n",
+ (br->pos_ << 3) + br->bit_pos_ - 64, n_bits, val);
+#endif
+ br->bit_pos_ += (uint32_t)n_bits;
+ return val;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_DEC_BIT_READER_H_ */
diff --git a/brotli/dec/context.h b/brotli/dec/context.h
new file mode 100644
index 0000000..dbc0c36
--- /dev/null
+++ b/brotli/dec/context.h
@@ -0,0 +1,259 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Lookup table to map the previous two bytes to a context id.
+
+ There are four different context modeling modes defined here:
+ CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
+ CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
+ CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
+ CONTEXT_SIGNED: second-order context model tuned for signed integers.
+
+ The context id for the UTF8 context model is calculated as follows. If p1
+ and p2 are the previous two bytes, we calcualte the context as
+
+ context = kContextLookup[p1] | kContextLookup[p2 + 256].
+
+ If the previous two bytes are ASCII characters (i.e. < 128), this will be
+ equivalent to
+
+ context = 4 * context1(p1) + context2(p2),
+
+ where context1 is based on the previous byte in the following way:
+
+ 0 : non-ASCII control
+ 1 : \t, \n, \r
+ 2 : space
+ 3 : other punctuation
+ 4 : " '
+ 5 : %
+ 6 : ( < [ {
+ 7 : ) > ] }
+ 8 : , ; :
+ 9 : .
+ 10 : =
+ 11 : number
+ 12 : upper-case vowel
+ 13 : upper-case consonant
+ 14 : lower-case vowel
+ 15 : lower-case consonant
+
+ and context2 is based on the second last byte:
+
+ 0 : control, space
+ 1 : punctuation
+ 2 : upper-case letter, number
+ 3 : lower-case letter
+
+ If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+ stream it will be a continuation byte, value between 128 and 191), the
+ context is the same as if the second last byte was an ASCII control or space.
+
+ If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+ be a continuation byte and the context id is 2 or 3 depending on the LSB of
+ the last byte and to a lesser extent on the second last byte if it is ASCII.
+
+ If the last byte is a UTF8 continuation byte, the second last byte can be:
+ - continuation byte: the next byte is probably ASCII or lead byte (assuming
+ 4-byte UTF8 characters are rare) and the context id is 0 or 1.
+ - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+ - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+
+ The possible value combinations of the previous two bytes, the range of
+ context ids and the type of the next byte is summarized in the table below:
+
+ |--------\-----------------------------------------------------------------|
+ | \ Last byte |
+ | Second \---------------------------------------------------------------|
+ | last byte \ ASCII | cont. byte | lead byte |
+ | \ (0-127) | (128-191) | (192-) |
+ |=============|===================|=====================|==================|
+ | ASCII | next: ASCII/lead | not valid | next: cont. |
+ | (0-127) | context: 4 - 63 | | context: 2 - 3 |
+ |-------------|-------------------|---------------------|------------------|
+ | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
+ | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
+ |-------------|-------------------|---------------------|------------------|
+ | lead byte | not valid | next: ASCII/lead | not valid |
+ | (192-207) | | context: 0 - 1 | |
+ |-------------|-------------------|---------------------|------------------|
+ | lead byte | not valid | next: cont. | not valid |
+ | (208-) | | context: 2 - 3 | |
+ |-------------|-------------------|---------------------|------------------|
+
+ The context id for the signed context mode is calculated as:
+
+ context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
+
+ For any context modeling modes, the context ids can be calculated by |-ing
+ together two lookups from one table using context model dependent offsets:
+
+ context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
+
+ where offset1 and offset2 are dependent on the context mode.
+*/
+
+#ifndef BROTLI_DEC_CONTEXT_H_
+#define BROTLI_DEC_CONTEXT_H_
+
+#include "./types.h"
+
+enum ContextType {
+ CONTEXT_LSB6 = 0,
+ CONTEXT_MSB6 = 1,
+ CONTEXT_UTF8 = 2,
+ CONTEXT_SIGNED = 3
+};
+
+/* Common context lookup table for all context modes. */
+static const uint8_t kContextLookup[1792] = {
+ /* CONTEXT_UTF8, last byte. */
+ /* ASCII range. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+ 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+ 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+ 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+ 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
+ /* UTF8 continuation byte range. */
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ /* UTF8 lead byte range. */
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ /* CONTEXT_UTF8 second last byte. */
+ /* ASCII range. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+ /* UTF8 continuation byte range. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* UTF8 lead byte range. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ /* CONTEXT_SIGNED, second last byte. */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+ /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
+ 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
+ /* CONTEXT_LSB6, last byte. */
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ /* CONTEXT_MSB6, last byte. */
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
+ 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+ 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+ 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
+ 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
+ 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
+ 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
+ 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
+ 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
+ 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
+ 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
+ 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
+ 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
+ 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
+ /* CONTEXT_{M,L}SB6, second last byte, */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const int kContextLookupOffsets[8] = {
+ /* CONTEXT_LSB6 */
+ 1024, 1536,
+ /* CONTEXT_MSB6 */
+ 1280, 1536,
+ /* CONTEXT_UTF8 */
+ 0, 256,
+ /* CONTEXT_SIGNED */
+ 768, 512,
+};
+
+#endif /* BROTLI_DEC_CONTEXT_H_ */
diff --git a/brotli/dec/decode.c b/brotli/dec/decode.c
new file mode 100644
index 0000000..a8e41ab
--- /dev/null
+++ b/brotli/dec/decode.c
@@ -0,0 +1,1023 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "./bit_reader.h"
+#include "./context.h"
+#include "./decode.h"
+#include "./huffman.h"
+#include "./prefix.h"
+#include "./safe_malloc.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef BROTLI_DECODE_DEBUG
+#define BROTLI_LOG_UINT(name) \
+ printf("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name))
+#define BROTLI_LOG_ARRAY_INDEX(array_name, idx) \
+ printf("[%s] %s[%lu] = %lu\n", __func__, #array_name, \
+ (unsigned long)(idx), (unsigned long)array_name[idx])
+#else
+#define BROTLI_LOG_UINT(name)
+#define BROTLI_LOG_ARRAY_INDEX(array_name, idx)
+#endif
+
+static const uint8_t kDefaultCodeLength = 8;
+static const uint8_t kCodeLengthRepeatCode = 16;
+static const int kNumLiteralCodes = 256;
+static const int kNumInsertAndCopyCodes = 704;
+static const int kNumBlockLengthCodes = 26;
+static const int kLiteralContextBits = 6;
+static const int kDistanceContextBits = 2;
+
+#define CODE_LENGTH_CODES 18
+static const uint8_t kCodeLengthCodeOrder[CODE_LENGTH_CODES] = {
+ 1, 2, 3, 4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+
+#define NUM_DISTANCE_SHORT_CODES 16
+static const int kDistanceShortCodeIndexOffset[NUM_DISTANCE_SHORT_CODES] = {
+ 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
+};
+
+static const int kDistanceShortCodeValueOffset[NUM_DISTANCE_SHORT_CODES] = {
+ 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+};
+
+static BROTLI_INLINE int DecodeWindowBits(BrotliBitReader* br) {
+ if (BrotliReadBits(br, 1)) {
+ return 17 + (int)BrotliReadBits(br, 3);
+ } else {
+ return 16;
+ }
+}
+
+/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */
+static BROTLI_INLINE int DecodeVarLenUint8(BrotliBitReader* br) {
+ if (BrotliReadBits(br, 1)) {
+ int nbits = (int)BrotliReadBits(br, 3);
+ if (nbits == 0) {
+ return 1;
+ } else {
+ return (int)BrotliReadBits(br, nbits) + (1 << nbits);
+ }
+ }
+ return 0;
+}
+
+static void DecodeMetaBlockLength(BrotliBitReader* br,
+ int* meta_block_length,
+ int* input_end,
+ int* is_uncompressed) {
+ int size_nibbles;
+ int i;
+ *input_end = (int)BrotliReadBits(br, 1);
+ *meta_block_length = 0;
+ *is_uncompressed = 0;
+ if (*input_end && BrotliReadBits(br, 1)) {
+ return;
+ }
+ size_nibbles = (int)BrotliReadBits(br, 2) + 4;
+ for (i = 0; i < size_nibbles; ++i) {
+ *meta_block_length |= (int)BrotliReadBits(br, 4) << (i * 4);
+ }
+ ++(*meta_block_length);
+ if (!*input_end) {
+ *is_uncompressed = (int)BrotliReadBits(br, 1);
+ }
+}
+
+/* Decodes the next Huffman code from bit-stream. */
+static BROTLI_INLINE int ReadSymbol(const HuffmanTree* tree,
+ BrotliBitReader* br) {
+ uint32_t bits;
+ uint32_t bitpos;
+ int lut_ix;
+ uint8_t lut_bits;
+ const HuffmanTreeNode* node = tree->root_;
+ BrotliFillBitWindow(br);
+ bits = BrotliPrefetchBits(br);
+ bitpos = br->bit_pos_;
+ /* Check if we find the bit combination from the Huffman lookup table. */
+ lut_ix = bits & (HUFF_LUT - 1);
+ lut_bits = tree->lut_bits_[lut_ix];
+ if (lut_bits <= HUFF_LUT_BITS) {
+ BrotliSetBitPos(br, bitpos + lut_bits);
+ return tree->lut_symbol_[lut_ix];
+ }
+ node += tree->lut_jump_[lut_ix];
+ bitpos += HUFF_LUT_BITS;
+ bits >>= HUFF_LUT_BITS;
+
+ /* Decode the value from a binary tree. */
+ assert(node != NULL);
+ do {
+ node = HuffmanTreeNextNode(node, bits & 1);
+ bits >>= 1;
+ ++bitpos;
+ } while (HuffmanTreeNodeIsNotLeaf(node));
+ BrotliSetBitPos(br, bitpos);
+ return node->symbol_;
+}
+
+static void PrintUcharVector(const uint8_t* v, int len) {
+ while (len-- > 0) printf(" %d", *v++);
+ printf("\n");
+}
+
+static int ReadHuffmanCodeLengths(
+ const uint8_t* code_length_code_lengths,
+ int num_symbols, uint8_t* code_lengths,
+ BrotliBitReader* br) {
+ int ok = 0;
+ int symbol;
+ uint8_t prev_code_len = kDefaultCodeLength;
+ int repeat = 0;
+ uint8_t repeat_length = 0;
+ int space = 32768;
+ HuffmanTree tree;
+
+ if (!BrotliHuffmanTreeBuildImplicit(&tree, code_length_code_lengths,
+ CODE_LENGTH_CODES)) {
+ printf("[ReadHuffmanCodeLengths] Building code length tree failed: ");
+ PrintUcharVector(code_length_code_lengths, CODE_LENGTH_CODES);
+ return 0;
+ }
+
+ if (!BrotliReadMoreInput(br)) {
+ printf("[ReadHuffmanCodeLengths] Unexpected end of input.\n");
+ return 0;
+ }
+
+ symbol = 0;
+ while (symbol + repeat < num_symbols && space > 0) {
+ uint8_t code_len;
+ if (!BrotliReadMoreInput(br)) {
+ printf("[ReadHuffmanCodeLengths] Unexpected end of input.\n");
+ goto End;
+ }
+ code_len = (uint8_t)ReadSymbol(&tree, br);
+ BROTLI_LOG_UINT(symbol);
+ BROTLI_LOG_UINT(repeat);
+ BROTLI_LOG_UINT(repeat_length);
+ BROTLI_LOG_UINT(code_len);
+ if ((code_len < kCodeLengthRepeatCode) ||
+ (code_len == kCodeLengthRepeatCode && repeat_length == 0) ||
+ (code_len > kCodeLengthRepeatCode && repeat_length > 0)) {
+ while (repeat > 0) {
+ code_lengths[symbol++] = repeat_length;
+ --repeat;
+ }
+ }
+ if (code_len < kCodeLengthRepeatCode) {
+ code_lengths[symbol++] = code_len;
+ if (code_len != 0) {
+ prev_code_len = code_len;
+ space -= 32768 >> code_len;
+ }
+ } else {
+ const int extra_bits = code_len - 14;
+ int i = repeat;
+ if (repeat > 0) {
+ repeat -= 2;
+ repeat <<= extra_bits;
+ }
+ repeat += (int)BrotliReadBits(br, extra_bits) + 3;
+ if (repeat + symbol > num_symbols) {
+ goto End;
+ }
+ if (code_len == kCodeLengthRepeatCode) {
+ repeat_length = prev_code_len;
+ for (; i < repeat; ++i) {
+ space -= 32768 >> repeat_length;
+ }
+ } else {
+ repeat_length = 0;
+ }
+ }
+ }
+ if (space != 0) {
+ printf("[ReadHuffmanCodeLengths] space = %d\n", space);
+ goto End;
+ }
+ if (symbol + repeat > num_symbols) {
+ printf("[ReadHuffmanCodeLengths] symbol + repeat > num_symbols "
+ "(%d + %d vs %d)\n", symbol, repeat, num_symbols);
+ goto End;
+ }
+ while (repeat-- > 0) code_lengths[symbol++] = repeat_length;
+ while (symbol < num_symbols) code_lengths[symbol++] = 0;
+ ok = 1;
+
+ End:
+ BrotliHuffmanTreeRelease(&tree);
+ return ok;
+}
+
+static int ReadHuffmanCode(int alphabet_size,
+ HuffmanTree* tree,
+ BrotliBitReader* br) {
+ int ok = 1;
+ int simple_code_or_skip;
+ uint8_t* code_lengths = NULL;
+
+ code_lengths =
+ (uint8_t*)BrotliSafeMalloc((uint64_t)alphabet_size,
+ sizeof(*code_lengths));
+ if (code_lengths == NULL) {
+ return 0;
+ }
+ if (!BrotliReadMoreInput(br)) {
+ printf("[ReadHuffmanCode] Unexpected end of input.\n");
+ return 0;
+ }
+ /* simple_code_or_skip is used as follows:
+ 1 for simple code;
+ 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+ simple_code_or_skip = (int)BrotliReadBits(br, 2);
+ BROTLI_LOG_UINT(simple_code_or_skip);
+ if (simple_code_or_skip == 1) {
+ /* Read symbols, codes & code lengths directly. */
+ int i;
+ int max_bits_counter = alphabet_size - 1;
+ int max_bits = 0;
+ int symbols[4] = { 0 };
+ const int num_symbols = (int)BrotliReadBits(br, 2) + 1;
+ while (max_bits_counter) {
+ max_bits_counter >>= 1;
+ ++max_bits;
+ }
+ memset(code_lengths, 0, (size_t)alphabet_size);
+ for (i = 0; i < num_symbols; ++i) {
+ symbols[i] = (int)BrotliReadBits(br, max_bits) % alphabet_size;
+ code_lengths[symbols[i]] = 2;
+ }
+ code_lengths[symbols[0]] = 1;
+ switch (num_symbols) {
+ case 1:
+ case 3:
+ break;
+ case 2:
+ code_lengths[symbols[1]] = 1;
+ break;
+ case 4:
+ if (BrotliReadBits(br, 1)) {
+ code_lengths[symbols[2]] = 3;
+ code_lengths[symbols[3]] = 3;
+ } else {
+ code_lengths[symbols[0]] = 2;
+ }
+ break;
+ }
+ BROTLI_LOG_UINT(num_symbols);
+ } else { /* Decode Huffman-coded code lengths. */
+ int i;
+ uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 };
+ int space = 32;
+ for (i = simple_code_or_skip;
+ i < CODE_LENGTH_CODES && space > 0; ++i) {
+ int code_len_idx = kCodeLengthCodeOrder[i];
+ uint8_t v = (uint8_t)BrotliReadBits(br, 2);
+ if (v == 1) {
+ v = (uint8_t)BrotliReadBits(br, 1);
+ if (v == 0) {
+ v = 2;
+ } else {
+ v = (uint8_t)BrotliReadBits(br, 1);
+ if (v == 0) {
+ v = 1;
+ } else {
+ v = 5;
+ }
+ }
+ } else if (v == 2) {
+ v = 4;
+ }
+ code_length_code_lengths[code_len_idx] = v;
+ BROTLI_LOG_ARRAY_INDEX(code_length_code_lengths, code_len_idx);
+ if (v != 0) {
+ space -= (32 >> v);
+ }
+ }
+ ok = ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size,
+ code_lengths, br);
+ }
+ if (ok) {
+ ok = BrotliHuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size);
+ if (!ok) {
+ printf("[ReadHuffmanCode] HuffmanTreeBuildImplicit failed: ");
+ PrintUcharVector(code_lengths, alphabet_size);
+ }
+ }
+ free(code_lengths);
+ return ok;
+}
+
+static int ReadCopyDistance(const HuffmanTree* tree,
+ int num_direct_codes,
+ int postfix_bits,
+ int postfix_mask,
+ BrotliBitReader* br) {
+ int code;
+ int nbits;
+ int postfix;
+ int offset;
+ code = ReadSymbol(tree, br);
+ if (code < num_direct_codes) {
+ return code;
+ }
+ code -= num_direct_codes;
+ postfix = code & postfix_mask;
+ code >>= postfix_bits;
+ nbits = (code >> 1) + 1;
+ offset = ((2 + (code & 1)) << nbits) - 4;
+ return (num_direct_codes +
+ ((offset + (int)BrotliReadBits(br, nbits)) << postfix_bits) +
+ postfix);
+}
+
+static int ReadBlockLength(const HuffmanTree* tree, BrotliBitReader* br) {
+ int code;
+ int nbits;
+ code = ReadSymbol(tree, br);
+ nbits = kBlockLengthPrefixCode[code].nbits;
+ return kBlockLengthPrefixCode[code].offset + (int)BrotliReadBits(br, nbits);
+}
+
+static void ReadInsertAndCopy(const HuffmanTree* tree,
+ int* insert_len,
+ int* copy_len,
+ int* copy_dist,
+ BrotliBitReader* br) {
+ int code;
+ int range_idx;
+ int insert_code;
+ int insert_extra_bits;
+ int copy_code;
+ int copy_extra_bits;
+ code = ReadSymbol(tree, br);
+ range_idx = code >> 6;
+ if (range_idx >= 2) {
+ range_idx -= 2;
+ *copy_dist = -1;
+ } else {
+ *copy_dist = 0;
+ }
+ insert_code = kInsertRangeLut[range_idx] + ((code >> 3) & 7);
+ copy_code = kCopyRangeLut[range_idx] + (code & 7);
+ *insert_len = kInsertLengthPrefixCode[insert_code].offset;
+ insert_extra_bits = kInsertLengthPrefixCode[insert_code].nbits;
+ if (insert_extra_bits > 0) {
+ *insert_len += (int)BrotliReadBits(br, insert_extra_bits);
+ }
+ *copy_len = kCopyLengthPrefixCode[copy_code].offset;
+ copy_extra_bits = kCopyLengthPrefixCode[copy_code].nbits;
+ if (copy_extra_bits > 0) {
+ *copy_len += (int)BrotliReadBits(br, copy_extra_bits);
+ }
+}
+
+static int TranslateShortCodes(int code, int* ringbuffer, int index) {
+ int val;
+ if (code < NUM_DISTANCE_SHORT_CODES) {
+ index += kDistanceShortCodeIndexOffset[code];
+ index &= 3;
+ val = ringbuffer[index] + kDistanceShortCodeValueOffset[code];
+ } else {
+ val = code - NUM_DISTANCE_SHORT_CODES + 1;
+ }
+ return val;
+}
+
+static void MoveToFront(uint8_t* v, uint8_t index) {
+ uint8_t value = v[index];
+ uint8_t i = index;
+ for (; i; --i) v[i] = v[i - 1];
+ v[0] = value;
+}
+
+static void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+ uint8_t mtf[256];
+ int i;
+ for (i = 0; i < 256; ++i) {
+ mtf[i] = (uint8_t)i;
+ }
+ for (i = 0; i < v_len; ++i) {
+ uint8_t index = v[i];
+ v[i] = mtf[index];
+ if (index) MoveToFront(mtf, index);
+ }
+}
+
+/* Contains a collection of huffman trees with the same alphabet size. */
+typedef struct {
+ int alphabet_size;
+ int num_htrees;
+ HuffmanTree* htrees;
+} HuffmanTreeGroup;
+
+static void HuffmanTreeGroupInit(HuffmanTreeGroup* group, int alphabet_size,
+ int ntrees) {
+ int i;
+ group->alphabet_size = alphabet_size;
+ group->num_htrees = ntrees;
+ group->htrees = (HuffmanTree*)malloc(sizeof(HuffmanTree) * (size_t)ntrees);
+ for (i = 0; i < ntrees; ++i) {
+ group->htrees[i].root_ = NULL;
+ }
+}
+
+static void HuffmanTreeGroupRelease(HuffmanTreeGroup* group) {
+ int i;
+ for (i = 0; i < group->num_htrees; ++i) {
+ BrotliHuffmanTreeRelease(&group->htrees[i]);
+ }
+ if (group->htrees) {
+ free(group->htrees);
+ }
+}
+
+static int HuffmanTreeGroupDecode(HuffmanTreeGroup* group,
+ BrotliBitReader* br) {
+ int i;
+ for (i = 0; i < group->num_htrees; ++i) {
+ if (!ReadHuffmanCode(group->alphabet_size, &group->htrees[i], br)) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int DecodeContextMap(int context_map_size,
+ int* num_htrees,
+ uint8_t** context_map,
+ BrotliBitReader* br) {
+ int ok = 1;
+ if (!BrotliReadMoreInput(br)) {
+ printf("[DecodeContextMap] Unexpected end of input.\n");
+ return 0;
+ }
+ *num_htrees = DecodeVarLenUint8(br) + 1;
+
+ BROTLI_LOG_UINT(context_map_size);
+ BROTLI_LOG_UINT(*num_htrees);
+
+ *context_map = (uint8_t*)malloc((size_t)context_map_size);
+ if (*context_map == 0) {
+ return 0;
+ }
+ if (*num_htrees <= 1) {
+ memset(*context_map, 0, (size_t)context_map_size);
+ return 1;
+ }
+
+ {
+ HuffmanTree tree_index_htree;
+ int use_rle_for_zeros = (int)BrotliReadBits(br, 1);
+ int max_run_length_prefix = 0;
+ int i;
+ if (use_rle_for_zeros) {
+ max_run_length_prefix = (int)BrotliReadBits(br, 4) + 1;
+ }
+ if (!ReadHuffmanCode(*num_htrees + max_run_length_prefix,
+ &tree_index_htree, br)) {
+ return 0;
+ }
+ for (i = 0; i < context_map_size;) {
+ int code;
+ if (!BrotliReadMoreInput(br)) {
+ printf("[DecodeContextMap] Unexpected end of input.\n");
+ ok = 0;
+ goto End;
+ }
+ code = ReadSymbol(&tree_index_htree, br);
+ if (code == 0) {
+ (*context_map)[i] = 0;
+ ++i;
+ } else if (code <= max_run_length_prefix) {
+ int reps = 1 + (1 << code) + (int)BrotliReadBits(br, code);
+ while (--reps) {
+ if (i >= context_map_size) {
+ ok = 0;
+ goto End;
+ }
+ (*context_map)[i] = 0;
+ ++i;
+ }
+ } else {
+ (*context_map)[i] = (uint8_t)(code - max_run_length_prefix);
+ ++i;
+ }
+ }
+ End:
+ BrotliHuffmanTreeRelease(&tree_index_htree);
+ }
+ if (BrotliReadBits(br, 1)) {
+ InverseMoveToFrontTransform(*context_map, context_map_size);
+ }
+ return ok;
+}
+
+static BROTLI_INLINE void DecodeBlockType(const int max_block_type,
+ const HuffmanTree* trees,
+ int tree_type,
+ int* block_types,
+ int* ringbuffers,
+ int* indexes,
+ BrotliBitReader* br) {
+ int* ringbuffer = ringbuffers + tree_type * 2;
+ int* index = indexes + tree_type;
+ int type_code = ReadSymbol(trees + tree_type, br);
+ int block_type;
+ if (type_code == 0) {
+ block_type = ringbuffer[*index & 1];
+ } else if (type_code == 1) {
+ block_type = ringbuffer[(*index - 1) & 1] + 1;
+ } else {
+ block_type = type_code - 2;
+ }
+ if (block_type >= max_block_type) {
+ block_type -= max_block_type;
+ }
+ block_types[tree_type] = block_type;
+ ringbuffer[(*index) & 1] = block_type;
+ ++(*index);
+}
+
+/* Copy len bytes from src to dst. It can write up to ten extra bytes
+ after the end of the copy.
+
+ The main part of this loop is a simple copy of eight bytes at a time until
+ we've copied (at least) the requested amount of bytes. However, if dst and
+ src are less than eight bytes apart (indicating a repeating pattern of
+ length < 8), we first need to expand the pattern in order to get the correct
+ results. For instance, if the buffer looks like this, with the eight-byte
+ <src> and <dst> patterns marked as intervals:
+
+ abxxxxxxxxxxxx
+ [------] src
+ [------] dst
+
+ a single eight-byte copy from <src> to <dst> will repeat the pattern once,
+ after which we can move <dst> two bytes without moving <src>:
+
+ ababxxxxxxxxxx
+ [------] src
+ [------] dst
+
+ and repeat the exercise until the two no longer overlap.
+
+ This allows us to do very well in the special case of one single byte
+ repeated many times, without taking a big hit for more general cases.
+
+ The worst case of extra writing past the end of the match occurs when
+ dst - src == 1 and len == 1; the last copy will read from byte positions
+ [0..7] and write to [4..11], whereas it was only supposed to write to
+ position 1. Thus, ten excess bytes.
+*/
+static BROTLI_INLINE void IncrementalCopyFastPath(
+ uint8_t* dst, const uint8_t* src, int len) {
+ if (src < dst) {
+ while (dst - src < 8) {
+ UNALIGNED_COPY64(dst, src);
+ len -= (int)(dst - src);
+ dst += dst - src;
+ }
+ }
+ while (len > 0) {
+ UNALIGNED_COPY64(dst, src);
+ src += 8;
+ dst += 8;
+ len -= 8;
+ }
+}
+
+int BrotliDecompressedSize(size_t encoded_size,
+ const uint8_t* encoded_buffer,
+ size_t* decoded_size) {
+ BrotliMemInput memin;
+ BrotliInput input = BrotliInitMemInput(encoded_buffer, encoded_size, &memin);
+ BrotliBitReader br;
+ int meta_block_len;
+ int input_end;
+ int is_uncompressed;
+ if (!BrotliInitBitReader(&br, input)) {
+ return 0;
+ }
+ DecodeWindowBits(&br);
+ DecodeMetaBlockLength(&br, &meta_block_len, &input_end, &is_uncompressed);
+ if (!input_end) {
+ return 0;
+ }
+ *decoded_size = (size_t)meta_block_len;
+ return 1;
+}
+
+int BrotliDecompressBuffer(size_t encoded_size,
+ const uint8_t* encoded_buffer,
+ size_t* decoded_size,
+ uint8_t* decoded_buffer) {
+ BrotliMemInput memin;
+ BrotliInput in = BrotliInitMemInput(encoded_buffer, encoded_size, &memin);
+ BrotliMemOutput mout;
+ BrotliOutput out = BrotliInitMemOutput(decoded_buffer, *decoded_size, &mout);
+ int success = BrotliDecompress(in, out);
+ *decoded_size = mout.pos;
+ return success;
+}
+
+int BrotliDecompress(BrotliInput input, BrotliOutput output) {
+ int ok = 1;
+ int i;
+ int pos = 0;
+ int input_end = 0;
+ int window_bits = 0;
+ int max_backward_distance;
+ int max_distance = 0;
+ int ringbuffer_size;
+ int ringbuffer_mask;
+ uint8_t* ringbuffer;
+ uint8_t* ringbuffer_end;
+ /* This ring buffer holds a few past copy distances that will be used by */
+ /* some special distance codes. */
+ int dist_rb[4] = { 16, 15, 11, 4 };
+ int dist_rb_idx = 0;
+ /* The previous 2 bytes used for context. */
+ uint8_t prev_byte1 = 0;
+ uint8_t prev_byte2 = 0;
+ HuffmanTreeGroup hgroup[3];
+ BrotliBitReader br;
+
+ /* 16 bytes would be enough, but we add some more slack for transforms */
+ /* to work at the end of the ringbuffer. */
+ static const int kRingBufferWriteAheadSlack = 128;
+
+ static const int kMaxDictionaryWordLength = 0;
+
+ if (!BrotliInitBitReader(&br, input)) {
+ return 0;
+ }
+
+ /* Decode window size. */
+ window_bits = DecodeWindowBits(&br);
+ max_backward_distance = (1 << window_bits) - 16;
+
+ ringbuffer_size = 1 << window_bits;
+ ringbuffer_mask = ringbuffer_size - 1;
+ ringbuffer = (uint8_t*)malloc((size_t)(ringbuffer_size +
+ kRingBufferWriteAheadSlack +
+ kMaxDictionaryWordLength));
+ if (!ringbuffer) {
+ ok = 0;
+ }
+ ringbuffer_end = ringbuffer + ringbuffer_size;
+
+ while (!input_end && ok) {
+ int meta_block_remaining_len = 0;
+ int is_uncompressed;
+ int block_length[3] = { 1 << 28, 1 << 28, 1 << 28 };
+ int block_type[3] = { 0 };
+ int num_block_types[3] = { 1, 1, 1 };
+ int block_type_rb[6] = { 0, 1, 0, 1, 0, 1 };
+ int block_type_rb_index[3] = { 0 };
+ HuffmanTree block_type_trees[3];
+ HuffmanTree block_len_trees[3];
+ int distance_postfix_bits;
+ int num_direct_distance_codes;
+ int distance_postfix_mask;
+ int num_distance_codes;
+ uint8_t* context_map = NULL;
+ uint8_t* context_modes = NULL;
+ int num_literal_htrees;
+ uint8_t* dist_context_map = NULL;
+ int num_dist_htrees;
+ int context_offset = 0;
+ uint8_t* context_map_slice = NULL;
+ uint8_t literal_htree_index = 0;
+ int dist_context_offset = 0;
+ uint8_t* dist_context_map_slice = NULL;
+ uint8_t dist_htree_index = 0;
+ int context_lookup_offset1 = 0;
+ int context_lookup_offset2 = 0;
+ uint8_t context_mode;
+
+ for (i = 0; i < 3; ++i) {
+ hgroup[i].num_htrees = 0;
+ hgroup[i].htrees = NULL;
+ block_type_trees[i].root_ = NULL;
+ block_len_trees[i].root_ = NULL;
+ }
+
+ if (!BrotliReadMoreInput(&br)) {
+ printf("[BrotliDecompress] Unexpected end of input.\n");
+ ok = 0;
+ goto End;
+ }
+ BROTLI_LOG_UINT(pos);
+ DecodeMetaBlockLength(&br, &meta_block_remaining_len,
+ &input_end, &is_uncompressed);
+ BROTLI_LOG_UINT(meta_block_remaining_len);
+ if (meta_block_remaining_len == 0) {
+ goto End;
+ }
+ if (is_uncompressed) {
+ BrotliSetBitPos(&br, (br.bit_pos_ + 7) & (uint32_t)(~7UL));
+ while (meta_block_remaining_len) {
+ ringbuffer[pos & ringbuffer_mask] = (uint8_t)BrotliReadBits(&br, 8);
+ if ((pos & ringbuffer_mask) == ringbuffer_mask) {
+ if (BrotliWrite(output, ringbuffer, (size_t)ringbuffer_size) < 0) {
+ ok = 0;
+ goto End;
+ }
+ }
+ ++pos;
+ --meta_block_remaining_len;
+ }
+ goto End;
+ }
+ for (i = 0; i < 3; ++i) {
+ block_type_trees[i].root_ = NULL;
+ block_len_trees[i].root_ = NULL;
+ num_block_types[i] = DecodeVarLenUint8(&br) + 1;
+ if (num_block_types[i] >= 2) {
+ if (!ReadHuffmanCode(
+ num_block_types[i] + 2, &block_type_trees[i], &br) ||
+ !ReadHuffmanCode(kNumBlockLengthCodes, &block_len_trees[i], &br)) {
+ ok = 0;
+ goto End;
+ }
+ block_length[i] = ReadBlockLength(&block_len_trees[i], &br);
+ block_type_rb_index[i] = 1;
+ }
+ }
+
+ BROTLI_LOG_UINT(num_block_types[0]);
+ BROTLI_LOG_UINT(num_block_types[1]);
+ BROTLI_LOG_UINT(num_block_types[2]);
+ BROTLI_LOG_UINT(block_length[0]);
+ BROTLI_LOG_UINT(block_length[1]);
+ BROTLI_LOG_UINT(block_length[2]);
+
+ if (!BrotliReadMoreInput(&br)) {
+ printf("[BrotliDecompress] Unexpected end of input.\n");
+ ok = 0;
+ goto End;
+ }
+ distance_postfix_bits = (int)BrotliReadBits(&br, 2);
+ num_direct_distance_codes = NUM_DISTANCE_SHORT_CODES +
+ ((int)BrotliReadBits(&br, 4) << distance_postfix_bits);
+ distance_postfix_mask = (1 << distance_postfix_bits) - 1;
+ num_distance_codes = (num_direct_distance_codes +
+ (48 << distance_postfix_bits));
+ context_modes = (uint8_t*)malloc((size_t)num_block_types[0]);
+ if (context_modes == 0) {
+ ok = 0;
+ goto End;
+ }
+ for (i = 0; i < num_block_types[0]; ++i) {
+ context_modes[i] = (uint8_t)(BrotliReadBits(&br, 2) << 1);
+ BROTLI_LOG_ARRAY_INDEX(context_modes, i);
+ }
+ BROTLI_LOG_UINT(num_direct_distance_codes);
+ BROTLI_LOG_UINT(distance_postfix_bits);
+
+ if (!DecodeContextMap(num_block_types[0] << kLiteralContextBits,
+ &num_literal_htrees, &context_map, &br) ||
+ !DecodeContextMap(num_block_types[2] << kDistanceContextBits,
+ &num_dist_htrees, &dist_context_map, &br)) {
+ ok = 0;
+ goto End;
+ }
+
+ HuffmanTreeGroupInit(&hgroup[0], kNumLiteralCodes, num_literal_htrees);
+ HuffmanTreeGroupInit(&hgroup[1], kNumInsertAndCopyCodes,
+ num_block_types[1]);
+ HuffmanTreeGroupInit(&hgroup[2], num_distance_codes, num_dist_htrees);
+
+ for (i = 0; i < 3; ++i) {
+ if (!HuffmanTreeGroupDecode(&hgroup[i], &br)) {
+ ok = 0;
+ goto End;
+ }
+ }
+
+ context_map_slice = context_map;
+ dist_context_map_slice = dist_context_map;
+ context_mode = context_modes[block_type[0]];
+ context_lookup_offset1 = kContextLookupOffsets[context_mode];
+ context_lookup_offset2 = kContextLookupOffsets[context_mode + 1];
+
+ while (meta_block_remaining_len > 0) {
+ int insert_length;
+ int copy_length;
+ int distance_code;
+ int distance;
+ uint8_t context;
+ int j;
+ const uint8_t* copy_src;
+ uint8_t* copy_dst;
+ if (!BrotliReadMoreInput(&br)) {
+ printf("[BrotliDecompress] Unexpected end of input.\n");
+ ok = 0;
+ goto End;
+ }
+ if (block_length[1] == 0) {
+ DecodeBlockType(num_block_types[1],
+ block_type_trees, 1, block_type, block_type_rb,
+ block_type_rb_index, &br);
+ block_length[1] = ReadBlockLength(&block_len_trees[1], &br);
+ }
+ --block_length[1];
+ ReadInsertAndCopy(&hgroup[1].htrees[block_type[1]],
+ &insert_length, &copy_length, &distance_code, &br);
+ BROTLI_LOG_UINT(insert_length);
+ BROTLI_LOG_UINT(copy_length);
+ BROTLI_LOG_UINT(distance_code);
+ for (j = 0; j < insert_length; ++j) {
+ if (!BrotliReadMoreInput(&br)) {
+ printf("[BrotliDecompress] Unexpected end of input.\n");
+ ok = 0;
+ goto End;
+ }
+ if (block_length[0] == 0) {
+ DecodeBlockType(num_block_types[0],
+ block_type_trees, 0, block_type, block_type_rb,
+ block_type_rb_index, &br);
+ block_length[0] = ReadBlockLength(&block_len_trees[0], &br);
+ context_offset = block_type[0] << kLiteralContextBits;
+ context_map_slice = context_map + context_offset;
+ context_mode = context_modes[block_type[0]];
+ context_lookup_offset1 = kContextLookupOffsets[context_mode];
+ context_lookup_offset2 = kContextLookupOffsets[context_mode + 1];
+ }
+ context = (kContextLookup[context_lookup_offset1 + prev_byte1] |
+ kContextLookup[context_lookup_offset2 + prev_byte2]);
+ BROTLI_LOG_UINT(context);
+ literal_htree_index = context_map_slice[context];
+ --block_length[0];
+ prev_byte2 = prev_byte1;
+ prev_byte1 = (uint8_t)ReadSymbol(&hgroup[0].htrees[literal_htree_index],
+ &br);
+ ringbuffer[pos & ringbuffer_mask] = prev_byte1;
+ BROTLI_LOG_UINT(literal_htree_index);
+ BROTLI_LOG_ARRAY_INDEX(ringbuffer, pos & ringbuffer_mask);
+ if ((pos & ringbuffer_mask) == ringbuffer_mask) {
+ if (BrotliWrite(output, ringbuffer, (size_t)ringbuffer_size) < 0) {
+ ok = 0;
+ goto End;
+ }
+ }
+ ++pos;
+ }
+ meta_block_remaining_len -= insert_length;
+ if (meta_block_remaining_len <= 0) break;
+
+ if (distance_code < 0) {
+ uint8_t context;
+ if (!BrotliReadMoreInput(&br)) {
+ printf("[BrotliDecompress] Unexpected end of input.\n");
+ ok = 0;
+ goto End;
+ }
+ if (block_length[2] == 0) {
+ DecodeBlockType(num_block_types[2],
+ block_type_trees, 2, block_type, block_type_rb,
+ block_type_rb_index, &br);
+ block_length[2] = ReadBlockLength(&block_len_trees[2], &br);
+ dist_htree_index = (uint8_t)block_type[2];
+ dist_context_offset = block_type[2] << kDistanceContextBits;
+ dist_context_map_slice = dist_context_map + dist_context_offset;
+ }
+ --block_length[2];
+ context = (uint8_t)(copy_length > 4 ? 3 : copy_length - 2);
+ dist_htree_index = dist_context_map_slice[context];
+ distance_code = ReadCopyDistance(&hgroup[2].htrees[dist_htree_index],
+ num_direct_distance_codes,
+ distance_postfix_bits,
+ distance_postfix_mask,
+ &br);
+ }
+
+ /* Convert the distance code to the actual distance by possibly looking */
+ /* up past distnaces from the ringbuffer. */
+ distance = TranslateShortCodes(distance_code, dist_rb, dist_rb_idx);
+ if (distance < 0) {
+ ok = 0;
+ goto End;
+ }
+ if (distance_code > 0) {
+ dist_rb[dist_rb_idx & 3] = distance;
+ ++dist_rb_idx;
+ }
+ BROTLI_LOG_UINT(distance);
+
+ if (pos < max_backward_distance &&
+ max_distance != max_backward_distance) {
+ max_distance = pos;
+ } else {
+ max_distance = max_backward_distance;
+ }
+
+ copy_dst = &ringbuffer[pos & ringbuffer_mask];
+
+ if (distance > max_distance) {
+ printf("Invalid backward reference. pos: %d distance: %d "
+ "len: %d bytes left: %d\n", pos, distance, copy_length,
+ meta_block_remaining_len);
+ ok = 0;
+ goto End;
+ } else {
+ if (copy_length > meta_block_remaining_len) {
+ printf("Invalid backward reference. pos: %d distance: %d "
+ "len: %d bytes left: %d\n", pos, distance, copy_length,
+ meta_block_remaining_len);
+ ok = 0;
+ goto End;
+ }
+
+ copy_src = &ringbuffer[(pos - distance) & ringbuffer_mask];
+
+#if (defined(__x86_64__) || defined(_M_X64))
+ if (copy_src + copy_length <= ringbuffer_end &&
+ copy_dst + copy_length < ringbuffer_end) {
+ if (copy_length <= 16 && distance >= 8) {
+ UNALIGNED_COPY64(copy_dst, copy_src);
+ UNALIGNED_COPY64(copy_dst + 8, copy_src + 8);
+ } else {
+ IncrementalCopyFastPath(copy_dst, copy_src, copy_length);
+ }
+ pos += copy_length;
+ meta_block_remaining_len -= copy_length;
+ copy_length = 0;
+ }
+#endif
+
+ for (j = 0; j < copy_length; ++j) {
+ ringbuffer[pos & ringbuffer_mask] =
+ ringbuffer[(pos - distance) & ringbuffer_mask];
+ if ((pos & ringbuffer_mask) == ringbuffer_mask) {
+ if (BrotliWrite(output, ringbuffer, (size_t)ringbuffer_size) < 0) {
+ ok = 0;
+ goto End;
+ }
+ }
+ ++pos;
+ --meta_block_remaining_len;
+ }
+ }
+
+ /* When we get here, we must have inserted at least one literal and */
+ /* made a copy of at least length two, therefore accessing the last 2 */
+ /* bytes is valid. */
+ prev_byte1 = ringbuffer[(pos - 1) & ringbuffer_mask];
+ prev_byte2 = ringbuffer[(pos - 2) & ringbuffer_mask];
+ }
+
+ /* Protect pos from overflow, wrap it around at every GB of input data */
+ pos &= 0x3fffffff;
+
+ End:
+ if (context_modes != 0) {
+ free(context_modes);
+ }
+ if (context_map != 0) {
+ free(context_map);
+ }
+ if (dist_context_map != 0) {
+ free(dist_context_map);
+ }
+ for (i = 0; i < 3; ++i) {
+ HuffmanTreeGroupRelease(&hgroup[i]);
+ BrotliHuffmanTreeRelease(&block_type_trees[i]);
+ BrotliHuffmanTreeRelease(&block_len_trees[i]);
+ }
+ }
+
+ if (ringbuffer != 0) {
+ if (BrotliWrite(output, ringbuffer, (size_t)(pos & ringbuffer_mask)) < 0) {
+ ok = 0;
+ }
+ free(ringbuffer);
+ }
+ return ok;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/brotli/dec/decode.h b/brotli/dec/decode.h
new file mode 100644
index 0000000..9182438
--- /dev/null
+++ b/brotli/dec/decode.h
@@ -0,0 +1,52 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ API for Brotli decompression
+*/
+
+#ifndef BROTLI_DEC_DECODE_H_
+#define BROTLI_DEC_DECODE_H_
+
+#include "./streams.h"
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Sets *decoded_size to the decompressed size of the given encoded stream. */
+/* Returns 1 on success, 0 on failure. */
+int BrotliDecompressedSize(size_t encoded_size,
+ const uint8_t* encoded_buffer,
+ size_t* decoded_size);
+
+/* Decompresses the data in encoded_buffer into decoded_buffer, and sets */
+/* *decoded_size to the decompressed length. */
+/* Returns 0 if there was either a bit stream error or memory allocation */
+/* error, and 1 otherwise. */
+/* If decoded size is zero, returns 1 and keeps decoded_buffer unchanged. */
+int BrotliDecompressBuffer(size_t encoded_size,
+ const uint8_t* encoded_buffer,
+ size_t* decoded_size,
+ uint8_t* decoded_buffer);
+
+/* Same as above, but uses the specified input and output callbacks instead */
+/* of reading from and writing to pre-allocated memory buffers. */
+int BrotliDecompress(BrotliInput input, BrotliOutput output);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_DEC_DECODE_H_ */
diff --git a/brotli/dec/huffman.c b/brotli/dec/huffman.c
new file mode 100644
index 0000000..20e6223
--- /dev/null
+++ b/brotli/dec/huffman.c
@@ -0,0 +1,257 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Utilities for building and looking up Huffman trees.
+*/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman.h"
+#include "./safe_malloc.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NON_EXISTENT_SYMBOL (-1)
+#define MAX_ALLOWED_CODE_LENGTH 15
+
+static void TreeNodeInit(HuffmanTreeNode* const node) {
+ node->children_ = -1; /* means: 'unassigned so far' */
+}
+
+static int NodeIsEmpty(const HuffmanTreeNode* const node) {
+ return (node->children_ < 0);
+}
+
+static int IsFull(const HuffmanTree* const tree) {
+ return (tree->num_nodes_ == tree->max_nodes_);
+}
+
+static void AssignChildren(HuffmanTree* const tree,
+ HuffmanTreeNode* const node) {
+ HuffmanTreeNode* const children = tree->root_ + tree->num_nodes_;
+ node->children_ = (int)(children - node);
+ assert(children - node == (int)(children - node));
+ tree->num_nodes_ += 2;
+ TreeNodeInit(children + 0);
+ TreeNodeInit(children + 1);
+}
+
+static int TreeInit(HuffmanTree* const tree, int num_leaves) {
+ assert(tree != NULL);
+ tree->root_ = NULL;
+ if (num_leaves == 0) return 0;
+ /* We allocate maximum possible nodes in the tree at once. */
+ /* Note that a Huffman tree is a full binary tree; and in a full binary */
+ /* tree with L leaves, the total number of nodes N = 2 * L - 1. */
+ tree->max_nodes_ = 2 * num_leaves - 1;
+ assert(tree->max_nodes_ < (1 << 16)); /* limit for the lut_jump_ table */
+ tree->root_ = (HuffmanTreeNode*)BrotliSafeMalloc((uint64_t)tree->max_nodes_,
+ sizeof(*tree->root_));
+ if (tree->root_ == NULL) return 0;
+ TreeNodeInit(tree->root_); /* Initialize root. */
+ tree->num_nodes_ = 1;
+ memset(tree->lut_bits_, 255, sizeof(tree->lut_bits_));
+ memset(tree->lut_jump_, 0, sizeof(tree->lut_jump_));
+ return 1;
+}
+
+void BrotliHuffmanTreeRelease(HuffmanTree* const tree) {
+ if (tree != NULL) {
+ if (tree->root_ != NULL) {
+ free(tree->root_);
+ }
+ tree->root_ = NULL;
+ tree->max_nodes_ = 0;
+ tree->num_nodes_ = 0;
+ }
+}
+
+/* Utility: converts Huffman code lengths to corresponding Huffman codes. */
+/* 'huff_codes' should be pre-allocated. */
+/* Returns false in case of error (memory allocation, invalid codes). */
+static int HuffmanCodeLengthsToCodes(const uint8_t* const code_lengths,
+ int code_lengths_size,
+ int* const huff_codes) {
+ int symbol;
+ int code_len;
+ int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+ int curr_code;
+ int next_codes[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+ int max_code_length = 0;
+
+ assert(code_lengths != NULL);
+ assert(code_lengths_size > 0);
+ assert(huff_codes != NULL);
+
+ /* Calculate max code length. */
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > max_code_length) {
+ max_code_length = code_lengths[symbol];
+ }
+ }
+ if (max_code_length > MAX_ALLOWED_CODE_LENGTH) return 0;
+
+ /* Calculate code length histogram. */
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ ++code_length_hist[code_lengths[symbol]];
+ }
+ code_length_hist[0] = 0;
+
+ /* Calculate the initial values of 'next_codes' for each code length. */
+ /* next_codes[code_len] denotes the code to be assigned to the next symbol */
+ /* of code length 'code_len'. */
+ curr_code = 0;
+ next_codes[0] = -1; /* Unused, as code length = 0 implies */
+ /* code doesn't exist. */
+ for (code_len = 1; code_len <= max_code_length; ++code_len) {
+ curr_code = (curr_code + code_length_hist[code_len - 1]) << 1;
+ next_codes[code_len] = curr_code;
+ }
+
+ /* Get symbols. */
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > 0) {
+ huff_codes[symbol] = next_codes[code_lengths[symbol]]++;
+ } else {
+ huff_codes[symbol] = NON_EXISTENT_SYMBOL;
+ }
+ }
+ return 1;
+}
+
+static const uint8_t kReverse7[128] = {
+ 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120,
+ 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124,
+ 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122,
+ 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126,
+ 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121,
+ 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125,
+ 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123,
+ 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127
+};
+
+static int ReverseBitsShort(int bits, int num_bits) {
+ return kReverse7[bits] >> (7 - num_bits);
+}
+
+static int TreeAddSymbol(HuffmanTree* const tree,
+ int symbol, int code, int code_length) {
+ int step = HUFF_LUT_BITS;
+ int base_code;
+ HuffmanTreeNode* node = tree->root_;
+ const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_;
+ assert(symbol == (int16_t)symbol);
+ if (code_length <= HUFF_LUT_BITS) {
+ int i = 1 << (HUFF_LUT_BITS - code_length);
+ base_code = ReverseBitsShort(code, code_length);
+ do {
+ int idx;
+ --i;
+ idx = base_code | (i << code_length);
+ tree->lut_symbol_[idx] = (int16_t)symbol;
+ tree->lut_bits_[idx] = (uint8_t)code_length;
+ } while (i > 0);
+ } else {
+ base_code = ReverseBitsShort((code >> (code_length - HUFF_LUT_BITS)),
+ HUFF_LUT_BITS);
+ }
+ while (code_length-- > 0) {
+ if (node >= max_node) {
+ return 0;
+ }
+ if (NodeIsEmpty(node)) {
+ if (IsFull(tree)) return 0; /* error: too many symbols. */
+ AssignChildren(tree, node);
+ } else if (!HuffmanTreeNodeIsNotLeaf(node)) {
+ return 0; /* leaf is already occupied. */
+ }
+ node += node->children_ + ((code >> code_length) & 1);
+ if (--step == 0) {
+ tree->lut_jump_[base_code] = (int16_t)(node - tree->root_);
+ }
+ }
+ if (NodeIsEmpty(node)) {
+ node->children_ = 0; /* turn newly created node into a leaf. */
+ } else if (HuffmanTreeNodeIsNotLeaf(node)) {
+ return 0; /* trying to assign a symbol to already used code. */
+ }
+ node->symbol_ = symbol; /* Add symbol in this node. */
+ return 1;
+}
+
+int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+ const uint8_t* const code_lengths,
+ int code_lengths_size) {
+ int symbol;
+ int num_symbols = 0;
+ int root_symbol = 0;
+
+ assert(tree != NULL);
+ assert(code_lengths != NULL);
+
+ /* Find out number of symbols and the root symbol. */
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > 0) {
+ /* Note: code length = 0 indicates non-existent symbol. */
+ ++num_symbols;
+ root_symbol = symbol;
+ }
+ }
+
+ /* Initialize the tree. Will fail for num_symbols = 0 */
+ if (!TreeInit(tree, num_symbols)) return 0;
+
+ /* Build tree. */
+ if (num_symbols == 1) { /* Trivial case. */
+ const int max_symbol = code_lengths_size;
+ if (root_symbol < 0 || root_symbol >= max_symbol) {
+ BrotliHuffmanTreeRelease(tree);
+ return 0;
+ }
+ return TreeAddSymbol(tree, root_symbol, 0, 0);
+ } else { /* Normal case. */
+ int ok = 0;
+
+ /* Get Huffman codes from the code lengths. */
+ int* const codes =
+ (int*)BrotliSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes));
+ if (codes == NULL) goto End;
+
+ if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) {
+ goto End;
+ }
+
+ /* Add symbols one-by-one. */
+ for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+ if (code_lengths[symbol] > 0) {
+ if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) {
+ goto End;
+ }
+ }
+ }
+ ok = 1;
+ End:
+ free(codes);
+ ok = ok && IsFull(tree);
+ if (!ok) BrotliHuffmanTreeRelease(tree);
+ return ok;
+ }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/brotli/dec/huffman.h b/brotli/dec/huffman.h
new file mode 100644
index 0000000..fbd0744
--- /dev/null
+++ b/brotli/dec/huffman.h
@@ -0,0 +1,75 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Utilities for building and looking up Huffman trees.
+*/
+
+#ifndef BROTLI_DEC_HUFFMAN_H_
+#define BROTLI_DEC_HUFFMAN_H_
+
+#include <assert.h>
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* A node of a Huffman tree. */
+typedef struct {
+ int symbol_;
+ int children_; /* delta offset to both children (contiguous) or 0 if leaf. */
+} HuffmanTreeNode;
+
+/* Huffman Tree. */
+#define HUFF_LUT_BITS 7
+#define HUFF_LUT (1U << HUFF_LUT_BITS)
+typedef struct HuffmanTree HuffmanTree;
+struct HuffmanTree {
+ /* Fast lookup for short bit lengths. */
+ uint8_t lut_bits_[HUFF_LUT];
+ int16_t lut_symbol_[HUFF_LUT];
+ int16_t lut_jump_[HUFF_LUT];
+ /* Complete tree for lookups. */
+ HuffmanTreeNode* root_; /* all the nodes, starting at root. */
+ int max_nodes_; /* max number of nodes */
+ int num_nodes_; /* number of currently occupied nodes */
+};
+
+/* Returns true if the given node is not a leaf of the Huffman tree. */
+static BROTLI_INLINE int HuffmanTreeNodeIsNotLeaf(
+ const HuffmanTreeNode* const node) {
+ return node->children_;
+}
+
+/* Go down one level. Most critical function. 'right_child' must be 0 or 1. */
+static BROTLI_INLINE const HuffmanTreeNode* HuffmanTreeNextNode(
+ const HuffmanTreeNode* node, int right_child) {
+ return node + node->children_ + right_child;
+}
+
+/* Releases the nodes of the Huffman tree. */
+/* Note: It does NOT free 'tree' itself. */
+void BrotliHuffmanTreeRelease(HuffmanTree* const tree);
+
+/* Builds Huffman tree assuming code lengths are implicitly in symbol order. */
+/* Returns false in case of error (invalid tree or memory error). */
+int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+ const uint8_t* const code_lengths,
+ int code_lengths_size);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_DEC_HUFFMAN_H_ */
diff --git a/brotli/dec/prefix.h b/brotli/dec/prefix.h
new file mode 100644
index 0000000..06afe4d
--- /dev/null
+++ b/brotli/dec/prefix.h
@@ -0,0 +1,65 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Lookup tables to map prefix codes to value ranges. This is used during
+ decoding of the block lengths, literal insertion lengths and copy lengths.
+*/
+
+#ifndef BROTLI_DEC_PREFIX_H_
+#define BROTLI_DEC_PREFIX_H_
+
+/* Represents the range of values belonging to a prefix code: */
+/* [offset, offset + 2^nbits) */
+struct PrefixCodeRange {
+ int offset;
+ int nbits;
+};
+
+static const struct PrefixCodeRange kBlockLengthPrefixCode[] = {
+ { 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
+ { 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
+ { 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
+ { 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
+ { 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
+ { 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
+ {8433, 13}, {16625, 24}
+};
+
+static const struct PrefixCodeRange kInsertLengthPrefixCode[] = {
+ { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0},
+ { 4, 0}, { 5, 0}, { 6, 1}, { 8, 1},
+ { 10, 2}, { 14, 2}, { 18, 3}, { 26, 3},
+ { 34, 4}, { 50, 4}, { 66, 5}, { 98, 5},
+ { 130, 6}, { 194, 7}, { 322, 8}, { 578, 9},
+ {1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
+};
+
+static const struct PrefixCodeRange kCopyLengthPrefixCode[] = {
+ { 2, 0}, { 3, 0}, { 4, 0}, { 5, 0},
+ { 6, 0}, { 7, 0}, { 8, 0}, { 9, 0},
+ { 10, 1}, { 12, 1}, { 14, 2}, { 18, 2},
+ { 22, 3}, { 30, 3}, { 38, 4}, { 54, 4},
+ { 70, 5}, { 102, 5}, { 134, 6}, { 198, 7},
+ {326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
+};
+
+static const int kInsertRangeLut[9] = {
+ 0, 0, 8, 8, 0, 16, 8, 16, 16,
+};
+
+static const int kCopyRangeLut[9] = {
+ 0, 8, 0, 8, 16, 0, 16, 8, 16,
+};
+
+#endif /* BROTLI_DEC_PREFIX_H_ */
diff --git a/brotli/dec/safe_malloc.c b/brotli/dec/safe_malloc.c
new file mode 100644
index 0000000..ef1624c
--- /dev/null
+++ b/brotli/dec/safe_malloc.c
@@ -0,0 +1,42 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Size-checked memory allocation.
+*/
+
+#include <stdlib.h>
+#include "./safe_malloc.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Returns 0 in case of overflow of nmemb * size. */
+static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
+ const uint64_t total_size = nmemb * size;
+ if (nmemb == 0) return 1;
+ if ((uint64_t)size > BROTLI_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+ if (total_size != (size_t)total_size) return 0;
+ return 1;
+}
+
+void* BrotliSafeMalloc(uint64_t nmemb, size_t size) {
+ if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+ assert(nmemb * size > 0);
+ return malloc((size_t)(nmemb * size));
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/brotli/dec/safe_malloc.h b/brotli/dec/safe_malloc.h
new file mode 100644
index 0000000..9a73b0e
--- /dev/null
+++ b/brotli/dec/safe_malloc.h
@@ -0,0 +1,45 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Size-checked memory allocation.
+*/
+
+#ifndef BROTLI_UTILS_UTILS_H_
+#define BROTLI_UTILS_UTILS_H_
+
+#include <assert.h>
+
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* This is the maximum memory amount that we will ever try to allocate. */
+#define BROTLI_MAX_ALLOCABLE_MEMORY (1 << 30)
+
+/* size-checking safe malloc/calloc: verify that the requested size is not too
+ large, or return NULL. You don't need to call these for constructs like
+ malloc(sizeof(foo)), but only if there's font-dependent size involved
+ somewhere (like: malloc(decoded_size * sizeof(*something))). That's why this
+ safe malloc() borrows the signature from calloc(), pointing at the dangerous
+ underlying multiply involved.
+*/
+void* BrotliSafeMalloc(uint64_t nmemb, size_t size);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_UTILS_UTILS_H_ */
diff --git a/brotli/dec/streams.c b/brotli/dec/streams.c
new file mode 100644
index 0000000..ac0f07e
--- /dev/null
+++ b/brotli/dec/streams.c
@@ -0,0 +1,117 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Functions for streaming input and output.
+*/
+
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+#include "./streams.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+int BrotliMemInputFunction(void* data, uint8_t* buf, size_t count) {
+ BrotliMemInput* input = (BrotliMemInput*)data;
+ if (input->pos > input->length) {
+ return -1;
+ }
+ if (input->pos + count > input->length) {
+ count = input->length - input->pos;
+ }
+ memcpy(buf, input->buffer + input->pos, count);
+ input->pos += count;
+ return (int)count;
+}
+
+BrotliInput BrotliInitMemInput(const uint8_t* buffer, size_t length,
+ BrotliMemInput* mem_input) {
+ BrotliInput input;
+ mem_input->buffer = buffer;
+ mem_input->length = length;
+ mem_input->pos = 0;
+ input.cb_ = &BrotliMemInputFunction;
+ input.data_ = mem_input;
+ return input;
+}
+
+int BrotliMemOutputFunction(void* data, const uint8_t* buf, size_t count) {
+ BrotliMemOutput* output = (BrotliMemOutput*)data;
+ if (output->pos + count > output->length) {
+ return -1;
+ }
+ memcpy(output->buffer + output->pos, buf, count);
+ output->pos += count;
+ return (int)count;
+}
+
+BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
+ BrotliMemOutput* mem_output) {
+ BrotliOutput output;
+ mem_output->buffer = buffer;
+ mem_output->length = length;
+ mem_output->pos = 0;
+ output.cb_ = &BrotliMemOutputFunction;
+ output.data_ = mem_output;
+ return output;
+}
+
+int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count) {
+#ifndef _WIN32
+ return (int)read(STDIN_FILENO, buf, count);
+#else
+ return -1;
+#endif
+}
+
+BrotliInput BrotliStdinInput() {
+ BrotliInput in;
+ in.cb_ = BrotliStdinInputFunction;
+ in.data_ = NULL;
+ return in;
+}
+
+int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count) {
+#ifndef _WIN32
+ return (int)write(STDOUT_FILENO, buf, count);
+#else
+ return -1;
+#endif
+}
+
+BrotliOutput BrotliStdoutOutput() {
+ BrotliOutput out;
+ out.cb_ = BrotliStdoutOutputFunction;
+ out.data_ = NULL;
+ return out;
+}
+
+int BrotliFileOutputFunction(void* data, const uint8_t* buf, size_t count) {
+ return (int)fwrite(buf, 1, count, (FILE*)data);
+}
+
+BrotliOutput BrotliFileOutput(FILE* f) {
+ BrotliOutput out;
+ out.cb_ = BrotliFileOutputFunction;
+ out.data_ = f;
+ return out;
+}
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/brotli/dec/streams.h b/brotli/dec/streams.h
new file mode 100644
index 0000000..1c8ef65
--- /dev/null
+++ b/brotli/dec/streams.h
@@ -0,0 +1,103 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Functions for streaming input and output.
+*/
+
+#ifndef BROTLI_DEC_STREAMS_H_
+#define BROTLI_DEC_STREAMS_H_
+
+#include <stdio.h>
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Function pointer type used to read len bytes into buf. Returns the */
+/* number of bytes read or -1 on error. */
+typedef int (*BrotliInputFunction)(void* data, uint8_t* buf, size_t len);
+
+/* Input callback function with associated data. */
+typedef struct {
+ BrotliInputFunction cb_;
+ void* data_;
+} BrotliInput;
+
+/* Reads len bytes into buf, using the in callback. */
+static BROTLI_INLINE int BrotliRead(BrotliInput in, uint8_t* buf, size_t len) {
+ return in.cb_(in.data_, buf, len);
+}
+
+/* Function pointer type used to write len bytes into buf. Returns the */
+/* number of bytes written or -1 on error. */
+typedef int (*BrotliOutputFunction)(void* data, const uint8_t* buf, size_t len);
+
+/* Output callback function with associated data. */
+typedef struct {
+ BrotliOutputFunction cb_;
+ void* data_;
+} BrotliOutput;
+
+/* Writes len bytes into buf, using the out callback. */
+static BROTLI_INLINE int BrotliWrite(BrotliOutput out,
+ const uint8_t* buf, size_t len) {
+ return out.cb_(out.data_, buf, len);
+}
+
+/* Memory region with position. */
+typedef struct {
+ const uint8_t* buffer;
+ size_t length;
+ size_t pos;
+} BrotliMemInput;
+
+/* Input callback where *data is a BrotliMemInput struct. */
+int BrotliMemInputFunction(void* data, uint8_t* buf, size_t count);
+
+/* Returns an input callback that wraps the given memory region. */
+BrotliInput BrotliInitMemInput(const uint8_t* buffer, size_t length,
+ BrotliMemInput* mem_input);
+
+/* Output buffer with position. */
+typedef struct {
+ uint8_t* buffer;
+ size_t length;
+ size_t pos;
+} BrotliMemOutput;
+
+/* Output callback where *data is a BrotliMemOutput struct. */
+int BrotliMemOutputFunction(void* data, const uint8_t* buf, size_t count);
+
+/* Returns an output callback that wraps the given memory region. */
+BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
+ BrotliMemOutput* mem_output);
+
+/* Input callback that reads from standard input. */
+int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count);
+BrotliInput BrotliStdinInput();
+
+/* Output callback that writes to standard output. */
+int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count);
+BrotliOutput BrotliStdoutOutput();
+
+/* Output callback that writes to a file. */
+int BrotliFileOutputFunction(void* data, const uint8_t* buf, size_t count);
+BrotliOutput BrotliFileOutput(FILE* f);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_DEC_STREAMS_H_ */
diff --git a/brotli/dec/types.h b/brotli/dec/types.h
new file mode 100644
index 0000000..bc09f8b
--- /dev/null
+++ b/brotli/dec/types.h
@@ -0,0 +1,42 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ Common types
+*/
+
+#ifndef BROTLI_DEC_TYPES_H_
+#define BROTLI_DEC_TYPES_H_
+
+#include <stddef.h> /* for size_t */
+
+#ifndef _MSC_VER
+#include <inttypes.h>
+#ifdef __STRICT_ANSI__
+#define BROTLI_INLINE
+#else /* __STRICT_ANSI__ */
+#define BROTLI_INLINE inline
+#endif
+#else
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+#define BROTLI_INLINE __forceinline
+#endif /* _MSC_VER */
+
+#endif /* BROTLI_DEC_TYPES_H_ */
diff --git a/brotli/enc/Makefile b/brotli/enc/Makefile
new file mode 100644
index 0000000..c7041dc
--- /dev/null
+++ b/brotli/enc/Makefile
@@ -0,0 +1,11 @@
+#brotli/enc
+
+include ../../shared.mk
+
+OBJS = backward_references.o block_splitter.o encode.o entropy_encode.o histogram.o literal_cost.o prefix.o
+
+all : $(OBJS)
+
+clean :
+ rm -f $(OBJS) $(SO)
+
diff --git a/brotli/enc/README b/brotli/enc/README
new file mode 100644
index 0000000..c988ae7
--- /dev/null
+++ b/brotli/enc/README
@@ -0,0 +1,3 @@
+This directory holds the encoder for brotli compression format.
+
+Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.
diff --git a/brotli/enc/backward_references.cc b/brotli/enc/backward_references.cc
new file mode 100644
index 0000000..0e7f89b
--- /dev/null
+++ b/brotli/enc/backward_references.cc
@@ -0,0 +1,143 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#include "./backward_references.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "./command.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(size_t num_bytes,
+ size_t position,
+ const uint8_t* ringbuffer,
+ const float* literal_cost,
+ size_t ringbuffer_mask,
+ const size_t max_backward_limit,
+ Hasher* hasher,
+ std::vector<Command>* commands) {
+ // Length heuristic that seems to help probably by better selection
+ // of lazy matches of similar lengths.
+ int insert_length = 0;
+ size_t i = position & ringbuffer_mask;
+ const int i_diff = position - i;
+ const size_t i_end = i + num_bytes;
+
+ double average_cost = 0.0;
+ for (int k = position; k < position + num_bytes; ++k) {
+ average_cost += literal_cost[k & ringbuffer_mask];
+ }
+ average_cost /= num_bytes;
+ hasher->set_average_cost(average_cost);
+
+ while (i + 2 < i_end) {
+ size_t best_len = 0;
+ size_t best_len_code = 0;
+ size_t best_dist = 0;
+ double best_score = 0;
+ size_t max_distance = std::min(i + i_diff, max_backward_limit);
+ hasher->set_insert_length(insert_length);
+ bool match_found = hasher->FindLongestMatch(
+ ringbuffer, literal_cost, ringbuffer_mask,
+ i + i_diff, i_end - i, max_distance,
+ &best_len, &best_len_code, &best_dist, &best_score);
+ if (match_found) {
+ // Found a match. Let's look for something even better ahead.
+ int delayed_backward_references_in_row = 0;
+ while (i + 4 < i_end &&
+ delayed_backward_references_in_row < 4) {
+ size_t best_len_2 = 0;
+ size_t best_len_code_2 = 0;
+ size_t best_dist_2 = 0;
+ double best_score_2 = 0;
+ max_distance = std::min(i + i_diff + 1, max_backward_limit);
+ hasher->Store(ringbuffer + i, i + i_diff);
+ match_found = hasher->FindLongestMatch(
+ ringbuffer, literal_cost, ringbuffer_mask,
+ i + i_diff + 1, i_end - i - 1, max_distance,
+ &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
+ double cost_diff_lazy = 0;
+ if (best_len >= 4) {
+ cost_diff_lazy +=
+ literal_cost[(i + 4) & ringbuffer_mask] - average_cost;
+ }
+ {
+ const int tail_length = best_len_2 - best_len + 1;
+ for (int k = 0; k < tail_length; ++k) {
+ cost_diff_lazy -=
+ literal_cost[(i + best_len + k) & ringbuffer_mask] -
+ average_cost;
+ }
+ }
+ // If we are not inserting any symbols, inserting one is more
+ // expensive than if we were inserting symbols anyways.
+ if (insert_length < 1) {
+ cost_diff_lazy += 0.97;
+ }
+ // Add bias to slightly avoid lazy matching.
+ cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
+ cost_diff_lazy += 0.04 * literal_cost[i & ringbuffer_mask];
+
+ if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
+ // Ok, let's just write one byte for now and start a match from the
+ // next byte.
+ ++insert_length;
+ ++delayed_backward_references_in_row;
+ best_len = best_len_2;
+ best_len_code = best_len_code_2;
+ best_dist = best_dist_2;
+ best_score = best_score_2;
+ i++;
+ } else {
+ break;
+ }
+ }
+ Command cmd;
+ cmd.insert_length_ = insert_length;
+ cmd.copy_length_ = best_len;
+ cmd.copy_length_code_ = best_len_code;
+ cmd.copy_distance_ = best_dist;
+ commands->push_back(cmd);
+ hasher->set_last_distance(best_dist);
+
+ insert_length = 0;
+ ++i;
+ for (int j = 1; j < best_len; ++j) {
+ if (i + 2 < i_end) {
+ hasher->Store(ringbuffer + i, i + i_diff);
+ }
+ ++i;
+ }
+ } else {
+ ++insert_length;
+ hasher->Store(ringbuffer + i, i + i_diff);
+ ++i;
+ }
+ }
+ insert_length += (i_end - i);
+
+ if (insert_length > 0) {
+ Command cmd;
+ cmd.insert_length_ = insert_length;
+ cmd.copy_length_ = 0;
+ cmd.copy_distance_ = 0;
+ commands->push_back(cmd);
+ }
+}
+
+} // namespace brotli
diff --git a/brotli/enc/backward_references.h b/brotli/enc/backward_references.h
new file mode 100644
index 0000000..f666ef6
--- /dev/null
+++ b/brotli/enc/backward_references.h
@@ -0,0 +1,39 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include <stdint.h>
+#include <vector>
+
+#include "./hash.h"
+#include "./command.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(size_t num_bytes,
+ size_t position,
+ const uint8_t* ringbuffer,
+ const float* literal_cost,
+ size_t ringbuffer_mask,
+ const size_t max_backward_limit,
+ Hasher* hasher,
+ std::vector<Command>* commands);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
diff --git a/brotli/enc/bit_cost.h b/brotli/enc/bit_cost.h
new file mode 100644
index 0000000..c769455
--- /dev/null
+++ b/brotli/enc/bit_cost.h
@@ -0,0 +1,139 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to estimate the bit cost of Huffman trees.
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include <stdint.h>
+
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+
+namespace brotli {
+
+static const int kHuffmanExtraBits[kCodeLengthCodes] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,
+};
+
+static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
+ int nbits = 0;
+ for (int i = 0; i < kCodeLengthCodes; ++i) {
+ nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
+ }
+ return nbits;
+}
+
+static inline int HuffmanTreeBitCost(
+ const Histogram<kCodeLengthCodes>& histogram,
+ const EntropyCode<kCodeLengthCodes>& entropy) {
+ return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
+}
+
+static inline int HuffmanBitCost(const uint8_t* depth, int length) {
+ int max_depth = 1;
+ int histogram[kCodeLengthCodes] = { 0 };
+ int tail_start = 0;
+ // compute histogram of compacted huffman tree
+ for (int i = 0; i < length;) {
+ const int value = depth[i];
+ if (value > max_depth) {
+ max_depth = value;
+ }
+ int reps = 1;
+ for (int k = i + 1; k < length && depth[k] == value; ++k) {
+ ++reps;
+ }
+ i += reps;
+ if (value == 0) {
+ if (reps < 3) {
+ histogram[0] += reps;
+ } else {
+ reps -= 3;
+ while (reps >= 0) {
+ ++histogram[17];
+ reps >>= 3;
+ --reps;
+ }
+ }
+ } else {
+ tail_start = i;
+ ++histogram[value];
+ --reps;
+ if (reps < 3) {
+ histogram[value] += reps;
+ } else {
+ reps -= 3;
+ while (reps >= 0) {
+ ++histogram[16];
+ reps >>= 2;
+ --reps;
+ }
+ }
+ }
+ }
+
+ // create huffman tree of huffman tree
+ uint8_t cost[kCodeLengthCodes] = { 0 };
+ CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
+ // account for rle extra bits
+ cost[16] += 2;
+ cost[17] += 3;
+
+ int tree_size = 0;
+ int bits = 6 + 3 * max_depth; // huffman tree of huffman tree cost
+ for (int i = 0; i < kCodeLengthCodes; ++i) {
+ bits += histogram[i] * cost[i]; // huffman tree bit cost
+ tree_size += histogram[i];
+ }
+ return bits;
+}
+
+template<int kSize>
+double PopulationCost(const Histogram<kSize>& histogram) {
+ if (histogram.total_count_ == 0) {
+ return 12;
+ }
+ int count = 0;
+ for (int i = 0; i < kSize && count < 5; ++i) {
+ if (histogram.data_[i] > 0) {
+ ++count;
+ }
+ }
+ if (count == 1) {
+ return 12;
+ }
+ if (count == 2) {
+ return 20 + histogram.total_count_;
+ }
+ uint8_t depth[kSize] = { 0 };
+ CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
+ int bits = 0;
+ for (int i = 0; i < kSize; ++i) {
+ bits += histogram.data_[i] * depth[i];
+ }
+ if (count == 3) {
+ bits += 28;
+ } else if (count == 4) {
+ bits += 37;
+ } else {
+ bits += HuffmanBitCost(depth, kSize);
+ }
+ return bits;
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_BIT_COST_H_
diff --git a/brotli/enc/block_splitter.cc b/brotli/enc/block_splitter.cc
new file mode 100644
index 0000000..34363c4
--- /dev/null
+++ b/brotli/enc/block_splitter.cc
@@ -0,0 +1,390 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#include "./block_splitter.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <map>
+
+#include "./cluster.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+static const int kMaxLiteralHistograms = 48;
+static const int kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 26;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const int kLiteralStrideLength = 70;
+static const int kCommandStrideLength = 40;
+static const int kSymbolsPerLiteralHistogram = 550;
+static const int kSymbolsPerCommandHistogram = 530;
+static const int kSymbolsPerDistanceHistogram = 550;
+static const int kMinLengthForBlockSplitting = 128;
+static const int kIterMulForRefining = 2;
+static const int kMinItersForRefining = 100;
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ std::vector<uint8_t>* literals) {
+ // Count how many we have.
+ size_t total_length = 0;
+ for (int i = 0; i < cmds.size(); ++i) {
+ total_length += cmds[i].insert_length_;
+ }
+ if (total_length == 0) {
+ return;
+ }
+
+ // Allocate.
+ literals->resize(total_length);
+
+ // Loop again, and copy this time.
+ size_t pos = 0;
+ size_t from_pos = 0;
+ for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
+ memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_);
+ pos += cmds[i].insert_length_;
+ from_pos += cmds[i].insert_length_ + cmds[i].copy_length_;
+ }
+}
+
+void CopyCommandsToByteArray(const std::vector<Command>& cmds,
+ std::vector<uint16_t>* insert_and_copy_codes,
+ std::vector<uint8_t>* distance_prefixes) {
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command& cmd = cmds[i];
+ insert_and_copy_codes->push_back(cmd.command_prefix_);
+ if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+ distance_prefixes->push_back(cmd.distance_prefix_);
+ }
+ }
+}
+
+template<typename HistogramType, typename DataType>
+void InitialEntropyCodes(const DataType* data, size_t length,
+ int literals_per_histogram,
+ int max_histograms,
+ size_t stride,
+ std::vector<HistogramType>* vec) {
+ int total_histograms = length / literals_per_histogram + 1;
+ if (total_histograms > max_histograms) {
+ total_histograms = max_histograms;
+ }
+ unsigned int seed = 7;
+ int block_length = length / total_histograms;
+ for (int i = 0; i < total_histograms; ++i) {
+ int pos = length * i / total_histograms;
+ if (i != 0) {
+ pos += rand_r(&seed) % block_length;
+ }
+ if (pos + stride >= length) {
+ pos = length - stride - 1;
+ }
+ HistogramType histo;
+ histo.Add(data + pos, stride);
+ vec->push_back(histo);
+ }
+}
+
+template<typename HistogramType, typename DataType>
+void RandomSample(unsigned int* seed,
+ const DataType* data,
+ size_t length,
+ size_t stride,
+ HistogramType* sample) {
+ size_t pos = 0;
+ if (stride >= length) {
+ pos = 0;
+ stride = length;
+ } else {
+ pos = rand_r(seed) % (length - stride + 1);
+ }
+ sample->Add(data + pos, stride);
+}
+
+template<typename HistogramType, typename DataType>
+void RefineEntropyCodes(const DataType* data, size_t length,
+ size_t stride,
+ std::vector<HistogramType>* vec) {
+ int iters =
+ kIterMulForRefining * length / stride + kMinItersForRefining;
+ unsigned int seed = 7;
+ iters = ((iters + vec->size() - 1) / vec->size()) * vec->size();
+ for (int iter = 0; iter < iters; ++iter) {
+ HistogramType sample;
+ RandomSample(&seed, data, length, stride, &sample);
+ int ix = iter % vec->size();
+ (*vec)[ix].AddHistogram(sample);
+ }
+}
+
+inline static float BitCost(int total, int count) {
+ return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
+}
+
+template<typename DataType, int kSize>
+void FindBlocks(const DataType* data, const size_t length,
+ const double block_switch_bitcost,
+ const std::vector<Histogram<kSize> > &vec,
+ uint8_t *block_id) {
+ if (vec.size() <= 1) {
+ for (int i = 0; i < length; ++i) {
+ block_id[i] = 0;
+ }
+ return;
+ }
+ int vecsize = vec.size();
+ double* insert_cost = new double[kSize * vecsize];
+ memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
+ for (int i = 0; i < kSize; ++i) {
+ for (int j = 0; j < vecsize; ++j) {
+ insert_cost[i * vecsize + j] =
+ BitCost(vec[j].total_count_, vec[j].data_[i]);
+ }
+ }
+ double *cost = new double[vecsize];
+ memset(cost, 0, sizeof(cost[0]) * vecsize);
+ bool* switch_signal = new bool[length * vecsize];
+ memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
+ // After each iteration of this loop, cost[k] will contain the difference
+ // between the minimum cost of arriving at the current byte position using
+ // entropy code k, and the minimum cost of arriving at the current byte
+ // position. This difference is capped at the block switch cost, and if it
+ // reaches block switch cost, it means that when we trace back from the last
+ // position, we need to switch here.
+ for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
+ int ix = byte_ix * vecsize;
+ int insert_cost_ix = data[byte_ix] * vecsize;
+ double min_cost = 1e99;
+ for (int k = 0; k < vecsize; ++k) {
+ // We are coding the symbol in data[byte_ix] with entropy code k.
+ cost[k] += insert_cost[insert_cost_ix + k];
+ if (cost[k] < min_cost) {
+ min_cost = cost[k];
+ block_id[byte_ix] = k;
+ }
+ }
+ double block_switch_cost = block_switch_bitcost;
+ // More blocks for the beginning.
+ if (byte_ix < 2000) {
+ block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
+ }
+ for (int k = 0; k < vecsize; ++k) {
+ cost[k] -= min_cost;
+ if (cost[k] >= block_switch_cost) {
+ cost[k] = block_switch_cost;
+ switch_signal[ix + k] = true;
+ }
+ }
+ }
+ // Now trace back from the last position and switch at the marked places.
+ int byte_ix = length - 1;
+ int ix = byte_ix * vecsize;
+ int cur_id = block_id[byte_ix];
+ while (byte_ix > 0) {
+ --byte_ix;
+ ix -= vecsize;
+ if (switch_signal[ix + cur_id]) {
+ cur_id = block_id[byte_ix];
+ }
+ block_id[byte_ix] = cur_id;
+ }
+ delete[] insert_cost;
+ delete[] cost;
+ delete[] switch_signal;
+}
+
+int RemapBlockIds(uint8_t* block_ids, const size_t length) {
+ std::map<uint8_t, uint8_t> new_id;
+ int next_id = 0;
+ for (int i = 0; i < length; ++i) {
+ if (new_id.find(block_ids[i]) == new_id.end()) {
+ new_id[block_ids[i]] = next_id;
+ ++next_id;
+ }
+ }
+ for (int i = 0; i < length; ++i) {
+ block_ids[i] = new_id[block_ids[i]];
+ }
+ return next_id;
+}
+
+template<typename HistogramType, typename DataType>
+void BuildBlockHistograms(const DataType* data, const size_t length,
+ uint8_t* block_ids,
+ std::vector<HistogramType>* histograms) {
+ int num_types = RemapBlockIds(block_ids, length);
+ histograms->clear();
+ histograms->resize(num_types);
+ for (int i = 0; i < length; ++i) {
+ (*histograms)[block_ids[i]].Add(data[i]);
+ }
+}
+
+template<typename HistogramType, typename DataType>
+void ClusterBlocks(const DataType* data, const size_t length,
+ uint8_t* block_ids) {
+ std::vector<HistogramType> histograms;
+ std::vector<int> block_index(length);
+ int cur_idx = 0;
+ HistogramType cur_histogram;
+ for (int i = 0; i < length; ++i) {
+ bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
+ block_index[i] = cur_idx;
+ cur_histogram.Add(data[i]);
+ if (block_boundary) {
+ histograms.push_back(cur_histogram);
+ cur_histogram.Clear();
+ ++cur_idx;
+ }
+ }
+ std::vector<HistogramType> clustered_histograms;
+ std::vector<int> histogram_symbols;
+ // Block ids need to fit in one byte.
+ static const int kMaxNumberOfBlockTypes = 256;
+ ClusterHistograms(histograms, 1, histograms.size(),
+ kMaxNumberOfBlockTypes,
+ &clustered_histograms,
+ &histogram_symbols);
+ for (int i = 0; i < length; ++i) {
+ block_ids[i] = histogram_symbols[block_index[i]];
+ }
+}
+
+void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
+ int cur_id = block_ids[0];
+ int cur_length = 1;
+ split->num_types_ = -1;
+ for (int i = 1; i < block_ids.size(); ++i) {
+ if (block_ids[i] != cur_id) {
+ split->types_.push_back(cur_id);
+ split->lengths_.push_back(cur_length);
+ split->num_types_ = std::max(split->num_types_, cur_id);
+ cur_id = block_ids[i];
+ cur_length = 0;
+ }
+ ++cur_length;
+ }
+ split->types_.push_back(cur_id);
+ split->lengths_.push_back(cur_length);
+ split->num_types_ = std::max(split->num_types_, cur_id);
+ ++split->num_types_;
+}
+
+template<typename HistogramType, typename DataType>
+void SplitByteVector(const std::vector<DataType>& data,
+ const int literals_per_histogram,
+ const int max_histograms,
+ const int sampling_stride_length,
+ const double block_switch_cost,
+ BlockSplit* split) {
+ if (data.empty()) {
+ split->num_types_ = 0;
+ return;
+ } else if (data.size() < kMinLengthForBlockSplitting) {
+ split->num_types_ = 1;
+ split->types_.push_back(0);
+ split->lengths_.push_back(data.size());
+ return;
+ }
+ std::vector<HistogramType> histograms;
+ // Find good entropy codes.
+ InitialEntropyCodes(data.data(), data.size(),
+ literals_per_histogram,
+ max_histograms,
+ sampling_stride_length,
+ &histograms);
+ RefineEntropyCodes(data.data(), data.size(),
+ sampling_stride_length,
+ &histograms);
+ // Find a good path through literals with the good entropy codes.
+ std::vector<uint8_t> block_ids(data.size());
+ for (int i = 0; i < 10; ++i) {
+ FindBlocks(data.data(), data.size(),
+ block_switch_cost,
+ histograms,
+ &block_ids[0]);
+ BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms);
+ }
+ ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]);
+ BuildBlockSplit(block_ids, split);
+}
+
+void SplitBlock(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ BlockSplit* literal_split,
+ BlockSplit* insert_and_copy_split,
+ BlockSplit* dist_split) {
+ // Create a continuous array of literals.
+ std::vector<uint8_t> literals;
+ CopyLiteralsToByteArray(cmds, data, &literals);
+
+ // Compute prefix codes for commands.
+ std::vector<uint16_t> insert_and_copy_codes;
+ std::vector<uint8_t> distance_prefixes;
+ CopyCommandsToByteArray(cmds,
+ &insert_and_copy_codes,
+ &distance_prefixes);
+
+ SplitByteVector<HistogramLiteral>(
+ literals,
+ kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+ kLiteralStrideLength, kLiteralBlockSwitchCost,
+ literal_split);
+ SplitByteVector<HistogramCommand>(
+ insert_and_copy_codes,
+ kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+ kCommandStrideLength, kCommandBlockSwitchCost,
+ insert_and_copy_split);
+ SplitByteVector<HistogramDistance>(
+ distance_prefixes,
+ kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+ kCommandStrideLength, kDistanceBlockSwitchCost,
+ dist_split);
+}
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+ int input_size,
+ int target_length,
+ std::vector<std::vector<Command> >* blocks) {
+ int num_blocks = input_size / target_length + 1;
+ int length_limit = input_size / num_blocks + 1;
+ int total_length = 0;
+ std::vector<Command> cur_block;
+ for (int i = 0; i < all_commands.size(); ++i) {
+ const Command& cmd = all_commands[i];
+ int cmd_length = cmd.insert_length_ + cmd.copy_length_;
+ if (total_length > length_limit) {
+ blocks->push_back(cur_block);
+ cur_block.clear();
+ total_length = 0;
+ }
+ cur_block.push_back(cmd);
+ total_length += cmd_length;
+ }
+ blocks->push_back(cur_block);
+}
+
+} // namespace brotli
diff --git a/brotli/enc/block_splitter.h b/brotli/enc/block_splitter.h
new file mode 100644
index 0000000..2a491e3
--- /dev/null
+++ b/brotli/enc/block_splitter.h
@@ -0,0 +1,77 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+
+#include "./command.h"
+
+namespace brotli {
+
+struct BlockSplit {
+ int num_types_;
+ std::vector<uint8_t> types_;
+ std::vector<int> type_codes_;
+ std::vector<int> lengths_;
+};
+
+struct BlockSplitIterator {
+ explicit BlockSplitIterator(const BlockSplit& split)
+ : split_(split), idx_(0), type_(0), length_(0) {
+ if (!split.lengths_.empty()) {
+ length_ = split.lengths_[0];
+ }
+ }
+
+ void Next() {
+ if (length_ == 0) {
+ ++idx_;
+ type_ = split_.types_[idx_];
+ length_ = split_.lengths_[idx_];
+ }
+ --length_;
+ }
+
+ const BlockSplit& split_;
+ int idx_;
+ int type_;
+ int length_;
+};
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ std::vector<uint8_t>* literals);
+
+void SplitBlock(const std::vector<Command>& cmds,
+ const uint8_t* data,
+ BlockSplit* literal_split,
+ BlockSplit* insert_and_copy_split,
+ BlockSplit* dist_split);
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+ int input_size,
+ int target_length,
+ std::vector<std::vector<Command> >* blocks);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_BLOCK_SPLITTER_H_
diff --git a/brotli/enc/cluster.h b/brotli/enc/cluster.h
new file mode 100644
index 0000000..855a88d
--- /dev/null
+++ b/brotli/enc/cluster.h
@@ -0,0 +1,288 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for clustering similar histograms together.
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <complex>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "./bit_cost.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+struct HistogramPair {
+ int idx1;
+ int idx2;
+ bool valid;
+ double cost_combo;
+ double cost_diff;
+};
+
+struct HistogramPairComparator {
+ bool operator()(const HistogramPair& p1, const HistogramPair& p2) {
+ if (p1.cost_diff != p2.cost_diff) {
+ return p1.cost_diff > p2.cost_diff;
+ }
+ return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
+ }
+};
+
+// Returns entropy reduction of the context map when we combine two clusters.
+inline double ClusterCostDiff(int size_a, int size_b) {
+ int size_c = size_a + size_b;
+ return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
+ size_c * FastLog2(size_c);
+}
+
+// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
+template<int kSize>
+void CompareAndPushToHeap(const Histogram<kSize>* out,
+ const int* cluster_size,
+ int idx1, int idx2,
+ std::vector<HistogramPair>* pairs) {
+ if (idx1 == idx2) {
+ return;
+ }
+ if (idx2 < idx1) {
+ int t = idx2;
+ idx2 = idx1;
+ idx1 = t;
+ }
+ bool store_pair = false;
+ HistogramPair p;
+ p.idx1 = idx1;
+ p.idx2 = idx2;
+ p.valid = true;
+ p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+ p.cost_diff -= out[idx1].bit_cost_;
+ p.cost_diff -= out[idx2].bit_cost_;
+
+ if (out[idx1].total_count_ == 0) {
+ p.cost_combo = out[idx2].bit_cost_;
+ store_pair = true;
+ } else if (out[idx2].total_count_ == 0) {
+ p.cost_combo = out[idx1].bit_cost_;
+ store_pair = true;
+ } else {
+ double threshold = pairs->empty() ? 1e99 :
+ std::max(0.0, (*pairs)[0].cost_diff);
+ Histogram<kSize> combo = out[idx1];
+ combo.AddHistogram(out[idx2]);
+ double cost_combo = PopulationCost(combo);
+ if (cost_combo < threshold - p.cost_diff) {
+ p.cost_combo = cost_combo;
+ store_pair = true;
+ }
+ }
+ if (store_pair) {
+ p.cost_diff += p.cost_combo;
+ pairs->push_back(p);
+ push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
+ }
+}
+
+template<int kSize>
+void HistogramCombine(Histogram<kSize>* out,
+ int* cluster_size,
+ int* symbols,
+ int symbols_size,
+ int max_clusters) {
+ double cost_diff_threshold = 0.0;
+ int min_cluster_size = 1;
+ std::set<int> all_symbols;
+ std::vector<int> clusters;
+ for (int i = 0; i < symbols_size; ++i) {
+ if (all_symbols.find(symbols[i]) == all_symbols.end()) {
+ all_symbols.insert(symbols[i]);
+ clusters.push_back(symbols[i]);
+ }
+ }
+
+ // We maintain a heap of histogram pairs, ordered by the bit cost reduction.
+ std::vector<HistogramPair> pairs;
+ for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
+ for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
+ CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
+ &pairs);
+ }
+ }
+
+ while (clusters.size() > min_cluster_size) {
+ if (pairs[0].cost_diff >= cost_diff_threshold) {
+ cost_diff_threshold = 1e99;
+ min_cluster_size = max_clusters;
+ continue;
+ }
+ // Take the best pair from the top of heap.
+ int best_idx1 = pairs[0].idx1;
+ int best_idx2 = pairs[0].idx2;
+ out[best_idx1].AddHistogram(out[best_idx2]);
+ out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+ cluster_size[best_idx1] += cluster_size[best_idx2];
+ for (int i = 0; i < symbols_size; ++i) {
+ if (symbols[i] == best_idx2) {
+ symbols[i] = best_idx1;
+ }
+ }
+ for (int i = 0; i + 1 < clusters.size(); ++i) {
+ if (clusters[i] >= best_idx2) {
+ clusters[i] = clusters[i + 1];
+ }
+ }
+ clusters.pop_back();
+ // Invalidate pairs intersecting the just combined best pair.
+ for (int i = 0; i < pairs.size(); ++i) {
+ HistogramPair& p = pairs[i];
+ if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
+ p.idx1 == best_idx2 || p.idx2 == best_idx2) {
+ p.valid = false;
+ }
+ }
+ // Pop invalid pairs from the top of the heap.
+ while (!pairs.empty() && !pairs[0].valid) {
+ pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
+ pairs.pop_back();
+ }
+ // Push new pairs formed with the combined histogram to the heap.
+ for (int i = 0; i < clusters.size(); ++i) {
+ CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
+ }
+ }
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving histogram from cur_symbol to candidate.
+template<int kSize>
+double HistogramBitCostDistance(const Histogram<kSize>& histogram,
+ const Histogram<kSize>& candidate) {
+ if (histogram.total_count_ == 0) {
+ return 0.0;
+ }
+ Histogram<kSize> tmp = histogram;
+ tmp.AddHistogram(candidate);
+ return PopulationCost(tmp) - candidate.bit_cost_;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+template<int kSize>
+void HistogramRemap(const Histogram<kSize>* in, int in_size,
+ Histogram<kSize>* out, int* symbols) {
+ std::set<int> all_symbols;
+ for (int i = 0; i < in_size; ++i) {
+ all_symbols.insert(symbols[i]);
+ }
+ for (int i = 0; i < in_size; ++i) {
+ int best_out = i == 0 ? symbols[0] : symbols[i - 1];
+ double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
+ for (std::set<int>::const_iterator k = all_symbols.begin();
+ k != all_symbols.end(); ++k) {
+ const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
+ if (cur_bits < best_bits) {
+ best_bits = cur_bits;
+ best_out = *k;
+ }
+ }
+ symbols[i] = best_out;
+ }
+
+ // Recompute each out based on raw and symbols.
+ for (std::set<int>::const_iterator k = all_symbols.begin();
+ k != all_symbols.end(); ++k) {
+ out[*k].Clear();
+ }
+ for (int i = 0; i < in_size; ++i) {
+ out[symbols[i]].AddHistogram(in[i]);
+ }
+}
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+template<int kSize>
+void HistogramReindex(std::vector<Histogram<kSize> >* out,
+ std::vector<int>* symbols) {
+ std::vector<Histogram<kSize> > tmp(*out);
+ std::map<int, int> new_index;
+ int next_index = 0;
+ for (int i = 0; i < symbols->size(); ++i) {
+ if (new_index.find((*symbols)[i]) == new_index.end()) {
+ new_index[(*symbols)[i]] = next_index;
+ (*out)[next_index] = tmp[(*symbols)[i]];
+ ++next_index;
+ }
+ }
+ out->resize(next_index);
+ for (int i = 0; i < symbols->size(); ++i) {
+ (*symbols)[i] = new_index[(*symbols)[i]];
+ }
+}
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+template<int kSize>
+void ClusterHistograms(const std::vector<Histogram<kSize> >& in,
+ int num_contexts, int num_blocks,
+ int max_histograms,
+ std::vector<Histogram<kSize> >* out,
+ std::vector<int>* histogram_symbols) {
+ const int in_size = num_contexts * num_blocks;
+ std::vector<int> cluster_size(in_size, 1);
+ out->resize(in_size);
+ histogram_symbols->resize(in_size);
+ for (int i = 0; i < in_size; ++i) {
+ (*out)[i] = in[i];
+ (*out)[i].bit_cost_ = PopulationCost(in[i]);
+ (*histogram_symbols)[i] = i;
+ }
+
+ // Collapse similar histograms within a block type.
+ if (num_contexts > 1) {
+ for (int i = 0; i < num_blocks; ++i) {
+ HistogramCombine(&(*out)[0], &cluster_size[0],
+ &(*histogram_symbols)[i * num_contexts], num_contexts,
+ max_histograms);
+ }
+ }
+
+ // Collapse similar histograms.
+ HistogramCombine(&(*out)[0], &cluster_size[0],
+ &(*histogram_symbols)[0], in_size,
+ max_histograms);
+
+ // Find the optimal map from original histograms to the final ones.
+ HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
+
+ // Convert the context map to a canonical form.
+ HistogramReindex(out, histogram_symbols);
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_CLUSTER_H_
diff --git a/brotli/enc/command.h b/brotli/enc/command.h
new file mode 100644
index 0000000..7a9f481
--- /dev/null
+++ b/brotli/enc/command.h
@@ -0,0 +1,46 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This class models a sequence of literals and a backward reference copy.
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+// Command holds a sequence of literals and a backward reference copy.
+class Command {
+ public:
+ Command() : insert_length_(0), copy_length_(0), copy_length_code_(0),
+ copy_distance_(0), distance_code_(0),
+ distance_prefix_(0), command_prefix_(0),
+ distance_extra_bits_(0), distance_extra_bits_value_(0) {}
+
+ uint32_t insert_length_;
+ uint32_t copy_length_;
+ uint32_t copy_length_code_;
+ uint32_t copy_distance_;
+ // Values <= 16 are short codes, values > 16 are distances shifted by 16.
+ uint32_t distance_code_;
+ uint16_t distance_prefix_;
+ uint16_t command_prefix_;
+ int distance_extra_bits_;
+ uint32_t distance_extra_bits_value_;
+};
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_COMMAND_H_
diff --git a/brotli/enc/context.h b/brotli/enc/context.h
new file mode 100644
index 0000000..9b015d2
--- /dev/null
+++ b/brotli/enc/context.h
@@ -0,0 +1,185 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to map previous bytes into a context id.
+
+#ifndef BROTLI_ENC_CONTEXT_H_
+#define BROTLI_ENC_CONTEXT_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+// Second-order context lookup table for UTF8 byte streams.
+//
+// If p1 and p2 are the previous two bytes, we calcualte the context as
+//
+// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
+//
+// If the previous two bytes are ASCII characters (i.e. < 128), this will be
+// equivalent to
+//
+// context = 4 * context1(p1) + context2(p2),
+//
+// where context1 is based on the previous byte in the following way:
+//
+// 0 : non-ASCII control
+// 1 : \t, \n, \r
+// 2 : space
+// 3 : other punctuation
+// 4 : " '
+// 5 : %
+// 6 : ( < [ {
+// 7 : ) > ] }
+// 8 : , ; :
+// 9 : .
+// 10 : =
+// 11 : number
+// 12 : upper-case vowel
+// 13 : upper-case consonant
+// 14 : lower-case vowel
+// 15 : lower-case consonant
+//
+// and context2 is based on the second last byte:
+//
+// 0 : control, space
+// 1 : punctuation
+// 2 : upper-case letter, number
+// 3 : lower-case letter
+//
+// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+// stream it will be a continuation byte, value between 128 and 191), the
+// context is the same as if the second last byte was an ASCII control or space.
+//
+// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+// be a continuation byte and the context id is 2 or 3 depending on the LSB of
+// the last byte and to a lesser extent on the second last byte if it is ASCII.
+//
+// If the last byte is a UTF8 continuation byte, the second last byte can be:
+// - continuation byte: the next byte is probably ASCII or lead byte (assuming
+// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
+// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+//
+// The possible value combinations of the previous two bytes, the range of
+// context ids and the type of the next byte is summarized in the table below:
+//
+// |--------\-----------------------------------------------------------------|
+// | \ Last byte |
+// | Second \---------------------------------------------------------------|
+// | last byte \ ASCII | cont. byte | lead byte |
+// | \ (0-127) | (128-191) | (192-) |
+// |=============|===================|=====================|==================|
+// | ASCII | next: ASCII/lead | not valid | next: cont. |
+// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
+// |-------------|-------------------|---------------------|------------------|
+// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
+// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
+// |-------------|-------------------|---------------------|------------------|
+// | lead byte | not valid | next: ASCII/lead | not valid |
+// | (192-207) | | context: 0 - 1 | |
+// |-------------|-------------------|---------------------|------------------|
+// | lead byte | not valid | next: cont. | not valid |
+// | (208-) | | context: 2 - 3 | |
+// |-------------|-------------------|---------------------|------------------|
+static const uint8_t kUTF8ContextLookup[512] = {
+ // Last byte.
+ //
+ // ASCII range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+ 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+ 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+ 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+ 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
+ // UTF8 continuation byte range.
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ // UTF8 lead byte range.
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+ // Second last byte.
+ //
+ // ASCII range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+ // UTF8 continuation byte range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // UTF8 lead byte range.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+// Context lookup table for small signed integers.
+static const int kSigned3BitContextLookup[] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+enum ContextType {
+ CONTEXT_LSB6 = 0,
+ CONTEXT_MSB6 = 1,
+ CONTEXT_UTF8 = 2,
+ CONTEXT_SIGNED = 3
+};
+
+static inline uint8_t Context(uint8_t p1, uint8_t p2, int mode) {
+ switch (mode) {
+ case CONTEXT_LSB6:
+ return p1 & 0x3f;
+ case CONTEXT_MSB6:
+ return p1 >> 2;
+ case CONTEXT_UTF8:
+ return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
+ case CONTEXT_SIGNED:
+ return (kSigned3BitContextLookup[p1] << 3) + kSigned3BitContextLookup[p2];
+ default:
+ return 0;
+ }
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_CONTEXT_H_
diff --git a/brotli/enc/encode.cc b/brotli/enc/encode.cc
new file mode 100644
index 0000000..b492421
--- /dev/null
+++ b/brotli/enc/encode.cc
@@ -0,0 +1,896 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Implementation of Brotli compressor.
+
+#include "./encode.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "./backward_references.h"
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./cluster.h"
+#include "./context.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./hash.h"
+#include "./histogram.h"
+#include "./literal_cost.h"
+#include "./prefix.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+static const int kWindowBits = 22;
+// To make decoding faster, we allow the decoder to write 16 bytes ahead in
+// its ringbuffer, therefore the encoder has to decrease max distance by this
+// amount.
+static const int kDecoderRingBufferWriteAheadSlack = 16;
+static const int kMaxBackwardDistance =
+ (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
+
+static const int kMetaBlockSizeBits = 21;
+static const int kRingBufferBits = 23;
+static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
+
+template<int kSize>
+double Entropy(const std::vector<Histogram<kSize> >& histograms) {
+ double retval = 0;
+ for (int i = 0; i < histograms.size(); ++i) {
+ retval += histograms[i].EntropyBitCost();
+ }
+ return retval;
+}
+
+template<int kSize>
+double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
+ double retval = 0;
+ for (int i = 0; i < histograms.size(); ++i) {
+ retval += PopulationCost(histograms[i]);
+ }
+ return retval;
+}
+
+void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
+ if (n == 0) {
+ WriteBits(1, 0, storage_ix, storage);
+ } else {
+ WriteBits(1, 1, storage_ix, storage);
+ int nbits = Log2Floor(n);
+ WriteBits(3, nbits, storage_ix, storage);
+ if (nbits > 0) {
+ WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
+ }
+ }
+}
+
+void EncodeMetaBlockLength(size_t meta_block_size,
+ bool is_last,
+ bool is_uncompressed,
+ int* storage_ix, uint8_t* storage) {
+ WriteBits(1, is_last, storage_ix, storage);
+ if (is_last) {
+ if (meta_block_size == 0) {
+ WriteBits(1, 1, storage_ix, storage);
+ return;
+ }
+ WriteBits(1, 0, storage_ix, storage);
+ }
+ --meta_block_size;
+ int num_bits = Log2Floor(meta_block_size) + 1;
+ if (num_bits < 16) {
+ num_bits = 16;
+ }
+ WriteBits(2, (num_bits - 13) >> 2, storage_ix, storage);
+ while (num_bits > 0) {
+ WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
+ meta_block_size >>= 4;
+ num_bits -= 4;
+ }
+ if (!is_last) {
+ WriteBits(1, is_uncompressed, storage_ix, storage);
+ }
+}
+
+template<int kSize>
+void EntropyEncode(int val, const EntropyCode<kSize>& code,
+ int* storage_ix, uint8_t* storage) {
+ if (code.count_ <= 1) {
+ return;
+ };
+ WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
+}
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+ const uint8_t* code_length_bitdepth,
+ int* storage_ix, uint8_t* storage) {
+ static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+ 1, 2, 3, 4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ };
+ // Throw away trailing zeros:
+ int codes_to_store = kCodeLengthCodes;
+ for (; codes_to_store > 0; --codes_to_store) {
+ if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+ break;
+ }
+ }
+ int num_codes = 0;
+ for (int i = 0; i < codes_to_store; ++i) {
+ if (code_length_bitdepth[kStorageOrder[i]] != 0) {
+ ++num_codes;
+ }
+ }
+ if (num_codes == 1) {
+ codes_to_store = kCodeLengthCodes;
+ }
+ int skip_some = 0; // skips none.
+ if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+ code_length_bitdepth[kStorageOrder[1]] == 0) {
+ skip_some = 2; // skips two.
+ if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+ skip_some = 3; // skips three.
+ }
+ }
+ WriteBits(2, skip_some, storage_ix, storage);
+ for (int i = skip_some; i < codes_to_store; ++i) {
+ uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
+ uint8_t bits[] = { 0, 5, 1, 3, 2, 13 };
+ int v = code_length_bitdepth[kStorageOrder[i]];
+ WriteBits(len[v], bits[v], storage_ix, storage);
+ }
+}
+
+void StoreHuffmanTreeToBitMask(
+ const uint8_t* huffman_tree,
+ const uint8_t* huffman_tree_extra_bits,
+ const int huffman_tree_size,
+ const EntropyCode<kCodeLengthCodes>& entropy,
+ int* storage_ix, uint8_t* storage) {
+ for (int i = 0; i < huffman_tree_size; ++i) {
+ const int ix = huffman_tree[i];
+ const int extra_bits = huffman_tree_extra_bits[i];
+ EntropyEncode(ix, entropy, storage_ix, storage);
+ switch (ix) {
+ case 16:
+ WriteBits(2, extra_bits, storage_ix, storage);
+ break;
+ case 17:
+ WriteBits(3, extra_bits, storage_ix, storage);
+ break;
+ }
+ }
+}
+
+template<int kSize>
+void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
+ int* storage_ix, uint8_t* storage) {
+ const uint8_t *depth = &code.depth_[0];
+ int max_bits_counter = alphabet_size - 1;
+ int max_bits = 0;
+ while (max_bits_counter) {
+ max_bits_counter >>= 1;
+ ++max_bits;
+ }
+ if (code.count_ == 0) { // emit minimal tree for empty cases
+ // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
+ WriteBits(4 + max_bits, 0x1, storage_ix, storage);
+ return;
+ }
+ if (code.count_ <= 4) {
+ int symbols[4];
+ // Quadratic sort.
+ int k, j;
+ for (k = 0; k < code.count_; ++k) {
+ symbols[k] = code.symbols_[k];
+ }
+ for (k = 0; k < code.count_; ++k) {
+ for (j = k + 1; j < code.count_; ++j) {
+ if (depth[symbols[j]] < depth[symbols[k]]) {
+ int t = symbols[k];
+ symbols[k] = symbols[j];
+ symbols[j] = t;
+ }
+ }
+ }
+ // Small tree marker to encode 1-4 symbols.
+ WriteBits(2, 1, storage_ix, storage);
+ WriteBits(2, code.count_ - 1, storage_ix, storage);
+ for (int i = 0; i < code.count_; ++i) {
+ WriteBits(max_bits, symbols[i], storage_ix, storage);
+ }
+ if (code.count_ == 4) {
+ if (depth[symbols[0]] == 2 &&
+ depth[symbols[1]] == 2 &&
+ depth[symbols[2]] == 2 &&
+ depth[symbols[3]] == 2) {
+ WriteBits(1, 0, storage_ix, storage);
+ } else {
+ WriteBits(1, 1, storage_ix, storage);
+ }
+ }
+ return;
+ }
+ uint8_t huffman_tree[kSize];
+ uint8_t huffman_tree_extra_bits[kSize];
+ int huffman_tree_size = 0;
+ WriteHuffmanTree(depth,
+ alphabet_size,
+ &huffman_tree[0],
+ &huffman_tree_extra_bits[0],
+ &huffman_tree_size);
+ Histogram<kCodeLengthCodes> huffman_tree_histogram;
+ memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
+ for (int i = 0; i < huffman_tree_size; ++i) {
+ huffman_tree_histogram.Add(huffman_tree[i]);
+ }
+ EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
+ BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
+ &huffman_tree_entropy);
+ StoreHuffmanTreeOfHuffmanTreeToBitMask(
+ &huffman_tree_entropy.depth_[0], storage_ix, storage);
+ StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
+ huffman_tree_size, huffman_tree_entropy,
+ storage_ix, storage);
+}
+
+template<int kSize>
+void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
+ int alphabet_size,
+ int* storage_ix, uint8_t* storage) {
+ for (int i = 0; i < codes.size(); ++i) {
+ StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
+ }
+}
+
+void EncodeCommand(const Command& cmd,
+ const EntropyCodeCommand& entropy,
+ int* storage_ix, uint8_t* storage) {
+ int code = cmd.command_prefix_;
+ EntropyEncode(code, entropy, storage_ix, storage);
+ if (code >= 128) {
+ code -= 128;
+ }
+ int insert_extra_bits = InsertLengthExtraBits(code);
+ uint64_t insert_extra_bits_val =
+ cmd.insert_length_ - InsertLengthOffset(code);
+ int copy_extra_bits = CopyLengthExtraBits(code);
+ uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
+ if (insert_extra_bits > 0) {
+ WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
+ }
+ if (copy_extra_bits > 0) {
+ WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
+ }
+}
+
+void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
+ int* storage_ix, uint8_t* storage) {
+ int code = cmd.distance_prefix_;
+ int extra_bits = cmd.distance_extra_bits_;
+ uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
+ EntropyEncode(code, entropy, storage_ix, storage);
+ if (extra_bits > 0) {
+ WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
+ }
+}
+
+void ComputeDistanceShortCodes(std::vector<Command>* cmds,
+ int* dist_ringbuffer,
+ size_t* ringbuffer_idx) {
+ static const int kIndexOffset[16] = {
+ 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
+ };
+ static const int kValueOffset[16] = {
+ 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+ };
+ for (int i = 0; i < cmds->size(); ++i) {
+ int cur_dist = (*cmds)[i].copy_distance_;
+ if (cur_dist == 0) break;
+ int dist_code = cur_dist + 16;
+ int limits[16] = { 0, 4, 10, 11,
+ 6, 6, 11, 11,
+ 11, 11, 11, 11,
+ 12, 12, 12, 12 };
+ for (int k = 0; k < 16; ++k) {
+ // Only accept more popular choices.
+ if (cur_dist < limits[k]) {
+ // Typically unpopular ranges, don't replace a short distance
+ // with them.
+ continue;
+ }
+ int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
+ kValueOffset[k]);
+ if (cur_dist == comp) {
+ dist_code = k + 1;
+ break;
+ }
+ }
+ if (dist_code > 1) {
+ dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
+ ++(*ringbuffer_idx);
+ }
+ (*cmds)[i].distance_code_ = dist_code;
+ }
+}
+
+void ComputeCommandPrefixes(std::vector<Command>* cmds,
+ int num_direct_distance_codes,
+ int distance_postfix_bits) {
+ for (int i = 0; i < cmds->size(); ++i) {
+ Command* cmd = &(*cmds)[i];
+ cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
+ cmd->copy_length_code_);
+ if (cmd->copy_length_code_ > 0) {
+ PrefixEncodeCopyDistance(cmd->distance_code_,
+ num_direct_distance_codes,
+ distance_postfix_bits,
+ &cmd->distance_prefix_,
+ &cmd->distance_extra_bits_,
+ &cmd->distance_extra_bits_value_);
+ }
+ if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
+ cmd->distance_prefix_ = 0xffff;
+ } else {
+ cmd->command_prefix_ += 128;
+ }
+ }
+}
+
+int IndexOf(const std::vector<int>& v, int value) {
+ for (int i = 0; i < v.size(); ++i) {
+ if (v[i] == value) return i;
+ }
+ return -1;
+}
+
+void MoveToFront(std::vector<int>* v, int index) {
+ int value = (*v)[index];
+ for (int i = index; i > 0; --i) {
+ (*v)[i] = (*v)[i - 1];
+ }
+ (*v)[0] = value;
+}
+
+std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
+ if (v.empty()) return v;
+ std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
+ for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
+ std::vector<int> result(v.size());
+ for (int i = 0; i < v.size(); ++i) {
+ int index = IndexOf(mtf, v[i]);
+ result[i] = index;
+ MoveToFront(&mtf, index);
+ }
+ return result;
+}
+
+// Finds runs of zeros in v_in and replaces them with a prefix code of the run
+// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
+// shifted by *max_length_prefix. Will not create prefix codes bigger than the
+// initial value of *max_run_length_prefix. The prefix code of run length L is
+// simply Log2Floor(L) and the number of extra bits is the same as the prefix
+// code.
+void RunLengthCodeZeros(const std::vector<int>& v_in,
+ int* max_run_length_prefix,
+ std::vector<int>* v_out,
+ std::vector<int>* extra_bits) {
+ int max_reps = 0;
+ for (int i = 0; i < v_in.size();) {
+ for (; i < v_in.size() && v_in[i] != 0; ++i) ;
+ int reps = 0;
+ for (; i < v_in.size() && v_in[i] == 0; ++i) {
+ ++reps;
+ }
+ max_reps = std::max(reps, max_reps);
+ }
+ int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
+ *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
+ for (int i = 0; i < v_in.size();) {
+ if (v_in[i] != 0) {
+ v_out->push_back(v_in[i] + *max_run_length_prefix);
+ extra_bits->push_back(0);
+ ++i;
+ } else {
+ int reps = 1;
+ for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
+ ++reps;
+ }
+ i += reps;
+ while (reps) {
+ if (reps < (2 << *max_run_length_prefix)) {
+ int run_length_prefix = Log2Floor(reps);
+ v_out->push_back(run_length_prefix);
+ extra_bits->push_back(reps - (1 << run_length_prefix));
+ break;
+ } else {
+ v_out->push_back(*max_run_length_prefix);
+ extra_bits->push_back((1 << *max_run_length_prefix) - 1);
+ reps -= (2 << *max_run_length_prefix) - 1;
+ }
+ }
+ }
+ }
+}
+
+// Returns a maximum zero-run-length-prefix value such that run-length coding
+// zeros in v with this maximum prefix value and then encoding the resulting
+// histogram and entropy-coding v produces the least amount of bits.
+int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
+ int min_cost = std::numeric_limits<int>::max();
+ int best_max_prefix = 0;
+ for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
+ std::vector<int> rle_symbols;
+ std::vector<int> extra_bits;
+ int max_run_length_prefix = max_prefix;
+ RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
+ if (max_run_length_prefix < max_prefix) break;
+ HistogramLiteral histogram;
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ histogram.Add(rle_symbols[i]);
+ }
+ int bit_cost = PopulationCost(histogram);
+ if (max_prefix > 0) {
+ bit_cost += 4;
+ }
+ for (int i = 1; i <= max_prefix; ++i) {
+ bit_cost += histogram.data_[i] * i; // extra bits
+ }
+ if (bit_cost < min_cost) {
+ min_cost = bit_cost;
+ best_max_prefix = max_prefix;
+ }
+ }
+ return best_max_prefix;
+}
+
+void EncodeContextMap(const std::vector<int>& context_map,
+ int num_clusters,
+ int* storage_ix, uint8_t* storage) {
+ EncodeVarLenUint8(num_clusters - 1, storage_ix, storage);
+
+ if (num_clusters == 1) {
+ return;
+ }
+
+ std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
+ std::vector<int> rle_symbols;
+ std::vector<int> extra_bits;
+ int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
+ RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
+ &rle_symbols, &extra_bits);
+ HistogramContextMap symbol_histogram;
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ symbol_histogram.Add(rle_symbols[i]);
+ }
+ EntropyCodeContextMap symbol_code;
+ BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
+ &symbol_code);
+ bool use_rle = max_run_length_prefix > 0;
+ WriteBits(1, use_rle, storage_ix, storage);
+ if (use_rle) {
+ WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+ }
+ StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
+ storage_ix, storage);
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
+ if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
+ WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
+ }
+ }
+ WriteBits(1, 1, storage_ix, storage); // use move-to-front
+}
+
+template<int kSize>
+void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
+ int alphabet_size,
+ std::vector<EntropyCode<kSize> >* entropy_codes) {
+ entropy_codes->resize(histograms.size());
+ for (int i = 0; i < histograms.size(); ++i) {
+ BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
+ }
+}
+
+struct BlockSplitCode {
+ EntropyCodeBlockType block_type_code;
+ EntropyCodeBlockLength block_len_code;
+};
+
+void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
+ int length,
+ int* storage_ix, uint8_t* storage) {
+ int len_code = BlockLengthPrefix(length);
+ int extra_bits = BlockLengthExtraBits(len_code);
+ int extra_bits_value = length - BlockLengthOffset(len_code);
+ EntropyEncode(len_code, entropy, storage_ix, storage);
+
+ if (extra_bits > 0) {
+ WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
+ }
+}
+
+void ComputeBlockTypeShortCodes(BlockSplit* split) {
+ if (split->num_types_ <= 1) {
+ split->num_types_ = 1;
+ return;
+ }
+ int ringbuffer[2] = { 0, 1 };
+ size_t index = 0;
+ for (int i = 0; i < split->types_.size(); ++i) {
+ int type = split->types_[i];
+ int type_code;
+ if (type == ringbuffer[index & 1]) {
+ type_code = 0;
+ } else if (type == ringbuffer[(index - 1) & 1] + 1) {
+ type_code = 1;
+ } else {
+ type_code = type + 2;
+ }
+ ringbuffer[index & 1] = type;
+ ++index;
+ split->type_codes_.push_back(type_code);
+ }
+}
+
+void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
+ BlockSplitCode* code,
+ int* storage_ix, uint8_t* storage) {
+ EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
+ if (split.num_types_ == 1) {
+ return;
+ }
+
+ HistogramBlockType type_histo;
+ for (int i = 0; i < split.type_codes_.size(); ++i) {
+ type_histo.Add(split.type_codes_[i]);
+ }
+ BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
+ &code->block_type_code);
+ HistogramBlockLength length_histo;
+ for (int i = 0; i < split.lengths_.size(); ++i) {
+ length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
+ }
+ BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
+ &code->block_len_code);
+ StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
+ storage_ix, storage);
+ StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
+ storage_ix, storage);
+ EncodeBlockLength(code->block_len_code, split.lengths_[0],
+ storage_ix, storage);
+}
+
+void MoveAndEncode(const BlockSplitCode& code,
+ BlockSplitIterator* it,
+ int* storage_ix, uint8_t* storage) {
+ if (it->length_ == 0) {
+ ++it->idx_;
+ it->type_ = it->split_.types_[it->idx_];
+ it->length_ = it->split_.lengths_[it->idx_];
+ int type_code = it->split_.type_codes_[it->idx_];
+ EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
+ EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
+ }
+ --it->length_;
+}
+
+struct EncodingParams {
+ int num_direct_distance_codes;
+ int distance_postfix_bits;
+ int literal_context_mode;
+};
+
+struct MetaBlock {
+ std::vector<Command> cmds;
+ EncodingParams params;
+ BlockSplit literal_split;
+ BlockSplit command_split;
+ BlockSplit distance_split;
+ std::vector<int> literal_context_modes;
+ std::vector<int> literal_context_map;
+ std::vector<int> distance_context_map;
+ std::vector<HistogramLiteral> literal_histograms;
+ std::vector<HistogramCommand> command_histograms;
+ std::vector<HistogramDistance> distance_histograms;
+};
+
+void BuildMetaBlock(const EncodingParams& params,
+ const std::vector<Command>& cmds,
+ const uint8_t* ringbuffer,
+ const size_t pos,
+ const size_t mask,
+ MetaBlock* mb) {
+ mb->cmds = cmds;
+ mb->params = params;
+ if (cmds.empty()) {
+ return;
+ }
+ ComputeCommandPrefixes(&mb->cmds,
+ mb->params.num_direct_distance_codes,
+ mb->params.distance_postfix_bits);
+ SplitBlock(mb->cmds,
+ &ringbuffer[pos & mask],
+ &mb->literal_split,
+ &mb->command_split,
+ &mb->distance_split);
+ ComputeBlockTypeShortCodes(&mb->literal_split);
+ ComputeBlockTypeShortCodes(&mb->command_split);
+ ComputeBlockTypeShortCodes(&mb->distance_split);
+
+ mb->literal_context_modes.resize(mb->literal_split.num_types_,
+ mb->params.literal_context_mode);
+
+
+ int num_literal_contexts =
+ mb->literal_split.num_types_ << kLiteralContextBits;
+ int num_distance_contexts =
+ mb->distance_split.num_types_ << kDistanceContextBits;
+ std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+ mb->command_histograms.resize(mb->command_split.num_types_);
+ std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+ BuildHistograms(mb->cmds,
+ mb->literal_split,
+ mb->command_split,
+ mb->distance_split,
+ ringbuffer,
+ pos,
+ mask,
+ mb->literal_context_modes,
+ &literal_histograms,
+ &mb->command_histograms,
+ &distance_histograms);
+
+ // Histogram ids need to fit in one byte.
+ static const int kMaxNumberOfHistograms = 256;
+
+ mb->literal_histograms = literal_histograms;
+ ClusterHistograms(literal_histograms,
+ 1 << kLiteralContextBits,
+ mb->literal_split.num_types_,
+ kMaxNumberOfHistograms,
+ &mb->literal_histograms,
+ &mb->literal_context_map);
+
+ mb->distance_histograms = distance_histograms;
+ ClusterHistograms(distance_histograms,
+ 1 << kDistanceContextBits,
+ mb->distance_split.num_types_,
+ kMaxNumberOfHistograms,
+ &mb->distance_histograms,
+ &mb->distance_context_map);
+}
+
+size_t MetaBlockLength(const std::vector<Command>& cmds) {
+ size_t length = 0;
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command& cmd = cmds[i];
+ length += cmd.insert_length_ + cmd.copy_length_;
+ }
+ return length;
+}
+
+void StoreMetaBlock(const MetaBlock& mb,
+ const bool is_last,
+ const uint8_t* ringbuffer,
+ const size_t mask,
+ size_t* pos,
+ int* storage_ix, uint8_t* storage) {
+ size_t length = MetaBlockLength(mb.cmds);
+ const size_t end_pos = *pos + length;
+ EncodeMetaBlockLength(length,
+ is_last,
+ false,
+ storage_ix, storage);
+ if (length == 0) {
+ return;
+ }
+ BlockSplitCode literal_split_code;
+ BlockSplitCode command_split_code;
+ BlockSplitCode distance_split_code;
+ BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
+ storage_ix, storage);
+ BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
+ storage_ix, storage);
+ BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
+ storage_ix, storage);
+ WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
+ WriteBits(4,
+ mb.params.num_direct_distance_codes >>
+ mb.params.distance_postfix_bits, storage_ix, storage);
+ int num_distance_codes =
+ kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
+ (48 << mb.params.distance_postfix_bits);
+ for (int i = 0; i < mb.literal_split.num_types_; ++i) {
+ WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
+ }
+ EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
+ EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
+ std::vector<EntropyCodeLiteral> literal_codes;
+ std::vector<EntropyCodeCommand> command_codes;
+ std::vector<EntropyCodeDistance> distance_codes;
+ BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
+ BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
+ &command_codes);
+ BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
+ &distance_codes);
+ StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
+ StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
+ StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
+ BlockSplitIterator literal_it(mb.literal_split);
+ BlockSplitIterator command_it(mb.command_split);
+ BlockSplitIterator distance_it(mb.distance_split);
+ for (int i = 0; i < mb.cmds.size(); ++i) {
+ const Command& cmd = mb.cmds[i];
+ MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
+ EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
+ for (int j = 0; j < cmd.insert_length_; ++j) {
+ MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
+ int histogram_idx = literal_it.type_;
+ uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
+ uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
+ int context = ((literal_it.type_ << kLiteralContextBits) +
+ Context(prev_byte, prev_byte2,
+ mb.literal_context_modes[literal_it.type_]));
+ histogram_idx = mb.literal_context_map[context];
+ EntropyEncode(ringbuffer[*pos & mask],
+ literal_codes[histogram_idx], storage_ix, storage);
+ ++(*pos);
+ }
+ if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
+ MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
+ int context = (distance_it.type_ << 2) +
+ ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
+ int histogram_index = mb.distance_context_map[context];
+ size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
+ EncodeCopyDistance(cmd, distance_codes[histogram_index],
+ storage_ix, storage);
+ }
+ *pos += cmd.copy_length_;
+ }
+}
+
+BrotliCompressor::BrotliCompressor()
+ : window_bits_(kWindowBits),
+ hasher_(new Hasher),
+ dist_ringbuffer_idx_(0),
+ input_pos_(0),
+ ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
+ literal_cost_(1 << kRingBufferBits),
+ storage_ix_(0),
+ storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
+ dist_ringbuffer_[0] = 16;
+ dist_ringbuffer_[1] = 15;
+ dist_ringbuffer_[2] = 11;
+ dist_ringbuffer_[3] = 4;
+ storage_[0] = 0;
+}
+
+BrotliCompressor::~BrotliCompressor() {
+ delete hasher_;
+ delete[] storage_;
+}
+
+void BrotliCompressor::WriteStreamHeader() {
+ // Encode window size.
+ if (window_bits_ == 16) {
+ WriteBits(1, 0, &storage_ix_, storage_);
+ } else {
+ WriteBits(1, 1, &storage_ix_, storage_);
+ WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
+ }
+}
+
+void BrotliCompressor::WriteMetaBlock(const size_t input_size,
+ const uint8_t* input_buffer,
+ const bool is_last,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer) {
+ std::vector<Command> commands;
+ if (input_size > 0) {
+ ringbuffer_.Write(input_buffer, input_size);
+ EstimateBitCostsForLiterals(input_pos_, input_size,
+ kRingBufferMask, ringbuffer_.start(),
+ &literal_cost_[0]);
+ CreateBackwardReferences(input_size, input_pos_,
+ ringbuffer_.start(),
+ &literal_cost_[0],
+ kRingBufferMask, kMaxBackwardDistance,
+ hasher_,
+ &commands);
+ ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
+ &dist_ringbuffer_idx_);
+ }
+ EncodingParams params;
+ params.num_direct_distance_codes = 12;
+ params.distance_postfix_bits = 1;
+ params.literal_context_mode = CONTEXT_SIGNED;
+ const int storage_ix0 = storage_ix_;
+ MetaBlock mb;
+ BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
+ kRingBufferMask, &mb);
+ StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
+ &input_pos_, &storage_ix_, storage_);
+ size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
+ if (input_size + 4 < output_size) {
+ storage_ix_ = storage_ix0;
+ storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
+ EncodeMetaBlockLength(input_size, false, true, &storage_ix_, storage_);
+ size_t hdr_size = (storage_ix_ + 7) >> 3;
+ memcpy(encoded_buffer, storage_, hdr_size);
+ memcpy(encoded_buffer + hdr_size, input_buffer, input_size);
+ *encoded_size = hdr_size + input_size;
+ if (is_last) {
+ encoded_buffer[*encoded_size] = 0x3; // ISLAST, ISEMPTY
+ ++(*encoded_size);
+ }
+ storage_ix_ = 0;
+ storage_[0] = 0;
+ } else {
+ memcpy(encoded_buffer, storage_, output_size);
+ *encoded_size = output_size;
+ if (is_last) {
+ storage_ix_ = 0;
+ storage_[0] = 0;
+ } else {
+ storage_ix_ -= output_size << 3;
+ storage_[storage_ix_ >> 3] = storage_[output_size];
+ }
+ }
+}
+
+void BrotliCompressor::FinishStream(
+ size_t* encoded_size, uint8_t* encoded_buffer) {
+ WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
+}
+
+
+int BrotliCompressBuffer(size_t input_size,
+ const uint8_t* input_buffer,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer) {
+ if (input_size == 0) {
+ encoded_buffer[0] = 6;
+ *encoded_size = 1;
+ return 1;
+ }
+
+ BrotliCompressor compressor;
+ compressor.WriteStreamHeader();
+
+ const int max_block_size = 1 << kMetaBlockSizeBits;
+ size_t max_output_size = *encoded_size;
+ const uint8_t* input_end = input_buffer + input_size;
+ *encoded_size = 0;
+
+ while (input_buffer < input_end) {
+ int block_size = max_block_size;
+ bool is_last = false;
+ if (block_size >= input_end - input_buffer) {
+ block_size = input_end - input_buffer;
+ is_last = true;
+ }
+ size_t output_size = max_output_size;
+ compressor.WriteMetaBlock(block_size, input_buffer, is_last,
+ &output_size, &encoded_buffer[*encoded_size]);
+ input_buffer += block_size;
+ *encoded_size += output_size;
+ max_output_size -= output_size;
+ }
+
+ return 1;
+}
+
+} // namespace brotli
diff --git a/brotli/enc/encode.h b/brotli/enc/encode.h
new file mode 100644
index 0000000..0494b83
--- /dev/null
+++ b/brotli/enc/encode.h
@@ -0,0 +1,75 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// API for Brotli compression
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <vector>
+#include "./hash.h"
+#include "./ringbuffer.h"
+
+namespace brotli {
+
+class BrotliCompressor {
+ public:
+ BrotliCompressor();
+ ~BrotliCompressor();
+
+ // Writes the stream header into the internal output buffer.
+ void WriteStreamHeader();
+
+ // Encodes the data in input_buffer as a meta-block and writes it to
+ // encoded_buffer and sets *encoded_size to the number of bytes that was
+ // written.
+ void WriteMetaBlock(const size_t input_size,
+ const uint8_t* input_buffer,
+ const bool is_last,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer);
+
+ // Writes a zero-length meta-block with end-of-input bit set to the
+ // internal output buffer and copies the output buffer to encoded_buffer and
+ // sets *encoded_size to the number of bytes written.
+ void FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
+
+
+ private:
+ int window_bits_;
+ Hasher* hasher_;
+ int dist_ringbuffer_[4];
+ size_t dist_ringbuffer_idx_;
+ size_t input_pos_;
+ RingBuffer ringbuffer_;
+ std::vector<float> literal_cost_;
+ int storage_ix_;
+ uint8_t* storage_;
+};
+
+// Compresses the data in input_buffer into encoded_buffer, and sets
+// *encoded_size to the compressed length.
+// Returns 0 if there was an error and 1 otherwise.
+int BrotliCompressBuffer(size_t input_size,
+ const uint8_t* input_buffer,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer);
+
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_ENCODE_H_
diff --git a/brotli/enc/entropy_encode.cc b/brotli/enc/entropy_encode.cc
new file mode 100644
index 0000000..e4c6b20
--- /dev/null
+++ b/brotli/enc/entropy_encode.cc
@@ -0,0 +1,409 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#include "./entropy_encode.h"
+
+#include <stdint.h>
+#include <algorithm>
+#include <limits>
+#include <vector>
+#include <cstdlib>
+
+#include "./histogram.h"
+
+namespace brotli {
+
+namespace {
+
+struct HuffmanTree {
+ HuffmanTree();
+ HuffmanTree(int count, int16_t left, int16_t right)
+ : total_count_(count),
+ index_left_(left),
+ index_right_or_value_(right) {
+ }
+ int total_count_;
+ int16_t index_left_;
+ int16_t index_right_or_value_;
+};
+
+HuffmanTree::HuffmanTree() {}
+
+// Sort the root nodes, least popular first.
+bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
+ if (v0.total_count_ == v1.total_count_) {
+ return v0.index_right_or_value_ > v1.index_right_or_value_;
+ }
+ return v0.total_count_ < v1.total_count_;
+}
+
+void SetDepth(const HuffmanTree &p,
+ HuffmanTree *pool,
+ uint8_t *depth,
+ int level) {
+ if (p.index_left_ >= 0) {
+ ++level;
+ SetDepth(pool[p.index_left_], pool, depth, level);
+ SetDepth(pool[p.index_right_or_value_], pool, depth, level);
+ } else {
+ depth[p.index_right_or_value_] = level;
+ }
+}
+
+} // namespace
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+ const int length,
+ const int tree_limit,
+ uint8_t *depth) {
+ // For block sizes below 64 kB, we never need to do a second iteration
+ // of this loop. Probably all of our block sizes will be smaller than
+ // that, so this loop is mostly of academic interest. If we actually
+ // would need this, we would be better off with the Katajainen algorithm.
+ for (int count_limit = 1; ; count_limit *= 2) {
+ std::vector<HuffmanTree> tree;
+ tree.reserve(2 * length + 1);
+
+ for (int i = 0; i < length; ++i) {
+ if (data[i]) {
+ const int count = std::max(data[i], count_limit);
+ tree.push_back(HuffmanTree(count, -1, i));
+ }
+ }
+
+ const int n = tree.size();
+ if (n == 1) {
+ depth[tree[0].index_right_or_value_] = 1; // Only one element.
+ break;
+ }
+
+ std::sort(tree.begin(), tree.end(), SortHuffmanTree);
+
+ // The nodes are:
+ // [0, n): the sorted leaf nodes that we start with.
+ // [n]: we add a sentinel here.
+ // [n + 1, 2n): new parent nodes are added here, starting from
+ // (n+1). These are naturally in ascending order.
+ // [2n]: we add a sentinel at the end as well.
+ // There will be (2n+1) elements at the end.
+ const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
+ tree.push_back(sentinel);
+ tree.push_back(sentinel);
+
+ int i = 0; // Points to the next leaf node.
+ int j = n + 1; // Points to the next non-leaf node.
+ for (int k = n - 1; k > 0; --k) {
+ int left, right;
+ if (tree[i].total_count_ <= tree[j].total_count_) {
+ left = i;
+ ++i;
+ } else {
+ left = j;
+ ++j;
+ }
+ if (tree[i].total_count_ <= tree[j].total_count_) {
+ right = i;
+ ++i;
+ } else {
+ right = j;
+ ++j;
+ }
+
+ // The sentinel node becomes the parent node.
+ int j_end = tree.size() - 1;
+ tree[j_end].total_count_ =
+ tree[left].total_count_ + tree[right].total_count_;
+ tree[j_end].index_left_ = left;
+ tree[j_end].index_right_or_value_ = right;
+
+ // Add back the last sentinel node.
+ tree.push_back(sentinel);
+ }
+ SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+ // We need to pack the Huffman tree in tree_limit bits.
+ // If this was not successful, add fake entities to the lowest values
+ // and retry.
+ if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+ break;
+ }
+ }
+}
+
+void Reverse(uint8_t* v, int start, int end) {
+ --end;
+ while (start < end) {
+ int tmp = v[start];
+ v[start] = v[end];
+ v[end] = tmp;
+ ++start;
+ --end;
+ }
+}
+
+void WriteHuffmanTreeRepetitions(
+ const int previous_value,
+ const int value,
+ int repetitions,
+ uint8_t* tree,
+ uint8_t* extra_bits,
+ int* tree_size) {
+ if (previous_value != value) {
+ tree[*tree_size] = value;
+ extra_bits[*tree_size] = 0;
+ ++(*tree_size);
+ --repetitions;
+ }
+ if (repetitions < 3) {
+ for (int i = 0; i < repetitions; ++i) {
+ tree[*tree_size] = value;
+ extra_bits[*tree_size] = 0;
+ ++(*tree_size);
+ }
+ } else {
+ repetitions -= 3;
+ int start = *tree_size;
+ while (repetitions >= 0) {
+ tree[*tree_size] = 16;
+ extra_bits[*tree_size] = repetitions & 0x3;
+ ++(*tree_size);
+ repetitions >>= 2;
+ --repetitions;
+ }
+ Reverse(tree, start, *tree_size);
+ Reverse(extra_bits, start, *tree_size);
+ }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(
+ int repetitions,
+ uint8_t* tree,
+ uint8_t* extra_bits,
+ int* tree_size) {
+ if (repetitions < 3) {
+ for (int i = 0; i < repetitions; ++i) {
+ tree[*tree_size] = 0;
+ extra_bits[*tree_size] = 0;
+ ++(*tree_size);
+ }
+ } else {
+ repetitions -= 3;
+ int start = *tree_size;
+ while (repetitions >= 0) {
+ tree[*tree_size] = 17;
+ extra_bits[*tree_size] = repetitions & 0x7;
+ ++(*tree_size);
+ repetitions >>= 3;
+ --repetitions;
+ }
+ Reverse(tree, start, *tree_size);
+ Reverse(extra_bits, start, *tree_size);
+ }
+}
+
+
+// Heuristics for selecting the stride ranges to collapse.
+int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+ return abs(a - b) < 4;
+}
+
+int OptimizeHuffmanCountsForRle(int length, int* counts) {
+ int stride;
+ int limit;
+ int sum;
+ uint8_t* good_for_rle;
+ // Let's make the Huffman code more compatible with rle encoding.
+ int i;
+ for (; length >= 0; --length) {
+ if (length == 0) {
+ return 1; // All zeros.
+ }
+ if (counts[length - 1] != 0) {
+ // Now counts[0..length - 1] does not have trailing zeros.
+ break;
+ }
+ }
+ // 2) Let's mark all population counts that already can be encoded
+ // with an rle code.
+ good_for_rle = (uint8_t*)calloc(length, 1);
+ if (good_for_rle == NULL) {
+ return 0;
+ }
+ {
+ // Let's not spoil any of the existing good rle codes.
+ // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+ // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+ int symbol = counts[0];
+ int stride = 0;
+ for (i = 0; i < length + 1; ++i) {
+ if (i == length || counts[i] != symbol) {
+ if ((symbol == 0 && stride >= 5) ||
+ (symbol != 0 && stride >= 7)) {
+ int k;
+ for (k = 0; k < stride; ++k) {
+ good_for_rle[i - k - 1] = 1;
+ }
+ }
+ stride = 1;
+ if (i != length) {
+ symbol = counts[i];
+ }
+ } else {
+ ++stride;
+ }
+ }
+ }
+ // 3) Let's replace those population counts that lead to more rle codes.
+ stride = 0;
+ limit = (counts[0] + counts[1] + counts[2]) / 3 + 1;
+ sum = 0;
+ for (i = 0; i < length + 1; ++i) {
+ if (i == length || good_for_rle[i] ||
+ (i != 0 && good_for_rle[i - 1]) ||
+ !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+ if (stride >= 4 || (stride >= 3 && sum == 0)) {
+ int k;
+ // The stride must end, collapse what we have, if we have enough (4).
+ int count = (sum + stride / 2) / stride;
+ if (count < 1) {
+ count = 1;
+ }
+ if (sum == 0) {
+ // Don't make an all zeros stride to be upgraded to ones.
+ count = 0;
+ }
+ for (k = 0; k < stride; ++k) {
+ // We don't want to change value at counts[i],
+ // that is already belonging to the next stride. Thus - 1.
+ counts[i - k - 1] = count;
+ }
+ }
+ stride = 0;
+ sum = 0;
+ if (i < length - 2) {
+ // All interesting strides have a count of at least 4,
+ // at least when non-zeros.
+ limit = (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 1;
+ } else if (i < length) {
+ limit = counts[i];
+ } else {
+ limit = 0;
+ }
+ }
+ ++stride;
+ if (i != length) {
+ sum += counts[i];
+ if (stride >= 4) {
+ limit = (sum + stride / 2) / stride;
+ }
+ }
+ }
+ free(good_for_rle);
+ return 1;
+}
+
+
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+ uint8_t* tree,
+ uint8_t* extra_bits_data,
+ int* huffman_tree_size) {
+ int previous_value = 8;
+ for (uint32_t i = 0; i < length;) {
+ const int value = depth[i];
+ int reps = 1;
+ for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
+ ++reps;
+ }
+ if (value == 0) {
+ WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data,
+ huffman_tree_size);
+ } else {
+ WriteHuffmanTreeRepetitions(previous_value, value, reps, tree,
+ extra_bits_data, huffman_tree_size);
+ previous_value = value;
+ }
+ i += reps;
+ }
+ // Throw away trailing zeros.
+ for (; *huffman_tree_size > 0; --(*huffman_tree_size)) {
+ if (tree[*huffman_tree_size - 1] > 0 && tree[*huffman_tree_size - 1] < 17) {
+ break;
+ }
+ }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+ static const size_t kLut[16] = { // Pre-reversed 4-bit values.
+ 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+ 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+ };
+ size_t retval = kLut[bits & 0xf];
+ for (int i = 4; i < num_bits; i += 4) {
+ retval <<= 4;
+ bits >>= 4;
+ retval |= kLut[bits & 0xf];
+ }
+ retval >>= (-num_bits & 0x3);
+ return retval;
+}
+
+} // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
+ // In Brotli, all bit depths are [1..15]
+ // 0 bit depth means that the symbol does not exist.
+ const int kMaxBits = 16; // 0..15 are values for bits
+ uint16_t bl_count[kMaxBits] = { 0 };
+ {
+ for (int i = 0; i < len; ++i) {
+ ++bl_count[depth[i]];
+ }
+ bl_count[0] = 0;
+ }
+ uint16_t next_code[kMaxBits];
+ next_code[0] = 0;
+ {
+ int code = 0;
+ for (int bits = 1; bits < kMaxBits; ++bits) {
+ code = (code + bl_count[bits - 1]) << 1;
+ next_code[bits] = code;
+ }
+ }
+ for (int i = 0; i < len; ++i) {
+ if (depth[i]) {
+ bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+ }
+ }
+}
+
+} // namespace brotli
diff --git a/brotli/enc/entropy_encode.h b/brotli/enc/entropy_encode.h
new file mode 100644
index 0000000..89c3e1a
--- /dev/null
+++ b/brotli/enc/entropy_encode.h
@@ -0,0 +1,116 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include <stdint.h>
+#include <string.h>
+#include "./histogram.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+ const int length,
+ const int tree_limit,
+ uint8_t *depth);
+
+// Change the population counts in a way that the consequent
+// Hufmann tree compression, especially its rle-part will be more
+// likely to compress this data more efficiently.
+//
+// length contains the size of the histogram.
+// counts contains the population counts.
+int OptimizeHuffmanCountsForRle(int length, int* counts);
+
+
+// Write a huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+ uint8_t* tree,
+ uint8_t* extra_bits_data,
+ int* huffman_tree_size);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
+
+template<int kSize>
+struct EntropyCode {
+ // How many bits for symbol.
+ uint8_t depth_[kSize];
+ // Actual bits used to represent the symbol.
+ uint16_t bits_[kSize];
+ // How many non-zero depth.
+ int count_;
+ // First four symbols with non-zero depth.
+ int symbols_[4];
+};
+
+template<int kSize>
+void BuildEntropyCode(const Histogram<kSize>& histogram,
+ const int tree_limit,
+ const int alphabet_size,
+ EntropyCode<kSize>* code) {
+ memset(code->depth_, 0, sizeof(code->depth_));
+ memset(code->bits_, 0, sizeof(code->bits_));
+ memset(code->symbols_, 0, sizeof(code->symbols_));
+ code->count_ = 0;
+ if (histogram.total_count_ == 0) return;
+ for (int i = 0; i < kSize; ++i) {
+ if (histogram.data_[i] > 0) {
+ if (code->count_ < 4) code->symbols_[code->count_] = i;
+ ++code->count_;
+ }
+ }
+ if (code->count_ >= 64) {
+ int counts[kSize];
+ memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize);
+ OptimizeHuffmanCountsForRle(alphabet_size, counts);
+ CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]);
+ } else {
+ CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit,
+ &code->depth_[0]);
+ }
+ ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]);
+}
+
+static const int kCodeLengthCodes = 18;
+
+// Literal entropy code.
+typedef EntropyCode<256> EntropyCodeLiteral;
+// Prefix entropy codes.
+typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
+typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
+typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
+// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
+typedef EntropyCode<272> EntropyCodeContextMap;
+// Block type entropy code, 256 block types + 2 special symbols.
+typedef EntropyCode<258> EntropyCodeBlockType;
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
diff --git a/brotli/enc/fast_log.h b/brotli/enc/fast_log.h
new file mode 100644
index 0000000..0b09ea6
--- /dev/null
+++ b/brotli/enc/fast_log.h
@@ -0,0 +1,161 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utilities for fast computation of logarithms.
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+inline int Log2Floor(uint32_t n) {
+#if defined(__clang__) || \
+ (defined(__GNUC__) && \
+ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+#else
+ if (n == 0)
+ return -1;
+ int log = 0;
+ uint32_t value = n;
+ for (int i = 4; i >= 0; --i) {
+ int shift = (1 << i);
+ uint32_t x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ assert(value == 1);
+ return log;
+#endif
+}
+
+// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
+inline int Log2Ceiling(uint32_t n) {
+ int floor = Log2Floor(n);
+ if (n == (n &~ (n - 1))) // zero or a power of two
+ return floor;
+ else
+ return floor + 1;
+}
+
+// A lookup table for small values of log2(int) to be used in entropy
+// computation.
+//
+// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
+static const float kLog2Table[] = {
+ 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+ 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+ 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+ 3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+ 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+ 3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+ 4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+ 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+ 4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+ 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+ 4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+ 5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+ 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+ 5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+ 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+ 5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+ 5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+ 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+ 5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+ 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+ 5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+ 5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+ 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+ 6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+ 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+ 6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+ 6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+ 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+ 6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+ 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+ 6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+ 6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+ 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+ 6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+ 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+ 6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+ 6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+ 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+ 6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+ 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+ 6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+ 6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+ 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+ 7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+ 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+ 7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+ 7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+ 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+ 7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+ 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+ 7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+ 7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+ 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+ 7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+ 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+ 7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+ 7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+ 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+ 7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+ 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+ 7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+ 7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+ 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+ 7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+ 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+ 7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+ 7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+ 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+ 7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+ 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+ 7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+ 7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+ 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+ 7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+ 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+ 7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+ 7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+ 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+ 7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+ 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+ 7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+ 7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+ 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+ 7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+ 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+ 7.9943534368588578f
+};
+
+// Faster logarithm for small integers, with the property of log2(0) == 0.
+static inline double FastLog2(int v) {
+ if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
+ return kLog2Table[v];
+ }
+ return log2(v);
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_FAST_LOG_H_
diff --git a/brotli/enc/find_match_length.h b/brotli/enc/find_match_length.h
new file mode 100644
index 0000000..0994ac2
--- /dev/null
+++ b/brotli/enc/find_match_length.h
@@ -0,0 +1,85 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find maximal matching prefixes of strings.
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+#include <stdint.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+// Separate implementation for x86_64, for speed.
+#if defined(__GNUC__) && defined(ARCH_K8)
+
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+ const uint8_t* s2,
+ size_t limit) {
+ int matched = 0;
+ size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
+ while (PREDICT_TRUE(--limit2)) {
+ if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
+ BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
+ s2 += 8;
+ matched += 8;
+ } else {
+ uint64_t x =
+ BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
+ int matching_bits = __builtin_ctzll(x);
+ matched += matching_bits >> 3;
+ return matched;
+ }
+ }
+ limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
+ while (--limit) {
+ if (PREDICT_TRUE(s1[matched] == *s2)) {
+ ++s2;
+ ++matched;
+ } else {
+ return matched;
+ }
+ }
+ return matched;
+}
+#else
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+ const uint8_t* s2,
+ size_t limit) {
+ int matched = 0;
+ const uint8_t* s2_limit = s2 + limit;
+ const uint8_t* s2_ptr = s2;
+ // Find out how long the match is. We loop over the data 32 bits at a
+ // time until we find a 32-bit block that doesn't match; then we find
+ // the first non-matching bit and use that to calculate the total
+ // length of the match.
+ while (s2_ptr <= s2_limit - 4 &&
+ BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
+ BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
+ s2_ptr += 4;
+ matched += 4;
+ }
+ while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+ ++s2_ptr;
+ ++matched;
+ }
+ return matched;
+}
+#endif
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
diff --git a/brotli/enc/hash.h b/brotli/enc/hash.h
new file mode 100644
index 0000000..cb38e8f
--- /dev/null
+++ b/brotli/enc/hash.h
@@ -0,0 +1,367 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <cstdlib>
+
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./port.h"
+
+namespace brotli {
+
+// kHashMul32 multiplier has these properties:
+// * The multiplier must be odd. Otherwise we may lose the highest bit.
+// * No long streaks of 1s or 0s.
+// * There is no effort to ensure that it is a prime, the oddity is enough
+// for this use.
+// * The number has been tuned heuristically against compression benchmarks.
+static const uint32_t kHashMul32 = 0x1e35a7bd;
+
+inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
+ uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
+ // The higher bits contain more mixture from the multiplication,
+ // so we take our results from there.
+ return h >> (32 - bits);
+}
+
+// Usually, we always choose the longest backward reference. This function
+// allows for the exception of that rule.
+//
+// If we choose a backward reference that is further away, it will
+// usually be coded with more bits. We approximate this by assuming
+// log2(distance). If the distance can be expressed in terms of the
+// last four distances, we use some heuristic constants to estimate
+// the bits cost. For the first up to four literals we use the bit
+// cost of the literals from the literal cost model, after that we
+// use the average bit cost of the cost model.
+//
+// This function is used to sometimes discard a longer backward reference
+// when it is not much longer and the bit cost for encoding it is more
+// than the saved literals.
+inline double BackwardReferenceScore(double average_cost,
+ double start_cost4,
+ double start_cost3,
+ double start_cost2,
+ int copy_length,
+ int backward_reference_offset,
+ int last_distance1,
+ int last_distance2,
+ int last_distance3,
+ int last_distance4) {
+ double retval = 0;
+ switch (copy_length) {
+ case 2: retval = start_cost2; break;
+ case 3: retval = start_cost3; break;
+ default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
+ }
+ int diff_last1 = abs(backward_reference_offset - last_distance1);
+ int diff_last2 = abs(backward_reference_offset - last_distance2);
+ if (diff_last1 == 0) {
+ retval += 0.6;
+ } else if (diff_last1 < 4) {
+ retval -= 0.9 + 0.03 * diff_last1;
+ } else if (diff_last2 < 4) {
+ retval -= 0.95 + 0.1 * diff_last2;
+ } else if (backward_reference_offset == last_distance3) {
+ retval -= 1.17;
+ } else if (backward_reference_offset == last_distance4) {
+ retval -= 1.27;
+ } else {
+ retval -= 1.20 * Log2Floor(backward_reference_offset);
+ }
+ return retval;
+}
+
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+//
+// This is a hash map of fixed size (kBucketSize) to a ring buffer of
+// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
+// index positions of the given hash key in the compressed data.
+template <int kBucketBits, int kBlockBits>
+class HashLongestMatch {
+ public:
+ HashLongestMatch()
+ : last_distance1_(4),
+ last_distance2_(11),
+ last_distance3_(15),
+ last_distance4_(16),
+ insert_length_(0),
+ average_cost_(5.4) {
+ Reset();
+ }
+ void Reset() {
+ std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
+ }
+
+ // Look at 3 bytes at data.
+ // Compute a hash from these, and store the value of ix at that position.
+ inline void Store(const uint8_t *data, const int ix) {
+ const uint32_t key = Hash3Bytes(data, kBucketBits);
+ const int minor_ix = num_[key] & kBlockMask;
+ buckets_[key][minor_ix] = ix;
+ ++num_[key];
+ }
+
+ // Store hashes for a range of data.
+ void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
+ for (int p = 0; p < len; ++p) {
+ Store(&data[p & mask], startix + p);
+ }
+ }
+
+ // Find a longest backward match of &data[cur_ix] up to the length of
+ // max_length.
+ //
+ // Does not look for matches longer than max_length.
+ // Does not look for matches further away than max_backward.
+ // Writes the best found match length into best_len_out.
+ // Writes the index (&data[index]) offset from the start of the best match
+ // into best_distance_out.
+ // Write the score of the best match into best_score_out.
+ bool FindLongestMatch(const uint8_t * __restrict data,
+ const float * __restrict literal_cost,
+ const size_t ring_buffer_mask,
+ const uint32_t cur_ix,
+ uint32_t max_length,
+ const uint32_t max_backward,
+ size_t * __restrict best_len_out,
+ size_t * __restrict best_len_code_out,
+ size_t * __restrict best_distance_out,
+ double * __restrict best_score_out) {
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+ const double start_cost4 = literal_cost == NULL ? 20 :
+ literal_cost[cur_ix_masked] +
+ literal_cost[(cur_ix + 1) & ring_buffer_mask] +
+ literal_cost[(cur_ix + 2) & ring_buffer_mask] +
+ literal_cost[(cur_ix + 3) & ring_buffer_mask];
+ const double start_cost3 = literal_cost == NULL ? 15 :
+ literal_cost[cur_ix_masked] +
+ literal_cost[(cur_ix + 1) & ring_buffer_mask] +
+ literal_cost[(cur_ix + 2) & ring_buffer_mask] + 0.3;
+ double start_cost2 = literal_cost == NULL ? 10 :
+ literal_cost[cur_ix_masked] +
+ literal_cost[(cur_ix + 1) & ring_buffer_mask] + 1.2;
+ bool match_found = false;
+ // Don't accept a short copy from far away.
+ double best_score = 8.25;
+ if (insert_length_ < 4) {
+ double cost_diff[4] = { 0.20, 0.09, 0.05, 0.03 };
+ best_score += cost_diff[insert_length_];
+ }
+ size_t best_len = *best_len_out;
+ *best_len_out = 0;
+ size_t best_ix = 1;
+ // Try last distance first.
+ for (int i = 0; i < 16; ++i) {
+ size_t prev_ix = cur_ix;
+ switch(i) {
+ case 0: prev_ix -= last_distance1_; break;
+ case 1: prev_ix -= last_distance2_; break;
+ case 2: prev_ix -= last_distance3_; break;
+ case 3: prev_ix -= last_distance4_; break;
+
+ case 4: prev_ix -= last_distance1_ - 1; break;
+ case 5: prev_ix -= last_distance1_ + 1; break;
+ case 6: prev_ix -= last_distance1_ - 2; break;
+ case 7: prev_ix -= last_distance1_ + 2; break;
+ case 8: prev_ix -= last_distance1_ - 3; break;
+ case 9: prev_ix -= last_distance1_ + 3; break;
+
+ case 10: prev_ix -= last_distance2_ - 1; break;
+ case 11: prev_ix -= last_distance2_ + 1; break;
+ case 12: prev_ix -= last_distance2_ - 2; break;
+ case 13: prev_ix -= last_distance2_ + 2; break;
+ case 14: prev_ix -= last_distance2_ - 3; break;
+ case 15: prev_ix -= last_distance2_ + 3; break;
+ }
+ if (prev_ix >= cur_ix) {
+ continue;
+ }
+ const size_t backward = cur_ix - prev_ix;
+ if (PREDICT_FALSE(backward > max_backward)) {
+ continue;
+ }
+ prev_ix &= ring_buffer_mask;
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
+ prev_ix + best_len > ring_buffer_mask ||
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+ continue;
+ }
+ const size_t len =
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+ max_length);
+ if (len >= 3 || (len == 2 && i < 2)) {
+ // Comparing for >= 2 does not change the semantics, but just saves for
+ // a few unnecessary binary logarithms in backward reference score,
+ // since we are not interested in such short matches.
+ const double score = BackwardReferenceScore(average_cost_,
+ start_cost4,
+ start_cost3,
+ start_cost2,
+ len, backward,
+ last_distance1_,
+ last_distance2_,
+ last_distance3_,
+ last_distance4_);
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_len_code_out = best_len;
+ *best_distance_out = best_ix;
+ *best_score_out = best_score;
+ match_found = true;
+ }
+ }
+ }
+ const uint32_t key = Hash3Bytes(&data[cur_ix_masked], kBucketBits);
+ const int * __restrict const bucket = &buckets_[key][0];
+ const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+ int stop = int(cur_ix) - 64;
+ if (stop < 0) { stop = 0; }
+
+ start_cost2 -= 1.0;
+ for (int i = cur_ix - 1; i > stop; --i) {
+ size_t prev_ix = i;
+ const size_t backward = cur_ix - prev_ix;
+ if (PREDICT_FALSE(backward > max_backward)) {
+ break;
+ }
+ prev_ix &= ring_buffer_mask;
+ if (data[cur_ix_masked] != data[prev_ix] ||
+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+ continue;
+ }
+ int len = 2;
+ const double score = start_cost2 - 1.70 * Log2Floor(backward);
+
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_len_code_out = best_len;
+ *best_distance_out = best_ix;
+ match_found = true;
+ }
+ }
+ for (int i = num_[key] - 1; i >= down; --i) {
+ int prev_ix = bucket[i & kBlockMask];
+ if (prev_ix < 0) {
+ continue;
+ } else {
+ const size_t backward = cur_ix - prev_ix;
+ if (PREDICT_FALSE(backward > max_backward)) {
+ break;
+ }
+ prev_ix &= ring_buffer_mask;
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
+ prev_ix + best_len > ring_buffer_mask ||
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+ continue;
+ }
+ const size_t len =
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+ max_length);
+ if (len >= 3) {
+ // Comparing for >= 3 does not change the semantics, but just saves
+ // for a few unnecessary binary logarithms in backward reference
+ // score, since we are not interested in such short matches.
+ const double score = BackwardReferenceScore(average_cost_,
+ start_cost4,
+ start_cost3,
+ start_cost2,
+ len, backward,
+ last_distance1_,
+ last_distance2_,
+ last_distance3_,
+ last_distance4_);
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ best_ix = backward;
+ *best_len_out = best_len;
+ *best_len_code_out = best_len;
+ *best_distance_out = best_ix;
+ *best_score_out = best_score;
+ match_found = true;
+ }
+ }
+ }
+ }
+ return match_found;
+ }
+
+ void set_last_distance(int v) {
+ if (last_distance1_ != v) {
+ last_distance4_ = last_distance3_;
+ last_distance3_ = last_distance2_;
+ last_distance2_ = last_distance1_;
+ last_distance1_ = v;
+ }
+ }
+
+ int last_distance() const { return last_distance1_; }
+
+ void set_insert_length(int v) { insert_length_ = v; }
+
+ void set_average_cost(double v) { average_cost_ = v; }
+
+ private:
+ // Number of hash buckets.
+ static const uint32_t kBucketSize = 1 << kBucketBits;
+
+ // Only kBlockSize newest backward references are kept,
+ // and the older are forgotten.
+ static const uint32_t kBlockSize = 1 << kBlockBits;
+
+ // Mask for accessing entries in a block (in a ringbuffer manner).
+ static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
+
+ // Number of entries in a particular bucket.
+ uint16_t num_[kBucketSize];
+
+ // Buckets containing kBlockSize of backward references.
+ int buckets_[kBucketSize][kBlockSize];
+
+ int last_distance1_;
+ int last_distance2_;
+ int last_distance3_;
+ int last_distance4_;
+
+ // Cost adjustment for how many literals we are planning to insert
+ // anyway.
+ int insert_length_;
+
+ double average_cost_;
+};
+
+typedef HashLongestMatch<13, 11> Hasher;
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_HASH_H_
diff --git a/brotli/enc/histogram.cc b/brotli/enc/histogram.cc
new file mode 100644
index 0000000..910b987
--- /dev/null
+++ b/brotli/enc/histogram.cc
@@ -0,0 +1,94 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Build per-context histograms of literals, commands and distance codes.
+
+#include "./histogram.h"
+
+#include <stdint.h>
+#include <cmath>
+
+#include "./block_splitter.h"
+#include "./command.h"
+#include "./context.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+void BuildHistograms(
+ const std::vector<Command>& cmds,
+ const BlockSplit& literal_split,
+ const BlockSplit& insert_and_copy_split,
+ const BlockSplit& dist_split,
+ const uint8_t* ringbuffer,
+ size_t pos,
+ size_t mask,
+ const std::vector<int>& context_modes,
+ std::vector<HistogramLiteral>* literal_histograms,
+ std::vector<HistogramCommand>* insert_and_copy_histograms,
+ std::vector<HistogramDistance>* copy_dist_histograms) {
+ BlockSplitIterator literal_it(literal_split);
+ BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
+ BlockSplitIterator dist_it(dist_split);
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command &cmd = cmds[i];
+ insert_and_copy_it.Next();
+ (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
+ cmd.command_prefix_);
+ for (int j = 0; j < cmd.insert_length_; ++j) {
+ literal_it.Next();
+ uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
+ uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
+ int context = (literal_it.type_ << kLiteralContextBits) +
+ Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
+ (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
+ ++pos;
+ }
+ pos += cmd.copy_length_;
+ if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+ dist_it.Next();
+ int context = (dist_it.type_ << kDistanceContextBits) +
+ ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
+ (*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
+ }
+ }
+}
+
+void BuildLiteralHistogramsForBlockType(
+ const std::vector<Command>& cmds,
+ const BlockSplit& literal_split,
+ const uint8_t* ringbuffer,
+ size_t pos,
+ size_t mask,
+ int block_type,
+ int context_mode,
+ std::vector<HistogramLiteral>* histograms) {
+ BlockSplitIterator literal_it(literal_split);
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command &cmd = cmds[i];
+ for (int j = 0; j < cmd.insert_length_; ++j) {
+ literal_it.Next();
+ if (literal_it.type_ == block_type) {
+ uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
+ uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
+ int context = Context(prev_byte, prev_byte2, context_mode);
+ (*histograms)[context].Add(ringbuffer[pos & mask]);
+ }
+ ++pos;
+ }
+ pos += cmd.copy_length_;
+ }
+}
+
+} // namespace brotli
diff --git a/brotli/enc/histogram.h b/brotli/enc/histogram.h
new file mode 100644
index 0000000..45726f5
--- /dev/null
+++ b/brotli/enc/histogram.h
@@ -0,0 +1,114 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Models the histograms of literals, commands and distance codes.
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+#include "./command.h"
+#include "./fast_log.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+class BlockSplit;
+
+// A simple container for histograms of data in blocks.
+template<int kDataSize>
+struct Histogram {
+ Histogram() {
+ Clear();
+ }
+ void Clear() {
+ memset(data_, 0, sizeof(data_));
+ total_count_ = 0;
+ }
+ void Add(int val) {
+ ++data_[val];
+ ++total_count_;
+ }
+ void Remove(int val) {
+ --data_[val];
+ --total_count_;
+ }
+ template<typename DataType>
+ void Add(const DataType *p, size_t n) {
+ total_count_ += n;
+ n += 1;
+ while(--n) ++data_[*p++];
+ }
+ void AddHistogram(const Histogram& v) {
+ total_count_ += v.total_count_;
+ for (int i = 0; i < kDataSize; ++i) {
+ data_[i] += v.data_[i];
+ }
+ }
+ double EntropyBitCost() const {
+ double retval = total_count_ * FastLog2(total_count_);
+ for (int i = 0; i < kDataSize; ++i) {
+ retval -= data_[i] * FastLog2(data_[i]);
+ }
+ return retval;
+ }
+
+ int data_[kDataSize];
+ int total_count_;
+ double bit_cost_;
+};
+
+// Literal histogram.
+typedef Histogram<256> HistogramLiteral;
+// Prefix histograms.
+typedef Histogram<kNumCommandPrefixes> HistogramCommand;
+typedef Histogram<kNumDistancePrefixes> HistogramDistance;
+typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
+// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
+typedef Histogram<272> HistogramContextMap;
+// Block type histogram, 256 block types + 2 special symbols.
+typedef Histogram<258> HistogramBlockType;
+
+static const int kLiteralContextBits = 6;
+static const int kDistanceContextBits = 2;
+
+void BuildHistograms(
+ const std::vector<Command>& cmds,
+ const BlockSplit& literal_split,
+ const BlockSplit& insert_and_copy_split,
+ const BlockSplit& dist_split,
+ const uint8_t* ringbuffer,
+ size_t pos,
+ size_t mask,
+ const std::vector<int>& context_modes,
+ std::vector<HistogramLiteral>* literal_histograms,
+ std::vector<HistogramCommand>* insert_and_copy_histograms,
+ std::vector<HistogramDistance>* copy_dist_histograms);
+
+void BuildLiteralHistogramsForBlockType(
+ const std::vector<Command>& cmds,
+ const BlockSplit& literal_split,
+ const uint8_t* ringbuffer,
+ size_t pos,
+ size_t mask,
+ int block_type,
+ int context_mode,
+ std::vector<HistogramLiteral>* histograms);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_HISTOGRAM_H_
diff --git a/brotli/enc/literal_cost.cc b/brotli/enc/literal_cost.cc
new file mode 100644
index 0000000..bf05a98
--- /dev/null
+++ b/brotli/enc/literal_cost.cc
@@ -0,0 +1,62 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#include "./literal_cost.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <algorithm>
+
+namespace brotli {
+
+void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+ const uint8_t *data, float *cost) {
+ int histogram[256] = { 0 };
+ int window_half = 2000;
+ int in_window = std::min(static_cast<size_t>(window_half), len);
+
+ // Bootstrap histogram.
+ for (int i = 0; i < in_window; ++i) {
+ ++histogram[data[(pos + i) & mask]];
+ }
+
+ // Compute bit costs with sliding window.
+ for (int i = 0; i < len; ++i) {
+ if (i - window_half >= 0) {
+ // Remove a byte in the past.
+ --histogram[data[(pos + i - window_half) & mask]];
+ --in_window;
+ }
+ if (i + window_half < len) {
+ // Add a byte in the future.
+ ++histogram[data[(pos + i + window_half) & mask]];
+ ++in_window;
+ }
+ int masked_pos = (pos + i) & mask;
+ int histo = histogram[data[masked_pos]];
+ if (histo == 0) {
+ histo = 1;
+ }
+ cost[masked_pos] = log2(static_cast<double>(in_window) / histo);
+ cost[masked_pos] += 0.029;
+ if (cost[masked_pos] < 1.0) {
+ cost[masked_pos] *= 0.5;
+ cost[masked_pos] += 0.5;
+ }
+ }
+}
+
+} // namespace brotli
diff --git a/brotli/enc/literal_cost.h b/brotli/enc/literal_cost.h
new file mode 100644
index 0000000..fd7f325
--- /dev/null
+++ b/brotli/enc/literal_cost.h
@@ -0,0 +1,33 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Estimates how many bits the literals in the interval [pos, pos + len) in the
+// ringbuffer (data, mask) will take entropy coded and writes these estimates
+// to the ringbuffer (cost, mask).
+void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+ const uint8_t *data, float *cost);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_LITERAL_COST_H_
diff --git a/brotli/enc/port.h b/brotli/enc/port.h
new file mode 100644
index 0000000..36a365e
--- /dev/null
+++ b/brotli/enc/port.h
@@ -0,0 +1,138 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Macros for endianness, branch prediction and unaligned loads and stores.
+
+#ifndef BROTLI_ENC_PORT_H_
+#define BROTLI_ENC_PORT_H_
+
+#if defined OS_LINUX || defined OS_CYGWIN
+#include <endian.h>
+#elif defined OS_FREEBSD
+#include <machine/endian.h>
+#elif defined OS_MACOSX
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#endif
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif
+
+#else
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif
+#endif // __BYTE_ORDER
+
+#if defined(COMPILER_GCC3)
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define PREDICT_FALSE(x) x
+#define PREDICT_TRUE(x) x
+#endif
+
+// Portable handling of unaligned loads, stores, and copies.
+// On some platforms, like ARM, the copy functions can be more efficient
+// then a load and a store.
+
+#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
+ defined(ARCH_K8) || defined(_ARCH_PPC)
+
+// x86 and x86-64 can perform unaligned loads/stores directly;
+// modern PowerPC hardware can also do unaligned integer loads and stores;
+// but note: the FPU still sends unaligned loads and stores to a trap handler!
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
+
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+ (*reinterpret_cast<uint32_t *>(_p) = (_val))
+#define BROTLI_UNALIGNED_STORE64(_p, _val) \
+ (*reinterpret_cast<uint64_t *>(_p) = (_val))
+
+#elif defined(__arm__) && \
+ !defined(__ARM_ARCH_5__) && \
+ !defined(__ARM_ARCH_5T__) && \
+ !defined(__ARM_ARCH_5TE__) && \
+ !defined(__ARM_ARCH_5TEJ__) && \
+ !defined(__ARM_ARCH_6__) && \
+ !defined(__ARM_ARCH_6J__) && \
+ !defined(__ARM_ARCH_6K__) && \
+ !defined(__ARM_ARCH_6Z__) && \
+ !defined(__ARM_ARCH_6ZK__) && \
+ !defined(__ARM_ARCH_6T2__)
+
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode).
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+ (*reinterpret_cast<uint32_t *>(_p) = (_val))
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+ uint64_t t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+ memcpy(p, &v, sizeof v);
+}
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
+ uint32_t t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+ uint64_t t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
+ memcpy(p, &v, sizeof v);
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+ memcpy(p, &v, sizeof v);
+}
+
+#endif
+
+#endif // BROTLI_ENC_PORT_H_
diff --git a/brotli/enc/prefix.cc b/brotli/enc/prefix.cc
new file mode 100644
index 0000000..3e43501
--- /dev/null
+++ b/brotli/enc/prefix.cc
@@ -0,0 +1,166 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#include "./prefix.h"
+
+#include "./fast_log.h"
+
+namespace brotli {
+
+// Represents the range of values belonging to a prefix code:
+// [offset, offset + 2^nbits)
+struct PrefixCodeRange {
+ int offset;
+ int nbits;
+};
+
+static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
+ { 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
+ { 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
+ { 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
+ { 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
+ { 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
+ { 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
+ {8433, 13}, {16625, 24}
+};
+
+static const PrefixCodeRange kInsertLengthPrefixCode[kNumInsertLenPrefixes] = {
+ { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0},
+ { 4, 0}, { 5, 0}, { 6, 1}, { 8, 1},
+ { 10, 2}, { 14, 2}, { 18, 3}, { 26, 3},
+ { 34, 4}, { 50, 4}, { 66, 5}, { 98, 5},
+ { 130, 6}, { 194, 7}, { 322, 8}, { 578, 9},
+ {1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
+};
+
+static const PrefixCodeRange kCopyLengthPrefixCode[kNumCopyLenPrefixes] = {
+ { 2, 0}, { 3, 0}, { 4, 0}, { 5, 0},
+ { 6, 0}, { 7, 0}, { 8, 0}, { 9, 0},
+ { 10, 1}, { 12, 1}, { 14, 2}, { 18, 2},
+ { 22, 3}, { 30, 3}, { 38, 4}, { 54, 4},
+ { 70, 5}, { 102, 5}, { 134, 6}, { 198, 7},
+ {326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
+};
+
+static const int kInsertAndCopyRangeLut[9] = {
+ 0, 1, 4, 2, 3, 6, 5, 7, 8,
+};
+
+static const int kInsertRangeLut[9] = {
+ 0, 0, 1, 1, 0, 2, 1, 2, 2,
+};
+
+static const int kCopyRangeLut[9] = {
+ 0, 1, 0, 1, 2, 0, 2, 1, 2,
+};
+
+int InsertLengthPrefix(int length) {
+ for (int i = 0; i < kNumInsertLenPrefixes; ++i) {
+ const PrefixCodeRange& range = kInsertLengthPrefixCode[i];
+ if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int CopyLengthPrefix(int length) {
+ for (int i = 0; i < kNumCopyLenPrefixes; ++i) {
+ const PrefixCodeRange& range = kCopyLengthPrefixCode[i];
+ if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int CommandPrefix(int insert_length, int copy_length) {
+ if (copy_length == 0) {
+ copy_length = 3;
+ }
+ int insert_prefix = InsertLengthPrefix(insert_length);
+ int copy_prefix = CopyLengthPrefix(copy_length);
+ int range_idx = 3 * (insert_prefix >> 3) + (copy_prefix >> 3);
+ return ((kInsertAndCopyRangeLut[range_idx] << 6) +
+ ((insert_prefix & 7) << 3) + (copy_prefix & 7));
+}
+
+int InsertLengthExtraBits(int code) {
+ int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+ return kInsertLengthPrefixCode[insert_code].nbits;
+}
+
+int InsertLengthOffset(int code) {
+ int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+ return kInsertLengthPrefixCode[insert_code].offset;
+}
+
+int CopyLengthExtraBits(int code) {
+ int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+ return kCopyLengthPrefixCode[copy_code].nbits;
+}
+
+int CopyLengthOffset(int code) {
+ int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+ return kCopyLengthPrefixCode[copy_code].offset;
+}
+
+void PrefixEncodeCopyDistance(int distance_code,
+ int num_direct_codes,
+ int postfix_bits,
+ uint16_t* code,
+ int* nbits,
+ uint32_t* extra_bits) {
+ distance_code -= 1;
+ if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
+ *code = distance_code;
+ *nbits = 0;
+ *extra_bits = 0;
+ return;
+ }
+ distance_code -= kNumDistanceShortCodes + num_direct_codes;
+ distance_code += (1 << (postfix_bits + 2));
+ int bucket = Log2Floor(distance_code) - 1;
+ int postfix_mask = (1 << postfix_bits) - 1;
+ int postfix = distance_code & postfix_mask;
+ int prefix = (distance_code >> bucket) & 1;
+ int offset = (2 + prefix) << bucket;
+ *nbits = bucket - postfix_bits;
+ *code = kNumDistanceShortCodes + num_direct_codes +
+ ((2 * (*nbits - 1) + prefix) << postfix_bits) + postfix;
+ *extra_bits = (distance_code - offset) >> postfix_bits;
+}
+
+int BlockLengthPrefix(int length) {
+ for (int i = 0; i < kNumBlockLenPrefixes; ++i) {
+ const PrefixCodeRange& range = kBlockLengthPrefixCode[i];
+ if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int BlockLengthExtraBits(int length_code) {
+ return kBlockLengthPrefixCode[length_code].nbits;
+}
+
+int BlockLengthOffset(int length_code) {
+ return kBlockLengthPrefixCode[length_code].offset;
+}
+
+} // namespace brotli
diff --git a/brotli/enc/prefix.h b/brotli/enc/prefix.h
new file mode 100644
index 0000000..47974f8
--- /dev/null
+++ b/brotli/enc/prefix.h
@@ -0,0 +1,51 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+static const int kNumInsertLenPrefixes = 24;
+static const int kNumCopyLenPrefixes = 24;
+static const int kNumCommandPrefixes = 704;
+static const int kNumBlockLenPrefixes = 26;
+static const int kNumDistanceShortCodes = 16;
+static const int kNumDistancePrefixes = 520;
+
+int CommandPrefix(int insert_length, int copy_length);
+int InsertLengthExtraBits(int prefix);
+int InsertLengthOffset(int prefix);
+int CopyLengthExtraBits(int prefix);
+int CopyLengthOffset(int prefix);
+
+void PrefixEncodeCopyDistance(int distance_code,
+ int num_direct_codes,
+ int shift_bits,
+ uint16_t* prefix,
+ int* nbits,
+ uint32_t* extra_bits);
+
+int BlockLengthPrefix(int length);
+int BlockLengthExtraBits(int prefix);
+int BlockLengthOffset(int prefix);
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_PREFIX_H_
diff --git a/brotli/enc/ringbuffer.h b/brotli/enc/ringbuffer.h
new file mode 100644
index 0000000..d88f2ca
--- /dev/null
+++ b/brotli/enc/ringbuffer.h
@@ -0,0 +1,89 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Sliding window over the input data.
+
+#ifndef BROTLI_ENC_RINGBUFFER_H_
+#define BROTLI_ENC_RINGBUFFER_H_
+
+// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
+// data in a circular manner: writing a byte writes it to
+// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
+// contains another copy of the first `1 << tail_bits' bytes:
+// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
+class RingBuffer {
+ public:
+ RingBuffer(int window_bits, int tail_bits)
+ : window_bits_(window_bits), tail_bits_(tail_bits), pos_(0) {
+ static const int kSlackForThreeByteHashingEverywhere = 2;
+ const int buflen = (1 << window_bits_) + (1 << tail_bits_);
+ buffer_ = new uint8_t[buflen + kSlackForThreeByteHashingEverywhere];
+ for (int i = 0; i < kSlackForThreeByteHashingEverywhere; ++i) {
+ buffer_[buflen + i] = 0;
+ }
+ }
+ ~RingBuffer() {
+ delete [] buffer_;
+ }
+
+ // Push bytes into the ring buffer.
+ void Write(const uint8_t *bytes, size_t n) {
+ const size_t masked_pos = pos_ & ((1 << window_bits_) - 1);
+ // The length of the writes is limited so that we do not need to worry
+ // about a write
+ WriteTail(bytes, n);
+ if (masked_pos + n <= (1 << window_bits_)) {
+ // A single write fits.
+ memcpy(&buffer_[masked_pos], bytes, n);
+ } else {
+ // Split into two writes.
+ // Copy into the end of the buffer, including the tail buffer.
+ memcpy(&buffer_[masked_pos], bytes,
+ std::min(n,
+ ((1 << window_bits_) + (1 << tail_bits_)) - masked_pos));
+ // Copy into the begining of the buffer
+ memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos),
+ n - ((1 << window_bits_) - masked_pos));
+ }
+ pos_ += n;
+ }
+
+ // Logical cursor position in the ring buffer.
+ size_t position() const { return pos_; }
+
+ uint8_t *start() { return &buffer_[0]; }
+ const uint8_t *start() const { return &buffer_[0]; }
+
+ private:
+ void WriteTail(const uint8_t *bytes, size_t n) {
+ const size_t masked_pos = pos_ & ((1 << window_bits_) - 1);
+ if (masked_pos < (1 << tail_bits_)) {
+ // Just fill the tail buffer with the beginning data.
+ const size_t p = (1 << window_bits_) + masked_pos;
+ memcpy(&buffer_[p], bytes, std::min(n, (1 << tail_bits_) - masked_pos));
+ }
+ }
+
+ // Size of the ringbuffer is (1 << window_bits) + (1 << tail_bits).
+ const int window_bits_;
+ const int tail_bits_;
+
+ // Position to write in the ring buffer.
+ size_t pos_;
+ // The actual ring buffer containing the data and the copy of the beginning
+ // as a tail.
+ uint8_t *buffer_;
+};
+
+#endif // BROTLI_ENC_RINGBUFFER_H_
diff --git a/brotli/enc/write_bits.h b/brotli/enc/write_bits.h
new file mode 100644
index 0000000..cf6f53e
--- /dev/null
+++ b/brotli/enc/write_bits.h
@@ -0,0 +1,95 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Write bits into a byte array.
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include <assert.h>
+#if defined(OS_MACOSX)
+ #include <machine/endian.h>
+#else
+ #include <endian.h>
+#endif
+#include <stdint.h>
+#include <stdio.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+//#define BIT_WRITER_DEBUG
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 56 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0 BYTE+1 BYTE+2
+//
+// 0000 0RRR 0000 0000 0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just sifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1, BYTE+2, etc.
+inline void WriteBits(int n_bits,
+ uint64_t bits,
+ int * __restrict pos,
+ uint8_t * __restrict array) {
+#ifdef BIT_WRITER_DEBUG
+ printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
+#endif
+#ifdef IS_LITTLE_ENDIAN
+ // This branch of the code can write up to 56 bits at a time,
+ // 7 bits are lost by being perhaps already in *p and at least
+ // 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+ // bits are in *p and we write 57 bits, then the next write will
+ // access a byte that was never initialized).
+ uint8_t *p = &array[*pos >> 3];
+ uint64_t v = *p;
+ v |= bits << (*pos & 7);
+ BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
+ *pos += n_bits;
+#else
+ // implicit & 0xff is assumed for uint8_t arithmetics
+ uint8_t *array_pos = &array[*pos >> 3];
+ const int bits_reserved_in_first_byte = (*pos & 7);
+ bits <<= bits_reserved_in_first_byte;
+ *array_pos++ |= bits;
+ for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
+ bits_left_to_write >= 1;
+ bits_left_to_write -= 8) {
+ bits >>= 8;
+ *array_pos++ = bits;
+ }
+ *array_pos = 0;
+ *pos += n_bits;
+#endif
+}
+
+inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
+#ifdef BIT_WRITER_DEBUG
+ printf("WriteBitsPrepareStorage %10d\n", pos);
+#endif
+ assert((pos & 7) == 0);
+ array[pos >> 3] = 0;
+}
+
+} // namespace brotli
+
+#endif // BROTLI_ENC_WRITE_BITS_H_
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..c1ff87d
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,32 @@
+<project name="compression" default="jar">
+ <target name="clean">
+ <delete dir="build/classes" />
+ <delete dir="build/jar" />
+ </target>
+
+ <target name="compile">
+ <mkdir dir="build/classes" />
+ <javac srcdir="src" destdir="build/classes" includeantruntime="false" debug="true">
+ <compilerarg value="-Xlint" />
+ <classpath>
+ <fileset dir="lib" includes="*.jar" />
+ </classpath>
+ </javac>
+ </target>
+
+ <target name="jar" depends="compile">
+ <mkdir dir="build/jar" />
+ <jar destfile="build/jar/compression.jar" basedir="build/classes">
+ <zipfileset src="lib/eotconverter.jar" />
+ <zipfileset src="lib/guava-11.0.1.jar" />
+ <zipfileset src="lib/sfntly.jar" />
+ <zipfileset src="lib/woffconverter.jar" />
+ <zipfileset src="lib/lzma.jar" />
+ <manifest>
+ <attribute name="Main-Class" value="com.google.typography.font.compression.SimpleRunner" />
+ </manifest>
+ </jar>
+ </target>
+
+</project>
+
diff --git a/docs/WOFFUltraCondensedfileformat.pdf b/docs/WOFFUltraCondensedfileformat.pdf
new file mode 100644
index 0000000..24c1e04
--- /dev/null
+++ b/docs/WOFFUltraCondensedfileformat.pdf
Binary files differ
diff --git a/ots-lzma.patch b/ots-lzma.patch
new file mode 100644
index 0000000..0cf22a5
--- /dev/null
+++ b/ots-lzma.patch
@@ -0,0 +1,5500 @@
+Index: test/lzma.cc
+===================================================================
+--- test/lzma.cc (revision 0)
++++ test/lzma.cc (revision 0)
+@@ -0,0 +1,139 @@
++// Copyright (c) 2012 The Chromium Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include <arpa/inet.h>
++#include <fcntl.h>
++#include <sys/stat.h>
++#include <unistd.h>
++
++#include <cstdio>
++#include <string>
++#include <vector>
++
++#include "opentype-sanitiser.h"
++#include "ots-memory-stream.h"
++
++#include "third_party/lzma_sdk/LzmaLib.h"
++
++namespace {
++
++static const size_t kCompressedLengthFieldSize = 4;
++
++int Usage(const char *argv0) {
++ std::fprintf(stderr, "Usage: %s (compress|decompress) filename\n", argv0);
++ return 1;
++}
++
++bool ReadFile(const char *file_name, std::vector<uint8_t>* data) {
++ const int fd = open(file_name, O_RDONLY);
++ if (fd < 0) {
++ return false;
++ }
++
++ struct stat st;
++ fstat(fd, &st);
++
++ data->resize(st.st_size);
++ if (read(fd, &(*data)[0], st.st_size) != st.st_size) {
++ close(fd);
++ return false;
++ }
++ close(fd);
++ return true;
++}
++
++bool Compress(std::vector<uint8_t>* input, std::vector<uint8_t>* output) {
++ size_t props_size = LZMA_PROPS_SIZE;
++ size_t out_len = input->size() * 2;
++ output->resize(out_len + props_size + kCompressedLengthFieldSize);
++
++ uint8_t* output_start = &(*output)[kCompressedLengthFieldSize];
++
++ int result = LzmaCompress(output_start + LZMA_PROPS_SIZE, &out_len,
++ &(*input)[0], input->size(),
++ output_start, &props_size,
++ -1, 0, -1, -1, -1, -1, 1);
++ if (props_size != LZMA_PROPS_SIZE || result != SZ_OK)
++ return false;
++
++ output->resize(props_size + out_len + kCompressedLengthFieldSize);
++ // Store the uncompressed length at the beginning of buffer.
++ uint32_t uncompressed_length = htonl(input->size());
++ memcpy(&(*output)[0], &uncompressed_length, kCompressedLengthFieldSize);
++ return true;
++}
++
++bool Decompress(std::vector<uint8_t>* input, std::vector<uint8_t>* output) {
++ if (input->size() < kCompressedLengthFieldSize + LZMA_PROPS_SIZE)
++ return false;
++
++ // Assume the uncompressed length is stored at the beginning of the buffer
++ // in network byte order.
++ uint32_t uncompressed_length = 0;
++ memcpy(&uncompressed_length, &(*input)[0], kCompressedLengthFieldSize);
++ uncompressed_length = ntohl(uncompressed_length);
++
++ output->resize(uncompressed_length);
++ uint8_t* input_start = &(*input)[kCompressedLengthFieldSize];
++ size_t in_len = input->size() - LZMA_PROPS_SIZE;
++ size_t out_len = output->size();
++ int result = LzmaUncompress(&(*output)[0], &out_len,
++ input_start + LZMA_PROPS_SIZE,
++ &in_len, input_start, LZMA_PROPS_SIZE);
++
++ return result == SZ_OK;
++}
++
++bool DumpResult(std::vector<uint8_t>* result, const std::string* file_name) {
++ int fd = open(file_name->c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0600);
++ if (fd < 0) {
++ perror("opening output file");
++ return false;
++ }
++ if (write(fd, &(*result)[0], result->size()) < 0) {
++ perror("writing output file");
++ close(fd);
++ return false;
++ }
++ close(fd);
++ return true;
++}
++
++} // namespace
++
++int main(int argc, char** argv) {
++ if (argc != 3) return Usage(argv[0]);
++
++ std::vector<uint8_t> in_data;
++ if (!ReadFile(argv[2], &in_data)) {
++ std::fprintf(stderr, "Failed to read file!\n");
++ return 1;
++ }
++
++ std::vector<uint8_t> out_data;
++ std::string file_name;
++ if (std::strncmp("compress", argv[1], 8) == 0) {
++ if (!Compress(&in_data, &out_data)) {
++ std::fprintf(stderr, "Failed to compress file.\n");
++ return 1;
++ }
++ file_name = "compressed.dat";
++ } else if (std::strncmp("decompress", argv[1], 10) == 0) {
++ if (!Decompress(&in_data, &out_data)) {
++ std::fprintf(stderr, "Failed to decompress file.\n");
++ return 1;
++ }
++ file_name = "decompressed.dat";
++ } else {
++ std::fprintf(
++ stderr,
++ "The second argument must be either 'compress' or 'decompress'.");
++ return 1;
++ }
++
++ if (!DumpResult(&out_data, &file_name)) {
++ std::fprintf(stderr, "Failed to write the result.\n");
++ return 1;
++ }
++}
+
+Property changes on: test/lzma.cc
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: ots-common.gypi
+===================================================================
+--- ots-common.gypi (revision 83)
++++ ots-common.gypi (working copy)
+@@ -68,6 +68,7 @@
+ ],
+ 'ots_include_dirs': [
+ 'include',
++ '.',
+ ],
+ },
+ }
+Index: ots-standalone.gyp
+===================================================================
+--- ots-standalone.gyp (revision 83)
++++ ots-standalone.gyp (working copy)
+@@ -96,6 +96,12 @@
+ '<@(ots_include_dirs)',
+ ],
+ },
++ 'dependencies': [
++ 'third_party/lzma_sdk/lzma_sdk.gyp:lzma_sdk',
++ ],
++ 'export_dependent_settings': [
++ 'third_party/lzma_sdk/lzma_sdk.gyp:lzma_sdk',
++ ],
+ },
+ {
+ 'target_name': 'idempotent',
+@@ -117,5 +123,15 @@
+ }],
+ ],
+ },
++ {
++ 'target_name': 'lzma',
++ 'type': 'executable',
++ 'sources': [
++ 'test/lzma.cc',
++ ],
++ 'dependencies': [
++ 'ots',
++ ],
++ },
+ ],
+ }
+Index: third_party/lzma_sdk/LzmaEnc.h
+===================================================================
+--- third_party/lzma_sdk/LzmaEnc.h (revision 0)
++++ third_party/lzma_sdk/LzmaEnc.h (revision 0)
+@@ -0,0 +1,80 @@
++/* LzmaEnc.h -- LZMA Encoder
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZMA_ENC_H
++#define __LZMA_ENC_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#define LZMA_PROPS_SIZE 5
++
++typedef struct _CLzmaEncProps
++{
++ int level; /* 0 <= level <= 9 */
++ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
++ (1 << 12) <= dictSize <= (1 << 30) for 64-bit version
++ default = (1 << 24) */
++ int lc; /* 0 <= lc <= 8, default = 3 */
++ int lp; /* 0 <= lp <= 4, default = 0 */
++ int pb; /* 0 <= pb <= 4, default = 2 */
++ int algo; /* 0 - fast, 1 - normal, default = 1 */
++ int fb; /* 5 <= fb <= 273, default = 32 */
++ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
++ int numHashBytes; /* 2, 3 or 4, default = 4 */
++ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
++ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
++ int numThreads; /* 1 or 2, default = 2 */
++} CLzmaEncProps;
++
++void LzmaEncProps_Init(CLzmaEncProps *p);
++void LzmaEncProps_Normalize(CLzmaEncProps *p);
++UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
++
++
++/* ---------- CLzmaEncHandle Interface ---------- */
++
++/* LzmaEnc_* functions can return the following exit codes:
++Returns:
++ SZ_OK - OK
++ SZ_ERROR_MEM - Memory allocation error
++ SZ_ERROR_PARAM - Incorrect paramater in props
++ SZ_ERROR_WRITE - Write callback error.
++ SZ_ERROR_PROGRESS - some break from progress callback
++ SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
++*/
++
++typedef void * CLzmaEncHandle;
++
++CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc);
++void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig);
++SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
++SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
++SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
++ ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
++SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++ int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
++
++/* ---------- One Call Interface ---------- */
++
++/* LzmaEncode
++Return code:
++ SZ_OK - OK
++ SZ_ERROR_MEM - Memory allocation error
++ SZ_ERROR_PARAM - Incorrect paramater
++ SZ_ERROR_OUTPUT_EOF - output buffer overflow
++ SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
++*/
++
++SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++ const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
++ ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzmaEnc.h
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzHash.h
+===================================================================
+--- third_party/lzma_sdk/LzHash.h (revision 0)
++++ third_party/lzma_sdk/LzHash.h (revision 0)
+@@ -0,0 +1,54 @@
++/* LzHash.h -- HASH functions for LZ algorithms
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZ_HASH_H
++#define __LZ_HASH_H
++
++#define kHash2Size (1 << 10)
++#define kHash3Size (1 << 16)
++#define kHash4Size (1 << 20)
++
++#define kFix3HashSize (kHash2Size)
++#define kFix4HashSize (kHash2Size + kHash3Size)
++#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
++
++#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8);
++
++#define HASH3_CALC { \
++ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++ hash2Value = temp & (kHash2Size - 1); \
++ hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
++
++#define HASH4_CALC { \
++ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++ hash2Value = temp & (kHash2Size - 1); \
++ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
++ hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
++
++#define HASH5_CALC { \
++ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++ hash2Value = temp & (kHash2Size - 1); \
++ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
++ hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \
++ hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \
++ hash4Value &= (kHash4Size - 1); }
++
++/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
++#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
++
++
++#define MT_HASH2_CALC \
++ hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
++
++#define MT_HASH3_CALC { \
++ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++ hash2Value = temp & (kHash2Size - 1); \
++ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
++
++#define MT_HASH4_CALC { \
++ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
++ hash2Value = temp & (kHash2Size - 1); \
++ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
++ hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzHash.h
+___________________________________________________________________
+Added: svn:executable
+ + *
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/Alloc.h
+===================================================================
+--- third_party/lzma_sdk/Alloc.h (revision 0)
++++ third_party/lzma_sdk/Alloc.h (revision 0)
+@@ -0,0 +1,38 @@
++/* Alloc.h -- Memory allocation functions
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __COMMON_ALLOC_H
++#define __COMMON_ALLOC_H
++
++#include <stddef.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++void *MyAlloc(size_t size);
++void MyFree(void *address);
++
++#ifdef _WIN32
++
++void SetLargePageSize();
++
++void *MidAlloc(size_t size);
++void MidFree(void *address);
++void *BigAlloc(size_t size);
++void BigFree(void *address);
++
++#else
++
++#define MidAlloc(size) MyAlloc(size)
++#define MidFree(address) MyFree(address)
++#define BigAlloc(size) MyAlloc(size)
++#define BigFree(address) MyFree(address)
++
++#endif
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/Alloc.h
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzmaLib.h
+===================================================================
+--- third_party/lzma_sdk/LzmaLib.h (revision 0)
++++ third_party/lzma_sdk/LzmaLib.h (revision 0)
+@@ -0,0 +1,135 @@
++/* LzmaLib.h -- LZMA library interface
++2009-04-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZMA_LIB_H
++#define __LZMA_LIB_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#define MY_STDAPI int MY_STD_CALL
++
++#define LZMA_PROPS_SIZE 5
++
++/*
++RAM requirements for LZMA:
++ for compression: (dictSize * 11.5 + 6 MB) + state_size
++ for decompression: dictSize + state_size
++ state_size = (4 + (1.5 << (lc + lp))) KB
++ by default (lc=3, lp=0), state_size = 16 KB.
++
++LZMA properties (5 bytes) format
++ Offset Size Description
++ 0 1 lc, lp and pb in encoded form.
++ 1 4 dictSize (little endian).
++*/
++
++/*
++LzmaCompress
++------------
++
++outPropsSize -
++ In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
++ Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
++
++ LZMA Encoder will use defult values for any parameter, if it is
++ -1 for any from: level, loc, lp, pb, fb, numThreads
++ 0 for dictSize
++
++level - compression level: 0 <= level <= 9;
++
++ level dictSize algo fb
++ 0: 16 KB 0 32
++ 1: 64 KB 0 32
++ 2: 256 KB 0 32
++ 3: 1 MB 0 32
++ 4: 4 MB 0 32
++ 5: 16 MB 1 32
++ 6: 32 MB 1 32
++ 7+: 64 MB 1 64
++
++ The default value for "level" is 5.
++
++ algo = 0 means fast method
++ algo = 1 means normal method
++
++dictSize - The dictionary size in bytes. The maximum value is
++ 128 MB = (1 << 27) bytes for 32-bit version
++ 1 GB = (1 << 30) bytes for 64-bit version
++ The default value is 16 MB = (1 << 24) bytes.
++ It's recommended to use the dictionary that is larger than 4 KB and
++ that can be calculated as (1 << N) or (3 << N) sizes.
++
++lc - The number of literal context bits (high bits of previous literal).
++ It can be in the range from 0 to 8. The default value is 3.
++ Sometimes lc=4 gives the gain for big files.
++
++lp - The number of literal pos bits (low bits of current position for literals).
++ It can be in the range from 0 to 4. The default value is 0.
++ The lp switch is intended for periodical data when the period is equal to 2^lp.
++ For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
++ better to set lc=0, if you change lp switch.
++
++pb - The number of pos bits (low bits of current position).
++ It can be in the range from 0 to 4. The default value is 2.
++ The pb switch is intended for periodical data when the period is equal 2^pb.
++
++fb - Word size (the number of fast bytes).
++ It can be in the range from 5 to 273. The default value is 32.
++ Usually, a big number gives a little bit better compression ratio and
++ slower compression process.
++
++numThreads - The number of thereads. 1 or 2. The default value is 2.
++ Fast mode (algo = 0) can use only 1 thread.
++
++Out:
++ destLen - processed output size
++Returns:
++ SZ_OK - OK
++ SZ_ERROR_MEM - Memory allocation error
++ SZ_ERROR_PARAM - Incorrect paramater
++ SZ_ERROR_OUTPUT_EOF - output buffer overflow
++ SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
++*/
++
++MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
++ unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
++ int level, /* 0 <= level <= 9, default = 5 */
++ unsigned dictSize, /* default = (1 << 24) */
++ int lc, /* 0 <= lc <= 8, default = 3 */
++ int lp, /* 0 <= lp <= 4, default = 0 */
++ int pb, /* 0 <= pb <= 4, default = 2 */
++ int fb, /* 5 <= fb <= 273, default = 32 */
++ int numThreads /* 1 or 2, default = 2 */
++ );
++
++/*
++LzmaUncompress
++--------------
++In:
++ dest - output data
++ destLen - output data size
++ src - input data
++ srcLen - input data size
++Out:
++ destLen - processed output size
++ srcLen - processed input size
++Returns:
++ SZ_OK - OK
++ SZ_ERROR_DATA - Data error
++ SZ_ERROR_MEM - Memory allocation arror
++ SZ_ERROR_UNSUPPORTED - Unsupported properties
++ SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
++*/
++
++MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
++ const unsigned char *props, size_t propsSize);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzmaLib.h
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LICENSE
+===================================================================
+--- third_party/lzma_sdk/LICENSE (revision 0)
++++ third_party/lzma_sdk/LICENSE (revision 0)
+@@ -0,0 +1 @@
++LZMA SDK is placed in the public domain.
+Index: third_party/lzma_sdk/Types.h
+===================================================================
+--- third_party/lzma_sdk/Types.h (revision 0)
++++ third_party/lzma_sdk/Types.h (revision 0)
+@@ -0,0 +1,254 @@
++/* Types.h -- Basic types
++2010-10-09 : Igor Pavlov : Public domain */
++
++#ifndef __7Z_TYPES_H
++#define __7Z_TYPES_H
++
++#include <stddef.h>
++
++#ifdef _WIN32
++#include <windows.h>
++#endif
++
++#ifndef EXTERN_C_BEGIN
++#ifdef __cplusplus
++#define EXTERN_C_BEGIN extern "C" {
++#define EXTERN_C_END }
++#else
++#define EXTERN_C_BEGIN
++#define EXTERN_C_END
++#endif
++#endif
++
++EXTERN_C_BEGIN
++
++#define SZ_OK 0
++
++#define SZ_ERROR_DATA 1
++#define SZ_ERROR_MEM 2
++#define SZ_ERROR_CRC 3
++#define SZ_ERROR_UNSUPPORTED 4
++#define SZ_ERROR_PARAM 5
++#define SZ_ERROR_INPUT_EOF 6
++#define SZ_ERROR_OUTPUT_EOF 7
++#define SZ_ERROR_READ 8
++#define SZ_ERROR_WRITE 9
++#define SZ_ERROR_PROGRESS 10
++#define SZ_ERROR_FAIL 11
++#define SZ_ERROR_THREAD 12
++
++#define SZ_ERROR_ARCHIVE 16
++#define SZ_ERROR_NO_ARCHIVE 17
++
++typedef int SRes;
++
++#ifdef _WIN32
++typedef DWORD WRes;
++#else
++typedef int WRes;
++#endif
++
++#ifndef RINOK
++#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
++#endif
++
++typedef unsigned char Byte;
++typedef short Int16;
++typedef unsigned short UInt16;
++
++#ifdef _LZMA_UINT32_IS_ULONG
++typedef long Int32;
++typedef unsigned long UInt32;
++#else
++typedef int Int32;
++typedef unsigned int UInt32;
++#endif
++
++#ifdef _SZ_NO_INT_64
++
++/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
++ NOTES: Some code will work incorrectly in that case! */
++
++typedef long Int64;
++typedef unsigned long UInt64;
++
++#else
++
++#if defined(_MSC_VER) || defined(__BORLANDC__)
++typedef __int64 Int64;
++typedef unsigned __int64 UInt64;
++#define UINT64_CONST(n) n
++#else
++typedef long long int Int64;
++typedef unsigned long long int UInt64;
++#define UINT64_CONST(n) n ## ULL
++#endif
++
++#endif
++
++#ifdef _LZMA_NO_SYSTEM_SIZE_T
++typedef UInt32 SizeT;
++#else
++typedef size_t SizeT;
++#endif
++
++typedef int Bool;
++#define True 1
++#define False 0
++
++
++#ifdef _WIN32
++#define MY_STD_CALL __stdcall
++#else
++#define MY_STD_CALL
++#endif
++
++#ifdef _MSC_VER
++
++#if _MSC_VER >= 1300
++#define MY_NO_INLINE __declspec(noinline)
++#else
++#define MY_NO_INLINE
++#endif
++
++#define MY_CDECL __cdecl
++#define MY_FAST_CALL __fastcall
++
++#else
++
++#define MY_CDECL
++#define MY_FAST_CALL
++
++#endif
++
++
++/* The following interfaces use first parameter as pointer to structure */
++
++typedef struct
++{
++ Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */
++} IByteIn;
++
++typedef struct
++{
++ void (*Write)(void *p, Byte b);
++} IByteOut;
++
++typedef struct
++{
++ SRes (*Read)(void *p, void *buf, size_t *size);
++ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
++ (output(*size) < input(*size)) is allowed */
++} ISeqInStream;
++
++/* it can return SZ_ERROR_INPUT_EOF */
++SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
++SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
++SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
++
++typedef struct
++{
++ size_t (*Write)(void *p, const void *buf, size_t size);
++ /* Returns: result - the number of actually written bytes.
++ (result < size) means error */
++} ISeqOutStream;
++
++typedef enum
++{
++ SZ_SEEK_SET = 0,
++ SZ_SEEK_CUR = 1,
++ SZ_SEEK_END = 2
++} ESzSeek;
++
++typedef struct
++{
++ SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
++ SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
++} ISeekInStream;
++
++typedef struct
++{
++ SRes (*Look)(void *p, const void **buf, size_t *size);
++ /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
++ (output(*size) > input(*size)) is not allowed
++ (output(*size) < input(*size)) is allowed */
++ SRes (*Skip)(void *p, size_t offset);
++ /* offset must be <= output(*size) of Look */
++
++ SRes (*Read)(void *p, void *buf, size_t *size);
++ /* reads directly (without buffer). It's same as ISeqInStream::Read */
++ SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
++} ILookInStream;
++
++SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
++SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
++
++/* reads via ILookInStream::Read */
++SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
++SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
++
++#define LookToRead_BUF_SIZE (1 << 14)
++
++typedef struct
++{
++ ILookInStream s;
++ ISeekInStream *realStream;
++ size_t pos;
++ size_t size;
++ Byte buf[LookToRead_BUF_SIZE];
++} CLookToRead;
++
++void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
++void LookToRead_Init(CLookToRead *p);
++
++typedef struct
++{
++ ISeqInStream s;
++ ILookInStream *realStream;
++} CSecToLook;
++
++void SecToLook_CreateVTable(CSecToLook *p);
++
++typedef struct
++{
++ ISeqInStream s;
++ ILookInStream *realStream;
++} CSecToRead;
++
++void SecToRead_CreateVTable(CSecToRead *p);
++
++typedef struct
++{
++ SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
++ /* Returns: result. (result != SZ_OK) means break.
++ Value (UInt64)(Int64)-1 for size means unknown value. */
++} ICompressProgress;
++
++typedef struct
++{
++ void *(*Alloc)(void *p, size_t size);
++ void (*Free)(void *p, void *address); /* address can be 0 */
++} ISzAlloc;
++
++#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
++#define IAlloc_Free(p, a) (p)->Free((p), a)
++
++#ifdef _WIN32
++
++#define CHAR_PATH_SEPARATOR '\\'
++#define WCHAR_PATH_SEPARATOR L'\\'
++#define STRING_PATH_SEPARATOR "\\"
++#define WSTRING_PATH_SEPARATOR L"\\"
++
++#else
++
++#define CHAR_PATH_SEPARATOR '/'
++#define WCHAR_PATH_SEPARATOR L'/'
++#define STRING_PATH_SEPARATOR "/"
++#define WSTRING_PATH_SEPARATOR L"/"
++
++#endif
++
++EXTERN_C_END
++
++#endif
+
+Property changes on: third_party/lzma_sdk/Types.h
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzmaDec.c
+===================================================================
+--- third_party/lzma_sdk/LzmaDec.c (revision 0)
++++ third_party/lzma_sdk/LzmaDec.c (revision 0)
+@@ -0,0 +1,999 @@
++/* LzmaDec.c -- LZMA Decoder
++2009-09-20 : Igor Pavlov : Public domain */
++
++#include "LzmaDec.h"
++
++#include <string.h>
++
++#define kNumTopBits 24
++#define kTopValue ((UInt32)1 << kNumTopBits)
++
++#define kNumBitModelTotalBits 11
++#define kBitModelTotal (1 << kNumBitModelTotalBits)
++#define kNumMoveBits 5
++
++#define RC_INIT_SIZE 5
++
++#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
++
++#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
++#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
++#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
++#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
++ { UPDATE_0(p); i = (i + i); A0; } else \
++ { UPDATE_1(p); i = (i + i) + 1; A1; }
++#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;)
++
++#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); }
++#define TREE_DECODE(probs, limit, i) \
++ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
++
++/* #define _LZMA_SIZE_OPT */
++
++#ifdef _LZMA_SIZE_OPT
++#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
++#else
++#define TREE_6_DECODE(probs, i) \
++ { i = 1; \
++ TREE_GET_BIT(probs, i); \
++ TREE_GET_BIT(probs, i); \
++ TREE_GET_BIT(probs, i); \
++ TREE_GET_BIT(probs, i); \
++ TREE_GET_BIT(probs, i); \
++ TREE_GET_BIT(probs, i); \
++ i -= 0x40; }
++#endif
++
++#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
++
++#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
++#define UPDATE_0_CHECK range = bound;
++#define UPDATE_1_CHECK range -= bound; code -= bound;
++#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
++ { UPDATE_0_CHECK; i = (i + i); A0; } else \
++ { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
++#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
++#define TREE_DECODE_CHECK(probs, limit, i) \
++ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
++
++
++#define kNumPosBitsMax 4
++#define kNumPosStatesMax (1 << kNumPosBitsMax)
++
++#define kLenNumLowBits 3
++#define kLenNumLowSymbols (1 << kLenNumLowBits)
++#define kLenNumMidBits 3
++#define kLenNumMidSymbols (1 << kLenNumMidBits)
++#define kLenNumHighBits 8
++#define kLenNumHighSymbols (1 << kLenNumHighBits)
++
++#define LenChoice 0
++#define LenChoice2 (LenChoice + 1)
++#define LenLow (LenChoice2 + 1)
++#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
++#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
++#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
++
++
++#define kNumStates 12
++#define kNumLitStates 7
++
++#define kStartPosModelIndex 4
++#define kEndPosModelIndex 14
++#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
++
++#define kNumPosSlotBits 6
++#define kNumLenToPosStates 4
++
++#define kNumAlignBits 4
++#define kAlignTableSize (1 << kNumAlignBits)
++
++#define kMatchMinLen 2
++#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
++
++#define IsMatch 0
++#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
++#define IsRepG0 (IsRep + kNumStates)
++#define IsRepG1 (IsRepG0 + kNumStates)
++#define IsRepG2 (IsRepG1 + kNumStates)
++#define IsRep0Long (IsRepG2 + kNumStates)
++#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
++#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
++#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
++#define LenCoder (Align + kAlignTableSize)
++#define RepLenCoder (LenCoder + kNumLenProbs)
++#define Literal (RepLenCoder + kNumLenProbs)
++
++#define LZMA_BASE_SIZE 1846
++#define LZMA_LIT_SIZE 768
++
++#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
++
++#if Literal != LZMA_BASE_SIZE
++StopCompilingDueBUG
++#endif
++
++#define LZMA_DIC_MIN (1 << 12)
++
++/* First LZMA-symbol is always decoded.
++And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization
++Out:
++ Result:
++ SZ_OK - OK
++ SZ_ERROR_DATA - Error
++ p->remainLen:
++ < kMatchSpecLenStart : normal remain
++ = kMatchSpecLenStart : finished
++ = kMatchSpecLenStart + 1 : Flush marker
++ = kMatchSpecLenStart + 2 : State Init Marker
++*/
++
++static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
++{
++ CLzmaProb *probs = p->probs;
++
++ unsigned state = p->state;
++ UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
++ unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
++ unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1;
++ unsigned lc = p->prop.lc;
++
++ Byte *dic = p->dic;
++ SizeT dicBufSize = p->dicBufSize;
++ SizeT dicPos = p->dicPos;
++
++ UInt32 processedPos = p->processedPos;
++ UInt32 checkDicSize = p->checkDicSize;
++ unsigned len = 0;
++
++ const Byte *buf = p->buf;
++ UInt32 range = p->range;
++ UInt32 code = p->code;
++
++ do
++ {
++ CLzmaProb *prob;
++ UInt32 bound;
++ unsigned ttt;
++ unsigned posState = processedPos & pbMask;
++
++ prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
++ IF_BIT_0(prob)
++ {
++ unsigned symbol;
++ UPDATE_0(prob);
++ prob = probs + Literal;
++ if (checkDicSize != 0 || processedPos != 0)
++ prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
++ (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
++
++ if (state < kNumLitStates)
++ {
++ state -= (state < 4) ? state : 3;
++ symbol = 1;
++ do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
++ }
++ else
++ {
++ unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
++ unsigned offs = 0x100;
++ state -= (state < 10) ? 3 : 6;
++ symbol = 1;
++ do
++ {
++ unsigned bit;
++ CLzmaProb *probLit;
++ matchByte <<= 1;
++ bit = (matchByte & offs);
++ probLit = prob + offs + bit + symbol;
++ GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
++ }
++ while (symbol < 0x100);
++ }
++ dic[dicPos++] = (Byte)symbol;
++ processedPos++;
++ continue;
++ }
++ else
++ {
++ UPDATE_1(prob);
++ prob = probs + IsRep + state;
++ IF_BIT_0(prob)
++ {
++ UPDATE_0(prob);
++ state += kNumStates;
++ prob = probs + LenCoder;
++ }
++ else
++ {
++ UPDATE_1(prob);
++ if (checkDicSize == 0 && processedPos == 0)
++ return SZ_ERROR_DATA;
++ prob = probs + IsRepG0 + state;
++ IF_BIT_0(prob)
++ {
++ UPDATE_0(prob);
++ prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
++ IF_BIT_0(prob)
++ {
++ UPDATE_0(prob);
++ dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
++ dicPos++;
++ processedPos++;
++ state = state < kNumLitStates ? 9 : 11;
++ continue;
++ }
++ UPDATE_1(prob);
++ }
++ else
++ {
++ UInt32 distance;
++ UPDATE_1(prob);
++ prob = probs + IsRepG1 + state;
++ IF_BIT_0(prob)
++ {
++ UPDATE_0(prob);
++ distance = rep1;
++ }
++ else
++ {
++ UPDATE_1(prob);
++ prob = probs + IsRepG2 + state;
++ IF_BIT_0(prob)
++ {
++ UPDATE_0(prob);
++ distance = rep2;
++ }
++ else
++ {
++ UPDATE_1(prob);
++ distance = rep3;
++ rep3 = rep2;
++ }
++ rep2 = rep1;
++ }
++ rep1 = rep0;
++ rep0 = distance;
++ }
++ state = state < kNumLitStates ? 8 : 11;
++ prob = probs + RepLenCoder;
++ }
++ {
++ unsigned limit, offset;
++ CLzmaProb *probLen = prob + LenChoice;
++ IF_BIT_0(probLen)
++ {
++ UPDATE_0(probLen);
++ probLen = prob + LenLow + (posState << kLenNumLowBits);
++ offset = 0;
++ limit = (1 << kLenNumLowBits);
++ }
++ else
++ {
++ UPDATE_1(probLen);
++ probLen = prob + LenChoice2;
++ IF_BIT_0(probLen)
++ {
++ UPDATE_0(probLen);
++ probLen = prob + LenMid + (posState << kLenNumMidBits);
++ offset = kLenNumLowSymbols;
++ limit = (1 << kLenNumMidBits);
++ }
++ else
++ {
++ UPDATE_1(probLen);
++ probLen = prob + LenHigh;
++ offset = kLenNumLowSymbols + kLenNumMidSymbols;
++ limit = (1 << kLenNumHighBits);
++ }
++ }
++ TREE_DECODE(probLen, limit, len);
++ len += offset;
++ }
++
++ if (state >= kNumStates)
++ {
++ UInt32 distance;
++ prob = probs + PosSlot +
++ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
++ TREE_6_DECODE(prob, distance);
++ if (distance >= kStartPosModelIndex)
++ {
++ unsigned posSlot = (unsigned)distance;
++ int numDirectBits = (int)(((distance >> 1) - 1));
++ distance = (2 | (distance & 1));
++ if (posSlot < kEndPosModelIndex)
++ {
++ distance <<= numDirectBits;
++ prob = probs + SpecPos + distance - posSlot - 1;
++ {
++ UInt32 mask = 1;
++ unsigned i = 1;
++ do
++ {
++ GET_BIT2(prob + i, i, ; , distance |= mask);
++ mask <<= 1;
++ }
++ while (--numDirectBits != 0);
++ }
++ }
++ else
++ {
++ numDirectBits -= kNumAlignBits;
++ do
++ {
++ NORMALIZE
++ range >>= 1;
++
++ {
++ UInt32 t;
++ code -= range;
++ t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
++ distance = (distance << 1) + (t + 1);
++ code += range & t;
++ }
++ /*
++ distance <<= 1;
++ if (code >= range)
++ {
++ code -= range;
++ distance |= 1;
++ }
++ */
++ }
++ while (--numDirectBits != 0);
++ prob = probs + Align;
++ distance <<= kNumAlignBits;
++ {
++ unsigned i = 1;
++ GET_BIT2(prob + i, i, ; , distance |= 1);
++ GET_BIT2(prob + i, i, ; , distance |= 2);
++ GET_BIT2(prob + i, i, ; , distance |= 4);
++ GET_BIT2(prob + i, i, ; , distance |= 8);
++ }
++ if (distance == (UInt32)0xFFFFFFFF)
++ {
++ len += kMatchSpecLenStart;
++ state -= kNumStates;
++ break;
++ }
++ }
++ }
++ rep3 = rep2;
++ rep2 = rep1;
++ rep1 = rep0;
++ rep0 = distance + 1;
++ if (checkDicSize == 0)
++ {
++ if (distance >= processedPos)
++ return SZ_ERROR_DATA;
++ }
++ else if (distance >= checkDicSize)
++ return SZ_ERROR_DATA;
++ state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
++ }
++
++ len += kMatchMinLen;
++
++ if (limit == dicPos)
++ return SZ_ERROR_DATA;
++ {
++ SizeT rem = limit - dicPos;
++ unsigned curLen = ((rem < len) ? (unsigned)rem : len);
++ SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
++
++ processedPos += curLen;
++
++ len -= curLen;
++ if (pos + curLen <= dicBufSize)
++ {
++ Byte *dest = dic + dicPos;
++ ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
++ const Byte *lim = dest + curLen;
++ dicPos += curLen;
++ do
++ *(dest) = (Byte)*(dest + src);
++ while (++dest != lim);
++ }
++ else
++ {
++ do
++ {
++ dic[dicPos++] = dic[pos];
++ if (++pos == dicBufSize)
++ pos = 0;
++ }
++ while (--curLen != 0);
++ }
++ }
++ }
++ }
++ while (dicPos < limit && buf < bufLimit);
++ NORMALIZE;
++ p->buf = buf;
++ p->range = range;
++ p->code = code;
++ p->remainLen = len;
++ p->dicPos = dicPos;
++ p->processedPos = processedPos;
++ p->reps[0] = rep0;
++ p->reps[1] = rep1;
++ p->reps[2] = rep2;
++ p->reps[3] = rep3;
++ p->state = state;
++
++ return SZ_OK;
++}
++
++static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
++{
++ if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
++ {
++ Byte *dic = p->dic;
++ SizeT dicPos = p->dicPos;
++ SizeT dicBufSize = p->dicBufSize;
++ unsigned len = p->remainLen;
++ UInt32 rep0 = p->reps[0];
++ if (limit - dicPos < len)
++ len = (unsigned)(limit - dicPos);
++
++ if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
++ p->checkDicSize = p->prop.dicSize;
++
++ p->processedPos += len;
++ p->remainLen -= len;
++ while (len-- != 0)
++ {
++ dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
++ dicPos++;
++ }
++ p->dicPos = dicPos;
++ }
++}
++
++static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
++{
++ do
++ {
++ SizeT limit2 = limit;
++ if (p->checkDicSize == 0)
++ {
++ UInt32 rem = p->prop.dicSize - p->processedPos;
++ if (limit - p->dicPos > rem)
++ limit2 = p->dicPos + rem;
++ }
++ RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
++ if (p->processedPos >= p->prop.dicSize)
++ p->checkDicSize = p->prop.dicSize;
++ LzmaDec_WriteRem(p, limit);
++ }
++ while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
++
++ if (p->remainLen > kMatchSpecLenStart)
++ {
++ p->remainLen = kMatchSpecLenStart;
++ }
++ return 0;
++}
++
++typedef enum
++{
++ DUMMY_ERROR, /* unexpected end of input stream */
++ DUMMY_LIT,
++ DUMMY_MATCH,
++ DUMMY_REP
++} ELzmaDummy;
++
++static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
++{
++ UInt32 range = p->range;
++ UInt32 code = p->code;
++ const Byte *bufLimit = buf + inSize;
++ CLzmaProb *probs = p->probs;
++ unsigned state = p->state;
++ ELzmaDummy res;
++
++ {
++ CLzmaProb *prob;
++ UInt32 bound;
++ unsigned ttt;
++ unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
++
++ prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
++ IF_BIT_0_CHECK(prob)
++ {
++ UPDATE_0_CHECK
++
++ /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
++
++ prob = probs + Literal;
++ if (p->checkDicSize != 0 || p->processedPos != 0)
++ prob += (LZMA_LIT_SIZE *
++ ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
++ (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
++
++ if (state < kNumLitStates)
++ {
++ unsigned symbol = 1;
++ do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
++ }
++ else
++ {
++ unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
++ ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
++ unsigned offs = 0x100;
++ unsigned symbol = 1;
++ do
++ {
++ unsigned bit;
++ CLzmaProb *probLit;
++ matchByte <<= 1;
++ bit = (matchByte & offs);
++ probLit = prob + offs + bit + symbol;
++ GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
++ }
++ while (symbol < 0x100);
++ }
++ res = DUMMY_LIT;
++ }
++ else
++ {
++ unsigned len;
++ UPDATE_1_CHECK;
++
++ prob = probs + IsRep + state;
++ IF_BIT_0_CHECK(prob)
++ {
++ UPDATE_0_CHECK;
++ state = 0;
++ prob = probs + LenCoder;
++ res = DUMMY_MATCH;
++ }
++ else
++ {
++ UPDATE_1_CHECK;
++ res = DUMMY_REP;
++ prob = probs + IsRepG0 + state;
++ IF_BIT_0_CHECK(prob)
++ {
++ UPDATE_0_CHECK;
++ prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
++ IF_BIT_0_CHECK(prob)
++ {
++ UPDATE_0_CHECK;
++ NORMALIZE_CHECK;
++ return DUMMY_REP;
++ }
++ else
++ {
++ UPDATE_1_CHECK;
++ }
++ }
++ else
++ {
++ UPDATE_1_CHECK;
++ prob = probs + IsRepG1 + state;
++ IF_BIT_0_CHECK(prob)
++ {
++ UPDATE_0_CHECK;
++ }
++ else
++ {
++ UPDATE_1_CHECK;
++ prob = probs + IsRepG2 + state;
++ IF_BIT_0_CHECK(prob)
++ {
++ UPDATE_0_CHECK;
++ }
++ else
++ {
++ UPDATE_1_CHECK;
++ }
++ }
++ }
++ state = kNumStates;
++ prob = probs + RepLenCoder;
++ }
++ {
++ unsigned limit, offset;
++ CLzmaProb *probLen = prob + LenChoice;
++ IF_BIT_0_CHECK(probLen)
++ {
++ UPDATE_0_CHECK;
++ probLen = prob + LenLow + (posState << kLenNumLowBits);
++ offset = 0;
++ limit = 1 << kLenNumLowBits;
++ }
++ else
++ {
++ UPDATE_1_CHECK;
++ probLen = prob + LenChoice2;
++ IF_BIT_0_CHECK(probLen)
++ {
++ UPDATE_0_CHECK;
++ probLen = prob + LenMid + (posState << kLenNumMidBits);
++ offset = kLenNumLowSymbols;
++ limit = 1 << kLenNumMidBits;
++ }
++ else
++ {
++ UPDATE_1_CHECK;
++ probLen = prob + LenHigh;
++ offset = kLenNumLowSymbols + kLenNumMidSymbols;
++ limit = 1 << kLenNumHighBits;
++ }
++ }
++ TREE_DECODE_CHECK(probLen, limit, len);
++ len += offset;
++ }
++
++ if (state < 4)
++ {
++ unsigned posSlot;
++ prob = probs + PosSlot +
++ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) <<
++ kNumPosSlotBits);
++ TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
++ if (posSlot >= kStartPosModelIndex)
++ {
++ int numDirectBits = ((posSlot >> 1) - 1);
++
++ /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
++
++ if (posSlot < kEndPosModelIndex)
++ {
++ prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1;
++ }
++ else
++ {
++ numDirectBits -= kNumAlignBits;
++ do
++ {
++ NORMALIZE_CHECK
++ range >>= 1;
++ code -= range & (((code - range) >> 31) - 1);
++ /* if (code >= range) code -= range; */
++ }
++ while (--numDirectBits != 0);
++ prob = probs + Align;
++ numDirectBits = kNumAlignBits;
++ }
++ {
++ unsigned i = 1;
++ do
++ {
++ GET_BIT_CHECK(prob + i, i);
++ }
++ while (--numDirectBits != 0);
++ }
++ }
++ }
++ }
++ }
++ NORMALIZE_CHECK;
++ return res;
++}
++
++
++static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data)
++{
++ p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]);
++ p->range = 0xFFFFFFFF;
++ p->needFlush = 0;
++}
++
++void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
++{
++ p->needFlush = 1;
++ p->remainLen = 0;
++ p->tempBufSize = 0;
++
++ if (initDic)
++ {
++ p->processedPos = 0;
++ p->checkDicSize = 0;
++ p->needInitState = 1;
++ }
++ if (initState)
++ p->needInitState = 1;
++}
++
++void LzmaDec_Init(CLzmaDec *p)
++{
++ p->dicPos = 0;
++ LzmaDec_InitDicAndState(p, True, True);
++}
++
++static void LzmaDec_InitStateReal(CLzmaDec *p)
++{
++ UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp));
++ UInt32 i;
++ CLzmaProb *probs = p->probs;
++ for (i = 0; i < numProbs; i++)
++ probs[i] = kBitModelTotal >> 1;
++ p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
++ p->state = 0;
++ p->needInitState = 0;
++}
++
++SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
++ ELzmaFinishMode finishMode, ELzmaStatus *status)
++{
++ SizeT inSize = *srcLen;
++ (*srcLen) = 0;
++ LzmaDec_WriteRem(p, dicLimit);
++
++ *status = LZMA_STATUS_NOT_SPECIFIED;
++
++ while (p->remainLen != kMatchSpecLenStart)
++ {
++ int checkEndMarkNow;
++
++ if (p->needFlush != 0)
++ {
++ for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
++ p->tempBuf[p->tempBufSize++] = *src++;
++ if (p->tempBufSize < RC_INIT_SIZE)
++ {
++ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
++ return SZ_OK;
++ }
++ if (p->tempBuf[0] != 0)
++ return SZ_ERROR_DATA;
++
++ LzmaDec_InitRc(p, p->tempBuf);
++ p->tempBufSize = 0;
++ }
++
++ checkEndMarkNow = 0;
++ if (p->dicPos >= dicLimit)
++ {
++ if (p->remainLen == 0 && p->code == 0)
++ {
++ *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
++ return SZ_OK;
++ }
++ if (finishMode == LZMA_FINISH_ANY)
++ {
++ *status = LZMA_STATUS_NOT_FINISHED;
++ return SZ_OK;
++ }
++ if (p->remainLen != 0)
++ {
++ *status = LZMA_STATUS_NOT_FINISHED;
++ return SZ_ERROR_DATA;
++ }
++ checkEndMarkNow = 1;
++ }
++
++ if (p->needInitState)
++ LzmaDec_InitStateReal(p);
++
++ if (p->tempBufSize == 0)
++ {
++ SizeT processed;
++ const Byte *bufLimit;
++ if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
++ {
++ int dummyRes = LzmaDec_TryDummy(p, src, inSize);
++ if (dummyRes == DUMMY_ERROR)
++ {
++ memcpy(p->tempBuf, src, inSize);
++ p->tempBufSize = (unsigned)inSize;
++ (*srcLen) += inSize;
++ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
++ return SZ_OK;
++ }
++ if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
++ {
++ *status = LZMA_STATUS_NOT_FINISHED;
++ return SZ_ERROR_DATA;
++ }
++ bufLimit = src;
++ }
++ else
++ bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
++ p->buf = src;
++ if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
++ return SZ_ERROR_DATA;
++ processed = (SizeT)(p->buf - src);
++ (*srcLen) += processed;
++ src += processed;
++ inSize -= processed;
++ }
++ else
++ {
++ unsigned rem = p->tempBufSize, lookAhead = 0;
++ while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
++ p->tempBuf[rem++] = src[lookAhead++];
++ p->tempBufSize = rem;
++ if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
++ {
++ int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
++ if (dummyRes == DUMMY_ERROR)
++ {
++ (*srcLen) += lookAhead;
++ *status = LZMA_STATUS_NEEDS_MORE_INPUT;
++ return SZ_OK;
++ }
++ if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
++ {
++ *status = LZMA_STATUS_NOT_FINISHED;
++ return SZ_ERROR_DATA;
++ }
++ }
++ p->buf = p->tempBuf;
++ if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
++ return SZ_ERROR_DATA;
++ lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf));
++ (*srcLen) += lookAhead;
++ src += lookAhead;
++ inSize -= lookAhead;
++ p->tempBufSize = 0;
++ }
++ }
++ if (p->code == 0)
++ *status = LZMA_STATUS_FINISHED_WITH_MARK;
++ return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA;
++}
++
++SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
++{
++ SizeT outSize = *destLen;
++ SizeT inSize = *srcLen;
++ *srcLen = *destLen = 0;
++ for (;;)
++ {
++ SizeT inSizeCur = inSize, outSizeCur, dicPos;
++ ELzmaFinishMode curFinishMode;
++ SRes res;
++ if (p->dicPos == p->dicBufSize)
++ p->dicPos = 0;
++ dicPos = p->dicPos;
++ if (outSize > p->dicBufSize - dicPos)
++ {
++ outSizeCur = p->dicBufSize;
++ curFinishMode = LZMA_FINISH_ANY;
++ }
++ else
++ {
++ outSizeCur = dicPos + outSize;
++ curFinishMode = finishMode;
++ }
++
++ res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
++ src += inSizeCur;
++ inSize -= inSizeCur;
++ *srcLen += inSizeCur;
++ outSizeCur = p->dicPos - dicPos;
++ memcpy(dest, p->dic + dicPos, outSizeCur);
++ dest += outSizeCur;
++ outSize -= outSizeCur;
++ *destLen += outSizeCur;
++ if (res != 0)
++ return res;
++ if (outSizeCur == 0 || outSize == 0)
++ return SZ_OK;
++ }
++}
++
++void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc)
++{
++ alloc->Free(alloc, p->probs);
++ p->probs = 0;
++}
++
++static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc)
++{
++ alloc->Free(alloc, p->dic);
++ p->dic = 0;
++}
++
++void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
++{
++ LzmaDec_FreeProbs(p, alloc);
++ LzmaDec_FreeDict(p, alloc);
++}
++
++SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
++{
++ UInt32 dicSize;
++ Byte d;
++
++ if (size < LZMA_PROPS_SIZE)
++ return SZ_ERROR_UNSUPPORTED;
++ else
++ dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
++
++ if (dicSize < LZMA_DIC_MIN)
++ dicSize = LZMA_DIC_MIN;
++ p->dicSize = dicSize;
++
++ d = data[0];
++ if (d >= (9 * 5 * 5))
++ return SZ_ERROR_UNSUPPORTED;
++
++ p->lc = d % 9;
++ d /= 9;
++ p->pb = d / 5;
++ p->lp = d % 5;
++
++ return SZ_OK;
++}
++
++static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
++{
++ UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
++ if (p->probs == 0 || numProbs != p->numProbs)
++ {
++ LzmaDec_FreeProbs(p, alloc);
++ p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
++ p->numProbs = numProbs;
++ if (p->probs == 0)
++ return SZ_ERROR_MEM;
++ }
++ return SZ_OK;
++}
++
++SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
++{
++ CLzmaProps propNew;
++ RINOK(LzmaProps_Decode(&propNew, props, propsSize));
++ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
++ p->prop = propNew;
++ return SZ_OK;
++}
++
++SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
++{
++ CLzmaProps propNew;
++ SizeT dicBufSize;
++ RINOK(LzmaProps_Decode(&propNew, props, propsSize));
++ RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
++ dicBufSize = propNew.dicSize;
++ if (p->dic == 0 || dicBufSize != p->dicBufSize)
++ {
++ LzmaDec_FreeDict(p, alloc);
++ p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
++ if (p->dic == 0)
++ {
++ LzmaDec_FreeProbs(p, alloc);
++ return SZ_ERROR_MEM;
++ }
++ }
++ p->dicBufSize = dicBufSize;
++ p->prop = propNew;
++ return SZ_OK;
++}
++
++SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
++ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
++ ELzmaStatus *status, ISzAlloc *alloc)
++{
++ CLzmaDec p;
++ SRes res;
++ SizeT inSize = *srcLen;
++ SizeT outSize = *destLen;
++ *srcLen = *destLen = 0;
++ if (inSize < RC_INIT_SIZE)
++ return SZ_ERROR_INPUT_EOF;
++
++ LzmaDec_Construct(&p);
++ res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc);
++ if (res != 0)
++ return res;
++ p.dic = dest;
++ p.dicBufSize = outSize;
++
++ LzmaDec_Init(&p);
++
++ *srcLen = inSize;
++ res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
++
++ if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
++ res = SZ_ERROR_INPUT_EOF;
++
++ (*destLen) = p.dicPos;
++ LzmaDec_FreeProbs(&p, alloc);
++ return res;
++}
+
+Property changes on: third_party/lzma_sdk/LzmaDec.c
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/README.ots
+===================================================================
+--- third_party/lzma_sdk/README.ots (revision 0)
++++ third_party/lzma_sdk/README.ots (revision 0)
+@@ -0,0 +1,8 @@
++Name: LZMA SDK
++URL: http://www.7-zip.org/sdk.html
++Version: 9.20
++
++Description:
++The LZMA SDK provides the documentation, samples, header files, libraries, and tools you need to develop applications that use LZMA compression.
++
++This contains only the C code required to decompress LZMA.
+Index: third_party/lzma_sdk/lzma_sdk.gyp
+===================================================================
+--- third_party/lzma_sdk/lzma_sdk.gyp (revision 0)
++++ third_party/lzma_sdk/lzma_sdk.gyp (revision 0)
+@@ -0,0 +1,33 @@
++# Copyright (c) 2012 The Chromium Authors. All rights reserved.
++# Use of this source code is governed by a BSD-style license that can be
++# found in the LICENSE file.
++
++{
++ 'targets': [
++ {
++ 'target_name': 'lzma_sdk',
++ 'type': 'static_library',
++ 'defines': [
++ '_7ZIP_ST',
++ '_LZMA_PROB32',
++ ],
++ 'sources': [
++ 'Alloc.c',
++ 'Alloc.h',
++ 'LzFind.c',
++ 'LzFind.h',
++ 'LzHash.h',
++ 'LzmaEnc.c',
++ 'LzmaEnc.h',
++ 'LzmaDec.c',
++ 'LzmaDec.h',
++ 'LzmaLib.c',
++ 'LzmaLib.h',
++ 'Types.h',
++ ],
++ 'include_dirs': [
++ '.',
++ ],
++ },
++ ],
++}
+
+Property changes on: third_party/lzma_sdk/lzma_sdk.gyp
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzmaDec.h
+===================================================================
+--- third_party/lzma_sdk/LzmaDec.h (revision 0)
++++ third_party/lzma_sdk/LzmaDec.h (revision 0)
+@@ -0,0 +1,231 @@
++/* LzmaDec.h -- LZMA Decoder
++2009-02-07 : Igor Pavlov : Public domain */
++
++#ifndef __LZMA_DEC_H
++#define __LZMA_DEC_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/* #define _LZMA_PROB32 */
++/* _LZMA_PROB32 can increase the speed on some CPUs,
++ but memory usage for CLzmaDec::probs will be doubled in that case */
++
++#ifdef _LZMA_PROB32
++#define CLzmaProb UInt32
++#else
++#define CLzmaProb UInt16
++#endif
++
++
++/* ---------- LZMA Properties ---------- */
++
++#define LZMA_PROPS_SIZE 5
++
++typedef struct _CLzmaProps
++{
++ unsigned lc, lp, pb;
++ UInt32 dicSize;
++} CLzmaProps;
++
++/* LzmaProps_Decode - decodes properties
++Returns:
++ SZ_OK
++ SZ_ERROR_UNSUPPORTED - Unsupported properties
++*/
++
++SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
++
++
++/* ---------- LZMA Decoder state ---------- */
++
++/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
++ Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
++
++#define LZMA_REQUIRED_INPUT_MAX 20
++
++typedef struct
++{
++ CLzmaProps prop;
++ CLzmaProb *probs;
++ Byte *dic;
++ const Byte *buf;
++ UInt32 range, code;
++ SizeT dicPos;
++ SizeT dicBufSize;
++ UInt32 processedPos;
++ UInt32 checkDicSize;
++ unsigned state;
++ UInt32 reps[4];
++ unsigned remainLen;
++ int needFlush;
++ int needInitState;
++ UInt32 numProbs;
++ unsigned tempBufSize;
++ Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
++} CLzmaDec;
++
++#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
++
++void LzmaDec_Init(CLzmaDec *p);
++
++/* There are two types of LZMA streams:
++ 0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
++ 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
++
++typedef enum
++{
++ LZMA_FINISH_ANY, /* finish at any point */
++ LZMA_FINISH_END /* block must be finished at the end */
++} ELzmaFinishMode;
++
++/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
++
++ You must use LZMA_FINISH_END, when you know that current output buffer
++ covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
++
++ If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
++ and output value of destLen will be less than output buffer size limit.
++ You can check status result also.
++
++ You can use multiple checks to test data integrity after full decompression:
++ 1) Check Result and "status" variable.
++ 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
++ 3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
++ You must use correct finish mode in that case. */
++
++typedef enum
++{
++ LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
++ LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
++ LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
++ LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
++ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
++} ELzmaStatus;
++
++/* ELzmaStatus is used only as output value for function call */
++
++
++/* ---------- Interfaces ---------- */
++
++/* There are 3 levels of interfaces:
++ 1) Dictionary Interface
++ 2) Buffer Interface
++ 3) One Call Interface
++ You can select any of these interfaces, but don't mix functions from different
++ groups for same object. */
++
++
++/* There are two variants to allocate state for Dictionary Interface:
++ 1) LzmaDec_Allocate / LzmaDec_Free
++ 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
++ You can use variant 2, if you set dictionary buffer manually.
++ For Buffer Interface you must always use variant 1.
++
++LzmaDec_Allocate* can return:
++ SZ_OK
++ SZ_ERROR_MEM - Memory allocation error
++ SZ_ERROR_UNSUPPORTED - Unsupported properties
++*/
++
++SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
++void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
++
++SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
++void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
++
++/* ---------- Dictionary Interface ---------- */
++
++/* You can use it, if you want to eliminate the overhead for data copying from
++ dictionary to some other external buffer.
++ You must work with CLzmaDec variables directly in this interface.
++
++ STEPS:
++ LzmaDec_Constr()
++ LzmaDec_Allocate()
++ for (each new stream)
++ {
++ LzmaDec_Init()
++ while (it needs more decompression)
++ {
++ LzmaDec_DecodeToDic()
++ use data from CLzmaDec::dic and update CLzmaDec::dicPos
++ }
++ }
++ LzmaDec_Free()
++*/
++
++/* LzmaDec_DecodeToDic
++
++ The decoding to internal dictionary buffer (CLzmaDec::dic).
++ You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
++
++finishMode:
++ It has meaning only if the decoding reaches output limit (dicLimit).
++ LZMA_FINISH_ANY - Decode just dicLimit bytes.
++ LZMA_FINISH_END - Stream must be finished after dicLimit.
++
++Returns:
++ SZ_OK
++ status:
++ LZMA_STATUS_FINISHED_WITH_MARK
++ LZMA_STATUS_NOT_FINISHED
++ LZMA_STATUS_NEEDS_MORE_INPUT
++ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
++ SZ_ERROR_DATA - Data error
++*/
++
++SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
++ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
++
++
++/* ---------- Buffer Interface ---------- */
++
++/* It's zlib-like interface.
++ See LzmaDec_DecodeToDic description for information about STEPS and return results,
++ but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
++ to work with CLzmaDec variables manually.
++
++finishMode:
++ It has meaning only if the decoding reaches output limit (*destLen).
++ LZMA_FINISH_ANY - Decode just destLen bytes.
++ LZMA_FINISH_END - Stream must be finished after (*destLen).
++*/
++
++SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
++ const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
++
++
++/* ---------- One Call Interface ---------- */
++
++/* LzmaDecode
++
++finishMode:
++ It has meaning only if the decoding reaches output limit (*destLen).
++ LZMA_FINISH_ANY - Decode just destLen bytes.
++ LZMA_FINISH_END - Stream must be finished after (*destLen).
++
++Returns:
++ SZ_OK
++ status:
++ LZMA_STATUS_FINISHED_WITH_MARK
++ LZMA_STATUS_NOT_FINISHED
++ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
++ SZ_ERROR_DATA - Data error
++ SZ_ERROR_MEM - Memory allocation error
++ SZ_ERROR_UNSUPPORTED - Unsupported properties
++ SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
++*/
++
++SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
++ const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
++ ELzmaStatus *status, ISzAlloc *alloc);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzmaDec.h
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzFind.c
+===================================================================
+--- third_party/lzma_sdk/LzFind.c (revision 0)
++++ third_party/lzma_sdk/LzFind.c (revision 0)
+@@ -0,0 +1,761 @@
++/* LzFind.c -- Match finder for LZ algorithms
++2009-04-22 : Igor Pavlov : Public domain */
++
++#include <string.h>
++
++#include "LzFind.h"
++#include "LzHash.h"
++
++#define kEmptyHashValue 0
++#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
++#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
++#define kNormalizeMask (~(kNormalizeStepMin - 1))
++#define kMaxHistorySize ((UInt32)3 << 30)
++
++#define kStartMaxLen 3
++
++static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
++{
++ if (!p->directInput)
++ {
++ alloc->Free(alloc, p->bufferBase);
++ p->bufferBase = 0;
++ }
++}
++
++/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
++
++static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
++{
++ UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
++ if (p->directInput)
++ {
++ p->blockSize = blockSize;
++ return 1;
++ }
++ if (p->bufferBase == 0 || p->blockSize != blockSize)
++ {
++ LzInWindow_Free(p, alloc);
++ p->blockSize = blockSize;
++ p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
++ }
++ return (p->bufferBase != 0);
++}
++
++Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
++Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
++
++UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
++
++void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
++{
++ p->posLimit -= subValue;
++ p->pos -= subValue;
++ p->streamPos -= subValue;
++}
++
++static void MatchFinder_ReadBlock(CMatchFinder *p)
++{
++ if (p->streamEndWasReached || p->result != SZ_OK)
++ return;
++ if (p->directInput)
++ {
++ UInt32 curSize = 0xFFFFFFFF - p->streamPos;
++ if (curSize > p->directInputRem)
++ curSize = (UInt32)p->directInputRem;
++ p->directInputRem -= curSize;
++ p->streamPos += curSize;
++ if (p->directInputRem == 0)
++ p->streamEndWasReached = 1;
++ return;
++ }
++ for (;;)
++ {
++ Byte *dest = p->buffer + (p->streamPos - p->pos);
++ size_t size = (p->bufferBase + p->blockSize - dest);
++ if (size == 0)
++ return;
++ p->result = p->stream->Read(p->stream, dest, &size);
++ if (p->result != SZ_OK)
++ return;
++ if (size == 0)
++ {
++ p->streamEndWasReached = 1;
++ return;
++ }
++ p->streamPos += (UInt32)size;
++ if (p->streamPos - p->pos > p->keepSizeAfter)
++ return;
++ }
++}
++
++void MatchFinder_MoveBlock(CMatchFinder *p)
++{
++ memmove(p->bufferBase,
++ p->buffer - p->keepSizeBefore,
++ (size_t)(p->streamPos - p->pos + p->keepSizeBefore));
++ p->buffer = p->bufferBase + p->keepSizeBefore;
++}
++
++int MatchFinder_NeedMove(CMatchFinder *p)
++{
++ if (p->directInput)
++ return 0;
++ /* if (p->streamEndWasReached) return 0; */
++ return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
++}
++
++void MatchFinder_ReadIfRequired(CMatchFinder *p)
++{
++ if (p->streamEndWasReached)
++ return;
++ if (p->keepSizeAfter >= p->streamPos - p->pos)
++ MatchFinder_ReadBlock(p);
++}
++
++static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
++{
++ if (MatchFinder_NeedMove(p))
++ MatchFinder_MoveBlock(p);
++ MatchFinder_ReadBlock(p);
++}
++
++static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
++{
++ p->cutValue = 32;
++ p->btMode = 1;
++ p->numHashBytes = 4;
++ p->bigHash = 0;
++}
++
++#define kCrcPoly 0xEDB88320
++
++void MatchFinder_Construct(CMatchFinder *p)
++{
++ UInt32 i;
++ p->bufferBase = 0;
++ p->directInput = 0;
++ p->hash = 0;
++ MatchFinder_SetDefaultSettings(p);
++
++ for (i = 0; i < 256; i++)
++ {
++ UInt32 r = i;
++ int j;
++ for (j = 0; j < 8; j++)
++ r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
++ p->crc[i] = r;
++ }
++}
++
++static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
++{
++ alloc->Free(alloc, p->hash);
++ p->hash = 0;
++}
++
++void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
++{
++ MatchFinder_FreeThisClassMemory(p, alloc);
++ LzInWindow_Free(p, alloc);
++}
++
++static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
++{
++ size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
++ if (sizeInBytes / sizeof(CLzRef) != num)
++ return 0;
++ return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
++}
++
++int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
++ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
++ ISzAlloc *alloc)
++{
++ UInt32 sizeReserv;
++ if (historySize > kMaxHistorySize)
++ {
++ MatchFinder_Free(p, alloc);
++ return 0;
++ }
++ sizeReserv = historySize >> 1;
++ if (historySize > ((UInt32)2 << 30))
++ sizeReserv = historySize >> 2;
++ sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
++
++ p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
++ p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
++ /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
++ if (LzInWindow_Create(p, sizeReserv, alloc))
++ {
++ UInt32 newCyclicBufferSize = historySize + 1;
++ UInt32 hs;
++ p->matchMaxLen = matchMaxLen;
++ {
++ p->fixedHashSize = 0;
++ if (p->numHashBytes == 2)
++ hs = (1 << 16) - 1;
++ else
++ {
++ hs = historySize - 1;
++ hs |= (hs >> 1);
++ hs |= (hs >> 2);
++ hs |= (hs >> 4);
++ hs |= (hs >> 8);
++ hs >>= 1;
++ hs |= 0xFFFF; /* don't change it! It's required for Deflate */
++ if (hs > (1 << 24))
++ {
++ if (p->numHashBytes == 3)
++ hs = (1 << 24) - 1;
++ else
++ hs >>= 1;
++ }
++ }
++ p->hashMask = hs;
++ hs++;
++ if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
++ if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
++ if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
++ hs += p->fixedHashSize;
++ }
++
++ {
++ UInt32 prevSize = p->hashSizeSum + p->numSons;
++ UInt32 newSize;
++ p->historySize = historySize;
++ p->hashSizeSum = hs;
++ p->cyclicBufferSize = newCyclicBufferSize;
++ p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
++ newSize = p->hashSizeSum + p->numSons;
++ if (p->hash != 0 && prevSize == newSize)
++ return 1;
++ MatchFinder_FreeThisClassMemory(p, alloc);
++ p->hash = AllocRefs(newSize, alloc);
++ if (p->hash != 0)
++ {
++ p->son = p->hash + p->hashSizeSum;
++ return 1;
++ }
++ }
++ }
++ MatchFinder_Free(p, alloc);
++ return 0;
++}
++
++static void MatchFinder_SetLimits(CMatchFinder *p)
++{
++ UInt32 limit = kMaxValForNormalize - p->pos;
++ UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
++ if (limit2 < limit)
++ limit = limit2;
++ limit2 = p->streamPos - p->pos;
++ if (limit2 <= p->keepSizeAfter)
++ {
++ if (limit2 > 0)
++ limit2 = 1;
++ }
++ else
++ limit2 -= p->keepSizeAfter;
++ if (limit2 < limit)
++ limit = limit2;
++ {
++ UInt32 lenLimit = p->streamPos - p->pos;
++ if (lenLimit > p->matchMaxLen)
++ lenLimit = p->matchMaxLen;
++ p->lenLimit = lenLimit;
++ }
++ p->posLimit = p->pos + limit;
++}
++
++void MatchFinder_Init(CMatchFinder *p)
++{
++ UInt32 i;
++ for (i = 0; i < p->hashSizeSum; i++)
++ p->hash[i] = kEmptyHashValue;
++ p->cyclicBufferPos = 0;
++ p->buffer = p->bufferBase;
++ p->pos = p->streamPos = p->cyclicBufferSize;
++ p->result = SZ_OK;
++ p->streamEndWasReached = 0;
++ MatchFinder_ReadBlock(p);
++ MatchFinder_SetLimits(p);
++}
++
++static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
++{
++ return (p->pos - p->historySize - 1) & kNormalizeMask;
++}
++
++void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
++{
++ UInt32 i;
++ for (i = 0; i < numItems; i++)
++ {
++ UInt32 value = items[i];
++ if (value <= subValue)
++ value = kEmptyHashValue;
++ else
++ value -= subValue;
++ items[i] = value;
++ }
++}
++
++static void MatchFinder_Normalize(CMatchFinder *p)
++{
++ UInt32 subValue = MatchFinder_GetSubValue(p);
++ MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
++ MatchFinder_ReduceOffsets(p, subValue);
++}
++
++static void MatchFinder_CheckLimits(CMatchFinder *p)
++{
++ if (p->pos == kMaxValForNormalize)
++ MatchFinder_Normalize(p);
++ if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
++ MatchFinder_CheckAndMoveAndRead(p);
++ if (p->cyclicBufferPos == p->cyclicBufferSize)
++ p->cyclicBufferPos = 0;
++ MatchFinder_SetLimits(p);
++}
++
++static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
++ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
++ UInt32 *distances, UInt32 maxLen)
++{
++ son[_cyclicBufferPos] = curMatch;
++ for (;;)
++ {
++ UInt32 delta = pos - curMatch;
++ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
++ return distances;
++ {
++ const Byte *pb = cur - delta;
++ curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
++ if (pb[maxLen] == cur[maxLen] && *pb == *cur)
++ {
++ UInt32 len = 0;
++ while (++len != lenLimit)
++ if (pb[len] != cur[len])
++ break;
++ if (maxLen < len)
++ {
++ *distances++ = maxLen = len;
++ *distances++ = delta - 1;
++ if (len == lenLimit)
++ return distances;
++ }
++ }
++ }
++ }
++}
++
++UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
++ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
++ UInt32 *distances, UInt32 maxLen)
++{
++ CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
++ CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
++ UInt32 len0 = 0, len1 = 0;
++ for (;;)
++ {
++ UInt32 delta = pos - curMatch;
++ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
++ {
++ *ptr0 = *ptr1 = kEmptyHashValue;
++ return distances;
++ }
++ {
++ CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
++ const Byte *pb = cur - delta;
++ UInt32 len = (len0 < len1 ? len0 : len1);
++ if (pb[len] == cur[len])
++ {
++ if (++len != lenLimit && pb[len] == cur[len])
++ while (++len != lenLimit)
++ if (pb[len] != cur[len])
++ break;
++ if (maxLen < len)
++ {
++ *distances++ = maxLen = len;
++ *distances++ = delta - 1;
++ if (len == lenLimit)
++ {
++ *ptr1 = pair[0];
++ *ptr0 = pair[1];
++ return distances;
++ }
++ }
++ }
++ if (pb[len] < cur[len])
++ {
++ *ptr1 = curMatch;
++ ptr1 = pair + 1;
++ curMatch = *ptr1;
++ len1 = len;
++ }
++ else
++ {
++ *ptr0 = curMatch;
++ ptr0 = pair;
++ curMatch = *ptr0;
++ len0 = len;
++ }
++ }
++ }
++}
++
++static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
++ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
++{
++ CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
++ CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
++ UInt32 len0 = 0, len1 = 0;
++ for (;;)
++ {
++ UInt32 delta = pos - curMatch;
++ if (cutValue-- == 0 || delta >= _cyclicBufferSize)
++ {
++ *ptr0 = *ptr1 = kEmptyHashValue;
++ return;
++ }
++ {
++ CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
++ const Byte *pb = cur - delta;
++ UInt32 len = (len0 < len1 ? len0 : len1);
++ if (pb[len] == cur[len])
++ {
++ while (++len != lenLimit)
++ if (pb[len] != cur[len])
++ break;
++ {
++ if (len == lenLimit)
++ {
++ *ptr1 = pair[0];
++ *ptr0 = pair[1];
++ return;
++ }
++ }
++ }
++ if (pb[len] < cur[len])
++ {
++ *ptr1 = curMatch;
++ ptr1 = pair + 1;
++ curMatch = *ptr1;
++ len1 = len;
++ }
++ else
++ {
++ *ptr0 = curMatch;
++ ptr0 = pair;
++ curMatch = *ptr0;
++ len0 = len;
++ }
++ }
++ }
++}
++
++#define MOVE_POS \
++ ++p->cyclicBufferPos; \
++ p->buffer++; \
++ if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
++
++#define MOVE_POS_RET MOVE_POS return offset;
++
++static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
++
++#define GET_MATCHES_HEADER2(minLen, ret_op) \
++ UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
++ lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
++ cur = p->buffer;
++
++#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
++#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
++
++#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
++
++#define GET_MATCHES_FOOTER(offset, maxLen) \
++ offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
++ distances + offset, maxLen) - distances); MOVE_POS_RET;
++
++#define SKIP_FOOTER \
++ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
++
++static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++ UInt32 offset;
++ GET_MATCHES_HEADER(2)
++ HASH2_CALC;
++ curMatch = p->hash[hashValue];
++ p->hash[hashValue] = p->pos;
++ offset = 0;
++ GET_MATCHES_FOOTER(offset, 1)
++}
++
++UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++ UInt32 offset;
++ GET_MATCHES_HEADER(3)
++ HASH_ZIP_CALC;
++ curMatch = p->hash[hashValue];
++ p->hash[hashValue] = p->pos;
++ offset = 0;
++ GET_MATCHES_FOOTER(offset, 2)
++}
++
++static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++ UInt32 hash2Value, delta2, maxLen, offset;
++ GET_MATCHES_HEADER(3)
++
++ HASH3_CALC;
++
++ delta2 = p->pos - p->hash[hash2Value];
++ curMatch = p->hash[kFix3HashSize + hashValue];
++
++ p->hash[hash2Value] =
++ p->hash[kFix3HashSize + hashValue] = p->pos;
++
++
++ maxLen = 2;
++ offset = 0;
++ if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
++ {
++ for (; maxLen != lenLimit; maxLen++)
++ if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
++ break;
++ distances[0] = maxLen;
++ distances[1] = delta2 - 1;
++ offset = 2;
++ if (maxLen == lenLimit)
++ {
++ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
++ MOVE_POS_RET;
++ }
++ }
++ GET_MATCHES_FOOTER(offset, maxLen)
++}
++
++static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++ UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
++ GET_MATCHES_HEADER(4)
++
++ HASH4_CALC;
++
++ delta2 = p->pos - p->hash[ hash2Value];
++ delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
++ curMatch = p->hash[kFix4HashSize + hashValue];
++
++ p->hash[ hash2Value] =
++ p->hash[kFix3HashSize + hash3Value] =
++ p->hash[kFix4HashSize + hashValue] = p->pos;
++
++ maxLen = 1;
++ offset = 0;
++ if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
++ {
++ distances[0] = maxLen = 2;
++ distances[1] = delta2 - 1;
++ offset = 2;
++ }
++ if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
++ {
++ maxLen = 3;
++ distances[offset + 1] = delta3 - 1;
++ offset += 2;
++ delta2 = delta3;
++ }
++ if (offset != 0)
++ {
++ for (; maxLen != lenLimit; maxLen++)
++ if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
++ break;
++ distances[offset - 2] = maxLen;
++ if (maxLen == lenLimit)
++ {
++ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
++ MOVE_POS_RET;
++ }
++ }
++ if (maxLen < 3)
++ maxLen = 3;
++ GET_MATCHES_FOOTER(offset, maxLen)
++}
++
++static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++ UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
++ GET_MATCHES_HEADER(4)
++
++ HASH4_CALC;
++
++ delta2 = p->pos - p->hash[ hash2Value];
++ delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
++ curMatch = p->hash[kFix4HashSize + hashValue];
++
++ p->hash[ hash2Value] =
++ p->hash[kFix3HashSize + hash3Value] =
++ p->hash[kFix4HashSize + hashValue] = p->pos;
++
++ maxLen = 1;
++ offset = 0;
++ if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
++ {
++ distances[0] = maxLen = 2;
++ distances[1] = delta2 - 1;
++ offset = 2;
++ }
++ if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
++ {
++ maxLen = 3;
++ distances[offset + 1] = delta3 - 1;
++ offset += 2;
++ delta2 = delta3;
++ }
++ if (offset != 0)
++ {
++ for (; maxLen != lenLimit; maxLen++)
++ if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
++ break;
++ distances[offset - 2] = maxLen;
++ if (maxLen == lenLimit)
++ {
++ p->son[p->cyclicBufferPos] = curMatch;
++ MOVE_POS_RET;
++ }
++ }
++ if (maxLen < 3)
++ maxLen = 3;
++ offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
++ distances + offset, maxLen) - (distances));
++ MOVE_POS_RET
++}
++
++UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++{
++ UInt32 offset;
++ GET_MATCHES_HEADER(3)
++ HASH_ZIP_CALC;
++ curMatch = p->hash[hashValue];
++ p->hash[hashValue] = p->pos;
++ offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
++ distances, 2) - (distances));
++ MOVE_POS_RET
++}
++
++static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++ do
++ {
++ SKIP_HEADER(2)
++ HASH2_CALC;
++ curMatch = p->hash[hashValue];
++ p->hash[hashValue] = p->pos;
++ SKIP_FOOTER
++ }
++ while (--num != 0);
++}
++
++void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++ do
++ {
++ SKIP_HEADER(3)
++ HASH_ZIP_CALC;
++ curMatch = p->hash[hashValue];
++ p->hash[hashValue] = p->pos;
++ SKIP_FOOTER
++ }
++ while (--num != 0);
++}
++
++static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++ do
++ {
++ UInt32 hash2Value;
++ SKIP_HEADER(3)
++ HASH3_CALC;
++ curMatch = p->hash[kFix3HashSize + hashValue];
++ p->hash[hash2Value] =
++ p->hash[kFix3HashSize + hashValue] = p->pos;
++ SKIP_FOOTER
++ }
++ while (--num != 0);
++}
++
++static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++ do
++ {
++ UInt32 hash2Value, hash3Value;
++ SKIP_HEADER(4)
++ HASH4_CALC;
++ curMatch = p->hash[kFix4HashSize + hashValue];
++ p->hash[ hash2Value] =
++ p->hash[kFix3HashSize + hash3Value] = p->pos;
++ p->hash[kFix4HashSize + hashValue] = p->pos;
++ SKIP_FOOTER
++ }
++ while (--num != 0);
++}
++
++static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++ do
++ {
++ UInt32 hash2Value, hash3Value;
++ SKIP_HEADER(4)
++ HASH4_CALC;
++ curMatch = p->hash[kFix4HashSize + hashValue];
++ p->hash[ hash2Value] =
++ p->hash[kFix3HashSize + hash3Value] =
++ p->hash[kFix4HashSize + hashValue] = p->pos;
++ p->son[p->cyclicBufferPos] = curMatch;
++ MOVE_POS
++ }
++ while (--num != 0);
++}
++
++void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++{
++ do
++ {
++ SKIP_HEADER(3)
++ HASH_ZIP_CALC;
++ curMatch = p->hash[hashValue];
++ p->hash[hashValue] = p->pos;
++ p->son[p->cyclicBufferPos] = curMatch;
++ MOVE_POS
++ }
++ while (--num != 0);
++}
++
++void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
++{
++ vTable->Init = (Mf_Init_Func)MatchFinder_Init;
++ vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
++ vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
++ vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
++ if (!p->btMode)
++ {
++ vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
++ vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
++ }
++ else if (p->numHashBytes == 2)
++ {
++ vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
++ vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
++ }
++ else if (p->numHashBytes == 3)
++ {
++ vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
++ vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
++ }
++ else
++ {
++ vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
++ vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
++ }
++}
+
+Property changes on: third_party/lzma_sdk/LzFind.c
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzmaEnc.c
+===================================================================
+--- third_party/lzma_sdk/LzmaEnc.c (revision 0)
++++ third_party/lzma_sdk/LzmaEnc.c (revision 0)
+@@ -0,0 +1,2268 @@
++/* LzmaEnc.c -- LZMA Encoder
++2010-04-16 : Igor Pavlov : Public domain */
++
++#include <string.h>
++
++/* #define SHOW_STAT */
++/* #define SHOW_STAT2 */
++
++#if defined(SHOW_STAT) || defined(SHOW_STAT2)
++#include <stdio.h>
++#endif
++
++#include "LzmaEnc.h"
++
++#include "LzFind.h"
++#ifndef _7ZIP_ST
++#include "LzFindMt.h"
++#endif
++
++#ifdef SHOW_STAT
++static int ttt = 0;
++#endif
++
++#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1)
++
++#define kBlockSize (9 << 10)
++#define kUnpackBlockSize (1 << 18)
++#define kMatchArraySize (1 << 21)
++#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX)
++
++#define kNumMaxDirectBits (31)
++
++#define kNumTopBits 24
++#define kTopValue ((UInt32)1 << kNumTopBits)
++
++#define kNumBitModelTotalBits 11
++#define kBitModelTotal (1 << kNumBitModelTotalBits)
++#define kNumMoveBits 5
++#define kProbInitValue (kBitModelTotal >> 1)
++
++#define kNumMoveReducingBits 4
++#define kNumBitPriceShiftBits 4
++#define kBitPrice (1 << kNumBitPriceShiftBits)
++
++void LzmaEncProps_Init(CLzmaEncProps *p)
++{
++ p->level = 5;
++ p->dictSize = p->mc = 0;
++ p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
++ p->writeEndMark = 0;
++}
++
++void LzmaEncProps_Normalize(CLzmaEncProps *p)
++{
++ int level = p->level;
++ if (level < 0) level = 5;
++ p->level = level;
++ if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26)));
++ if (p->lc < 0) p->lc = 3;
++ if (p->lp < 0) p->lp = 0;
++ if (p->pb < 0) p->pb = 2;
++ if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
++ if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
++ if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
++ if (p->numHashBytes < 0) p->numHashBytes = 4;
++ if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
++ if (p->numThreads < 0)
++ p->numThreads =
++ #ifndef _7ZIP_ST
++ ((p->btMode && p->algo) ? 2 : 1);
++ #else
++ 1;
++ #endif
++}
++
++UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
++{
++ CLzmaEncProps props = *props2;
++ LzmaEncProps_Normalize(&props);
++ return props.dictSize;
++}
++
++/* #define LZMA_LOG_BSR */
++/* Define it for Intel's CPU */
++
++
++#ifdef LZMA_LOG_BSR
++
++#define kDicLogSizeMaxCompress 30
++
++#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); }
++
++UInt32 GetPosSlot1(UInt32 pos)
++{
++ UInt32 res;
++ BSR2_RET(pos, res);
++ return res;
++}
++#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
++#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
++
++#else
++
++#define kNumLogBits (9 + (int)sizeof(size_t) / 2)
++#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
++
++void LzmaEnc_FastPosInit(Byte *g_FastPos)
++{
++ int c = 2, slotFast;
++ g_FastPos[0] = 0;
++ g_FastPos[1] = 1;
++
++ for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++)
++ {
++ UInt32 k = (1 << ((slotFast >> 1) - 1));
++ UInt32 j;
++ for (j = 0; j < k; j++, c++)
++ g_FastPos[c] = (Byte)slotFast;
++ }
++}
++
++#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \
++ (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
++ res = p->g_FastPos[pos >> i] + (i * 2); }
++/*
++#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
++ p->g_FastPos[pos >> 6] + 12 : \
++ p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
++*/
++
++#define GetPosSlot1(pos) p->g_FastPos[pos]
++#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
++#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); }
++
++#endif
++
++
++#define LZMA_NUM_REPS 4
++
++typedef unsigned CState;
++
++typedef struct
++{
++ UInt32 price;
++
++ CState state;
++ int prev1IsChar;
++ int prev2;
++
++ UInt32 posPrev2;
++ UInt32 backPrev2;
++
++ UInt32 posPrev;
++ UInt32 backPrev;
++ UInt32 backs[LZMA_NUM_REPS];
++} COptimal;
++
++#define kNumOpts (1 << 12)
++
++#define kNumLenToPosStates 4
++#define kNumPosSlotBits 6
++#define kDicLogSizeMin 0
++#define kDicLogSizeMax 32
++#define kDistTableSizeMax (kDicLogSizeMax * 2)
++
++
++#define kNumAlignBits 4
++#define kAlignTableSize (1 << kNumAlignBits)
++#define kAlignMask (kAlignTableSize - 1)
++
++#define kStartPosModelIndex 4
++#define kEndPosModelIndex 14
++#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex)
++
++#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
++
++#ifdef _LZMA_PROB32
++#define CLzmaProb UInt32
++#else
++#define CLzmaProb UInt16
++#endif
++
++#define LZMA_PB_MAX 4
++#define LZMA_LC_MAX 8
++#define LZMA_LP_MAX 4
++
++#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
++
++
++#define kLenNumLowBits 3
++#define kLenNumLowSymbols (1 << kLenNumLowBits)
++#define kLenNumMidBits 3
++#define kLenNumMidSymbols (1 << kLenNumMidBits)
++#define kLenNumHighBits 8
++#define kLenNumHighSymbols (1 << kLenNumHighBits)
++
++#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
++
++#define LZMA_MATCH_LEN_MIN 2
++#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
++
++#define kNumStates 12
++
++typedef struct
++{
++ CLzmaProb choice;
++ CLzmaProb choice2;
++ CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits];
++ CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits];
++ CLzmaProb high[kLenNumHighSymbols];
++} CLenEnc;
++
++typedef struct
++{
++ CLenEnc p;
++ UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
++ UInt32 tableSize;
++ UInt32 counters[LZMA_NUM_PB_STATES_MAX];
++} CLenPriceEnc;
++
++typedef struct
++{
++ UInt32 range;
++ Byte cache;
++ UInt64 low;
++ UInt64 cacheSize;
++ Byte *buf;
++ Byte *bufLim;
++ Byte *bufBase;
++ ISeqOutStream *outStream;
++ UInt64 processed;
++ SRes res;
++} CRangeEnc;
++
++typedef struct
++{
++ CLzmaProb *litProbs;
++
++ CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
++ CLzmaProb isRep[kNumStates];
++ CLzmaProb isRepG0[kNumStates];
++ CLzmaProb isRepG1[kNumStates];
++ CLzmaProb isRepG2[kNumStates];
++ CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
++
++ CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
++ CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
++ CLzmaProb posAlignEncoder[1 << kNumAlignBits];
++
++ CLenPriceEnc lenEnc;
++ CLenPriceEnc repLenEnc;
++
++ UInt32 reps[LZMA_NUM_REPS];
++ UInt32 state;
++} CSaveState;
++
++typedef struct
++{
++ IMatchFinder matchFinder;
++ void *matchFinderObj;
++
++ #ifndef _7ZIP_ST
++ Bool mtMode;
++ CMatchFinderMt matchFinderMt;
++ #endif
++
++ CMatchFinder matchFinderBase;
++
++ #ifndef _7ZIP_ST
++ Byte pad[128];
++ #endif
++
++ UInt32 optimumEndIndex;
++ UInt32 optimumCurrentIndex;
++
++ UInt32 longestMatchLength;
++ UInt32 numPairs;
++ UInt32 numAvail;
++ COptimal opt[kNumOpts];
++
++ #ifndef LZMA_LOG_BSR
++ Byte g_FastPos[1 << kNumLogBits];
++ #endif
++
++ UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
++ UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
++ UInt32 numFastBytes;
++ UInt32 additionalOffset;
++ UInt32 reps[LZMA_NUM_REPS];
++ UInt32 state;
++
++ UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
++ UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
++ UInt32 alignPrices[kAlignTableSize];
++ UInt32 alignPriceCount;
++
++ UInt32 distTableSize;
++
++ unsigned lc, lp, pb;
++ unsigned lpMask, pbMask;
++
++ CLzmaProb *litProbs;
++
++ CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
++ CLzmaProb isRep[kNumStates];
++ CLzmaProb isRepG0[kNumStates];
++ CLzmaProb isRepG1[kNumStates];
++ CLzmaProb isRepG2[kNumStates];
++ CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
++
++ CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
++ CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
++ CLzmaProb posAlignEncoder[1 << kNumAlignBits];
++
++ CLenPriceEnc lenEnc;
++ CLenPriceEnc repLenEnc;
++
++ unsigned lclp;
++
++ Bool fastMode;
++
++ CRangeEnc rc;
++
++ Bool writeEndMark;
++ UInt64 nowPos64;
++ UInt32 matchPriceCount;
++ Bool finished;
++ Bool multiThread;
++
++ SRes result;
++ UInt32 dictSize;
++ UInt32 matchFinderCycles;
++
++ int needInit;
++
++ CSaveState saveState;
++} CLzmaEnc;
++
++void LzmaEnc_SaveState(CLzmaEncHandle pp)
++{
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ CSaveState *dest = &p->saveState;
++ int i;
++ dest->lenEnc = p->lenEnc;
++ dest->repLenEnc = p->repLenEnc;
++ dest->state = p->state;
++
++ for (i = 0; i < kNumStates; i++)
++ {
++ memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
++ memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
++ }
++ for (i = 0; i < kNumLenToPosStates; i++)
++ memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
++ memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
++ memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
++ memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
++ memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
++ memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
++ memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
++ memcpy(dest->reps, p->reps, sizeof(p->reps));
++ memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb));
++}
++
++void LzmaEnc_RestoreState(CLzmaEncHandle pp)
++{
++ CLzmaEnc *dest = (CLzmaEnc *)pp;
++ const CSaveState *p = &dest->saveState;
++ int i;
++ dest->lenEnc = p->lenEnc;
++ dest->repLenEnc = p->repLenEnc;
++ dest->state = p->state;
++
++ for (i = 0; i < kNumStates; i++)
++ {
++ memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
++ memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
++ }
++ for (i = 0; i < kNumLenToPosStates; i++)
++ memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
++ memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
++ memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
++ memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
++ memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
++ memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
++ memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
++ memcpy(dest->reps, p->reps, sizeof(p->reps));
++ memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb));
++}
++
++SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
++{
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ CLzmaEncProps props = *props2;
++ LzmaEncProps_Normalize(&props);
++
++ if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX ||
++ props.dictSize > ((UInt32)1 << kDicLogSizeMaxCompress) || props.dictSize > ((UInt32)1 << 30))
++ return SZ_ERROR_PARAM;
++ p->dictSize = props.dictSize;
++ p->matchFinderCycles = props.mc;
++ {
++ unsigned fb = props.fb;
++ if (fb < 5)
++ fb = 5;
++ if (fb > LZMA_MATCH_LEN_MAX)
++ fb = LZMA_MATCH_LEN_MAX;
++ p->numFastBytes = fb;
++ }
++ p->lc = props.lc;
++ p->lp = props.lp;
++ p->pb = props.pb;
++ p->fastMode = (props.algo == 0);
++ p->matchFinderBase.btMode = props.btMode;
++ {
++ UInt32 numHashBytes = 4;
++ if (props.btMode)
++ {
++ if (props.numHashBytes < 2)
++ numHashBytes = 2;
++ else if (props.numHashBytes < 4)
++ numHashBytes = props.numHashBytes;
++ }
++ p->matchFinderBase.numHashBytes = numHashBytes;
++ }
++
++ p->matchFinderBase.cutValue = props.mc;
++
++ p->writeEndMark = props.writeEndMark;
++
++ #ifndef _7ZIP_ST
++ /*
++ if (newMultiThread != _multiThread)
++ {
++ ReleaseMatchFinder();
++ _multiThread = newMultiThread;
++ }
++ */
++ p->multiThread = (props.numThreads > 1);
++ #endif
++
++ return SZ_OK;
++}
++
++static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
++static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
++static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
++static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
++
++#define IsCharState(s) ((s) < 7)
++
++#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
++
++#define kInfinityPrice (1 << 30)
++
++static void RangeEnc_Construct(CRangeEnc *p)
++{
++ p->outStream = 0;
++ p->bufBase = 0;
++}
++
++#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
++
++#define RC_BUF_SIZE (1 << 16)
++static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc)
++{
++ if (p->bufBase == 0)
++ {
++ p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE);
++ if (p->bufBase == 0)
++ return 0;
++ p->bufLim = p->bufBase + RC_BUF_SIZE;
++ }
++ return 1;
++}
++
++static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc)
++{
++ alloc->Free(alloc, p->bufBase);
++ p->bufBase = 0;
++}
++
++static void RangeEnc_Init(CRangeEnc *p)
++{
++ /* Stream.Init(); */
++ p->low = 0;
++ p->range = 0xFFFFFFFF;
++ p->cacheSize = 1;
++ p->cache = 0;
++
++ p->buf = p->bufBase;
++
++ p->processed = 0;
++ p->res = SZ_OK;
++}
++
++static void RangeEnc_FlushStream(CRangeEnc *p)
++{
++ size_t num;
++ if (p->res != SZ_OK)
++ return;
++ num = p->buf - p->bufBase;
++ if (num != p->outStream->Write(p->outStream, p->bufBase, num))
++ p->res = SZ_ERROR_WRITE;
++ p->processed += num;
++ p->buf = p->bufBase;
++}
++
++static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
++{
++ if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0)
++ {
++ Byte temp = p->cache;
++ do
++ {
++ Byte *buf = p->buf;
++ *buf++ = (Byte)(temp + (Byte)(p->low >> 32));
++ p->buf = buf;
++ if (buf == p->bufLim)
++ RangeEnc_FlushStream(p);
++ temp = 0xFF;
++ }
++ while (--p->cacheSize != 0);
++ p->cache = (Byte)((UInt32)p->low >> 24);
++ }
++ p->cacheSize++;
++ p->low = (UInt32)p->low << 8;
++}
++
++static void RangeEnc_FlushData(CRangeEnc *p)
++{
++ int i;
++ for (i = 0; i < 5; i++)
++ RangeEnc_ShiftLow(p);
++}
++
++static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits)
++{
++ do
++ {
++ p->range >>= 1;
++ p->low += p->range & (0 - ((value >> --numBits) & 1));
++ if (p->range < kTopValue)
++ {
++ p->range <<= 8;
++ RangeEnc_ShiftLow(p);
++ }
++ }
++ while (numBits != 0);
++}
++
++static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol)
++{
++ UInt32 ttt = *prob;
++ UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt;
++ if (symbol == 0)
++ {
++ p->range = newBound;
++ ttt += (kBitModelTotal - ttt) >> kNumMoveBits;
++ }
++ else
++ {
++ p->low += newBound;
++ p->range -= newBound;
++ ttt -= ttt >> kNumMoveBits;
++ }
++ *prob = (CLzmaProb)ttt;
++ if (p->range < kTopValue)
++ {
++ p->range <<= 8;
++ RangeEnc_ShiftLow(p);
++ }
++}
++
++static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol)
++{
++ symbol |= 0x100;
++ do
++ {
++ RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1);
++ symbol <<= 1;
++ }
++ while (symbol < 0x10000);
++}
++
++static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte)
++{
++ UInt32 offs = 0x100;
++ symbol |= 0x100;
++ do
++ {
++ matchByte <<= 1;
++ RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1);
++ symbol <<= 1;
++ offs &= ~(matchByte ^ symbol);
++ }
++ while (symbol < 0x10000);
++}
++
++void LzmaEnc_InitPriceTables(UInt32 *ProbPrices)
++{
++ UInt32 i;
++ for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits))
++ {
++ const int kCyclesBits = kNumBitPriceShiftBits;
++ UInt32 w = i;
++ UInt32 bitCount = 0;
++ int j;
++ for (j = 0; j < kCyclesBits; j++)
++ {
++ w = w * w;
++ bitCount <<= 1;
++ while (w >= ((UInt32)1 << 16))
++ {
++ w >>= 1;
++ bitCount++;
++ }
++ }
++ ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
++ }
++}
++
++
++#define GET_PRICE(prob, symbol) \
++ p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
++
++#define GET_PRICEa(prob, symbol) \
++ ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
++
++#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
++#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
++
++#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
++#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
++
++static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices)
++{
++ UInt32 price = 0;
++ symbol |= 0x100;
++ do
++ {
++ price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1);
++ symbol <<= 1;
++ }
++ while (symbol < 0x10000);
++ return price;
++}
++
++static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices)
++{
++ UInt32 price = 0;
++ UInt32 offs = 0x100;
++ symbol |= 0x100;
++ do
++ {
++ matchByte <<= 1;
++ price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1);
++ symbol <<= 1;
++ offs &= ~(matchByte ^ symbol);
++ }
++ while (symbol < 0x10000);
++ return price;
++}
++
++
++static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
++{
++ UInt32 m = 1;
++ int i;
++ for (i = numBitLevels; i != 0;)
++ {
++ UInt32 bit;
++ i--;
++ bit = (symbol >> i) & 1;
++ RangeEnc_EncodeBit(rc, probs + m, bit);
++ m = (m << 1) | bit;
++ }
++}
++
++static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
++{
++ UInt32 m = 1;
++ int i;
++ for (i = 0; i < numBitLevels; i++)
++ {
++ UInt32 bit = symbol & 1;
++ RangeEnc_EncodeBit(rc, probs + m, bit);
++ m = (m << 1) | bit;
++ symbol >>= 1;
++ }
++}
++
++static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
++{
++ UInt32 price = 0;
++ symbol |= (1 << numBitLevels);
++ while (symbol != 1)
++ {
++ price += GET_PRICEa(probs[symbol >> 1], symbol & 1);
++ symbol >>= 1;
++ }
++ return price;
++}
++
++static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
++{
++ UInt32 price = 0;
++ UInt32 m = 1;
++ int i;
++ for (i = numBitLevels; i != 0; i--)
++ {
++ UInt32 bit = symbol & 1;
++ symbol >>= 1;
++ price += GET_PRICEa(probs[m], bit);
++ m = (m << 1) | bit;
++ }
++ return price;
++}
++
++
++static void LenEnc_Init(CLenEnc *p)
++{
++ unsigned i;
++ p->choice = p->choice2 = kProbInitValue;
++ for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++)
++ p->low[i] = kProbInitValue;
++ for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++)
++ p->mid[i] = kProbInitValue;
++ for (i = 0; i < kLenNumHighSymbols; i++)
++ p->high[i] = kProbInitValue;
++}
++
++static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState)
++{
++ if (symbol < kLenNumLowSymbols)
++ {
++ RangeEnc_EncodeBit(rc, &p->choice, 0);
++ RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol);
++ }
++ else
++ {
++ RangeEnc_EncodeBit(rc, &p->choice, 1);
++ if (symbol < kLenNumLowSymbols + kLenNumMidSymbols)
++ {
++ RangeEnc_EncodeBit(rc, &p->choice2, 0);
++ RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols);
++ }
++ else
++ {
++ RangeEnc_EncodeBit(rc, &p->choice2, 1);
++ RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols);
++ }
++ }
++}
++
++static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices)
++{
++ UInt32 a0 = GET_PRICE_0a(p->choice);
++ UInt32 a1 = GET_PRICE_1a(p->choice);
++ UInt32 b0 = a1 + GET_PRICE_0a(p->choice2);
++ UInt32 b1 = a1 + GET_PRICE_1a(p->choice2);
++ UInt32 i = 0;
++ for (i = 0; i < kLenNumLowSymbols; i++)
++ {
++ if (i >= numSymbols)
++ return;
++ prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices);
++ }
++ for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++)
++ {
++ if (i >= numSymbols)
++ return;
++ prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices);
++ }
++ for (; i < numSymbols; i++)
++ prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices);
++}
++
++static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices)
++{
++ LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices);
++ p->counters[posState] = p->tableSize;
++}
++
++static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices)
++{
++ UInt32 posState;
++ for (posState = 0; posState < numPosStates; posState++)
++ LenPriceEnc_UpdateTable(p, posState, ProbPrices);
++}
++
++static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices)
++{
++ LenEnc_Encode(&p->p, rc, symbol, posState);
++ if (updatePrice)
++ if (--p->counters[posState] == 0)
++ LenPriceEnc_UpdateTable(p, posState, ProbPrices);
++}
++
++
++
++
++static void MovePos(CLzmaEnc *p, UInt32 num)
++{
++ #ifdef SHOW_STAT
++ ttt += num;
++ printf("\n MovePos %d", num);
++ #endif
++ if (num != 0)
++ {
++ p->additionalOffset += num;
++ p->matchFinder.Skip(p->matchFinderObj, num);
++ }
++}
++
++static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes)
++{
++ UInt32 lenRes = 0, numPairs;
++ p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
++ numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
++ #ifdef SHOW_STAT
++ printf("\n i = %d numPairs = %d ", ttt, numPairs / 2);
++ ttt++;
++ {
++ UInt32 i;
++ for (i = 0; i < numPairs; i += 2)
++ printf("%2d %6d | ", p->matches[i], p->matches[i + 1]);
++ }
++ #endif
++ if (numPairs > 0)
++ {
++ lenRes = p->matches[numPairs - 2];
++ if (lenRes == p->numFastBytes)
++ {
++ const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++ UInt32 distance = p->matches[numPairs - 1] + 1;
++ UInt32 numAvail = p->numAvail;
++ if (numAvail > LZMA_MATCH_LEN_MAX)
++ numAvail = LZMA_MATCH_LEN_MAX;
++ {
++ const Byte *pby2 = pby - distance;
++ for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++);
++ }
++ }
++ }
++ p->additionalOffset++;
++ *numDistancePairsRes = numPairs;
++ return lenRes;
++}
++
++
++#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False;
++#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False;
++#define IsShortRep(p) ((p)->backPrev == 0)
++
++static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState)
++{
++ return
++ GET_PRICE_0(p->isRepG0[state]) +
++ GET_PRICE_0(p->isRep0Long[state][posState]);
++}
++
++static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState)
++{
++ UInt32 price;
++ if (repIndex == 0)
++ {
++ price = GET_PRICE_0(p->isRepG0[state]);
++ price += GET_PRICE_1(p->isRep0Long[state][posState]);
++ }
++ else
++ {
++ price = GET_PRICE_1(p->isRepG0[state]);
++ if (repIndex == 1)
++ price += GET_PRICE_0(p->isRepG1[state]);
++ else
++ {
++ price += GET_PRICE_1(p->isRepG1[state]);
++ price += GET_PRICE(p->isRepG2[state], repIndex - 2);
++ }
++ }
++ return price;
++}
++
++static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState)
++{
++ return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] +
++ GetPureRepPrice(p, repIndex, state, posState);
++}
++
++static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur)
++{
++ UInt32 posMem = p->opt[cur].posPrev;
++ UInt32 backMem = p->opt[cur].backPrev;
++ p->optimumEndIndex = cur;
++ do
++ {
++ if (p->opt[cur].prev1IsChar)
++ {
++ MakeAsChar(&p->opt[posMem])
++ p->opt[posMem].posPrev = posMem - 1;
++ if (p->opt[cur].prev2)
++ {
++ p->opt[posMem - 1].prev1IsChar = False;
++ p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2;
++ p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2;
++ }
++ }
++ {
++ UInt32 posPrev = posMem;
++ UInt32 backCur = backMem;
++
++ backMem = p->opt[posPrev].backPrev;
++ posMem = p->opt[posPrev].posPrev;
++
++ p->opt[posPrev].backPrev = backCur;
++ p->opt[posPrev].posPrev = cur;
++ cur = posPrev;
++ }
++ }
++ while (cur != 0);
++ *backRes = p->opt[0].backPrev;
++ p->optimumCurrentIndex = p->opt[0].posPrev;
++ return p->optimumCurrentIndex;
++}
++
++#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300)
++
++static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes)
++{
++ UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur;
++ UInt32 matchPrice, repMatchPrice, normalMatchPrice;
++ UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS];
++ UInt32 *matches;
++ const Byte *data;
++ Byte curByte, matchByte;
++ if (p->optimumEndIndex != p->optimumCurrentIndex)
++ {
++ const COptimal *opt = &p->opt[p->optimumCurrentIndex];
++ UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex;
++ *backRes = opt->backPrev;
++ p->optimumCurrentIndex = opt->posPrev;
++ return lenRes;
++ }
++ p->optimumCurrentIndex = p->optimumEndIndex = 0;
++
++ if (p->additionalOffset == 0)
++ mainLen = ReadMatchDistances(p, &numPairs);
++ else
++ {
++ mainLen = p->longestMatchLength;
++ numPairs = p->numPairs;
++ }
++
++ numAvail = p->numAvail;
++ if (numAvail < 2)
++ {
++ *backRes = (UInt32)(-1);
++ return 1;
++ }
++ if (numAvail > LZMA_MATCH_LEN_MAX)
++ numAvail = LZMA_MATCH_LEN_MAX;
++
++ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++ repMaxIndex = 0;
++ for (i = 0; i < LZMA_NUM_REPS; i++)
++ {
++ UInt32 lenTest;
++ const Byte *data2;
++ reps[i] = p->reps[i];
++ data2 = data - (reps[i] + 1);
++ if (data[0] != data2[0] || data[1] != data2[1])
++ {
++ repLens[i] = 0;
++ continue;
++ }
++ for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
++ repLens[i] = lenTest;
++ if (lenTest > repLens[repMaxIndex])
++ repMaxIndex = i;
++ }
++ if (repLens[repMaxIndex] >= p->numFastBytes)
++ {
++ UInt32 lenRes;
++ *backRes = repMaxIndex;
++ lenRes = repLens[repMaxIndex];
++ MovePos(p, lenRes - 1);
++ return lenRes;
++ }
++
++ matches = p->matches;
++ if (mainLen >= p->numFastBytes)
++ {
++ *backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
++ MovePos(p, mainLen - 1);
++ return mainLen;
++ }
++ curByte = *data;
++ matchByte = *(data - (reps[0] + 1));
++
++ if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2)
++ {
++ *backRes = (UInt32)-1;
++ return 1;
++ }
++
++ p->opt[0].state = (CState)p->state;
++
++ posState = (position & p->pbMask);
++
++ {
++ const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
++ p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
++ (!IsCharState(p->state) ?
++ LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
++ LitEnc_GetPrice(probs, curByte, p->ProbPrices));
++ }
++
++ MakeAsChar(&p->opt[1]);
++
++ matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
++ repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
++
++ if (matchByte == curByte)
++ {
++ UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState);
++ if (shortRepPrice < p->opt[1].price)
++ {
++ p->opt[1].price = shortRepPrice;
++ MakeAsShortRep(&p->opt[1]);
++ }
++ }
++ lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]);
++
++ if (lenEnd < 2)
++ {
++ *backRes = p->opt[1].backPrev;
++ return 1;
++ }
++
++ p->opt[1].posPrev = 0;
++ for (i = 0; i < LZMA_NUM_REPS; i++)
++ p->opt[0].backs[i] = reps[i];
++
++ len = lenEnd;
++ do
++ p->opt[len--].price = kInfinityPrice;
++ while (len >= 2);
++
++ for (i = 0; i < LZMA_NUM_REPS; i++)
++ {
++ UInt32 repLen = repLens[i];
++ UInt32 price;
++ if (repLen < 2)
++ continue;
++ price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState);
++ do
++ {
++ UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2];
++ COptimal *opt = &p->opt[repLen];
++ if (curAndLenPrice < opt->price)
++ {
++ opt->price = curAndLenPrice;
++ opt->posPrev = 0;
++ opt->backPrev = i;
++ opt->prev1IsChar = False;
++ }
++ }
++ while (--repLen >= 2);
++ }
++
++ normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
++
++ len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2);
++ if (len <= mainLen)
++ {
++ UInt32 offs = 0;
++ while (len > matches[offs])
++ offs += 2;
++ for (; ; len++)
++ {
++ COptimal *opt;
++ UInt32 distance = matches[offs + 1];
++
++ UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN];
++ UInt32 lenToPosState = GetLenToPosState(len);
++ if (distance < kNumFullDistances)
++ curAndLenPrice += p->distancesPrices[lenToPosState][distance];
++ else
++ {
++ UInt32 slot;
++ GetPosSlot2(distance, slot);
++ curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot];
++ }
++ opt = &p->opt[len];
++ if (curAndLenPrice < opt->price)
++ {
++ opt->price = curAndLenPrice;
++ opt->posPrev = 0;
++ opt->backPrev = distance + LZMA_NUM_REPS;
++ opt->prev1IsChar = False;
++ }
++ if (len == matches[offs])
++ {
++ offs += 2;
++ if (offs == numPairs)
++ break;
++ }
++ }
++ }
++
++ cur = 0;
++
++ #ifdef SHOW_STAT2
++ if (position >= 0)
++ {
++ unsigned i;
++ printf("\n pos = %4X", position);
++ for (i = cur; i <= lenEnd; i++)
++ printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price);
++ }
++ #endif
++
++ for (;;)
++ {
++ UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen;
++ UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice;
++ Bool nextIsChar;
++ Byte curByte, matchByte;
++ const Byte *data;
++ COptimal *curOpt;
++ COptimal *nextOpt;
++
++ cur++;
++ if (cur == lenEnd)
++ return Backward(p, backRes, cur);
++
++ newLen = ReadMatchDistances(p, &numPairs);
++ if (newLen >= p->numFastBytes)
++ {
++ p->numPairs = numPairs;
++ p->longestMatchLength = newLen;
++ return Backward(p, backRes, cur);
++ }
++ position++;
++ curOpt = &p->opt[cur];
++ posPrev = curOpt->posPrev;
++ if (curOpt->prev1IsChar)
++ {
++ posPrev--;
++ if (curOpt->prev2)
++ {
++ state = p->opt[curOpt->posPrev2].state;
++ if (curOpt->backPrev2 < LZMA_NUM_REPS)
++ state = kRepNextStates[state];
++ else
++ state = kMatchNextStates[state];
++ }
++ else
++ state = p->opt[posPrev].state;
++ state = kLiteralNextStates[state];
++ }
++ else
++ state = p->opt[posPrev].state;
++ if (posPrev == cur - 1)
++ {
++ if (IsShortRep(curOpt))
++ state = kShortRepNextStates[state];
++ else
++ state = kLiteralNextStates[state];
++ }
++ else
++ {
++ UInt32 pos;
++ const COptimal *prevOpt;
++ if (curOpt->prev1IsChar && curOpt->prev2)
++ {
++ posPrev = curOpt->posPrev2;
++ pos = curOpt->backPrev2;
++ state = kRepNextStates[state];
++ }
++ else
++ {
++ pos = curOpt->backPrev;
++ if (pos < LZMA_NUM_REPS)
++ state = kRepNextStates[state];
++ else
++ state = kMatchNextStates[state];
++ }
++ prevOpt = &p->opt[posPrev];
++ if (pos < LZMA_NUM_REPS)
++ {
++ UInt32 i;
++ reps[0] = prevOpt->backs[pos];
++ for (i = 1; i <= pos; i++)
++ reps[i] = prevOpt->backs[i - 1];
++ for (; i < LZMA_NUM_REPS; i++)
++ reps[i] = prevOpt->backs[i];
++ }
++ else
++ {
++ UInt32 i;
++ reps[0] = (pos - LZMA_NUM_REPS);
++ for (i = 1; i < LZMA_NUM_REPS; i++)
++ reps[i] = prevOpt->backs[i - 1];
++ }
++ }
++ curOpt->state = (CState)state;
++
++ curOpt->backs[0] = reps[0];
++ curOpt->backs[1] = reps[1];
++ curOpt->backs[2] = reps[2];
++ curOpt->backs[3] = reps[3];
++
++ curPrice = curOpt->price;
++ nextIsChar = False;
++ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++ curByte = *data;
++ matchByte = *(data - (reps[0] + 1));
++
++ posState = (position & p->pbMask);
++
++ curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]);
++ {
++ const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
++ curAnd1Price +=
++ (!IsCharState(state) ?
++ LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) :
++ LitEnc_GetPrice(probs, curByte, p->ProbPrices));
++ }
++
++ nextOpt = &p->opt[cur + 1];
++
++ if (curAnd1Price < nextOpt->price)
++ {
++ nextOpt->price = curAnd1Price;
++ nextOpt->posPrev = cur;
++ MakeAsChar(nextOpt);
++ nextIsChar = True;
++ }
++
++ matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]);
++ repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
++
++ if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0))
++ {
++ UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState);
++ if (shortRepPrice <= nextOpt->price)
++ {
++ nextOpt->price = shortRepPrice;
++ nextOpt->posPrev = cur;
++ MakeAsShortRep(nextOpt);
++ nextIsChar = True;
++ }
++ }
++ numAvailFull = p->numAvail;
++ {
++ UInt32 temp = kNumOpts - 1 - cur;
++ if (temp < numAvailFull)
++ numAvailFull = temp;
++ }
++
++ if (numAvailFull < 2)
++ continue;
++ numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
++
++ if (!nextIsChar && matchByte != curByte) /* speed optimization */
++ {
++ /* try Literal + rep0 */
++ UInt32 temp;
++ UInt32 lenTest2;
++ const Byte *data2 = data - (reps[0] + 1);
++ UInt32 limit = p->numFastBytes + 1;
++ if (limit > numAvailFull)
++ limit = numAvailFull;
++
++ for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++);
++ lenTest2 = temp - 1;
++ if (lenTest2 >= 2)
++ {
++ UInt32 state2 = kLiteralNextStates[state];
++ UInt32 posStateNext = (position + 1) & p->pbMask;
++ UInt32 nextRepMatchPrice = curAnd1Price +
++ GET_PRICE_1(p->isMatch[state2][posStateNext]) +
++ GET_PRICE_1(p->isRep[state2]);
++ /* for (; lenTest2 >= 2; lenTest2--) */
++ {
++ UInt32 curAndLenPrice;
++ COptimal *opt;
++ UInt32 offset = cur + 1 + lenTest2;
++ while (lenEnd < offset)
++ p->opt[++lenEnd].price = kInfinityPrice;
++ curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
++ opt = &p->opt[offset];
++ if (curAndLenPrice < opt->price)
++ {
++ opt->price = curAndLenPrice;
++ opt->posPrev = cur + 1;
++ opt->backPrev = 0;
++ opt->prev1IsChar = True;
++ opt->prev2 = False;
++ }
++ }
++ }
++ }
++
++ startLen = 2; /* speed optimization */
++ {
++ UInt32 repIndex;
++ for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++)
++ {
++ UInt32 lenTest;
++ UInt32 lenTestTemp;
++ UInt32 price;
++ const Byte *data2 = data - (reps[repIndex] + 1);
++ if (data[0] != data2[0] || data[1] != data2[1])
++ continue;
++ for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++);
++ while (lenEnd < cur + lenTest)
++ p->opt[++lenEnd].price = kInfinityPrice;
++ lenTestTemp = lenTest;
++ price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState);
++ do
++ {
++ UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2];
++ COptimal *opt = &p->opt[cur + lenTest];
++ if (curAndLenPrice < opt->price)
++ {
++ opt->price = curAndLenPrice;
++ opt->posPrev = cur;
++ opt->backPrev = repIndex;
++ opt->prev1IsChar = False;
++ }
++ }
++ while (--lenTest >= 2);
++ lenTest = lenTestTemp;
++
++ if (repIndex == 0)
++ startLen = lenTest + 1;
++
++ /* if (_maxMode) */
++ {
++ UInt32 lenTest2 = lenTest + 1;
++ UInt32 limit = lenTest2 + p->numFastBytes;
++ UInt32 nextRepMatchPrice;
++ if (limit > numAvailFull)
++ limit = numAvailFull;
++ for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
++ lenTest2 -= lenTest + 1;
++ if (lenTest2 >= 2)
++ {
++ UInt32 state2 = kRepNextStates[state];
++ UInt32 posStateNext = (position + lenTest) & p->pbMask;
++ UInt32 curAndLenCharPrice =
++ price + p->repLenEnc.prices[posState][lenTest - 2] +
++ GET_PRICE_0(p->isMatch[state2][posStateNext]) +
++ LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
++ data[lenTest], data2[lenTest], p->ProbPrices);
++ state2 = kLiteralNextStates[state2];
++ posStateNext = (position + lenTest + 1) & p->pbMask;
++ nextRepMatchPrice = curAndLenCharPrice +
++ GET_PRICE_1(p->isMatch[state2][posStateNext]) +
++ GET_PRICE_1(p->isRep[state2]);
++
++ /* for (; lenTest2 >= 2; lenTest2--) */
++ {
++ UInt32 curAndLenPrice;
++ COptimal *opt;
++ UInt32 offset = cur + lenTest + 1 + lenTest2;
++ while (lenEnd < offset)
++ p->opt[++lenEnd].price = kInfinityPrice;
++ curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
++ opt = &p->opt[offset];
++ if (curAndLenPrice < opt->price)
++ {
++ opt->price = curAndLenPrice;
++ opt->posPrev = cur + lenTest + 1;
++ opt->backPrev = 0;
++ opt->prev1IsChar = True;
++ opt->prev2 = True;
++ opt->posPrev2 = cur;
++ opt->backPrev2 = repIndex;
++ }
++ }
++ }
++ }
++ }
++ }
++ /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */
++ if (newLen > numAvail)
++ {
++ newLen = numAvail;
++ for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
++ matches[numPairs] = newLen;
++ numPairs += 2;
++ }
++ if (newLen >= startLen)
++ {
++ UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
++ UInt32 offs, curBack, posSlot;
++ UInt32 lenTest;
++ while (lenEnd < cur + newLen)
++ p->opt[++lenEnd].price = kInfinityPrice;
++
++ offs = 0;
++ while (startLen > matches[offs])
++ offs += 2;
++ curBack = matches[offs + 1];
++ GetPosSlot2(curBack, posSlot);
++ for (lenTest = /*2*/ startLen; ; lenTest++)
++ {
++ UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN];
++ UInt32 lenToPosState = GetLenToPosState(lenTest);
++ COptimal *opt;
++ if (curBack < kNumFullDistances)
++ curAndLenPrice += p->distancesPrices[lenToPosState][curBack];
++ else
++ curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask];
++
++ opt = &p->opt[cur + lenTest];
++ if (curAndLenPrice < opt->price)
++ {
++ opt->price = curAndLenPrice;
++ opt->posPrev = cur;
++ opt->backPrev = curBack + LZMA_NUM_REPS;
++ opt->prev1IsChar = False;
++ }
++
++ if (/*_maxMode && */lenTest == matches[offs])
++ {
++ /* Try Match + Literal + Rep0 */
++ const Byte *data2 = data - (curBack + 1);
++ UInt32 lenTest2 = lenTest + 1;
++ UInt32 limit = lenTest2 + p->numFastBytes;
++ UInt32 nextRepMatchPrice;
++ if (limit > numAvailFull)
++ limit = numAvailFull;
++ for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
++ lenTest2 -= lenTest + 1;
++ if (lenTest2 >= 2)
++ {
++ UInt32 state2 = kMatchNextStates[state];
++ UInt32 posStateNext = (position + lenTest) & p->pbMask;
++ UInt32 curAndLenCharPrice = curAndLenPrice +
++ GET_PRICE_0(p->isMatch[state2][posStateNext]) +
++ LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
++ data[lenTest], data2[lenTest], p->ProbPrices);
++ state2 = kLiteralNextStates[state2];
++ posStateNext = (posStateNext + 1) & p->pbMask;
++ nextRepMatchPrice = curAndLenCharPrice +
++ GET_PRICE_1(p->isMatch[state2][posStateNext]) +
++ GET_PRICE_1(p->isRep[state2]);
++
++ /* for (; lenTest2 >= 2; lenTest2--) */
++ {
++ UInt32 offset = cur + lenTest + 1 + lenTest2;
++ UInt32 curAndLenPrice;
++ COptimal *opt;
++ while (lenEnd < offset)
++ p->opt[++lenEnd].price = kInfinityPrice;
++ curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
++ opt = &p->opt[offset];
++ if (curAndLenPrice < opt->price)
++ {
++ opt->price = curAndLenPrice;
++ opt->posPrev = cur + lenTest + 1;
++ opt->backPrev = 0;
++ opt->prev1IsChar = True;
++ opt->prev2 = True;
++ opt->posPrev2 = cur;
++ opt->backPrev2 = curBack + LZMA_NUM_REPS;
++ }
++ }
++ }
++ offs += 2;
++ if (offs == numPairs)
++ break;
++ curBack = matches[offs + 1];
++ if (curBack >= kNumFullDistances)
++ GetPosSlot2(curBack, posSlot);
++ }
++ }
++ }
++ }
++}
++
++#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
++
++static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes)
++{
++ UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i;
++ const Byte *data;
++ const UInt32 *matches;
++
++ if (p->additionalOffset == 0)
++ mainLen = ReadMatchDistances(p, &numPairs);
++ else
++ {
++ mainLen = p->longestMatchLength;
++ numPairs = p->numPairs;
++ }
++
++ numAvail = p->numAvail;
++ *backRes = (UInt32)-1;
++ if (numAvail < 2)
++ return 1;
++ if (numAvail > LZMA_MATCH_LEN_MAX)
++ numAvail = LZMA_MATCH_LEN_MAX;
++ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++
++ repLen = repIndex = 0;
++ for (i = 0; i < LZMA_NUM_REPS; i++)
++ {
++ UInt32 len;
++ const Byte *data2 = data - (p->reps[i] + 1);
++ if (data[0] != data2[0] || data[1] != data2[1])
++ continue;
++ for (len = 2; len < numAvail && data[len] == data2[len]; len++);
++ if (len >= p->numFastBytes)
++ {
++ *backRes = i;
++ MovePos(p, len - 1);
++ return len;
++ }
++ if (len > repLen)
++ {
++ repIndex = i;
++ repLen = len;
++ }
++ }
++
++ matches = p->matches;
++ if (mainLen >= p->numFastBytes)
++ {
++ *backRes = matches[numPairs - 1] + LZMA_NUM_REPS;
++ MovePos(p, mainLen - 1);
++ return mainLen;
++ }
++
++ mainDist = 0; /* for GCC */
++ if (mainLen >= 2)
++ {
++ mainDist = matches[numPairs - 1];
++ while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1)
++ {
++ if (!ChangePair(matches[numPairs - 3], mainDist))
++ break;
++ numPairs -= 2;
++ mainLen = matches[numPairs - 2];
++ mainDist = matches[numPairs - 1];
++ }
++ if (mainLen == 2 && mainDist >= 0x80)
++ mainLen = 1;
++ }
++
++ if (repLen >= 2 && (
++ (repLen + 1 >= mainLen) ||
++ (repLen + 2 >= mainLen && mainDist >= (1 << 9)) ||
++ (repLen + 3 >= mainLen && mainDist >= (1 << 15))))
++ {
++ *backRes = repIndex;
++ MovePos(p, repLen - 1);
++ return repLen;
++ }
++
++ if (mainLen < 2 || numAvail <= 2)
++ return 1;
++
++ p->longestMatchLength = ReadMatchDistances(p, &p->numPairs);
++ if (p->longestMatchLength >= 2)
++ {
++ UInt32 newDistance = matches[p->numPairs - 1];
++ if ((p->longestMatchLength >= mainLen && newDistance < mainDist) ||
++ (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) ||
++ (p->longestMatchLength > mainLen + 1) ||
++ (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist)))
++ return 1;
++ }
++
++ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
++ for (i = 0; i < LZMA_NUM_REPS; i++)
++ {
++ UInt32 len, limit;
++ const Byte *data2 = data - (p->reps[i] + 1);
++ if (data[0] != data2[0] || data[1] != data2[1])
++ continue;
++ limit = mainLen - 1;
++ for (len = 2; len < limit && data[len] == data2[len]; len++);
++ if (len >= limit)
++ return 1;
++ }
++ *backRes = mainDist + LZMA_NUM_REPS;
++ MovePos(p, mainLen - 2);
++ return mainLen;
++}
++
++static void WriteEndMarker(CLzmaEnc *p, UInt32 posState)
++{
++ UInt32 len;
++ RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
++ RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
++ p->state = kMatchNextStates[p->state];
++ len = LZMA_MATCH_LEN_MIN;
++ LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
++ RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1);
++ RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits);
++ RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
++}
++
++static SRes CheckErrors(CLzmaEnc *p)
++{
++ if (p->result != SZ_OK)
++ return p->result;
++ if (p->rc.res != SZ_OK)
++ p->result = SZ_ERROR_WRITE;
++ if (p->matchFinderBase.result != SZ_OK)
++ p->result = SZ_ERROR_READ;
++ if (p->result != SZ_OK)
++ p->finished = True;
++ return p->result;
++}
++
++static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
++{
++ /* ReleaseMFStream(); */
++ p->finished = True;
++ if (p->writeEndMark)
++ WriteEndMarker(p, nowPos & p->pbMask);
++ RangeEnc_FlushData(&p->rc);
++ RangeEnc_FlushStream(&p->rc);
++ return CheckErrors(p);
++}
++
++static void FillAlignPrices(CLzmaEnc *p)
++{
++ UInt32 i;
++ for (i = 0; i < kAlignTableSize; i++)
++ p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
++ p->alignPriceCount = 0;
++}
++
++static void FillDistancesPrices(CLzmaEnc *p)
++{
++ UInt32 tempPrices[kNumFullDistances];
++ UInt32 i, lenToPosState;
++ for (i = kStartPosModelIndex; i < kNumFullDistances; i++)
++ {
++ UInt32 posSlot = GetPosSlot1(i);
++ UInt32 footerBits = ((posSlot >> 1) - 1);
++ UInt32 base = ((2 | (posSlot & 1)) << footerBits);
++ tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices);
++ }
++
++ for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++)
++ {
++ UInt32 posSlot;
++ const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState];
++ UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState];
++ for (posSlot = 0; posSlot < p->distTableSize; posSlot++)
++ posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices);
++ for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++)
++ posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
++
++ {
++ UInt32 *distancesPrices = p->distancesPrices[lenToPosState];
++ UInt32 i;
++ for (i = 0; i < kStartPosModelIndex; i++)
++ distancesPrices[i] = posSlotPrices[i];
++ for (; i < kNumFullDistances; i++)
++ distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i];
++ }
++ }
++ p->matchPriceCount = 0;
++}
++
++void LzmaEnc_Construct(CLzmaEnc *p)
++{
++ RangeEnc_Construct(&p->rc);
++ MatchFinder_Construct(&p->matchFinderBase);
++ #ifndef _7ZIP_ST
++ MatchFinderMt_Construct(&p->matchFinderMt);
++ p->matchFinderMt.MatchFinder = &p->matchFinderBase;
++ #endif
++
++ {
++ CLzmaEncProps props;
++ LzmaEncProps_Init(&props);
++ LzmaEnc_SetProps(p, &props);
++ }
++
++ #ifndef LZMA_LOG_BSR
++ LzmaEnc_FastPosInit(p->g_FastPos);
++ #endif
++
++ LzmaEnc_InitPriceTables(p->ProbPrices);
++ p->litProbs = 0;
++ p->saveState.litProbs = 0;
++}
++
++CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc)
++{
++ void *p;
++ p = alloc->Alloc(alloc, sizeof(CLzmaEnc));
++ if (p != 0)
++ LzmaEnc_Construct((CLzmaEnc *)p);
++ return p;
++}
++
++void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc)
++{
++ alloc->Free(alloc, p->litProbs);
++ alloc->Free(alloc, p->saveState.litProbs);
++ p->litProbs = 0;
++ p->saveState.litProbs = 0;
++}
++
++void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ #ifndef _7ZIP_ST
++ MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
++ #endif
++ MatchFinder_Free(&p->matchFinderBase, allocBig);
++ LzmaEnc_FreeLits(p, alloc);
++ RangeEnc_Free(&p->rc, alloc);
++}
++
++void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
++ alloc->Free(alloc, p);
++}
++
++static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize)
++{
++ UInt32 nowPos32, startPos32;
++ if (p->needInit)
++ {
++ p->matchFinder.Init(p->matchFinderObj);
++ p->needInit = 0;
++ }
++
++ if (p->finished)
++ return p->result;
++ RINOK(CheckErrors(p));
++
++ nowPos32 = (UInt32)p->nowPos64;
++ startPos32 = nowPos32;
++
++ if (p->nowPos64 == 0)
++ {
++ UInt32 numPairs;
++ Byte curByte;
++ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
++ return Flush(p, nowPos32);
++ ReadMatchDistances(p, &numPairs);
++ RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0);
++ p->state = kLiteralNextStates[p->state];
++ curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset);
++ LitEnc_Encode(&p->rc, p->litProbs, curByte);
++ p->additionalOffset--;
++ nowPos32++;
++ }
++
++ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
++ for (;;)
++ {
++ UInt32 pos, len, posState;
++
++ if (p->fastMode)
++ len = GetOptimumFast(p, &pos);
++ else
++ len = GetOptimum(p, nowPos32, &pos);
++
++ #ifdef SHOW_STAT2
++ printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos);
++ #endif
++
++ posState = nowPos32 & p->pbMask;
++ if (len == 1 && pos == (UInt32)-1)
++ {
++ Byte curByte;
++ CLzmaProb *probs;
++ const Byte *data;
++
++ RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0);
++ data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
++ curByte = *data;
++ probs = LIT_PROBS(nowPos32, *(data - 1));
++ if (IsCharState(p->state))
++ LitEnc_Encode(&p->rc, probs, curByte);
++ else
++ LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1));
++ p->state = kLiteralNextStates[p->state];
++ }
++ else
++ {
++ RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
++ if (pos < LZMA_NUM_REPS)
++ {
++ RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1);
++ if (pos == 0)
++ {
++ RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0);
++ RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1));
++ }
++ else
++ {
++ UInt32 distance = p->reps[pos];
++ RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1);
++ if (pos == 1)
++ RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0);
++ else
++ {
++ RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1);
++ RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2);
++ if (pos == 3)
++ p->reps[3] = p->reps[2];
++ p->reps[2] = p->reps[1];
++ }
++ p->reps[1] = p->reps[0];
++ p->reps[0] = distance;
++ }
++ if (len == 1)
++ p->state = kShortRepNextStates[p->state];
++ else
++ {
++ LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
++ p->state = kRepNextStates[p->state];
++ }
++ }
++ else
++ {
++ UInt32 posSlot;
++ RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
++ p->state = kMatchNextStates[p->state];
++ LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
++ pos -= LZMA_NUM_REPS;
++ GetPosSlot(pos, posSlot);
++ RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot);
++
++ if (posSlot >= kStartPosModelIndex)
++ {
++ UInt32 footerBits = ((posSlot >> 1) - 1);
++ UInt32 base = ((2 | (posSlot & 1)) << footerBits);
++ UInt32 posReduced = pos - base;
++
++ if (posSlot < kEndPosModelIndex)
++ RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced);
++ else
++ {
++ RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
++ RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
++ p->alignPriceCount++;
++ }
++ }
++ p->reps[3] = p->reps[2];
++ p->reps[2] = p->reps[1];
++ p->reps[1] = p->reps[0];
++ p->reps[0] = pos;
++ p->matchPriceCount++;
++ }
++ }
++ p->additionalOffset -= len;
++ nowPos32 += len;
++ if (p->additionalOffset == 0)
++ {
++ UInt32 processed;
++ if (!p->fastMode)
++ {
++ if (p->matchPriceCount >= (1 << 7))
++ FillDistancesPrices(p);
++ if (p->alignPriceCount >= kAlignTableSize)
++ FillAlignPrices(p);
++ }
++ if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
++ break;
++ processed = nowPos32 - startPos32;
++ if (useLimits)
++ {
++ if (processed + kNumOpts + 300 >= maxUnpackSize ||
++ RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize)
++ break;
++ }
++ else if (processed >= (1 << 15))
++ {
++ p->nowPos64 += nowPos32 - startPos32;
++ return CheckErrors(p);
++ }
++ }
++ }
++ p->nowPos64 += nowPos32 - startPos32;
++ return Flush(p, nowPos32);
++}
++
++#define kBigHashDicLimit ((UInt32)1 << 24)
++
++static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ UInt32 beforeSize = kNumOpts;
++ Bool btMode;
++ if (!RangeEnc_Alloc(&p->rc, alloc))
++ return SZ_ERROR_MEM;
++ btMode = (p->matchFinderBase.btMode != 0);
++ #ifndef _7ZIP_ST
++ p->mtMode = (p->multiThread && !p->fastMode && btMode);
++ #endif
++
++ {
++ unsigned lclp = p->lc + p->lp;
++ if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
++ {
++ LzmaEnc_FreeLits(p, alloc);
++ p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
++ p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
++ if (p->litProbs == 0 || p->saveState.litProbs == 0)
++ {
++ LzmaEnc_FreeLits(p, alloc);
++ return SZ_ERROR_MEM;
++ }
++ p->lclp = lclp;
++ }
++ }
++
++ p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit);
++
++ if (beforeSize + p->dictSize < keepWindowSize)
++ beforeSize = keepWindowSize - p->dictSize;
++
++ #ifndef _7ZIP_ST
++ if (p->mtMode)
++ {
++ RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig));
++ p->matchFinderObj = &p->matchFinderMt;
++ MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
++ }
++ else
++ #endif
++ {
++ if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
++ return SZ_ERROR_MEM;
++ p->matchFinderObj = &p->matchFinderBase;
++ MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
++ }
++ return SZ_OK;
++}
++
++void LzmaEnc_Init(CLzmaEnc *p)
++{
++ UInt32 i;
++ p->state = 0;
++ for (i = 0 ; i < LZMA_NUM_REPS; i++)
++ p->reps[i] = 0;
++
++ RangeEnc_Init(&p->rc);
++
++
++ for (i = 0; i < kNumStates; i++)
++ {
++ UInt32 j;
++ for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
++ {
++ p->isMatch[i][j] = kProbInitValue;
++ p->isRep0Long[i][j] = kProbInitValue;
++ }
++ p->isRep[i] = kProbInitValue;
++ p->isRepG0[i] = kProbInitValue;
++ p->isRepG1[i] = kProbInitValue;
++ p->isRepG2[i] = kProbInitValue;
++ }
++
++ {
++ UInt32 num = 0x300 << (p->lp + p->lc);
++ for (i = 0; i < num; i++)
++ p->litProbs[i] = kProbInitValue;
++ }
++
++ {
++ for (i = 0; i < kNumLenToPosStates; i++)
++ {
++ CLzmaProb *probs = p->posSlotEncoder[i];
++ UInt32 j;
++ for (j = 0; j < (1 << kNumPosSlotBits); j++)
++ probs[j] = kProbInitValue;
++ }
++ }
++ {
++ for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
++ p->posEncoders[i] = kProbInitValue;
++ }
++
++ LenEnc_Init(&p->lenEnc.p);
++ LenEnc_Init(&p->repLenEnc.p);
++
++ for (i = 0; i < (1 << kNumAlignBits); i++)
++ p->posAlignEncoder[i] = kProbInitValue;
++
++ p->optimumEndIndex = 0;
++ p->optimumCurrentIndex = 0;
++ p->additionalOffset = 0;
++
++ p->pbMask = (1 << p->pb) - 1;
++ p->lpMask = (1 << p->lp) - 1;
++}
++
++void LzmaEnc_InitPrices(CLzmaEnc *p)
++{
++ if (!p->fastMode)
++ {
++ FillDistancesPrices(p);
++ FillAlignPrices(p);
++ }
++
++ p->lenEnc.tableSize =
++ p->repLenEnc.tableSize =
++ p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
++ LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices);
++ LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices);
++}
++
++static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ UInt32 i;
++ for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++)
++ if (p->dictSize <= ((UInt32)1 << i))
++ break;
++ p->distTableSize = i * 2;
++
++ p->finished = False;
++ p->result = SZ_OK;
++ RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
++ LzmaEnc_Init(p);
++ LzmaEnc_InitPrices(p);
++ p->nowPos64 = 0;
++ return SZ_OK;
++}
++
++static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream,
++ ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ p->matchFinderBase.stream = inStream;
++ p->needInit = 1;
++ p->rc.outStream = outStream;
++ return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
++}
++
++SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
++ ISeqInStream *inStream, UInt32 keepWindowSize,
++ ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ p->matchFinderBase.stream = inStream;
++ p->needInit = 1;
++ return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
++}
++
++static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
++{
++ p->matchFinderBase.directInput = 1;
++ p->matchFinderBase.bufferBase = (Byte *)src;
++ p->matchFinderBase.directInputRem = srcLen;
++}
++
++SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
++ UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ LzmaEnc_SetInputBuf(p, src, srcLen);
++ p->needInit = 1;
++
++ return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
++}
++
++void LzmaEnc_Finish(CLzmaEncHandle pp)
++{
++ #ifndef _7ZIP_ST
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ if (p->mtMode)
++ MatchFinderMt_ReleaseStream(&p->matchFinderMt);
++ #else
++ pp = pp;
++ #endif
++}
++
++typedef struct
++{
++ ISeqOutStream funcTable;
++ Byte *data;
++ SizeT rem;
++ Bool overflow;
++} CSeqOutStreamBuf;
++
++static size_t MyWrite(void *pp, const void *data, size_t size)
++{
++ CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp;
++ if (p->rem < size)
++ {
++ size = p->rem;
++ p->overflow = True;
++ }
++ memcpy(p->data, data, size);
++ p->rem -= size;
++ p->data += size;
++ return size;
++}
++
++
++UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
++{
++ const CLzmaEnc *p = (CLzmaEnc *)pp;
++ return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
++}
++
++const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
++{
++ const CLzmaEnc *p = (CLzmaEnc *)pp;
++ return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
++}
++
++SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
++ Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
++{
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ UInt64 nowPos64;
++ SRes res;
++ CSeqOutStreamBuf outStream;
++
++ outStream.funcTable.Write = MyWrite;
++ outStream.data = dest;
++ outStream.rem = *destLen;
++ outStream.overflow = False;
++
++ p->writeEndMark = False;
++ p->finished = False;
++ p->result = SZ_OK;
++
++ if (reInit)
++ LzmaEnc_Init(p);
++ LzmaEnc_InitPrices(p);
++ nowPos64 = p->nowPos64;
++ RangeEnc_Init(&p->rc);
++ p->rc.outStream = &outStream.funcTable;
++
++ res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize);
++
++ *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
++ *destLen -= outStream.rem;
++ if (outStream.overflow)
++ return SZ_ERROR_OUTPUT_EOF;
++
++ return res;
++}
++
++static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
++{
++ SRes res = SZ_OK;
++
++ #ifndef _7ZIP_ST
++ Byte allocaDummy[0x300];
++ int i = 0;
++ for (i = 0; i < 16; i++)
++ allocaDummy[i] = (Byte)i;
++ #endif
++
++ for (;;)
++ {
++ res = LzmaEnc_CodeOneBlock(p, False, 0, 0);
++ if (res != SZ_OK || p->finished != 0)
++ break;
++ if (progress != 0)
++ {
++ res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
++ if (res != SZ_OK)
++ {
++ res = SZ_ERROR_PROGRESS;
++ break;
++ }
++ }
++ }
++ LzmaEnc_Finish(p);
++ return res;
++}
++
++SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
++ ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig));
++ return LzmaEnc_Encode2((CLzmaEnc *)pp, progress);
++}
++
++SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
++{
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++ int i;
++ UInt32 dictSize = p->dictSize;
++ if (*size < LZMA_PROPS_SIZE)
++ return SZ_ERROR_PARAM;
++ *size = LZMA_PROPS_SIZE;
++ props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
++
++ for (i = 11; i <= 30; i++)
++ {
++ if (dictSize <= ((UInt32)2 << i))
++ {
++ dictSize = (2 << i);
++ break;
++ }
++ if (dictSize <= ((UInt32)3 << i))
++ {
++ dictSize = (3 << i);
++ break;
++ }
++ }
++
++ for (i = 0; i < 4; i++)
++ props[1 + i] = (Byte)(dictSize >> (8 * i));
++ return SZ_OK;
++}
++
++SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++ int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ SRes res;
++ CLzmaEnc *p = (CLzmaEnc *)pp;
++
++ CSeqOutStreamBuf outStream;
++
++ LzmaEnc_SetInputBuf(p, src, srcLen);
++
++ outStream.funcTable.Write = MyWrite;
++ outStream.data = dest;
++ outStream.rem = *destLen;
++ outStream.overflow = False;
++
++ p->writeEndMark = writeEndMark;
++
++ p->rc.outStream = &outStream.funcTable;
++ res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig);
++ if (res == SZ_OK)
++ res = LzmaEnc_Encode2(p, progress);
++
++ *destLen -= outStream.rem;
++ if (outStream.overflow)
++ return SZ_ERROR_OUTPUT_EOF;
++ return res;
++}
++
++SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++ const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
++ ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
++{
++ CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
++ SRes res;
++ if (p == 0)
++ return SZ_ERROR_MEM;
++
++ res = LzmaEnc_SetProps(p, props);
++ if (res == SZ_OK)
++ {
++ res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
++ if (res == SZ_OK)
++ res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
++ writeEndMark, progress, alloc, allocBig);
++ }
++
++ LzmaEnc_Destroy(p, alloc, allocBig);
++ return res;
++}
+
+Property changes on: third_party/lzma_sdk/LzmaEnc.c
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/Alloc.c
+===================================================================
+--- third_party/lzma_sdk/Alloc.c (revision 0)
++++ third_party/lzma_sdk/Alloc.c (revision 0)
+@@ -0,0 +1,127 @@
++/* Alloc.c -- Memory allocation functions
++2008-09-24
++Igor Pavlov
++Public domain */
++
++#ifdef _WIN32
++#include <windows.h>
++#endif
++#include <stdlib.h>
++
++#include "Alloc.h"
++
++/* #define _SZ_ALLOC_DEBUG */
++
++/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
++#ifdef _SZ_ALLOC_DEBUG
++#include <stdio.h>
++int g_allocCount = 0;
++int g_allocCountMid = 0;
++int g_allocCountBig = 0;
++#endif
++
++void *MyAlloc(size_t size)
++{
++ if (size == 0)
++ return 0;
++ #ifdef _SZ_ALLOC_DEBUG
++ {
++ void *p = malloc(size);
++ fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p);
++ return p;
++ }
++ #else
++ return malloc(size);
++ #endif
++}
++
++void MyFree(void *address)
++{
++ #ifdef _SZ_ALLOC_DEBUG
++ if (address != 0)
++ fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address);
++ #endif
++ free(address);
++}
++
++#ifdef _WIN32
++
++void *MidAlloc(size_t size)
++{
++ if (size == 0)
++ return 0;
++ #ifdef _SZ_ALLOC_DEBUG
++ fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++);
++ #endif
++ return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
++}
++
++void MidFree(void *address)
++{
++ #ifdef _SZ_ALLOC_DEBUG
++ if (address != 0)
++ fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid);
++ #endif
++ if (address == 0)
++ return;
++ VirtualFree(address, 0, MEM_RELEASE);
++}
++
++#ifndef MEM_LARGE_PAGES
++#undef _7ZIP_LARGE_PAGES
++#endif
++
++#ifdef _7ZIP_LARGE_PAGES
++SIZE_T g_LargePageSize = 0;
++typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
++#endif
++
++void SetLargePageSize()
++{
++ #ifdef _7ZIP_LARGE_PAGES
++ SIZE_T size = 0;
++ GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
++ GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
++ if (largePageMinimum == 0)
++ return;
++ size = largePageMinimum();
++ if (size == 0 || (size & (size - 1)) != 0)
++ return;
++ g_LargePageSize = size;
++ #endif
++}
++
++
++void *BigAlloc(size_t size)
++{
++ if (size == 0)
++ return 0;
++ #ifdef _SZ_ALLOC_DEBUG
++ fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++);
++ #endif
++
++ #ifdef _7ZIP_LARGE_PAGES
++ if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18))
++ {
++ void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)),
++ MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
++ if (res != 0)
++ return res;
++ }
++ #endif
++ return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
++}
++
++void BigFree(void *address)
++{
++ #ifdef _SZ_ALLOC_DEBUG
++ if (address != 0)
++ fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig);
++ #endif
++
++ if (address == 0)
++ return;
++ VirtualFree(address, 0, MEM_RELEASE);
++}
++
++#endif
+
+Property changes on: third_party/lzma_sdk/Alloc.c
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzmaLib.c
+===================================================================
+--- third_party/lzma_sdk/LzmaLib.c (revision 0)
++++ third_party/lzma_sdk/LzmaLib.c (revision 0)
+@@ -0,0 +1,46 @@
++/* LzmaLib.c -- LZMA library wrapper
++2008-08-05
++Igor Pavlov
++Public domain */
++
++#include "LzmaEnc.h"
++#include "LzmaDec.h"
++#include "Alloc.h"
++#include "LzmaLib.h"
++
++static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
++static void SzFree(void *p, void *address) { p = p; MyFree(address); }
++static ISzAlloc g_Alloc = { SzAlloc, SzFree };
++
++MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
++ unsigned char *outProps, size_t *outPropsSize,
++ int level, /* 0 <= level <= 9, default = 5 */
++ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
++ int lc, /* 0 <= lc <= 8, default = 3 */
++ int lp, /* 0 <= lp <= 4, default = 0 */
++ int pb, /* 0 <= pb <= 4, default = 2 */
++ int fb, /* 5 <= fb <= 273, default = 32 */
++ int numThreads /* 1 or 2, default = 2 */
++)
++{
++ CLzmaEncProps props;
++ LzmaEncProps_Init(&props);
++ props.level = level;
++ props.dictSize = dictSize;
++ props.lc = lc;
++ props.lp = lp;
++ props.pb = pb;
++ props.fb = fb;
++ props.numThreads = numThreads;
++
++ return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,
++ NULL, &g_Alloc, &g_Alloc);
++}
++
++
++MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
++ const unsigned char *props, size_t propsSize)
++{
++ ELzmaStatus status;
++ return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);
++}
+
+Property changes on: third_party/lzma_sdk/LzmaLib.c
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
+Index: third_party/lzma_sdk/LzFind.h
+===================================================================
+--- third_party/lzma_sdk/LzFind.h (revision 0)
++++ third_party/lzma_sdk/LzFind.h (revision 0)
+@@ -0,0 +1,115 @@
++/* LzFind.h -- Match finder for LZ algorithms
++2009-04-22 : Igor Pavlov : Public domain */
++
++#ifndef __LZ_FIND_H
++#define __LZ_FIND_H
++
++#include "Types.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++typedef UInt32 CLzRef;
++
++typedef struct _CMatchFinder
++{
++ Byte *buffer;
++ UInt32 pos;
++ UInt32 posLimit;
++ UInt32 streamPos;
++ UInt32 lenLimit;
++
++ UInt32 cyclicBufferPos;
++ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
++
++ UInt32 matchMaxLen;
++ CLzRef *hash;
++ CLzRef *son;
++ UInt32 hashMask;
++ UInt32 cutValue;
++
++ Byte *bufferBase;
++ ISeqInStream *stream;
++ int streamEndWasReached;
++
++ UInt32 blockSize;
++ UInt32 keepSizeBefore;
++ UInt32 keepSizeAfter;
++
++ UInt32 numHashBytes;
++ int directInput;
++ size_t directInputRem;
++ int btMode;
++ int bigHash;
++ UInt32 historySize;
++ UInt32 fixedHashSize;
++ UInt32 hashSizeSum;
++ UInt32 numSons;
++ SRes result;
++ UInt32 crc[256];
++} CMatchFinder;
++
++#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
++#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)])
++
++#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
++
++int MatchFinder_NeedMove(CMatchFinder *p);
++Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
++void MatchFinder_MoveBlock(CMatchFinder *p);
++void MatchFinder_ReadIfRequired(CMatchFinder *p);
++
++void MatchFinder_Construct(CMatchFinder *p);
++
++/* Conditions:
++ historySize <= 3 GB
++ keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
++*/
++int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
++ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
++ ISzAlloc *alloc);
++void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc);
++void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems);
++void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
++
++UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
++ UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
++ UInt32 *distances, UInt32 maxLen);
++
++/*
++Conditions:
++ Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
++ Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
++*/
++
++typedef void (*Mf_Init_Func)(void *object);
++typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index);
++typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
++typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
++typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
++typedef void (*Mf_Skip_Func)(void *object, UInt32);
++
++typedef struct _IMatchFinder
++{
++ Mf_Init_Func Init;
++ Mf_GetIndexByte_Func GetIndexByte;
++ Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
++ Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
++ Mf_GetMatches_Func GetMatches;
++ Mf_Skip_Func Skip;
++} IMatchFinder;
++
++void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
++
++void MatchFinder_Init(CMatchFinder *p);
++UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
++UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
++void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
++void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+
+Property changes on: third_party/lzma_sdk/LzFind.h
+___________________________________________________________________
+Added: svn:eol-style
+ + LF
+
diff --git a/shared.mk b/shared.mk
new file mode 100644
index 0000000..15dca18
--- /dev/null
+++ b/shared.mk
@@ -0,0 +1,17 @@
+OS := $(shell uname)
+IDIRS=-I../brotli/dec/ -I../brotli/enc/ -I../
+
+GFLAGS=-no-canonical-prefixes -fno-omit-frame-pointer -m64
+
+CPP = g++
+LFLAGS =
+CPPFLAGS = -c $(IDIRS) -std=c++0x $(GFLAGS)
+
+ifeq ($(OS), Darwin)
+ CPPFLAGS += -DOS_MACOSX
+else
+ CPPFLAGS += -fno-tree-vrp
+endif
+
+%.o : %.c
+ $(CPP) $(CPPFLAGS) $< -o $@
diff --git a/woff2/Makefile b/woff2/Makefile
new file mode 100644
index 0000000..971feac
--- /dev/null
+++ b/woff2/Makefile
@@ -0,0 +1,28 @@
+#Converter makefile
+
+include ../shared.mk
+
+OUROBJ = font.o glyph.o normalize.o transform.o woff2.o
+
+BROTLI = ../brotli
+ENCOBJ = $(BROTLI)/enc/*.o
+DECOBJ = $(BROTLI)/dec/*.o
+
+OBJS = $(OUROBJ)
+EXECUTABLES=woff2_compress woff2_decompress
+
+EXE_OBJS=$(patsubst %, %.o, $(EXECUTABLES))
+
+all : $(OBJS) $(EXECUTABLES)
+
+$(EXECUTABLES) : $(EXE_OBJS) deps
+ $(CPP) $(LFLAGS) $(OBJS) $(ENCOBJ) $(DECOBJ) $@.o -o $@
+
+deps :
+ make -C $(BROTLI)/dec
+ make -C $(BROTLI)/enc
+
+clean :
+ rm -f $(OBJS) $(EXE_OBJS) $(EXECUTABLES)
+ make -C $(BROTLI)/dec clean
+ make -C $(BROTLI)/enc clean
diff --git a/woff2/file.h b/woff2/file.h
new file mode 100644
index 0000000..f93fdee
--- /dev/null
+++ b/woff2/file.h
@@ -0,0 +1,40 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// File IO helpers
+
+#ifndef BROTLI_WOFF2_FILE_H_
+#define BROTLI_WOFF2_FILE_H_
+
+#include <fstream>
+#include <iterator>
+
+namespace woff2 {
+
+inline std::string GetFileContent(std::string filename) {
+ std::ifstream ifs(filename.c_str(), std::ios::binary);
+ return std::string(
+ std::istreambuf_iterator<char>(ifs.rdbuf()),
+ std::istreambuf_iterator<char>());
+}
+
+inline void SetFileContents(std::string filename, std::string content) {
+ std::ofstream ofs(filename.c_str(), std::ios::binary);
+ std::copy(content.begin(),
+ content.end(),
+ std::ostream_iterator<char>(ofs));
+}
+
+} // namespace woff2
+#endif // BROTLI_WOFF2_FILE_H_
diff --git a/woff2/font.cc b/woff2/font.cc
new file mode 100644
index 0000000..2733708
--- /dev/null
+++ b/woff2/font.cc
@@ -0,0 +1,176 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Font management utilities
+
+#include "./font.h"
+
+#include <algorithm>
+
+#include "./ots.h"
+#include "./port.h"
+#include "./store_bytes.h"
+
+namespace woff2 {
+
+Font::Table* Font::FindTable(uint32_t tag) {
+ std::map<uint32_t, Font::Table>::iterator it = tables.find(tag);
+ return it == tables.end() ? 0 : &it->second;
+}
+
+const Font::Table* Font::FindTable(uint32_t tag) const {
+ std::map<uint32_t, Font::Table>::const_iterator it = tables.find(tag);
+ return it == tables.end() ? 0 : &it->second;
+}
+
+bool ReadFont(const uint8_t* data, size_t len, Font* font) {
+ ots::Buffer file(data, len);
+
+ // We don't care about the search_range, entry_selector and range_shift
+ // fields, they will always be computed upon writing the font.
+ if (!file.ReadU32(&font->flavor) ||
+ !file.ReadU16(&font->num_tables) ||
+ !file.Skip(6)) {
+ return OTS_FAILURE();
+ }
+
+ std::map<uint32_t, uint32_t> intervals;
+ for (uint16_t i = 0; i < font->num_tables; ++i) {
+ Font::Table table;
+ if (!file.ReadU32(&table.tag) ||
+ !file.ReadU32(&table.checksum) ||
+ !file.ReadU32(&table.offset) ||
+ !file.ReadU32(&table.length)) {
+ return OTS_FAILURE();
+ }
+ if ((table.offset & 3) != 0 ||
+ table.length > len ||
+ len - table.length < table.offset) {
+ return OTS_FAILURE();
+ }
+ intervals[table.offset] = table.length;
+ table.data = data + table.offset;
+ if (font->tables.find(table.tag) != font->tables.end()) {
+ return OTS_FAILURE();
+ }
+ font->tables[table.tag] = table;
+ }
+
+ // Check that tables are non-overlapping.
+ uint32_t last_offset = 12UL + 16UL * font->num_tables;
+ for (const auto& i : intervals) {
+ if (i.first < last_offset || i.first + i.second < i.first) {
+ return OTS_FAILURE();
+ }
+ last_offset = i.first + i.second;
+ }
+ return true;
+}
+
+size_t FontFileSize(const Font& font) {
+ size_t max_offset = 12ULL + 16ULL * font.num_tables;
+ for (const auto& i : font.tables) {
+ const Font::Table& table = i.second;
+ size_t padding_size = (4 - (table.length & 3)) & 3;
+ size_t end_offset = (padding_size + table.offset) + table.length;
+ max_offset = std::max(max_offset, end_offset);
+ }
+ return max_offset;
+}
+
+bool WriteFont(const Font& font, uint8_t* dst, size_t dst_size) {
+ if (dst_size < 12ULL + 16ULL * font.num_tables) {
+ return OTS_FAILURE();
+ }
+ size_t offset = 0;
+ StoreU32(font.flavor, &offset, dst);
+ Store16(font.num_tables, &offset, dst);
+ uint16_t max_pow2 = font.num_tables ? Log2Floor(font.num_tables) : 0;
+ uint16_t search_range = max_pow2 ? 1 << (max_pow2 + 4) : 0;
+ uint16_t range_shift = (font.num_tables << 4) - search_range;
+ Store16(search_range, &offset, dst);
+ Store16(max_pow2, &offset, dst);
+ Store16(range_shift, &offset, dst);
+ for (const auto& i : font.tables) {
+ const Font::Table& table = i.second;
+ StoreU32(table.tag, &offset, dst);
+ StoreU32(table.checksum, &offset, dst);
+ StoreU32(table.offset, &offset, dst);
+ StoreU32(table.length, &offset, dst);
+ if (table.offset + table.length < table.offset ||
+ dst_size < table.offset + table.length) {
+ return OTS_FAILURE();
+ }
+ memcpy(dst + table.offset, table.data, table.length);
+ size_t padding_size = (4 - (table.length & 3)) & 3;
+ if (table.offset + table.length + padding_size < padding_size ||
+ dst_size < table.offset + table.length + padding_size) {
+ return OTS_FAILURE();
+ }
+ memset(dst + table.offset + table.length, 0, padding_size);
+ }
+ return true;
+}
+
+int NumGlyphs(const Font& font) {
+ const Font::Table* head_table = font.FindTable(kHeadTableTag);
+ const Font::Table* loca_table = font.FindTable(kLocaTableTag);
+ if (head_table == NULL || loca_table == NULL || head_table->length < 52) {
+ return 0;
+ }
+ int index_fmt = head_table->data[51];
+ return (loca_table->length / (index_fmt == 0 ? 2 : 4)) - 1;
+}
+
+bool GetGlyphData(const Font& font, int glyph_index,
+ const uint8_t** glyph_data, size_t* glyph_size) {
+ if (glyph_index < 0) {
+ return OTS_FAILURE();
+ }
+ const Font::Table* head_table = font.FindTable(kHeadTableTag);
+ const Font::Table* loca_table = font.FindTable(kLocaTableTag);
+ const Font::Table* glyf_table = font.FindTable(kGlyfTableTag);
+ if (head_table == NULL || loca_table == NULL || glyf_table == NULL ||
+ head_table->length < 52) {
+ return OTS_FAILURE();
+ }
+ int index_fmt = head_table->data[51];
+ ots::Buffer loca_buf(loca_table->data, loca_table->length);
+ if (index_fmt == 0) {
+ uint16_t offset1, offset2;
+ if (!loca_buf.Skip(2 * glyph_index) ||
+ !loca_buf.ReadU16(&offset1) ||
+ !loca_buf.ReadU16(&offset2) ||
+ offset2 < offset1 ||
+ 2 * offset2 > glyf_table->length) {
+ return OTS_FAILURE();
+ }
+ *glyph_data = glyf_table->data + 2 * offset1;
+ *glyph_size = 2 * (offset2 - offset1);
+ } else {
+ uint32_t offset1, offset2;
+ if (!loca_buf.Skip(4 * glyph_index) ||
+ !loca_buf.ReadU32(&offset1) ||
+ !loca_buf.ReadU32(&offset2) ||
+ offset2 < offset1 ||
+ offset2 > glyf_table->length) {
+ return OTS_FAILURE();
+ }
+ *glyph_data = glyf_table->data + offset1;
+ *glyph_size = offset2 - offset1;
+ }
+ return true;
+}
+
+} // namespace woff2
diff --git a/woff2/font.h b/woff2/font.h
new file mode 100644
index 0000000..21fd634
--- /dev/null
+++ b/woff2/font.h
@@ -0,0 +1,81 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Data model for a font file in sfnt format, reading and writing functions and
+// accessors for the glyph data.
+
+#ifndef BROTLI_WOFF2_FONT_H_
+#define BROTLI_WOFF2_FONT_H_
+
+#include <stddef.h>
+#include <inttypes.h>
+#include <map>
+#include <vector>
+
+namespace woff2 {
+
+// Tags of popular tables.
+static const uint32_t kGlyfTableTag = 0x676c7966;
+static const uint32_t kHeadTableTag = 0x68656164;
+static const uint32_t kLocaTableTag = 0x6c6f6361;
+
+// Represents an sfnt font file. Only the table directory is parsed, for the
+// table data we only store a raw pointer, therefore a font object is valid only
+// as long the data from which it was parsed is around.
+struct Font {
+ uint32_t flavor;
+ uint16_t num_tables;
+
+ struct Table {
+ uint32_t tag;
+ uint32_t checksum;
+ uint32_t offset;
+ uint32_t length;
+ const uint8_t* data;
+
+ // Buffer used to mutate the data before writing out.
+ std::vector<uint8_t> buffer;
+ };
+ std::map<uint32_t, Table> tables;
+
+ Table* FindTable(uint32_t tag);
+ const Table* FindTable(uint32_t tag) const;
+};
+
+// Parses the font from the given data. Returns false on parsing failure or
+// buffer overflow. The font is valid only so long the input data pointer is
+// valid.
+bool ReadFont(const uint8_t* data, size_t len, Font* font);
+
+// Returns the file size of the font.
+size_t FontFileSize(const Font& font);
+
+// Writes the font into the specified dst buffer. The dst_size should be the
+// same as returned by FontFileSize(). Returns false upon buffer overflow (which
+// should not happen if dst_size was computed by FontFileSize()).
+bool WriteFont(const Font& font, uint8_t* dst, size_t dst_size);
+
+// Returns the number of glyphs in the font.
+// NOTE: Currently this works only for TrueType-flavored fonts, will return
+// zero for CFF-flavored fonts.
+int NumGlyphs(const Font& font);
+
+// Sets *glyph_data and *glyph_size to point to the location of the glyph data
+// with the given index. Returns false if the glyph is not found.
+bool GetGlyphData(const Font& font, int glyph_index,
+ const uint8_t** glyph_data, size_t* glyph_size);
+
+} // namespace woff2
+
+#endif // BROTLI_WOFF2_FONT_H_
diff --git a/woff2/glyph.cc b/woff2/glyph.cc
new file mode 100644
index 0000000..4752e09
--- /dev/null
+++ b/woff2/glyph.cc
@@ -0,0 +1,380 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Glyph manipulation
+
+#include "./glyph.h"
+
+#include <stdlib.h>
+#include <limits>
+#include "./ots.h"
+#include "./store_bytes.h"
+
+namespace woff2 {
+
+static const int32_t kFLAG_ONCURVE = 1;
+static const int32_t kFLAG_XSHORT = 1 << 1;
+static const int32_t kFLAG_YSHORT = 1 << 2;
+static const int32_t kFLAG_REPEAT = 1 << 3;
+static const int32_t kFLAG_XREPEATSIGN = 1 << 4;
+static const int32_t kFLAG_YREPEATSIGN = 1 << 5;
+static const int32_t kFLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
+static const int32_t kFLAG_WE_HAVE_A_SCALE = 1 << 3;
+static const int32_t kFLAG_MORE_COMPONENTS = 1 << 5;
+static const int32_t kFLAG_WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6;
+static const int32_t kFLAG_WE_HAVE_A_TWO_BY_TWO = 1 << 7;
+static const int32_t kFLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
+
+bool ReadCompositeGlyphData(ots::Buffer* buffer, Glyph* glyph) {
+ glyph->have_instructions = false;
+ glyph->composite_data = buffer->buffer() + buffer->offset();
+ size_t start_offset = buffer->offset();
+ uint16_t flags = kFLAG_MORE_COMPONENTS;
+ while (flags & kFLAG_MORE_COMPONENTS) {
+ if (!buffer->ReadU16(&flags)) {
+ return OTS_FAILURE();
+ }
+ glyph->have_instructions |= (flags & kFLAG_WE_HAVE_INSTRUCTIONS) != 0;
+ size_t arg_size = 2; // glyph index
+ if (flags & kFLAG_ARG_1_AND_2_ARE_WORDS) {
+ arg_size += 4;
+ } else {
+ arg_size += 2;
+ }
+ if (flags & kFLAG_WE_HAVE_A_SCALE) {
+ arg_size += 2;
+ } else if (flags & kFLAG_WE_HAVE_AN_X_AND_Y_SCALE) {
+ arg_size += 4;
+ } else if (flags & kFLAG_WE_HAVE_A_TWO_BY_TWO) {
+ arg_size += 8;
+ }
+ if (!buffer->Skip(arg_size)) {
+ return OTS_FAILURE();
+ }
+ }
+ if (buffer->offset() - start_offset > std::numeric_limits<uint32_t>::max()) {
+ return OTS_FAILURE();
+ }
+ glyph->composite_data_size = buffer->offset() - start_offset;
+ return true;
+}
+
+bool ReadGlyph(const uint8_t* data, size_t len, Glyph* glyph) {
+ ots::Buffer buffer(data, len);
+
+ int16_t num_contours;
+ if (!buffer.ReadS16(&num_contours)) {
+ return OTS_FAILURE();
+ }
+
+ if (num_contours == 0) {
+ // Empty glyph.
+ return true;
+ }
+
+ // Read the bounding box.
+ if (!buffer.ReadS16(&glyph->x_min) ||
+ !buffer.ReadS16(&glyph->y_min) ||
+ !buffer.ReadS16(&glyph->x_max) ||
+ !buffer.ReadS16(&glyph->y_max)) {
+ return OTS_FAILURE();
+ }
+
+ if (num_contours > 0) {
+ // Simple glyph.
+ glyph->contours.resize(num_contours);
+
+ // Read the number of points per contour.
+ uint16_t last_point_index = 0;
+ for (int i = 0; i < num_contours; ++i) {
+ uint16_t point_index;
+ if (!buffer.ReadU16(&point_index)) {
+ return OTS_FAILURE();
+ }
+ uint16_t num_points = point_index - last_point_index + (i == 0 ? 1 : 0);
+ glyph->contours[i].resize(num_points);
+ last_point_index = point_index;
+ }
+
+ // Read the instructions.
+ if (!buffer.ReadU16(&glyph->instructions_size)) {
+ return OTS_FAILURE();
+ }
+ glyph->instructions_data = data + buffer.offset();
+ if (!buffer.Skip(glyph->instructions_size)) {
+ return OTS_FAILURE();
+ }
+
+ // Read the run-length coded flags.
+ std::vector<std::vector<uint8_t> > flags(num_contours);
+ uint8_t flag = 0;
+ uint8_t flag_repeat = 0;
+ for (int i = 0; i < num_contours; ++i) {
+ flags[i].resize(glyph->contours[i].size());
+ for (int j = 0; j < glyph->contours[i].size(); ++j) {
+ if (flag_repeat == 0) {
+ if (!buffer.ReadU8(&flag)) {
+ return OTS_FAILURE();
+ }
+ if (flag & kFLAG_REPEAT) {
+ if (!buffer.ReadU8(&flag_repeat)) {
+ return OTS_FAILURE();
+ }
+ }
+ } else {
+ flag_repeat--;
+ }
+ flags[i][j] = flag;
+ glyph->contours[i][j].on_curve = flag & kFLAG_ONCURVE;
+ }
+ }
+
+ // Read the x coordinates.
+ int prev_x = 0;
+ for (int i = 0; i < num_contours; ++i) {
+ for (int j = 0; j < glyph->contours[i].size(); ++j) {
+ uint8_t flag = flags[i][j];
+ if (flag & kFLAG_XSHORT) {
+ // single byte x-delta coord value
+ uint8_t x_delta;
+ if (!buffer.ReadU8(&x_delta)) {
+ return OTS_FAILURE();
+ }
+ int sign = (flag & kFLAG_XREPEATSIGN) ? 1 : -1;
+ glyph->contours[i][j].x = prev_x + sign * x_delta;
+ } else {
+ // double byte x-delta coord value
+ int16_t x_delta = 0;
+ if (!(flag & kFLAG_XREPEATSIGN)) {
+ if (!buffer.ReadS16(&x_delta)) {
+ return OTS_FAILURE();
+ }
+ }
+ glyph->contours[i][j].x = prev_x + x_delta;
+ }
+ prev_x = glyph->contours[i][j].x;
+ }
+ }
+
+ // Read the y coordinates.
+ int prev_y = 0;
+ for (int i = 0; i < num_contours; ++i) {
+ for (int j = 0; j < glyph->contours[i].size(); ++j) {
+ uint8_t flag = flags[i][j];
+ if (flag & kFLAG_YSHORT) {
+ // single byte y-delta coord value
+ uint8_t y_delta;
+ if (!buffer.ReadU8(&y_delta)) {
+ return OTS_FAILURE();
+ }
+ int sign = (flag & kFLAG_YREPEATSIGN) ? 1 : -1;
+ glyph->contours[i][j].y = prev_y + sign * y_delta;
+ } else {
+ // double byte y-delta coord value
+ int16_t y_delta = 0;
+ if (!(flag & kFLAG_YREPEATSIGN)) {
+ if (!buffer.ReadS16(&y_delta)) {
+ return OTS_FAILURE();
+ }
+ }
+ glyph->contours[i][j].y = prev_y + y_delta;
+ }
+ prev_y = glyph->contours[i][j].y;
+ }
+ }
+ } else if (num_contours == -1) {
+ // Composite glyph.
+ if (!ReadCompositeGlyphData(&buffer, glyph)) {
+ return OTS_FAILURE();
+ }
+ // Read the instructions.
+ if (glyph->have_instructions) {
+ if (!buffer.ReadU16(&glyph->instructions_size)) {
+ return OTS_FAILURE();
+ }
+ glyph->instructions_data = data + buffer.offset();
+ if (!buffer.Skip(glyph->instructions_size)) {
+ return OTS_FAILURE();
+ }
+ } else {
+ glyph->instructions_size = 0;
+ }
+ } else {
+ return OTS_FAILURE();
+ }
+ return true;
+}
+
+namespace {
+
+void StoreBbox(const Glyph& glyph, size_t* offset, uint8_t* dst) {
+ Store16(glyph.x_min, offset, dst);
+ Store16(glyph.y_min, offset, dst);
+ Store16(glyph.x_max, offset, dst);
+ Store16(glyph.y_max, offset, dst);
+}
+
+void StoreInstructions(const Glyph& glyph, size_t* offset, uint8_t* dst) {
+ Store16(glyph.instructions_size, offset, dst);
+ StoreBytes(glyph.instructions_data, glyph.instructions_size, offset, dst);
+}
+
+bool StoreEndPtsOfContours(const Glyph& glyph, size_t* offset, uint8_t* dst) {
+ int end_point = -1;
+ for (const auto& contour : glyph.contours) {
+ end_point += contour.size();
+ if (contour.size() > std::numeric_limits<uint16_t>::max() ||
+ end_point > std::numeric_limits<uint16_t>::max()) {
+ return OTS_FAILURE();
+ }
+ Store16(end_point, offset, dst);
+ }
+ return true;
+}
+
+bool StorePoints(const Glyph& glyph, size_t* offset,
+ uint8_t* dst, size_t dst_size) {
+ int last_flag = -1;
+ int repeat_count = 0;
+ int last_x = 0;
+ int last_y = 0;
+ size_t x_bytes = 0;
+ size_t y_bytes = 0;
+
+ // Store the flags and calculate the total size of the x and y coordinates.
+ for (const auto& contour : glyph.contours) {
+ for (const auto& point : contour) {
+ int flag = point.on_curve ? kFLAG_ONCURVE : 0;
+ int dx = point.x - last_x;
+ int dy = point.y - last_y;
+ if (dx == 0) {
+ flag |= kFLAG_XREPEATSIGN;
+ } else if (dx > -256 && dx < 256) {
+ flag |= kFLAG_XSHORT | (dx > 0 ? kFLAG_XREPEATSIGN : 0);
+ x_bytes += 1;
+ } else {
+ x_bytes += 2;
+ }
+ if (dy == 0) {
+ flag |= kFLAG_YREPEATSIGN;
+ } else if (dy > -256 && dy < 256) {
+ flag |= kFLAG_YSHORT | (dy > 0 ? kFLAG_YREPEATSIGN : 0);
+ y_bytes += 1;
+ } else {
+ y_bytes += 2;
+ }
+ if (flag == last_flag && repeat_count != 255) {
+ dst[*offset - 1] |= kFLAG_REPEAT;
+ repeat_count++;
+ } else {
+ if (repeat_count != 0) {
+ if (*offset >= dst_size) {
+ return OTS_FAILURE();
+ }
+ dst[(*offset)++] = repeat_count;
+ }
+ if (*offset >= dst_size) {
+ return OTS_FAILURE();
+ }
+ dst[(*offset)++] = flag;
+ repeat_count = 0;
+ }
+ last_x = point.x;
+ last_y = point.y;
+ last_flag = flag;
+ }
+ }
+ if (repeat_count != 0) {
+ if (*offset >= dst_size) {
+ return OTS_FAILURE();
+ }
+ dst[(*offset)++] = repeat_count;
+ }
+
+ if (*offset + x_bytes + y_bytes > dst_size) {
+ return OTS_FAILURE();
+ }
+
+ // Store the x and y coordinates.
+ size_t x_offset = *offset;
+ size_t y_offset = *offset + x_bytes;
+ last_x = 0;
+ last_y = 0;
+ for (const auto& contour : glyph.contours) {
+ for (const auto& point : contour) {
+ int dx = point.x - last_x;
+ int dy = point.y - last_y;
+ if (dx == 0) {
+ // pass
+ } else if (dx > -256 && dx < 256) {
+ dst[x_offset++] = std::abs(dx);
+ } else {
+ Store16(dx, &x_offset, dst);
+ }
+ if (dy == 0) {
+ // pass
+ } else if (dy > -256 && dy < 256) {
+ dst[y_offset++] = std::abs(dy);
+ } else {
+ Store16(dy, &y_offset, dst);
+ }
+ last_x += dx;
+ last_y += dy;
+ }
+ }
+ *offset = y_offset;
+ return true;
+}
+
+} // namespace
+
+bool StoreGlyph(const Glyph& glyph, uint8_t* dst, size_t* dst_size) {
+ size_t offset = 0;
+ if (glyph.composite_data_size > 0) {
+ // Composite glyph.
+ if (*dst_size < ((10ULL + glyph.composite_data_size) +
+ ((glyph.have_instructions ? 2ULL : 0) +
+ glyph.instructions_size))) {
+ return OTS_FAILURE();
+ }
+ Store16(-1, &offset, dst);
+ StoreBbox(glyph, &offset, dst);
+ StoreBytes(glyph.composite_data, glyph.composite_data_size, &offset, dst);
+ if (glyph.have_instructions) {
+ StoreInstructions(glyph, &offset, dst);
+ }
+ } else if (glyph.contours.size() > 0) {
+ // Simple glyph.
+ if (glyph.contours.size() > std::numeric_limits<int16_t>::max()) {
+ return OTS_FAILURE();
+ }
+ if (*dst_size < ((12ULL + 2 * glyph.contours.size()) +
+ glyph.instructions_size)) {
+ return OTS_FAILURE();
+ }
+ Store16(glyph.contours.size(), &offset, dst);
+ StoreBbox(glyph, &offset, dst);
+ if (!StoreEndPtsOfContours(glyph, &offset, dst)) {
+ return OTS_FAILURE();
+ }
+ StoreInstructions(glyph, &offset, dst);
+ if (!StorePoints(glyph, &offset, dst, *dst_size)) {
+ return OTS_FAILURE();
+ }
+ }
+ *dst_size = offset;
+ return true;
+}
+
+} // namespace woff2
diff --git a/woff2/glyph.h b/woff2/glyph.h
new file mode 100644
index 0000000..2e249f6
--- /dev/null
+++ b/woff2/glyph.h
@@ -0,0 +1,71 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Data model and I/O for glyph data within sfnt format files for the purpose of
+// performing the preprocessing step of the WOFF 2.0 conversion.
+
+#ifndef BROTLI_WOFF2_GLYPH_H_
+#define BROTLI_WOFF2_GLYPH_H_
+
+#include <stddef.h>
+#include <inttypes.h>
+#include <vector>
+
+namespace woff2 {
+
+// Represents a parsed simple or composite glyph. The composite glyph data and
+// instructions are un-parsed and we keep only pointers to the raw data,
+// therefore the glyph is valid only so long the data from which it was parsed
+// is around.
+class Glyph {
+ public:
+ Glyph() : instructions_size(0), composite_data_size(0) {}
+
+ // Bounding box.
+ int16_t x_min;
+ int16_t x_max;
+ int16_t y_min;
+ int16_t y_max;
+
+ // Instructions.
+ uint16_t instructions_size;
+ const uint8_t* instructions_data;
+
+ // Data model for simple glyphs.
+ struct Point {
+ int x;
+ int y;
+ bool on_curve;
+ };
+ std::vector<std::vector<Point> > contours;
+
+ // Data for composite glyphs.
+ const uint8_t* composite_data;
+ uint32_t composite_data_size;
+ bool have_instructions;
+};
+
+// Parses the glyph from the given data. Returns false on parsing failure or
+// buffer overflow. The glyph is valid only so long the input data pointer is
+// valid.
+bool ReadGlyph(const uint8_t* data, size_t len, Glyph* glyph);
+
+// Stores the glyph into the specified dst buffer. The *dst_size is the buffer
+// size on entry and is set to the actual (unpadded) stored size on exit.
+// Returns false on buffer overflow.
+bool StoreGlyph(const Glyph& glyph, uint8_t* dst, size_t* dst_size);
+
+} // namespace woff2
+
+#endif // BROTLI_WOFF2_GLYPH_H_
diff --git a/woff2/normalize.cc b/woff2/normalize.cc
new file mode 100644
index 0000000..ef9f158
--- /dev/null
+++ b/woff2/normalize.cc
@@ -0,0 +1,194 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Glyph normalization
+
+#include "./normalize.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "./ots.h"
+#include "./port.h"
+#include "./font.h"
+#include "./glyph.h"
+#include "./round.h"
+#include "./store_bytes.h"
+
+namespace woff2 {
+
+namespace {
+
+void StoreLoca(int index_fmt, uint32_t value, size_t* offset, uint8_t* dst) {
+ if (index_fmt == 0) {
+ Store16(value >> 1, offset, dst);
+ } else {
+ StoreU32(value, offset, dst);
+ }
+}
+
+void NormalizeSimpleGlyphBoundingBox(Glyph* glyph) {
+ if (glyph->contours.empty() || glyph->contours[0].empty()) {
+ return;
+ }
+ int16_t x_min = glyph->contours[0][0].x;
+ int16_t y_min = glyph->contours[0][0].y;
+ int16_t x_max = x_min;
+ int16_t y_max = y_min;
+ for (const auto& contour : glyph->contours) {
+ for (const auto& point : contour) {
+ if (point.x < x_min) x_min = point.x;
+ if (point.x > x_max) x_max = point.x;
+ if (point.y < y_min) y_min = point.y;
+ if (point.y > y_max) y_max = point.y;
+ }
+ }
+ glyph->x_min = x_min;
+ glyph->y_min = y_min;
+ glyph->x_max = x_max;
+ glyph->y_max = y_max;
+}
+
+} // namespace
+
+bool NormalizeGlyphs(Font* font) {
+ Font::Table* head_table = font->FindTable(kHeadTableTag);
+ Font::Table* glyf_table = font->FindTable(kGlyfTableTag);
+ Font::Table* loca_table = font->FindTable(kLocaTableTag);
+ if (head_table == NULL || loca_table == NULL || glyf_table == NULL) {
+ return OTS_FAILURE();
+ }
+ int index_fmt = head_table->data[51];
+ int num_glyphs = NumGlyphs(*font);
+
+ // We need to allocate a bit more than its original length for the normalized
+ // glyf table, since it can happen that the glyphs in the original table are
+ // 2-byte aligned, while in the normalized table they are 4-byte aligned.
+ // That gives a maximum of 2 bytes increase per glyph. However, there is no
+ // theoretical guarantee that the total size of the flags plus the coordinates
+ // is the smallest possible in the normalized version, so we have to allow
+ // some general overhead.
+ // TODO(user) Figure out some more precise upper bound on the size of
+ // the overhead.
+ size_t max_normalized_glyf_size = 1.1 * glyf_table->length + 2 * num_glyphs;
+
+ glyf_table->buffer.resize(max_normalized_glyf_size);
+ loca_table->buffer.resize(Round4(loca_table->length));
+ uint8_t* glyf_dst = &glyf_table->buffer[0];
+ uint8_t* loca_dst = &loca_table->buffer[0];
+ uint32_t glyf_offset = 0;
+ size_t loca_offset = 0;
+
+ for (int i = 0; i < num_glyphs; ++i) {
+ StoreLoca(index_fmt, glyf_offset, &loca_offset, loca_dst);
+ Glyph glyph;
+ const uint8_t* glyph_data;
+ size_t glyph_size;
+ if (!GetGlyphData(*font, i, &glyph_data, &glyph_size) ||
+ (glyph_size > 0 && !ReadGlyph(glyph_data, glyph_size, &glyph))) {
+ return OTS_FAILURE();
+ }
+ NormalizeSimpleGlyphBoundingBox(&glyph);
+ size_t glyf_dst_size = glyf_table->buffer.size() - glyf_offset;
+ if (!StoreGlyph(glyph, glyf_dst + glyf_offset, &glyf_dst_size)) {
+ return OTS_FAILURE();
+ }
+ glyf_dst_size = Round4(glyf_dst_size);
+ if (glyf_dst_size > std::numeric_limits<uint32_t>::max() ||
+ glyf_offset + static_cast<uint32_t>(glyf_dst_size) < glyf_offset ||
+ (index_fmt == 0 && glyf_offset + glyf_dst_size >= (1UL << 17))) {
+ return OTS_FAILURE();
+ }
+ glyf_offset += glyf_dst_size;
+ }
+ StoreLoca(index_fmt, glyf_offset, &loca_offset, loca_dst);
+
+ glyf_table->buffer.resize(glyf_offset);
+ glyf_table->data = &glyf_table->buffer[0];
+ glyf_table->length = glyf_offset;
+ loca_table->data = &loca_table->buffer[0];
+
+ return true;
+}
+
+bool NormalizeOffsets(Font* font) {
+ uint32_t offset = 12 + 16 * font->num_tables;
+ for (auto& i : font->tables) {
+ i.second.offset = offset;
+ offset += Round4(i.second.length);
+ }
+ return true;
+}
+
+namespace {
+
+uint32_t ComputeChecksum(const uint8_t* buf, size_t size) {
+ uint32_t checksum = 0;
+ for (size_t i = 0; i < size; i += 4) {
+ checksum += ((buf[i] << 24) |
+ (buf[i + 1] << 16) |
+ (buf[i + 2] << 8) |
+ buf[i + 3]);
+ }
+ return checksum;
+}
+
+uint32_t ComputeHeaderChecksum(const Font& font) {
+ uint32_t checksum = font.flavor;
+ uint16_t max_pow2 = font.num_tables ? Log2Floor(font.num_tables) : 0;
+ uint16_t search_range = max_pow2 ? 1 << (max_pow2 + 4) : 0;
+ uint16_t range_shift = (font.num_tables << 4) - search_range;
+ checksum += (font.num_tables << 16 | search_range);
+ checksum += (max_pow2 << 16 | range_shift);
+ for (const auto& i : font.tables) {
+ checksum += i.second.tag;
+ checksum += i.second.checksum;
+ checksum += i.second.offset;
+ checksum += i.second.length;
+ }
+ return checksum;
+}
+
+} // namespace
+
+bool FixChecksums(Font* font) {
+ Font::Table* head_table = font->FindTable(kHeadTableTag);
+ if (head_table == NULL || head_table->length < 12) {
+ return OTS_FAILURE();
+ }
+ head_table->buffer.resize(Round4(head_table->length));
+ uint8_t* head_buf = &head_table->buffer[0];
+ memcpy(head_buf, head_table->data, Round4(head_table->length));
+ head_table->data = head_buf;
+ size_t offset = 8;
+ StoreU32(0, &offset, head_buf);
+ uint32_t file_checksum = 0;
+ for (auto& i : font->tables) {
+ Font::Table* table = &i.second;
+ table->checksum = ComputeChecksum(table->data, table->length);
+ file_checksum += table->checksum;
+ }
+ file_checksum += ComputeHeaderChecksum(*font);
+ offset = 8;
+ StoreU32(0xb1b0afba - file_checksum, &offset, head_buf);
+ return true;
+}
+
+bool NormalizeFont(Font* font) {
+ return (NormalizeGlyphs(font) &&
+ NormalizeOffsets(font) &&
+ FixChecksums(font));
+}
+
+} // namespace woff2
diff --git a/woff2/normalize.h b/woff2/normalize.h
new file mode 100644
index 0000000..b3d8331
--- /dev/null
+++ b/woff2/normalize.h
@@ -0,0 +1,45 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for normalizing fonts. Since the WOFF 2.0 decoder creates font
+// files in normalized form, the WOFF 2.0 conversion is guaranteed to be
+// lossless (in a bitwise sense) only for normalized font files.
+
+#ifndef BROTLI_WOFF2_NORMALIZE_H_
+#define BROTLI_WOFF2_NORMALIZE_H_
+
+namespace woff2 {
+
+struct Font;
+
+// Changes the offset fields of the table headers so that the data for the
+// tables will be written in order of increasing tag values, without any gaps
+// other than the 4-byte padding.
+bool NormalizeOffsets(Font* font);
+
+// Changes the checksum fields of the table headers and the checksum field of
+// the head table so that it matches the current data.
+bool FixChecksums(Font* font);
+
+// Parses each of the glyphs in the font and writes them again to the glyf
+// table in normalized form, as defined by the StoreGlyph() function. Changes
+// the loca table accordigly.
+bool NormalizeGlyphs(Font* font);
+
+// Performs all of the normalization steps above.
+bool NormalizeFont(Font* font);
+
+} // namespace woff2
+
+#endif // BROTLI_WOFF2_NORMALIZE_H_
diff --git a/woff2/ots.h b/woff2/ots.h
new file mode 100644
index 0000000..4eac1cb
--- /dev/null
+++ b/woff2/ots.h
@@ -0,0 +1,153 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The parts of ots.h & opentype-sanitiser.h that we need, taken from the
+// https://code.google.com/p/ots/ project.
+
+#ifndef BROTLI_WOFF2_OTS_H_
+#define BROTLI_WOFF2_OTS_H_
+
+#include <stdint.h>
+#include <arpa/inet.h>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+
+namespace ots {
+
+#if defined(_MSC_VER) || !defined(OTS_DEBUG)
+#define OTS_FAILURE() false
+#else
+#define OTS_FAILURE() ots::Failure(__FILE__, __LINE__, __PRETTY_FUNCTION__)
+inline bool Failure(const char *f, int l, const char *fn) {
+ std::fprintf(stderr, "ERROR at %s:%d (%s)\n", f, l, fn);
+ std::fflush(stderr);
+ return false;
+}
+#endif
+
+// -----------------------------------------------------------------------------
+// Buffer helper class
+//
+// This class perform some trival buffer operations while checking for
+// out-of-bounds errors. As a family they return false if anything is amiss,
+// updating the current offset otherwise.
+// -----------------------------------------------------------------------------
+class Buffer {
+ public:
+ Buffer(const uint8_t *buffer, size_t len)
+ : buffer_(buffer),
+ length_(len),
+ offset_(0) { }
+
+ bool Skip(size_t n_bytes) {
+ return Read(NULL, n_bytes);
+ }
+
+ bool Read(uint8_t *buffer, size_t n_bytes) {
+ if (n_bytes > 1024 * 1024 * 1024) {
+ return OTS_FAILURE();
+ }
+ if ((offset_ + n_bytes > length_) ||
+ (offset_ > length_ - n_bytes)) {
+ return OTS_FAILURE();
+ }
+ if (buffer) {
+ std::memcpy(buffer, buffer_ + offset_, n_bytes);
+ }
+ offset_ += n_bytes;
+ return true;
+ }
+
+ inline bool ReadU8(uint8_t *value) {
+ if (offset_ + 1 > length_) {
+ return OTS_FAILURE();
+ }
+ *value = buffer_[offset_];
+ ++offset_;
+ return true;
+ }
+
+ bool ReadU16(uint16_t *value) {
+ if (offset_ + 2 > length_) {
+ return OTS_FAILURE();
+ }
+ std::memcpy(value, buffer_ + offset_, sizeof(uint16_t));
+ *value = ntohs(*value);
+ offset_ += 2;
+ return true;
+ }
+
+ bool ReadS16(int16_t *value) {
+ return ReadU16(reinterpret_cast<uint16_t*>(value));
+ }
+
+ bool ReadU24(uint32_t *value) {
+ if (offset_ + 3 > length_) {
+ return OTS_FAILURE();
+ }
+ *value = static_cast<uint32_t>(buffer_[offset_]) << 16 |
+ static_cast<uint32_t>(buffer_[offset_ + 1]) << 8 |
+ static_cast<uint32_t>(buffer_[offset_ + 2]);
+ offset_ += 3;
+ return true;
+ }
+
+ bool ReadU32(uint32_t *value) {
+ if (offset_ + 4 > length_) {
+ return OTS_FAILURE();
+ }
+ std::memcpy(value, buffer_ + offset_, sizeof(uint32_t));
+ *value = ntohl(*value);
+ offset_ += 4;
+ return true;
+ }
+
+ bool ReadS32(int32_t *value) {
+ return ReadU32(reinterpret_cast<uint32_t*>(value));
+ }
+
+ bool ReadTag(uint32_t *value) {
+ if (offset_ + 4 > length_) {
+ return OTS_FAILURE();
+ }
+ std::memcpy(value, buffer_ + offset_, sizeof(uint32_t));
+ offset_ += 4;
+ return true;
+ }
+
+ bool ReadR64(uint64_t *value) {
+ if (offset_ + 8 > length_) {
+ return OTS_FAILURE();
+ }
+ std::memcpy(value, buffer_ + offset_, sizeof(uint64_t));
+ offset_ += 8;
+ return true;
+ }
+
+ const uint8_t *buffer() const { return buffer_; }
+ size_t offset() const { return offset_; }
+ size_t length() const { return length_; }
+
+ void set_offset(size_t newoffset) { offset_ = newoffset; }
+
+ private:
+ const uint8_t * const buffer_;
+ const size_t length_;
+ size_t offset_;
+};
+
+} // namespace ots
+
+#endif // BROTLI_WOFF2_OTS_H_
diff --git a/woff2/port.h b/woff2/port.h
new file mode 100644
index 0000000..e7a2708
--- /dev/null
+++ b/woff2/port.h
@@ -0,0 +1,46 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper function for bit twiddling
+
+#ifndef BROTLI_WOFF2_PORT_H_
+#define BROTLI_WOFF2_PORT_H_
+
+namespace woff2 {
+
+typedef unsigned int uint32;
+
+inline int Log2Floor(uint32 n) {
+#if defined(__GNUC__)
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+#else
+ if (n == 0)
+ return -1;
+ int log = 0;
+ uint32 value = n;
+ for (int i = 4; i >= 0; --i) {
+ int shift = (1 << i);
+ uint32 x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ assert(value == 1);
+ return log;
+#endif
+}
+
+} // namespace woff2
+#endif // BROTLI_WOFF2_PORT_H_
diff --git a/woff2/round.h b/woff2/round.h
new file mode 100644
index 0000000..4d88862
--- /dev/null
+++ b/woff2/round.h
@@ -0,0 +1,33 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper for rounding
+
+#ifndef BROTLI_WOFF2_ROUND_H_
+#define BROTLI_WOFF2_ROUND_H_
+
+namespace woff2 {
+
+// Round a value up to the nearest multiple of 4. Don't round the value in the
+// case that rounding up overflows.
+template<typename T> T Round4(T value) {
+ if (std::numeric_limits<T>::max() - value < 3) {
+ return value;
+ }
+ return (value + 3) & ~3;
+}
+
+} // namespace woff2
+
+#endif // BROTLI_WOFF2_ROUND_H_
diff --git a/woff2/store_bytes.h b/woff2/store_bytes.h
new file mode 100644
index 0000000..37054b2
--- /dev/null
+++ b/woff2/store_bytes.h
@@ -0,0 +1,61 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper functions for storing integer values into byte streams.
+// No bounds checking is performed, that is the responsibility of the caller.
+
+#ifndef BROTLI_WOFF2_STORE_BYTES_H_
+#define BROTLI_WOFF2_STORE_BYTES_H_
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <string.h>
+
+namespace woff2 {
+
+inline size_t StoreU32(uint8_t* dst, size_t offset, uint32_t x) {
+ dst[offset] = x >> 24;
+ dst[offset + 1] = x >> 16;
+ dst[offset + 2] = x >> 8;
+ dst[offset + 3] = x;
+ return offset + 4;
+}
+
+inline size_t Store16(uint8_t* dst, size_t offset, int x) {
+ dst[offset] = x >> 8;
+ dst[offset + 1] = x;
+ return offset + 2;
+}
+
+inline void StoreU32(uint32_t val, size_t* offset, uint8_t* dst) {
+ dst[(*offset)++] = val >> 24;
+ dst[(*offset)++] = val >> 16;
+ dst[(*offset)++] = val >> 8;
+ dst[(*offset)++] = val;
+}
+
+inline void Store16(int val, size_t* offset, uint8_t* dst) {
+ dst[(*offset)++] = val >> 8;
+ dst[(*offset)++] = val;
+}
+
+inline void StoreBytes(const uint8_t* data, size_t len,
+ size_t* offset, uint8_t* dst) {
+ memcpy(&dst[*offset], data, len);
+ *offset += len;
+}
+
+} // namespace woff2
+
+#endif // BROTLI_WOFF2_STORE_BYTES_H_
diff --git a/woff2/transform.cc b/woff2/transform.cc
new file mode 100644
index 0000000..a218ed1
--- /dev/null
+++ b/woff2/transform.cc
@@ -0,0 +1,263 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for preprocessing fonts as part of the WOFF 2.0 conversion.
+
+#include "./transform.h"
+
+#include <complex> // for std::abs
+
+#include "./ots.h"
+#include "./font.h"
+#include "./glyph.h"
+
+namespace woff2 {
+
+namespace {
+
+const int FLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
+const int FLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
+
+void WriteBytes(std::vector<uint8_t>* out, const uint8_t* data, size_t len) {
+ if (len == 0) return;
+ size_t offset = out->size();
+ out->resize(offset + len);
+ memcpy(&(*out)[offset], data, len);
+}
+
+void WriteBytes(std::vector<uint8_t>* out, const std::vector<uint8_t>& in) {
+ for (int i = 0; i < in.size(); ++i) {
+ out->push_back(in[i]);
+ }
+}
+
+void WriteUShort(std::vector<uint8_t>* out, int value) {
+ out->push_back(value >> 8);
+ out->push_back(value & 255);
+}
+
+void WriteLong(std::vector<uint8_t>* out, int value) {
+ out->push_back((value >> 24) & 255);
+ out->push_back((value >> 16) & 255);
+ out->push_back((value >> 8) & 255);
+ out->push_back(value & 255);
+}
+
+void Write255UShort(std::vector<uint8_t>* out, int value) {
+ if (value < 253) {
+ out->push_back(value);
+ } else if (value < 506) {
+ out->push_back(255);
+ out->push_back(value - 253);
+ } else if (value < 762) {
+ out->push_back(254);
+ out->push_back(value - 506);
+ } else {
+ out->push_back(253);
+ out->push_back(value >> 8);
+ out->push_back(value & 0xff);
+ }
+}
+
+// Glyf table preprocessing, based on
+// GlyfEncoder.java
+// but only the "sbbox" and "cbbox" options are supported.
+class GlyfEncoder {
+ public:
+ explicit GlyfEncoder(int num_glyphs)
+ : sbbox_(false), cbbox_(true), n_glyphs_(num_glyphs) {
+ bbox_bitmap_.resize(((num_glyphs + 31) >> 5) << 2);
+ }
+
+ bool Encode(int glyph_id, const Glyph& glyph) {
+ if (glyph.composite_data_size > 0) {
+ WriteCompositeGlyph(glyph_id, glyph);
+ } else if (glyph.contours.size() > 0) {
+ WriteSimpleGlyph(glyph_id, glyph);
+ } else {
+ WriteUShort(&n_contour_stream_, 0);
+ }
+ return true;
+ }
+
+ void GetTransformedGlyfBytes(std::vector<uint8_t>* result) {
+ WriteLong(result, 0); // version
+ WriteUShort(result, n_glyphs_);
+ WriteUShort(result, 0); // index_format, will be set later
+ WriteLong(result, n_contour_stream_.size());
+ WriteLong(result, n_points_stream_.size());
+ WriteLong(result, flag_byte_stream_.size());
+ WriteLong(result, glyph_stream_.size());
+ WriteLong(result, composite_stream_.size());
+ WriteLong(result, bbox_bitmap_.size() + bbox_stream_.size());
+ WriteLong(result, instruction_stream_.size());
+ WriteBytes(result, n_contour_stream_);
+ WriteBytes(result, n_points_stream_);
+ WriteBytes(result, flag_byte_stream_);
+ WriteBytes(result, glyph_stream_);
+ WriteBytes(result, composite_stream_);
+ WriteBytes(result, bbox_bitmap_);
+ WriteBytes(result, bbox_stream_);
+ WriteBytes(result, instruction_stream_);
+ }
+
+ private:
+ void WriteInstructions(const Glyph& glyph) {
+ Write255UShort(&glyph_stream_, glyph.instructions_size);
+ WriteBytes(&instruction_stream_,
+ glyph.instructions_data, glyph.instructions_size);
+ }
+
+ void WriteSimpleGlyph(int glyph_id, const Glyph& glyph) {
+ int num_contours = glyph.contours.size();
+ WriteUShort(&n_contour_stream_, num_contours);
+ if (sbbox_) {
+ WriteBbox(glyph_id, glyph);
+ }
+ // TODO: check that bbox matches, write bbox if not
+ for (int i = 0; i < num_contours; i++) {
+ Write255UShort(&n_points_stream_, glyph.contours[i].size());
+ }
+ int lastX = 0;
+ int lastY = 0;
+ for (int i = 0; i < num_contours; i++) {
+ int num_points = glyph.contours[i].size();
+ for (int j = 0; j < num_points; j++) {
+ int x = glyph.contours[i][j].x;
+ int y = glyph.contours[i][j].y;
+ int dx = x - lastX;
+ int dy = y - lastY;
+ WriteTriplet(glyph.contours[i][j].on_curve, dx, dy);
+ lastX = x;
+ lastY = y;
+ }
+ }
+ if (num_contours > 0) {
+ WriteInstructions(glyph);
+ }
+ }
+
+ void WriteCompositeGlyph(int glyph_id, const Glyph& glyph) {
+ WriteUShort(&n_contour_stream_, -1);
+ if (cbbox_) {
+ WriteBbox(glyph_id, glyph);
+ }
+ WriteBytes(&composite_stream_,
+ glyph.composite_data,
+ glyph.composite_data_size);
+ if (glyph.have_instructions) {
+ WriteInstructions(glyph);
+ }
+ }
+
+ void WriteBbox(int glyph_id, const Glyph& glyph) {
+ bbox_bitmap_[glyph_id >> 3] |= 0x80 >> (glyph_id & 7);
+ WriteUShort(&bbox_stream_, glyph.x_min);
+ WriteUShort(&bbox_stream_, glyph.y_min);
+ WriteUShort(&bbox_stream_, glyph.x_max);
+ WriteUShort(&bbox_stream_, glyph.y_max);
+ }
+
+ void WriteTriplet(bool on_curve, int x, int y) {
+ int abs_x = std::abs(x);
+ int abs_y = std::abs(y);
+ int on_curve_bit = on_curve ? 0 : 128;
+ int x_sign_bit = (x < 0) ? 0 : 1;
+ int y_sign_bit = (y < 0) ? 0 : 1;
+ int xy_sign_bits = x_sign_bit + 2 * y_sign_bit;
+ if (x == 0 && abs_y < 1280) {
+ flag_byte_stream_.push_back(on_curve_bit +
+ ((abs_y & 0xf00) >> 7) + y_sign_bit);
+ glyph_stream_.push_back(abs_y & 0xff);
+ } else if (y == 0 && abs_x < 1280) {
+ flag_byte_stream_.push_back(on_curve_bit + 10 +
+ ((abs_x & 0xf00) >> 7) + x_sign_bit);
+ glyph_stream_.push_back(abs_x & 0xff);
+ } else if (abs_x < 65 && abs_y < 65) {
+ flag_byte_stream_.push_back(on_curve_bit + 20 +
+ ((abs_x - 1) & 0x30) +
+ (((abs_y - 1) & 0x30) >> 2) +
+ xy_sign_bits);
+ glyph_stream_.push_back((((abs_x - 1) & 0xf) << 4) | ((abs_y - 1) & 0xf));
+ } else if (abs_x < 769 && abs_y < 769) {
+ flag_byte_stream_.push_back(on_curve_bit + 84 +
+ 12 * (((abs_x - 1) & 0x300) >> 8) +
+ (((abs_y - 1) & 0x300) >> 6) + xy_sign_bits);
+ glyph_stream_.push_back((abs_x - 1) & 0xff);
+ glyph_stream_.push_back((abs_y - 1) & 0xff);
+ } else if (abs_x < 4096 && abs_y < 4096) {
+ flag_byte_stream_.push_back(on_curve_bit + 120 + xy_sign_bits);
+ glyph_stream_.push_back(abs_x >> 4);
+ glyph_stream_.push_back(((abs_x & 0xf) << 4) | (abs_y >> 8));
+ glyph_stream_.push_back(abs_y & 0xff);
+ } else {
+ flag_byte_stream_.push_back(on_curve_bit + 124 + xy_sign_bits);
+ glyph_stream_.push_back(abs_x >> 8);
+ glyph_stream_.push_back(abs_x & 0xff);
+ glyph_stream_.push_back(abs_y >> 8);
+ glyph_stream_.push_back(abs_y & 0xff);
+ }
+ }
+
+ std::vector<uint8_t> n_contour_stream_;
+ std::vector<uint8_t> n_points_stream_;
+ std::vector<uint8_t> flag_byte_stream_;
+ std::vector<uint8_t> composite_stream_;
+ std::vector<uint8_t> bbox_bitmap_;
+ std::vector<uint8_t> bbox_stream_;
+ std::vector<uint8_t> glyph_stream_;
+ std::vector<uint8_t> instruction_stream_;
+ bool sbbox_;
+ bool cbbox_;
+ int n_glyphs_;
+};
+
+} // namespace
+
+bool TransformGlyfAndLocaTables(Font* font) {
+ Font::Table* transformed_glyf = &font->tables[kGlyfTableTag ^ 0x80808080];
+ Font::Table* transformed_loca = &font->tables[kLocaTableTag ^ 0x80808080];
+
+ int num_glyphs = NumGlyphs(*font);
+ GlyfEncoder encoder(num_glyphs);
+ for (int i = 0; i < num_glyphs; ++i) {
+ Glyph glyph;
+ const uint8_t* glyph_data;
+ size_t glyph_size;
+ if (!GetGlyphData(*font, i, &glyph_data, &glyph_size) ||
+ (glyph_size > 0 && !ReadGlyph(glyph_data, glyph_size, &glyph))) {
+ return OTS_FAILURE();
+ }
+ encoder.Encode(i, glyph);
+ }
+ encoder.GetTransformedGlyfBytes(&transformed_glyf->buffer);
+
+ const Font::Table* head_table = font->FindTable(kHeadTableTag);
+ if (head_table == NULL || head_table->length < 52) {
+ return OTS_FAILURE();
+ }
+ transformed_glyf->buffer[7] = head_table->data[51]; // index_format
+
+ transformed_glyf->tag = kGlyfTableTag ^ 0x80808080;
+ transformed_glyf->length = transformed_glyf->buffer.size();
+ transformed_glyf->data = transformed_glyf->buffer.data();
+
+ transformed_loca->tag = kLocaTableTag ^ 0x80808080;
+ transformed_loca->length = 0;
+ transformed_loca->data = NULL;
+
+ return true;
+}
+
+} // namespace woff2
diff --git a/woff2/transform.h b/woff2/transform.h
new file mode 100644
index 0000000..dd63e73
--- /dev/null
+++ b/woff2/transform.h
@@ -0,0 +1,31 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for preprocessing fonts as part of the WOFF 2.0 conversion.
+
+#ifndef BROTLI_WOFF2_TRANSFORM_H_
+#define BROTLI_WOFF2_TRANSFORM_H_
+
+#include "./font.h"
+
+namespace woff2 {
+
+// Adds the transformed versions of the glyf and loca tables to the font. The
+// transformed loca table has zero length. The tag of the transformed tables is
+// derived from the original tag by flipping the MSBs of every byte.
+bool TransformGlyfAndLocaTables(Font* font);
+
+} // namespace woff2
+
+#endif // BROTLI_WOFF2_TRANSFORM_H_
diff --git a/woff2/woff2.cc b/woff2/woff2.cc
new file mode 100644
index 0000000..43e0861
--- /dev/null
+++ b/woff2/woff2.cc
@@ -0,0 +1,1313 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for converting WOFF2 format font files to their TTF versions.
+
+#include "./woff2.h"
+
+#include <stdlib.h>
+#include <complex>
+#include <cstring>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "./ots.h"
+#include "./decode.h"
+#include "./encode.h"
+#include "./font.h"
+#include "./normalize.h"
+#include "./round.h"
+#include "./store_bytes.h"
+#include "./transform.h"
+
+namespace woff2 {
+
+namespace {
+
+using std::string;
+using std::vector;
+
+
+
+// simple glyph flags
+const int kGlyfOnCurve = 1 << 0;
+const int kGlyfXShort = 1 << 1;
+const int kGlyfYShort = 1 << 2;
+const int kGlyfRepeat = 1 << 3;
+const int kGlyfThisXIsSame = 1 << 4;
+const int kGlyfThisYIsSame = 1 << 5;
+
+// composite glyph flags
+const int FLAG_ARG_1_AND_2_ARE_WORDS = 1 << 0;
+const int FLAG_ARGS_ARE_XY_VALUES = 1 << 1;
+const int FLAG_ROUND_XY_TO_GRID = 1 << 2;
+const int FLAG_WE_HAVE_A_SCALE = 1 << 3;
+const int FLAG_RESERVED = 1 << 4;
+const int FLAG_MORE_COMPONENTS = 1 << 5;
+const int FLAG_WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6;
+const int FLAG_WE_HAVE_A_TWO_BY_TWO = 1 << 7;
+const int FLAG_WE_HAVE_INSTRUCTIONS = 1 << 8;
+const int FLAG_USE_MY_METRICS = 1 << 9;
+const int FLAG_OVERLAP_COMPOUND = 1 << 10;
+const int FLAG_SCALED_COMPONENT_OFFSET = 1 << 11;
+const int FLAG_UNSCALED_COMPONENT_OFFSET = 1 << 12;
+
+const size_t kSfntHeaderSize = 12;
+const size_t kSfntEntrySize = 16;
+const size_t kCheckSumAdjustmentOffset = 8;
+
+const size_t kEndPtsOfContoursOffset = 10;
+const size_t kCompositeGlyphBegin = 10;
+
+// Note that the byte order is big-endian, not the same as ots.cc
+#define TAG(a, b, c, d) ((a << 24) | (b << 16) | (c << 8) | d)
+
+const uint32_t kWoff2Signature = 0x774f4632; // "wOF2"
+
+const unsigned int kWoff2FlagsContinueStream = 1 << 4;
+const unsigned int kWoff2FlagsTransform = 1 << 5;
+
+const size_t kWoff2HeaderSize = 44;
+const size_t kWoff2EntrySize = 20;
+
+const size_t kLzmaHeaderSize = 13;
+
+// Compression type values common to both short and long formats
+const uint32_t kCompressionTypeMask = 0xf;
+const uint32_t kCompressionTypeNone = 0;
+const uint32_t kCompressionTypeGzip = 1;
+const uint32_t kCompressionTypeLzma = 2;
+const uint32_t kCompressionTypeBrotli = 3;
+const uint32_t kCompressionTypeLzham = 4;
+
+// This is a special value for the short format only, as described in
+// "Design for compressed header format" in draft doc.
+const uint32_t kShortFlagsContinue = 3;
+
+struct Point {
+ int x;
+ int y;
+ bool on_curve;
+};
+
+struct Table {
+ uint32_t tag;
+ uint32_t flags;
+ uint32_t src_offset;
+ uint32_t src_length;
+
+ uint32_t transform_length;
+
+ uint32_t dst_offset;
+ uint32_t dst_length;
+ const uint8_t* dst_data;
+};
+
+// Based on section 6.1.1 of MicroType Express draft spec
+bool Read255UShort(ots::Buffer* buf, unsigned int* value) {
+ static const int kWordCode = 253;
+ static const int kOneMoreByteCode2 = 254;
+ static const int kOneMoreByteCode1 = 255;
+ static const int kLowestUCode = 253;
+ uint8_t code = 0;
+ if (!buf->ReadU8(&code)) {
+ return OTS_FAILURE();
+ }
+ if (code == kWordCode) {
+ uint16_t result = 0;
+ if (!buf->ReadU16(&result)) {
+ return OTS_FAILURE();
+ }
+ *value = result;
+ return true;
+ } else if (code == kOneMoreByteCode1) {
+ uint8_t result = 0;
+ if (!buf->ReadU8(&result)) {
+ return OTS_FAILURE();
+ }
+ *value = result + kLowestUCode;
+ return true;
+ } else if (code == kOneMoreByteCode2) {
+ uint8_t result = 0;
+ if (!buf->ReadU8(&result)) {
+ return OTS_FAILURE();
+ }
+ *value = result + kLowestUCode * 2;
+ return true;
+ } else {
+ *value = code;
+ return true;
+ }
+}
+
+bool ReadBase128(ots::Buffer* buf, uint32_t* value) {
+ uint32_t result = 0;
+ for (size_t i = 0; i < 5; ++i) {
+ uint8_t code = 0;
+ if (!buf->ReadU8(&code)) {
+ return OTS_FAILURE();
+ }
+ // If any of the top seven bits are set then we're about to overflow.
+ if (result & 0xe0000000) {
+ return OTS_FAILURE();
+ }
+ result = (result << 7) | (code & 0x7f);
+ if ((code & 0x80) == 0) {
+ *value = result;
+ return true;
+ }
+ }
+ // Make sure not to exceed the size bound
+ return OTS_FAILURE();
+}
+
+size_t Base128Size(size_t n) {
+ size_t size = 1;
+ for (; n >= 128; n >>= 7) ++size;
+ return size;
+}
+
+void StoreBase128(size_t len, size_t* offset, uint8_t* dst) {
+ size_t size = Base128Size(len);
+ for (int i = 0; i < size; ++i) {
+ int b = (int)(len >> (7 * (size - i - 1))) & 0x7f;
+ if (i < size - 1) {
+ b |= 0x80;
+ }
+ dst[(*offset)++] = b;
+ }
+}
+
+int WithSign(int flag, int baseval) {
+ // Precondition: 0 <= baseval < 65536 (to avoid integer overflow)
+ return (flag & 1) ? baseval : -baseval;
+}
+
+bool TripletDecode(const uint8_t* flags_in, const uint8_t* in, size_t in_size,
+ unsigned int n_points, std::vector<Point>* result,
+ size_t* in_bytes_consumed) {
+ int x = 0;
+ int y = 0;
+
+ if (n_points > in_size) {
+ return OTS_FAILURE();
+ }
+ unsigned int triplet_index = 0;
+
+ for (unsigned int i = 0; i < n_points; ++i) {
+ uint8_t flag = flags_in[i];
+ bool on_curve = !(flag >> 7);
+ flag &= 0x7f;
+ unsigned int n_data_bytes;
+ if (flag < 84) {
+ n_data_bytes = 1;
+ } else if (flag < 120) {
+ n_data_bytes = 2;
+ } else if (flag < 124) {
+ n_data_bytes = 3;
+ } else {
+ n_data_bytes = 4;
+ }
+ if (triplet_index + n_data_bytes > in_size ||
+ triplet_index + n_data_bytes < triplet_index) {
+ return OTS_FAILURE();
+ }
+ int dx, dy;
+ if (flag < 10) {
+ dx = 0;
+ dy = WithSign(flag, ((flag & 14) << 7) + in[triplet_index]);
+ } else if (flag < 20) {
+ dx = WithSign(flag, (((flag - 10) & 14) << 7) + in[triplet_index]);
+ dy = 0;
+ } else if (flag < 84) {
+ int b0 = flag - 20;
+ int b1 = in[triplet_index];
+ dx = WithSign(flag, 1 + (b0 & 0x30) + (b1 >> 4));
+ dy = WithSign(flag >> 1, 1 + ((b0 & 0x0c) << 2) + (b1 & 0x0f));
+ } else if (flag < 120) {
+ int b0 = flag - 84;
+ dx = WithSign(flag, 1 + ((b0 / 12) << 8) + in[triplet_index]);
+ dy = WithSign(flag >> 1,
+ 1 + (((b0 % 12) >> 2) << 8) + in[triplet_index + 1]);
+ } else if (flag < 124) {
+ int b2 = in[triplet_index + 1];
+ dx = WithSign(flag, (in[triplet_index] << 4) + (b2 >> 4));
+ dy = WithSign(flag >> 1, ((b2 & 0x0f) << 8) + in[triplet_index + 2]);
+ } else {
+ dx = WithSign(flag, (in[triplet_index] << 8) + in[triplet_index + 1]);
+ dy = WithSign(flag >> 1,
+ (in[triplet_index + 2] << 8) + in[triplet_index + 3]);
+ }
+ triplet_index += n_data_bytes;
+ // Possible overflow but coordinate values are not security sensitive
+ x += dx;
+ y += dy;
+ result->push_back(Point());
+ Point& back = result->back();
+ back.x = x;
+ back.y = y;
+ back.on_curve = on_curve;
+ }
+ *in_bytes_consumed = triplet_index;
+ return true;
+}
+
+// This function stores just the point data. On entry, dst points to the
+// beginning of a simple glyph. Returns true on success.
+bool StorePoints(const std::vector<Point>& points,
+ unsigned int n_contours, unsigned int instruction_length,
+ uint8_t* dst, size_t dst_size, size_t* glyph_size) {
+ // I believe that n_contours < 65536, in which case this is safe. However, a
+ // comment and/or an assert would be good.
+ unsigned int flag_offset = kEndPtsOfContoursOffset + 2 * n_contours + 2 +
+ instruction_length;
+ int last_flag = -1;
+ int repeat_count = 0;
+ int last_x = 0;
+ int last_y = 0;
+ unsigned int x_bytes = 0;
+ unsigned int y_bytes = 0;
+
+ for (unsigned int i = 0; i < points.size(); ++i) {
+ const Point& point = points[i];
+ int flag = point.on_curve ? kGlyfOnCurve : 0;
+ int dx = point.x - last_x;
+ int dy = point.y - last_y;
+ if (dx == 0) {
+ flag |= kGlyfThisXIsSame;
+ } else if (dx > -256 && dx < 256) {
+ flag |= kGlyfXShort | (dx > 0 ? kGlyfThisXIsSame : 0);
+ x_bytes += 1;
+ } else {
+ x_bytes += 2;
+ }
+ if (dy == 0) {
+ flag |= kGlyfThisYIsSame;
+ } else if (dy > -256 && dy < 256) {
+ flag |= kGlyfYShort | (dy > 0 ? kGlyfThisYIsSame : 0);
+ y_bytes += 1;
+ } else {
+ y_bytes += 2;
+ }
+
+ if (flag == last_flag && repeat_count != 255) {
+ dst[flag_offset - 1] |= kGlyfRepeat;
+ repeat_count++;
+ } else {
+ if (repeat_count != 0) {
+ if (flag_offset >= dst_size) {
+ return OTS_FAILURE();
+ }
+ dst[flag_offset++] = repeat_count;
+ }
+ if (flag_offset >= dst_size) {
+ return OTS_FAILURE();
+ }
+ dst[flag_offset++] = flag;
+ repeat_count = 0;
+ }
+ last_x = point.x;
+ last_y = point.y;
+ last_flag = flag;
+ }
+
+ if (repeat_count != 0) {
+ if (flag_offset >= dst_size) {
+ return OTS_FAILURE();
+ }
+ dst[flag_offset++] = repeat_count;
+ }
+ unsigned int xy_bytes = x_bytes + y_bytes;
+ if (xy_bytes < x_bytes ||
+ flag_offset + xy_bytes < flag_offset ||
+ flag_offset + xy_bytes > dst_size) {
+ return OTS_FAILURE();
+ }
+
+ int x_offset = flag_offset;
+ int y_offset = flag_offset + x_bytes;
+ last_x = 0;
+ last_y = 0;
+ for (unsigned int i = 0; i < points.size(); ++i) {
+ int dx = points[i].x - last_x;
+ if (dx == 0) {
+ // pass
+ } else if (dx > -256 && dx < 256) {
+ dst[x_offset++] = std::abs(dx);
+ } else {
+ // will always fit for valid input, but overflow is harmless
+ x_offset = Store16(dst, x_offset, dx);
+ }
+ last_x += dx;
+ int dy = points[i].y - last_y;
+ if (dy == 0) {
+ // pass
+ } else if (dy > -256 && dy < 256) {
+ dst[y_offset++] = std::abs(dy);
+ } else {
+ y_offset = Store16(dst, y_offset, dy);
+ }
+ last_y += dy;
+ }
+ *glyph_size = y_offset;
+ return true;
+}
+
+// Compute the bounding box of the coordinates, and store into a glyf buffer.
+// A precondition is that there are at least 10 bytes available.
+void ComputeBbox(const std::vector<Point>& points, uint8_t* dst) {
+ int x_min = 0;
+ int y_min = 0;
+ int x_max = 0;
+ int y_max = 0;
+
+ for (unsigned int i = 0; i < points.size(); ++i) {
+ int x = points[i].x;
+ int y = points[i].y;
+ if (i == 0 || x < x_min) x_min = x;
+ if (i == 0 || x > x_max) x_max = x;
+ if (i == 0 || y < y_min) y_min = y;
+ if (i == 0 || y > y_max) y_max = y;
+ }
+ size_t offset = 2;
+ offset = Store16(dst, offset, x_min);
+ offset = Store16(dst, offset, y_min);
+ offset = Store16(dst, offset, x_max);
+ offset = Store16(dst, offset, y_max);
+}
+
+// Process entire bbox stream. This is done as a separate pass to allow for
+// composite bbox computations (an optional more aggressive transform).
+bool ProcessBboxStream(ots::Buffer* bbox_stream, unsigned int n_glyphs,
+ const std::vector<uint32_t>& loca_values, uint8_t* glyf_buf,
+ size_t glyf_buf_length) {
+ const uint8_t* buf = bbox_stream->buffer();
+ if (n_glyphs >= 65536 || loca_values.size() != n_glyphs + 1) {
+ return OTS_FAILURE();
+ }
+ // Safe because n_glyphs is bounded
+ unsigned int bitmap_length = ((n_glyphs + 31) >> 5) << 2;
+ if (!bbox_stream->Skip(bitmap_length)) {
+ return OTS_FAILURE();
+ }
+ for (unsigned int i = 0; i < n_glyphs; ++i) {
+ if (buf[i >> 3] & (0x80 >> (i & 7))) {
+ uint32_t loca_offset = loca_values[i];
+ if (loca_values[i + 1] - loca_offset < kEndPtsOfContoursOffset) {
+ return OTS_FAILURE();
+ }
+ if (glyf_buf_length < 2 + 10 ||
+ loca_offset > glyf_buf_length - 2 - 10) {
+ return OTS_FAILURE();
+ }
+ if (!bbox_stream->Read(glyf_buf + loca_offset + 2, 8)) {
+ return OTS_FAILURE();
+ }
+ }
+ }
+ return true;
+}
+
+bool ProcessComposite(ots::Buffer* composite_stream, uint8_t* dst,
+ size_t dst_size, size_t* glyph_size, bool* have_instructions) {
+ size_t start_offset = composite_stream->offset();
+ bool we_have_instructions = false;
+
+ uint16_t flags = FLAG_MORE_COMPONENTS;
+ while (flags & FLAG_MORE_COMPONENTS) {
+ if (!composite_stream->ReadU16(&flags)) {
+ return OTS_FAILURE();
+ }
+ we_have_instructions |= (flags & FLAG_WE_HAVE_INSTRUCTIONS) != 0;
+ size_t arg_size = 2; // glyph index
+ if (flags & FLAG_ARG_1_AND_2_ARE_WORDS) {
+ arg_size += 4;
+ } else {
+ arg_size += 2;
+ }
+ if (flags & FLAG_WE_HAVE_A_SCALE) {
+ arg_size += 2;
+ } else if (flags & FLAG_WE_HAVE_AN_X_AND_Y_SCALE) {
+ arg_size += 4;
+ } else if (flags & FLAG_WE_HAVE_A_TWO_BY_TWO) {
+ arg_size += 8;
+ }
+ if (!composite_stream->Skip(arg_size)) {
+ return OTS_FAILURE();
+ }
+ }
+ size_t composite_glyph_size = composite_stream->offset() - start_offset;
+ if (composite_glyph_size + kCompositeGlyphBegin > dst_size) {
+ return OTS_FAILURE();
+ }
+ Store16(dst, 0, 0xffff); // nContours = -1 for composite glyph
+ std::memcpy(dst + kCompositeGlyphBegin,
+ composite_stream->buffer() + start_offset,
+ composite_glyph_size);
+ *glyph_size = kCompositeGlyphBegin + composite_glyph_size;
+ *have_instructions = we_have_instructions;
+ return true;
+}
+
+// Build TrueType loca table
+bool StoreLoca(const std::vector<uint32_t>& loca_values, int index_format,
+ uint8_t* dst, size_t dst_size) {
+ const uint64_t loca_size = loca_values.size();
+ const uint64_t offset_size = index_format ? 4 : 2;
+ if ((loca_size << 2) >> 2 != loca_size) {
+ return OTS_FAILURE();
+ }
+ if (offset_size * loca_size > dst_size) {
+ return OTS_FAILURE();
+ }
+ size_t offset = 0;
+ for (size_t i = 0; i < loca_values.size(); ++i) {
+ uint32_t value = loca_values[i];
+ if (index_format) {
+ offset = StoreU32(dst, offset, value);
+ } else {
+ offset = Store16(dst, offset, value >> 1);
+ }
+ }
+ return true;
+}
+
+// Reconstruct entire glyf table based on transformed original
+bool ReconstructGlyf(const uint8_t* data, size_t data_size,
+ uint8_t* dst, size_t dst_size,
+ uint8_t* loca_buf, size_t loca_size) {
+ static const int kNumSubStreams = 7;
+ ots::Buffer file(data, data_size);
+ uint32_t version;
+ std::vector<std::pair<const uint8_t*, size_t> > substreams(kNumSubStreams);
+
+ if (!file.ReadU32(&version)) {
+ return OTS_FAILURE();
+ }
+ uint16_t num_glyphs;
+ uint16_t index_format;
+ if (!file.ReadU16(&num_glyphs) ||
+ !file.ReadU16(&index_format)) {
+ return OTS_FAILURE();
+ }
+ unsigned int offset = (2 + kNumSubStreams) * 4;
+ if (offset > data_size) {
+ return OTS_FAILURE();
+ }
+ // Invariant from here on: data_size >= offset
+ for (int i = 0; i < kNumSubStreams; ++i) {
+ uint32_t substream_size;
+ if (!file.ReadU32(&substream_size)) {
+ return OTS_FAILURE();
+ }
+ if (substream_size > data_size - offset) {
+ return OTS_FAILURE();
+ }
+ substreams[i] = std::make_pair(data + offset, substream_size);
+ offset += substream_size;
+ }
+ ots::Buffer n_contour_stream(substreams[0].first, substreams[0].second);
+ ots::Buffer n_points_stream(substreams[1].first, substreams[1].second);
+ ots::Buffer flag_stream(substreams[2].first, substreams[2].second);
+ ots::Buffer glyph_stream(substreams[3].first, substreams[3].second);
+ ots::Buffer composite_stream(substreams[4].first, substreams[4].second);
+ ots::Buffer bbox_stream(substreams[5].first, substreams[5].second);
+ ots::Buffer instruction_stream(substreams[6].first, substreams[6].second);
+
+ std::vector<uint32_t> loca_values(num_glyphs + 1);
+ std::vector<unsigned int> n_points_vec;
+ std::vector<Point> points;
+ uint32_t loca_offset = 0;
+ for (unsigned int i = 0; i < num_glyphs; ++i) {
+ size_t glyph_size = 0;
+ uint16_t n_contours = 0;
+ if (!n_contour_stream.ReadU16(&n_contours)) {
+ return OTS_FAILURE();
+ }
+ uint8_t* glyf_dst = dst + loca_offset;
+ size_t glyf_dst_size = dst_size - loca_offset;
+ if (n_contours == 0xffff) {
+ // composite glyph
+ bool have_instructions = false;
+ unsigned int instruction_size = 0;
+ if (!ProcessComposite(&composite_stream, glyf_dst, glyf_dst_size,
+ &glyph_size, &have_instructions)) {
+ return OTS_FAILURE();
+ }
+ if (have_instructions) {
+ if (!Read255UShort(&glyph_stream, &instruction_size)) {
+ return OTS_FAILURE();
+ }
+ if (instruction_size + 2 > glyf_dst_size - glyph_size) {
+ return OTS_FAILURE();
+ }
+ Store16(glyf_dst, glyph_size, instruction_size);
+ if (!instruction_stream.Read(glyf_dst + glyph_size + 2,
+ instruction_size)) {
+ return OTS_FAILURE();
+ }
+ glyph_size += instruction_size + 2;
+ }
+ } else if (n_contours > 0) {
+ // simple glyph
+ n_points_vec.clear();
+ points.clear();
+ unsigned int total_n_points = 0;
+ unsigned int n_points_contour;
+ for (unsigned int j = 0; j < n_contours; ++j) {
+ if (!Read255UShort(&n_points_stream, &n_points_contour)) {
+ return OTS_FAILURE();
+ }
+ n_points_vec.push_back(n_points_contour);
+ if (total_n_points + n_points_contour < total_n_points) {
+ return OTS_FAILURE();
+ }
+ total_n_points += n_points_contour;
+ }
+ unsigned int flag_size = total_n_points;
+ if (flag_size > flag_stream.length() - flag_stream.offset()) {
+ return OTS_FAILURE();
+ }
+ const uint8_t* flags_buf = flag_stream.buffer() + flag_stream.offset();
+ const uint8_t* triplet_buf = glyph_stream.buffer() +
+ glyph_stream.offset();
+ size_t triplet_size = glyph_stream.length() - glyph_stream.offset();
+ size_t triplet_bytes_consumed = 0;
+ if (!TripletDecode(flags_buf, triplet_buf, triplet_size, total_n_points,
+ &points, &triplet_bytes_consumed)) {
+ return OTS_FAILURE();
+ }
+ const uint32_t header_and_endpts_contours_size =
+ kEndPtsOfContoursOffset + 2 * n_contours;
+ if (glyf_dst_size < header_and_endpts_contours_size) {
+ return OTS_FAILURE();
+ }
+ Store16(glyf_dst, 0, n_contours);
+ ComputeBbox(points, glyf_dst);
+ size_t offset = kEndPtsOfContoursOffset;
+ int end_point = -1;
+ for (unsigned int contour_ix = 0; contour_ix < n_contours; ++contour_ix) {
+ end_point += n_points_vec[contour_ix];
+ if (end_point >= 65536) {
+ return OTS_FAILURE();
+ }
+ offset = Store16(glyf_dst, offset, end_point);
+ }
+ if (!flag_stream.Skip(flag_size)) {
+ return OTS_FAILURE();
+ }
+ if (!glyph_stream.Skip(triplet_bytes_consumed)) {
+ return OTS_FAILURE();
+ }
+ unsigned int instruction_size;
+ if (!Read255UShort(&glyph_stream, &instruction_size)) {
+ return OTS_FAILURE();
+ }
+ if (glyf_dst_size - header_and_endpts_contours_size <
+ instruction_size + 2) {
+ return OTS_FAILURE();
+ }
+ uint8_t* instruction_dst = glyf_dst + header_and_endpts_contours_size;
+ Store16(instruction_dst, 0, instruction_size);
+ if (!instruction_stream.Read(instruction_dst + 2, instruction_size)) {
+ return OTS_FAILURE();
+ }
+ if (!StorePoints(points, n_contours, instruction_size,
+ glyf_dst, glyf_dst_size, &glyph_size)) {
+ return OTS_FAILURE();
+ }
+ } else {
+ glyph_size = 0;
+ }
+ loca_values[i] = loca_offset;
+ if (glyph_size + 3 < glyph_size) {
+ return OTS_FAILURE();
+ }
+ glyph_size = Round4(glyph_size);
+ if (glyph_size > dst_size - loca_offset) {
+ // This shouldn't happen, but this test defensively maintains the
+ // invariant that loca_offset <= dst_size.
+ return OTS_FAILURE();
+ }
+ loca_offset += glyph_size;
+ }
+ loca_values[num_glyphs] = loca_offset;
+ if (!ProcessBboxStream(&bbox_stream, num_glyphs, loca_values,
+ dst, dst_size)) {
+ return OTS_FAILURE();
+ }
+ return StoreLoca(loca_values, index_format, loca_buf, loca_size);
+}
+
+// This is linear search, but could be changed to binary because we
+// do have a guarantee that the tables are sorted by tag. But the total
+// cpu time is expected to be very small in any case.
+const Table* FindTable(const std::vector<Table>& tables, uint32_t tag) {
+ size_t n_tables = tables.size();
+ for (size_t i = 0; i < n_tables; ++i) {
+ if (tables[i].tag == tag) {
+ return &tables[i];
+ }
+ }
+ return NULL;
+}
+
+bool ReconstructTransformed(const std::vector<Table>& tables, uint32_t tag,
+ const uint8_t* transformed_buf, size_t transformed_size,
+ uint8_t* dst, size_t dst_length) {
+ if (tag == TAG('g', 'l', 'y', 'f')) {
+ const Table* glyf_table = FindTable(tables, tag);
+ const Table* loca_table = FindTable(tables, TAG('l', 'o', 'c', 'a'));
+ if (glyf_table == NULL || loca_table == NULL) {
+ return OTS_FAILURE();
+ }
+ if (static_cast<uint64_t>(glyf_table->dst_offset + glyf_table->dst_length) >
+ dst_length) {
+ return OTS_FAILURE();
+ }
+ if (static_cast<uint64_t>(loca_table->dst_offset + loca_table->dst_length) >
+ dst_length) {
+ return OTS_FAILURE();
+ }
+ return ReconstructGlyf(transformed_buf, transformed_size,
+ dst + glyf_table->dst_offset, glyf_table->dst_length,
+ dst + loca_table->dst_offset, loca_table->dst_length);
+ } else if (tag == TAG('l', 'o', 'c', 'a')) {
+ // processing was already done by glyf table, but validate
+ if (!FindTable(tables, TAG('g', 'l', 'y', 'f'))) {
+ return OTS_FAILURE();
+ }
+ } else {
+ // transform for the tag is not known
+ return OTS_FAILURE();
+ }
+ return true;
+}
+
+uint32_t ComputeChecksum(const uint8_t* buf, size_t size) {
+ uint32_t checksum = 0;
+ for (size_t i = 0; i < size; i += 4) {
+ // We assume the addition is mod 2^32, which is valid because unsigned
+ checksum += (buf[i] << 24) | (buf[i + 1] << 16) |
+ (buf[i + 2] << 8) | buf[i + 3];
+ }
+ return checksum;
+}
+
+bool FixChecksums(const std::vector<Table>& tables, uint8_t* dst) {
+ const Table* head_table = FindTable(tables, TAG('h', 'e', 'a', 'd'));
+ if (head_table == NULL ||
+ head_table->dst_length < kCheckSumAdjustmentOffset + 4) {
+ return OTS_FAILURE();
+ }
+ size_t adjustment_offset = head_table->dst_offset + kCheckSumAdjustmentOffset;
+ StoreU32(dst, adjustment_offset, 0);
+ size_t n_tables = tables.size();
+ uint32_t file_checksum = 0;
+ for (size_t i = 0; i < n_tables; ++i) {
+ const Table* table = &tables[i];
+ size_t table_length = table->dst_length;
+ uint8_t* table_data = dst + table->dst_offset;
+ uint32_t checksum = ComputeChecksum(table_data, table_length);
+ StoreU32(dst, kSfntHeaderSize + i * kSfntEntrySize + 4, checksum);
+ file_checksum += checksum;
+ }
+ file_checksum += ComputeChecksum(dst,
+ kSfntHeaderSize + kSfntEntrySize * n_tables);
+ uint32_t checksum_adjustment = 0xb1b0afba - file_checksum;
+ StoreU32(dst, adjustment_offset, checksum_adjustment);
+ return true;
+}
+
+bool Woff2Compress(const uint8_t* data, const size_t len,
+ uint32_t compression_type,
+ uint8_t* result, uint32_t* result_len) {
+ if (compression_type == kCompressionTypeBrotli) {
+ size_t compressed_len = *result_len;
+
+ brotli::BrotliCompressBuffer(len, data, &compressed_len, result);
+ *result_len = compressed_len;
+ return true;
+ }
+ return false;
+}
+
+bool Woff2Uncompress(uint8_t* dst_buf, size_t dst_size,
+ const uint8_t* src_buf, size_t src_size, uint32_t compression_type) {
+ if (compression_type == kCompressionTypeBrotli) {
+ size_t uncompressed_size = dst_size;
+ int ok = BrotliDecompressBuffer(src_size, src_buf,
+ &uncompressed_size, dst_buf);
+ if (!ok || uncompressed_size != dst_size) {
+ return OTS_FAILURE();
+ }
+ return true;
+ }
+ // Unknown compression type
+ return OTS_FAILURE();
+}
+
+bool ReadLongDirectory(ots::Buffer* file, std::vector<Table>* tables,
+ size_t num_tables) {
+ for (size_t i = 0; i < num_tables; ++i) {
+ Table* table = &(*tables)[i];
+ if (!file->ReadU32(&table->tag) ||
+ !file->ReadU32(&table->flags) ||
+ !file->ReadU32(&table->src_length) ||
+ !file->ReadU32(&table->transform_length) ||
+ !file->ReadU32(&table->dst_length)) {
+ return OTS_FAILURE();
+ }
+ }
+ return true;
+}
+
+const uint32_t known_tags[29] = {
+ TAG('c', 'm', 'a', 'p'), // 0
+ TAG('h', 'e', 'a', 'd'), // 1
+ TAG('h', 'h', 'e', 'a'), // 2
+ TAG('h', 'm', 't', 'x'), // 3
+ TAG('m', 'a', 'x', 'p'), // 4
+ TAG('n', 'a', 'm', 'e'), // 5
+ TAG('O', 'S', '/', '2'), // 6
+ TAG('p', 'o', 's', 't'), // 7
+ TAG('c', 'v', 't', ' '), // 8
+ TAG('f', 'p', 'g', 'm'), // 9
+ TAG('g', 'l', 'y', 'f'), // 10
+ TAG('l', 'o', 'c', 'a'), // 11
+ TAG('p', 'r', 'e', 'p'), // 12
+ TAG('C', 'F', 'F', ' '), // 13
+ TAG('V', 'O', 'R', 'G'), // 14
+ TAG('E', 'B', 'D', 'T'), // 15
+ TAG('E', 'B', 'L', 'C'), // 16
+ TAG('g', 'a', 's', 'p'), // 17
+ TAG('h', 'd', 'm', 'x'), // 18
+ TAG('k', 'e', 'r', 'n'), // 19
+ TAG('L', 'T', 'S', 'H'), // 20
+ TAG('P', 'C', 'L', 'T'), // 21
+ TAG('V', 'D', 'M', 'X'), // 22
+ TAG('v', 'h', 'e', 'a'), // 23
+ TAG('v', 'm', 't', 'x'), // 24
+ TAG('B', 'A', 'S', 'E'), // 25
+ TAG('G', 'D', 'E', 'F'), // 26
+ TAG('G', 'P', 'O', 'S'), // 27
+ TAG('G', 'S', 'U', 'B'), // 28
+};
+
+int KnownTableIndex(uint32_t tag) {
+ for (int i = 0; i < 29; ++i) {
+ if (tag == known_tags[i]) return i;
+ }
+ return 31;
+}
+
+bool ReadShortDirectory(ots::Buffer* file, std::vector<Table>* tables,
+ size_t num_tables) {
+ uint32_t last_compression_type = 0;
+ for (size_t i = 0; i < num_tables; ++i) {
+ Table* table = &(*tables)[i];
+ uint8_t flag_byte;
+ if (!file->ReadU8(&flag_byte)) {
+ return OTS_FAILURE();
+ }
+ uint32_t tag;
+ if ((flag_byte & 0x1f) == 0x1f) {
+ if (!file->ReadU32(&tag)) {
+ return OTS_FAILURE();
+ }
+ } else {
+ if ((flag_byte & 0x1f) >= (sizeof(known_tags) / sizeof(known_tags[0]))) {
+ return OTS_FAILURE();
+ }
+ tag = known_tags[flag_byte & 0x1f];
+ }
+ uint32_t flags = flag_byte >> 6;
+ if (flags == kShortFlagsContinue) {
+ flags = last_compression_type | kWoff2FlagsContinueStream;
+ } else {
+ if (flags == kCompressionTypeNone ||
+ flags == kCompressionTypeGzip ||
+ flags == kCompressionTypeLzma) {
+ last_compression_type = flags;
+ } else {
+ return OTS_FAILURE();
+ }
+ }
+ if ((flag_byte & 0x20) != 0) {
+ flags |= kWoff2FlagsTransform;
+ }
+ uint32_t dst_length;
+ if (!ReadBase128(file, &dst_length)) {
+ return OTS_FAILURE();
+ }
+ uint32_t transform_length = dst_length;
+ if ((flags & kWoff2FlagsTransform) != 0) {
+ if (!ReadBase128(file, &transform_length)) {
+ return OTS_FAILURE();
+ }
+ }
+ uint32_t src_length = transform_length;
+ if ((flag_byte >> 6) == 1 || (flag_byte >> 6) == 2) {
+ if (!ReadBase128(file, &src_length)) {
+ return OTS_FAILURE();
+ }
+ } else if ((flag_byte >> 6) == kShortFlagsContinue) {
+ // The compressed data for this table is in a previuos table, so we set
+ // the src_length to zero.
+ src_length = 0;
+ }
+ table->tag = tag;
+ table->flags = flags;
+ table->src_length = src_length;
+ table->transform_length = transform_length;
+ table->dst_length = dst_length;
+ }
+ return true;
+}
+
+} // namespace
+
+size_t ComputeWOFF2FinalSize(const uint8_t* data, size_t length) {
+ ots::Buffer file(data, length);
+ uint32_t total_length;
+
+ if (!file.Skip(16) ||
+ !file.ReadU32(&total_length)) {
+ return 0;
+ }
+ return total_length;
+}
+
+bool ConvertWOFF2ToTTF(uint8_t* result, size_t result_length,
+ const uint8_t* data, size_t length) {
+ ots::Buffer file(data, length);
+
+ uint32_t signature;
+ uint32_t flavor;
+ if (!file.ReadU32(&signature) || signature != kWoff2Signature ||
+ !file.ReadU32(&flavor)) {
+ return OTS_FAILURE();
+ }
+
+ // TODO(user): Should call IsValidVersionTag() here.
+
+ uint32_t reported_length;
+ if (!file.ReadU32(&reported_length) || length != reported_length) {
+ return OTS_FAILURE();
+ }
+ uint16_t num_tables;
+ if (!file.ReadU16(&num_tables) || !num_tables) {
+ return OTS_FAILURE();
+ }
+ // These reserved bits will be always zero in the final format, but they
+ // temporarily indicate the use of brotli, so that we can evaluate gzip, lzma
+ // and brotli side-by-side.
+ uint16_t reserved;
+ if (!file.ReadU16(&reserved)) {
+ return OTS_FAILURE();
+ }
+ // We don't care about these fields of the header:
+ // uint32_t total_sfnt_size
+ // uint16_t major_version, minor_version
+ // uint32_t meta_offset, meta_length, meta_orig_length
+ // uint32_t priv_offset, priv_length
+ if (!file.Skip(28)) {
+ return OTS_FAILURE();
+ }
+ std::vector<Table> tables(num_tables);
+ // Note: change below to ReadLongDirectory to enable long format.
+ if (!ReadShortDirectory(&file, &tables, num_tables)) {
+ return OTS_FAILURE();
+ }
+ uint64_t src_offset = file.offset();
+ uint64_t dst_offset = kSfntHeaderSize +
+ kSfntEntrySize * static_cast<uint64_t>(num_tables);
+ uint64_t uncompressed_sum = 0;
+ for (uint16_t i = 0; i < num_tables; ++i) {
+ Table* table = &tables[i];
+ table->src_offset = src_offset;
+ src_offset += table->src_length;
+ if (src_offset > std::numeric_limits<uint32_t>::max()) {
+ return OTS_FAILURE();
+ }
+ src_offset = Round4(src_offset); // TODO: reconsider
+ table->dst_offset = dst_offset;
+ dst_offset += table->dst_length;
+ if (dst_offset > std::numeric_limits<uint32_t>::max()) {
+ return OTS_FAILURE();
+ }
+ dst_offset = Round4(dst_offset);
+ if ((table->flags & kCompressionTypeMask) != kCompressionTypeNone) {
+ uncompressed_sum += table->src_length;
+ if (uncompressed_sum > std::numeric_limits<uint32_t>::max()) {
+ return OTS_FAILURE();
+ }
+ }
+ }
+ // Enforce same 30M limit on uncompressed tables as OTS
+ if (uncompressed_sum > 30 * 1024 * 1024) {
+ return OTS_FAILURE();
+ }
+ if (src_offset > length || dst_offset > result_length) {
+ return OTS_FAILURE();
+ }
+
+ const uint32_t sfnt_header_and_table_directory_size = 12 + 16 * num_tables;
+ if (sfnt_header_and_table_directory_size > result_length) {
+ return OTS_FAILURE();
+ }
+
+ // Start building the font
+ size_t offset = 0;
+ offset = StoreU32(result, offset, flavor);
+ offset = Store16(result, offset, num_tables);
+ unsigned max_pow2 = 0;
+ while (1u << (max_pow2 + 1) <= num_tables) {
+ max_pow2++;
+ }
+ const uint16_t output_search_range = (1u << max_pow2) << 4;
+ offset = Store16(result, offset, output_search_range);
+ offset = Store16(result, offset, max_pow2);
+ offset = Store16(result, offset, (num_tables << 4) - output_search_range);
+ for (uint16_t i = 0; i < num_tables; ++i) {
+ const Table* table = &tables[i];
+ offset = StoreU32(result, offset, table->tag);
+ offset = StoreU32(result, offset, 0); // checksum, to fill in later
+ offset = StoreU32(result, offset, table->dst_offset);
+ offset = StoreU32(result, offset, table->dst_length);
+ }
+ std::vector<uint8_t> uncompressed_buf;
+ bool continue_valid = false;
+ const uint8_t* transform_buf = NULL;
+ for (uint16_t i = 0; i < num_tables; ++i) {
+ const Table* table = &tables[i];
+ uint32_t flags = table->flags;
+ const uint8_t* src_buf = data + table->src_offset;
+ uint32_t compression_type = flags & kCompressionTypeMask;
+ if (compression_type == kCompressionTypeLzma && reserved > 0) {
+ compression_type = kCompressionTypeLzma + reserved;
+ }
+ size_t transform_length = table->transform_length;
+ if ((flags & kWoff2FlagsContinueStream) != 0) {
+ if (!continue_valid) {
+ return OTS_FAILURE();
+ }
+ } else if (compression_type == kCompressionTypeNone) {
+ if (transform_length != table->src_length) {
+ return OTS_FAILURE();
+ }
+ transform_buf = src_buf;
+ continue_valid = false;
+ } else if ((flags & kWoff2FlagsContinueStream) == 0) {
+ uint64_t total_size = transform_length;
+ for (uint16_t j = i + 1; j < num_tables; ++j) {
+ if ((tables[j].flags & kWoff2FlagsContinueStream) == 0) {
+ break;
+ }
+ total_size += tables[j].transform_length;
+ if (total_size > std::numeric_limits<uint32_t>::max()) {
+ return OTS_FAILURE();
+ }
+ }
+ uncompressed_buf.resize(total_size);
+ if (!Woff2Uncompress(&uncompressed_buf[0], total_size,
+ src_buf, table->src_length, compression_type)) {
+ return OTS_FAILURE();
+ }
+ transform_buf = &uncompressed_buf[0];
+ continue_valid = true;
+ } else {
+ return OTS_FAILURE();
+ }
+
+ if ((flags & kWoff2FlagsTransform) == 0) {
+ if (transform_length != table->dst_length) {
+ return OTS_FAILURE();
+ }
+ if (static_cast<uint64_t>(table->dst_offset + transform_length) >
+ result_length) {
+ return OTS_FAILURE();
+ }
+ std::memcpy(result + table->dst_offset, transform_buf,
+ transform_length);
+ } else {
+ if (!ReconstructTransformed(tables, table->tag,
+ transform_buf, transform_length, result, result_length)) {
+ return OTS_FAILURE();
+ }
+ }
+ if (continue_valid) {
+ transform_buf += transform_length;
+ if (transform_buf > uncompressed_buf.data() + uncompressed_buf.size()) {
+ return OTS_FAILURE();
+ }
+ }
+ }
+
+ return FixChecksums(tables, result);
+}
+
+void StoreTableEntry(const Table& table, size_t* offset, uint8_t* dst) {
+ uint8_t flag_byte = KnownTableIndex(table.tag);
+ if ((table.flags & kWoff2FlagsTransform) != 0) {
+ flag_byte |= 0x20;
+ }
+ if ((table.flags & kWoff2FlagsContinueStream) != 0) {
+ flag_byte |= 0xc0;
+ } else {
+ flag_byte |= ((table.flags & 3) << 6);
+ }
+ dst[(*offset)++] = flag_byte;
+ if ((flag_byte & 0x1f) == 0x1f) {
+ StoreU32(table.tag, offset, dst);
+ }
+ StoreBase128(table.src_length, offset, dst);
+ if ((flag_byte & 0x20) != 0) {
+ StoreBase128(table.transform_length, offset, dst);
+ }
+ if ((flag_byte & 0xc0) == 0x40 || (flag_byte & 0xc0) == 0x80) {
+ StoreBase128(table.dst_length, offset, dst);
+ }
+}
+
+size_t TableEntrySize(const Table& table) {
+ size_t size = KnownTableIndex(table.tag) < 31 ? 1 : 5;
+ size += Base128Size(table.src_length);
+ if ((table.flags & kWoff2FlagsTransform) != 0) {
+ size += Base128Size(table.transform_length);
+ }
+ if ((table.flags & kWoff2FlagsContinueStream) == 0 &&
+ ((table.flags & 3) == kCompressionTypeGzip ||
+ (table.flags & 3) == kCompressionTypeLzma)) {
+ size += Base128Size(table.dst_length);
+ }
+ return size;
+}
+
+size_t ComputeWoff2Length(const std::vector<Table>& tables) {
+ size_t size = 44; // header size
+ for (const auto& table : tables) {
+ size += TableEntrySize(table);
+ }
+ for (const auto& table : tables) {
+ size += table.dst_length;
+ size = Round4(size);
+ }
+ return size;
+}
+
+size_t ComputeTTFLength(const std::vector<Table>& tables) {
+ size_t size = 12 + 16 * tables.size(); // sfnt header
+ for (const auto& table : tables) {
+ size += Round4(table.src_length);
+ }
+ return size;
+}
+
+size_t ComputeTotalTransformLength(const Font& font) {
+ size_t total = 0;
+ for (const auto& i : font.tables) {
+ const Font::Table& table = i.second;
+ if (table.tag & 0x80808080 || !font.FindTable(table.tag ^ 0x80808080)) {
+ // Count transformed tables and non-transformed tables that do not have
+ // transformed versions.
+ total += table.length;
+ }
+ }
+ return total;
+}
+
+struct Woff2ConvertOptions {
+ uint32_t compression_type;
+ bool continue_streams;
+ bool keep_dsig;
+ bool transform_glyf;
+
+ Woff2ConvertOptions()
+ : compression_type(kCompressionTypeBrotli),
+ continue_streams(true),
+ keep_dsig(true),
+ transform_glyf(true) {}
+
+
+};
+
+size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length) {
+ // Except for the header size, which is 32 bytes larger in woff2 format,
+ // all other parts should be smaller (table header in short format,
+ // transformations and compression). Just to be sure, we will give some
+ // headroom anyway.
+ return length + 1024;
+}
+
+bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
+ uint8_t *result, size_t *result_length) {
+
+ Woff2ConvertOptions options;
+
+ Font font;
+ if (!ReadFont(data, length, &font)) {
+ fprintf(stderr, "Parsing of the input font failed.\n");
+ return false;
+ }
+
+ if (!NormalizeFont(&font)) {
+ fprintf(stderr, "Font normalization failed.\n");
+ return false;
+ }
+
+ if (!options.keep_dsig) {
+ font.tables.erase(TAG('D', 'S', 'I', 'G'));
+ }
+
+ if (options.transform_glyf &&
+ !TransformGlyfAndLocaTables(&font)) {
+ fprintf(stderr, "Font transformation failed.\n");
+ return false;
+ }
+
+ const Font::Table* head_table = font.FindTable(kHeadTableTag);
+ if (head_table == NULL) {
+ fprintf(stderr, "Missing head table.\n");
+ return false;
+ }
+
+ // Although the compressed size of each table in the final woff2 file won't
+ // be larger than its transform_length, we have to allocate a large enough
+ // buffer for the compressor, since the compressor can potentially increase
+ // the size. If the compressor overflows this, it should return false and
+ // then this function will also return false.
+ size_t total_transform_length = ComputeTotalTransformLength(font);
+ size_t compression_buffer_size = 1.2 * total_transform_length + 10240;
+ std::vector<uint8_t> compression_buf(compression_buffer_size);
+ size_t compression_buf_offset = 0;
+ uint32_t total_compressed_length = compression_buffer_size;
+
+ if (options.continue_streams) {
+ // Collect all transformed data into one place.
+ std::vector<uint8_t> transform_buf(total_transform_length);
+ size_t transform_offset = 0;
+ for (const auto& i : font.tables) {
+ if (i.second.tag & 0x80808080) continue;
+ const Font::Table* table = font.FindTable(i.second.tag ^ 0x80808080);
+ if (table == NULL) table = &i.second;
+ StoreBytes(table->data, table->length,
+ &transform_offset, &transform_buf[0]);
+ }
+ // Compress all transformed data in one stream.
+ if (!Woff2Compress(transform_buf.data(), total_transform_length,
+ options.compression_type,
+ &compression_buf[0],
+ &total_compressed_length)) {
+ fprintf(stderr, "Compression of combined table failed.\n");
+ return false;
+ }
+ }
+
+ std::vector<Table> tables;
+ for (const auto& i : font.tables) {
+ const Font::Table& src_table = i.second;
+ if (src_table.tag & 0x80808080) {
+ // This is a transformed table, we will write it together with the
+ // original version.
+ continue;
+ }
+ Table table;
+ table.tag = src_table.tag;
+ table.flags = std::min(options.compression_type, kCompressionTypeLzma);
+ table.src_length = src_table.length;
+ table.transform_length = src_table.length;
+ const uint8_t* transformed_data = src_table.data;
+ const Font::Table* transformed_table =
+ font.FindTable(src_table.tag ^ 0x80808080);
+ if (transformed_table != NULL) {
+ table.flags |= kWoff2FlagsTransform;
+ table.transform_length = transformed_table->length;
+ transformed_data = transformed_table->data;
+ }
+ if (options.continue_streams) {
+ if (tables.empty()) {
+ table.dst_length = total_compressed_length;
+ table.dst_data = &compression_buf[0];
+ } else {
+ table.dst_length = 0;
+ table.dst_data = NULL;
+ table.flags |= kWoff2FlagsContinueStream;
+ }
+ } else {
+ table.dst_length = table.transform_length;
+ table.dst_data = transformed_data;
+ if (options.compression_type != kCompressionTypeNone) {
+ uint32_t compressed_length =
+ compression_buf.size() - compression_buf_offset;
+ if (!Woff2Compress(transformed_data, table.transform_length,
+ options.compression_type,
+ &compression_buf[compression_buf_offset],
+ &compressed_length)) {
+ fprintf(stderr, "Compression of table %x failed.\n", src_table.tag);
+ return false;
+ }
+ if (compressed_length >= table.transform_length) {
+ table.flags &= (~3); // no compression
+ } else {
+ table.dst_length = compressed_length;
+ table.dst_data = &compression_buf[compression_buf_offset];
+ compression_buf_offset += table.dst_length;
+ }
+ }
+ }
+ tables.push_back(table);
+ }
+
+ size_t woff2_length = ComputeWoff2Length(tables);
+ if (woff2_length > *result_length) {
+ fprintf(stderr, "Result allocation was too small (%zd vs %zd bytes).\n",
+ *result_length, woff2_length);
+ return false;
+ }
+ *result_length = woff2_length;
+ uint16_t reserved =
+ (options.compression_type > kCompressionTypeLzma) ?
+ options.compression_type - kCompressionTypeLzma : 0;
+
+ size_t offset = 0;
+ StoreU32(kWoff2Signature, &offset, result);
+ StoreU32(font.flavor, &offset, result);
+ StoreU32(woff2_length, &offset, result);
+ Store16(tables.size(), &offset, result);
+ Store16(reserved, &offset, result);
+ StoreU32(ComputeTTFLength(tables), &offset, result);
+ StoreBytes(head_table->data + 4, 4, &offset, result); // font revision
+ StoreU32(0, &offset, result); // metaOffset
+ StoreU32(0, &offset, result); // metaLength
+ StoreU32(0, &offset, result); // metaOrigLength
+ StoreU32(0, &offset, result); // privOffset
+ StoreU32(0, &offset, result); // privLength
+ for (const auto& table : tables) {
+ StoreTableEntry(table, &offset, result);
+ }
+ for (const auto& table : tables) {
+ StoreBytes(table.dst_data, table.dst_length, &offset, result);
+ offset = Round4(offset);
+ }
+ if (*result_length != offset) {
+ fprintf(stderr, "Mismatch between computed and actual length "
+ "(%zd vs %zd)\n", *result_length, offset);
+ return false;
+ }
+ return true;
+}
+
+} // namespace woff2
diff --git a/woff2/woff2.h b/woff2/woff2.h
new file mode 100644
index 0000000..aba5080
--- /dev/null
+++ b/woff2/woff2.h
@@ -0,0 +1,50 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Library for converting WOFF2 format font files to their TTF versions.
+
+#ifndef BROTLI_WOFF2_WOFF2_H_
+#define BROTLI_WOFF2_WOFF2_H_
+
+#include <stddef.h>
+#include <inttypes.h>
+#include <string>
+
+namespace woff2 {
+
+using std::string;
+
+// Compute the size of the final uncompressed font, or 0 on error.
+size_t ComputeWOFF2FinalSize(const uint8_t *data, size_t length);
+
+// Decompresses the font into the target buffer. The result_length should
+// be the same as determined by ComputeFinalSize(). Returns true on successful
+// decompression.
+bool ConvertWOFF2ToTTF(uint8_t *result, size_t result_length,
+ const uint8_t *data, size_t length);
+
+// Returns an upper bound on the size of the compressed file.
+size_t MaxWOFF2CompressedSize(const uint8_t* data, size_t length);
+
+// Compresses the font into the target buffer. *result_length should be at least
+// the value returned by MaxWOFF2CompressedSize(), upon return, it is set to the
+// actual compressed size. Returns true on successful compression.
+bool ConvertTTFToWOFF2(const uint8_t *data, size_t length,
+ uint8_t *result, size_t *result_length);
+
+
+
+} // namespace woff2
+
+#endif // BROTLI_WOFF2_WOFF2_H_
diff --git a/woff2/woff2_compress.cc b/woff2/woff2_compress.cc
new file mode 100644
index 0000000..778369b
--- /dev/null
+++ b/woff2/woff2_compress.cc
@@ -0,0 +1,52 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A commandline tool for compressing ttf format files to woff2.
+
+#include <string>
+
+#include "file.h"
+#include "./woff2.h"
+
+
+int main(int argc, char **argv) {
+ using std::string;
+
+ if (argc != 2) {
+ fprintf(stderr, "One argument, the input filename, must be provided.\n");
+ return 1;
+ }
+
+ string filename(argv[1]);
+ string outfilename = filename.substr(0, filename.find_last_of(".")) + ".woff2";
+ fprintf(stdout, "Processing %s => %s\n",
+ filename.c_str(), outfilename.c_str());
+ string input = woff2::GetFileContent(filename);
+
+ const uint8_t* input_data = reinterpret_cast<const uint8_t*>(input.data());
+ size_t output_size = woff2::MaxWOFF2CompressedSize(input_data, input.size());
+ string output(output_size, 0);
+ uint8_t* output_data = reinterpret_cast<uint8_t*>(&output[0]);
+
+ if (!woff2::ConvertTTFToWOFF2(input_data, input.size(),
+ output_data, &output_size)) {
+ fprintf(stderr, "Compression failed.\n");
+ return 1;
+ }
+ output.resize(output_size);
+
+ woff2::SetFileContents(outfilename, output);
+
+ return 0;
+}
diff --git a/woff2/woff2_decompress.cc b/woff2/woff2_decompress.cc
new file mode 100644
index 0000000..c083793
--- /dev/null
+++ b/woff2/woff2_decompress.cc
@@ -0,0 +1,54 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A very simple commandline tool for decompressing woff2 format files to true
+// type font files.
+
+#include <string>
+
+
+#include "file.h"
+#include "./woff2.h"
+
+int main(int argc, char **argv) {
+ using std::string;
+
+ if (argc != 2) {
+ fprintf(stderr, "One argument, the input filename, must be provided.\n");
+ return 1;
+ }
+
+ string filename(argv[1]);
+ string outfilename = filename.substr(0, filename.find_last_of(".")) + ".ttf";
+ fprintf(stdout, "Processing %s => %s\n",
+ filename.c_str(), outfilename.c_str());
+ string input = woff2::GetFileContent(filename);
+
+ size_t decompressed_size = woff2::ComputeWOFF2FinalSize(
+ reinterpret_cast<const uint8_t*>(input.data()), input.size());
+ string output(decompressed_size, 0);
+ const bool ok = woff2::ConvertWOFF2ToTTF(
+ reinterpret_cast<uint8_t*>(&output[0]), decompressed_size,
+ reinterpret_cast<const uint8_t*>(input.data()), input.size());
+
+ if (!ok) {
+ fprintf(stderr, "Decompression failed\n");
+ return 1;
+ }
+
+ woff2::SetFileContents(outfilename, output);
+
+ return 0;
+}
+
diff --git a/woff2_header_dump.py b/woff2_header_dump.py
new file mode 100644
index 0000000..b352d50
--- /dev/null
+++ b/woff2_header_dump.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2012 Google Inc. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This is a simple utility for dumping out the header of a compressed file, and
+# is suitable for doing spot checks of compressed. files. However, this only
+# implements the "long" form of the table directory.
+
+import struct
+import sys
+
+def dump_woff2_header(header):
+ header_values = struct.unpack('>IIIHHIHHIIIII', header[:44])
+ for i, key in enumerate([
+ 'signature',
+ 'flavor',
+ 'length',
+ 'numTables',
+ 'reserved',
+ 'totalSfntSize',
+ 'majorVersion',
+ 'minorVersion',
+ 'metaOffset',
+ 'metaOrigLength',
+ 'privOffset',
+ 'privLength']):
+ print key, header_values[i]
+ numTables = header_values[3]
+ for i in range(numTables):
+ entry = struct.unpack('>IIIII', header[44+20*i:44+20*(i+1)])
+ print '%08x %d %d %d %d' % entry
+
+def main():
+ header = file(sys.argv[1]).read()
+ dump_woff2_header(header)
+
+main()
+