diff options
author | Lode Vandevenne <lode@google.com> | 2013-01-16 10:30:56 +0100 |
---|---|---|
committer | Lode Vandevenne <lode@google.com> | 2013-01-16 10:30:56 +0100 |
commit | b50b7ef8f8150616d3e9a227ce2d722a8355b1da (patch) | |
tree | 33731fde3659eb06ebf5f90f06ad7a6723eea82c /util.h | |
download | zopfli-b50b7ef8f8150616d3e9a227ce2d722a8355b1da.tar.gz |
Initial commit
Diffstat (limited to 'util.h')
-rw-r--r-- | util.h | 214 |
1 files changed, 214 insertions, 0 deletions
@@ -0,0 +1,214 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +Several utilities, including: #defines to try different compression results, +basic deflate specification values and generic program options. +*/ + +#ifndef ZOPFLI_UTIL_H_ +#define ZOPFLI_UTIL_H_ + +#include <string.h> +#include <stdlib.h> + +/* Minimum and maximum length that can be encoded in deflate. */ +#define MAX_MATCH 258 +#define MIN_MATCH 3 + +/* +The window size for deflate. Must be a power of two. This should be 32768, the +maximum possible by the deflate spec. Anything less hurts compression more than +speed. +*/ +#define WINDOW_SIZE 32768 + +/* +The window mask used to wrap indices into the window. This is why the +window size must be a power of two. +*/ +#define WINDOW_MASK (WINDOW_SIZE - 1) + +/* +A block structure of huge, non-smart, blocks to divide the input into, to allow +operating on huge files without exceeding memory, such as the 1GB wiki9 corpus. +The whole compression algorithm, including the smarter block splitting, will +be executed independently on each huge block. +Dividing into huge blocks hurts compression, but not much relative to the size. +Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks. +*/ +#define MASTER_BLOCK_SIZE 20000000 + +/* +Used to initialize costs for example +*/ +#define LARGE_FLOAT 1e30 + +/* +For longest match cache. max 256. Uses huge amounts of memory but makes it +faster. Uses this many times three bytes per single byte of the input data. +This is so because longest match finding has to find the exact distance +that belongs to each length for the best lz77 strategy. +Good values: e.g. 5, 8. +*/ +#define NUM_CACHED_LENGTHS 8 + +/* +limit the max hash chain hits for this hash value. This has an effect only +on files where the hash value is the same very often. On these files, this +gives worse compression (the value should ideally be 32768, which is the +WINDOW_SIZE, while zlib uses 4096 even for best level), but makes it faster on +some specific files. +Good value: e.g. 8192. +*/ +#define MAX_CHAIN_HITS 8192 + +/* +Whether to use the longest match cache for FindLongestMatch. This cache +consumes a lot of memory but speeds it up. No effect on compression size. +*/ +#define USE_LONGEST_MATCH_CACHE + +/* +Enable to remember amount of successive identical bytes in the hash chain for +finding longest match +required for USE_HASH_SAME_HASH and SHORTCUT_LONG_REPETITIONS +This has no effect on the compression result, and enabling it increases speed. +*/ +#define USE_HASH_SAME + +/* +Switch to a faster hash based on the info from USE_HASH_SAME once the +best length so far is long enough. This is way faster for files with lots of +identical bytes, on which the compressor is otherwise too slow. Regular files +are unaffected or maybe a tiny bit slower. +This has no effect on the compression result, only on speed. +*/ +#define USE_HASH_SAME_HASH + +/* +Enable this, to avoid slowness for files which are a repetition of the same +character more than a multiple of MAX_MATCH times. This should not affect the +compression result. +*/ +#define SHORTCUT_LONG_REPETITIONS + +/* +Whether to use lazy matching in the greedy LZ77 implementation. This gives a +better result of LZ77Greedy, but the effect this has on the optimal LZ77 +varies from file to file. +*/ +#define LAZY_MATCHING + +/* +Gets the symbol for the given length, cfr. the DEFLATE spec. +Returns the symbol in the range [257-285] (inclusive) +*/ +int GetLengthSymbol(int l); + +/* Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. */ +int GetLengthExtraBits(int l); + +/* Gets value of the extra bits for the given length, cfr. the DEFLATE spec. */ +int GetLengthExtraBitsValue(int l); + +/* Gets the symbol for the given dist, cfr. the DEFLATE spec. */ +int GetDistSymbol(int dist); + +/* Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. */ +int GetDistExtraBits(int dist); + +/* Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. */ +int GetDistExtraBitsValue(int dist); + +/* +Options used throughout the program. +*/ +typedef struct Options { + /* Whether to print output */ + int verbose; + + /* + Maximum amount of times to rerun forward and backward pass to optimize LZ77 + compression cost. Good values: 10, 15 for small files, 5 for files over + several MB in size or it will be too slow. + */ + int numiterations; + + /* + If true, splits the data in multiple deflate blocks with optimal choice + for the block boundaries. Block splitting gives better compression. Default: + true (1). + */ + int blocksplitting; + + /* + If true, chooses the optimal block split points only after doing the iterative + LZ77 compression. If false, chooses the block split points first, then does + iterative LZ77 on each individual block. Depending on the file, either first + or last gives the best compression. Default: false (0). + */ + int blocksplittinglast; + + /* + Maximum amount of blocks to split into (0 for unlimited, but this can give + extreme results that hurt compression on some files). Default value: 15. + */ + int blocksplittingmax; +} Options; + +/* Initializes options with default values. */ +void InitOptions(Options* options); + +/* +Appends value to dynamically allocated memory, doubling its allocation size +whenever needed. + +value: the value to append, type T +data: pointer to the dynamic array to append to, type T** +size: pointer to the size of the array to append to, type size_t*. This is the +size that you consider the array to be, not the internal allocation size. +Precondition: allocated size of data is at least a power of two greater than or +equal than *size. +*/ +#ifdef __cplusplus /* C++ cannot assign void* from malloc to *data */ +#define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ + if (!((*size) & ((*size) - 1))) {\ + /*double alloc size if it's a power of two*/\ + void** data_void = reinterpret_cast<void**>(data);\ + *data_void = (*size) == 0 ? malloc(sizeof(**data))\ + : realloc((*data), (*size) * 2 * sizeof(**data));\ + }\ + (*data)[(*size)] = (value);\ + (*size)++;\ +} +#else /* C gives problems with strict-aliasing rules for (void**) cast */ +#define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ + if (!((*size) & ((*size) - 1))) {\ + /*double alloc size if it's a power of two*/\ + (*data) = (*size) == 0 ? malloc(sizeof(**data))\ + : realloc((*data), (*size) * 2 * sizeof(**data));\ + }\ + (*data)[(*size)] = (value);\ + (*size)++;\ +} +#endif + + +#endif /* ZOPFLI_UTIL_H_ */ |