Initial commit

author: Lode Vandevenne <lode@google.com> 2013-01-16 10:30:56 +0100
committer: Lode Vandevenne <lode@google.com> 2013-01-16 10:30:56 +0100
commit: b50b7ef8f8150616d3e9a227ce2d722a8355b1da (patch)
tree: 33731fde3659eb06ebf5f90f06ad7a6723eea82c /util.h
download: zopfli-b50b7ef8f8150616d3e9a227ce2d722a8355b1da.tar.gz
1 files changed, 214 insertions, 0 deletions
diff --git a/util.h b/util.h
new file mode 100644
index 0000000..a1ac653
--- /dev/null
+++ b/util.h
@@ -0,0 +1,214 @@
+/*
+Copyright 2011 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Author: lode.vandevenne@gmail.com (Lode Vandevenne)
+Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
+*/
+
+/*
+Several utilities, including: #defines to try different compression results,
+basic deflate specification values and generic program options.
+*/
+
+#ifndef ZOPFLI_UTIL_H_
+#define ZOPFLI_UTIL_H_
+
+#include <string.h>
+#include <stdlib.h>
+
+/* Minimum and maximum length that can be encoded in deflate. */
+#define MAX_MATCH 258
+#define MIN_MATCH 3
+
+/*
+The window size for deflate. Must be a power of two. This should be 32768, the
+maximum possible by the deflate spec. Anything less hurts compression more than
+speed.
+*/
+#define WINDOW_SIZE 32768
+
+/*
+The window mask used to wrap indices into the window. This is why the
+window size must be a power of two.
+*/
+#define WINDOW_MASK (WINDOW_SIZE - 1)
+
+/*
+A block structure of huge, non-smart, blocks to divide the input into, to allow
+operating on huge files without exceeding memory, such as the 1GB wiki9 corpus.
+The whole compression algorithm, including the smarter block splitting, will
+be executed independently on each huge block.
+Dividing into huge blocks hurts compression, but not much relative to the size.
+Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks.
+*/
+#define MASTER_BLOCK_SIZE 20000000
+
+/*
+Used to initialize costs for example
+*/
+#define LARGE_FLOAT 1e30
+
+/*
+For longest match cache. max 256. Uses huge amounts of memory but makes it
+faster. Uses this many times three bytes per single byte of the input data.
+This is so because longest match finding has to find the exact distance
+that belongs to each length for the best lz77 strategy.
+Good values: e.g. 5, 8.
+*/
+#define NUM_CACHED_LENGTHS 8
+
+/*
+limit the max hash chain hits for this hash value. This has an effect only
+on files where the hash value is the same very often. On these files, this
+gives worse compression (the value should ideally be 32768, which is the
+WINDOW_SIZE, while zlib uses 4096 even for best level), but makes it faster on
+some specific files.
+Good value: e.g. 8192.
+*/
+#define MAX_CHAIN_HITS 8192
+
+/*
+Whether to use the longest match cache for FindLongestMatch. This cache
+consumes a lot of memory but speeds it up. No effect on compression size.
+*/
+#define USE_LONGEST_MATCH_CACHE
+
+/*
+Enable to remember amount of successive identical bytes in the hash chain for
+finding longest match
+required for USE_HASH_SAME_HASH and SHORTCUT_LONG_REPETITIONS
+This has no effect on the compression result, and enabling it increases speed.
+*/
+#define USE_HASH_SAME
+
+/*
+Switch to a faster hash based on the info from USE_HASH_SAME once the
+best length so far is long enough. This is way faster for files with lots of
+identical bytes, on which the compressor is otherwise too slow. Regular files
+are unaffected or maybe a tiny bit slower.
+This has no effect on the compression result, only on speed.
+*/
+#define USE_HASH_SAME_HASH
+
+/*
+Enable this, to avoid slowness for files which are a repetition of the same
+character more than a multiple of MAX_MATCH times. This should not affect the
+compression result.
+*/
+#define SHORTCUT_LONG_REPETITIONS
+
+/*
+Whether to use lazy matching in the greedy LZ77 implementation. This gives a
+better result of LZ77Greedy, but the effect this has on the optimal LZ77
+varies from file to file.
+*/
+#define LAZY_MATCHING
+
+/*
+Gets the symbol for the given length, cfr. the DEFLATE spec.
+Returns the symbol in the range [257-285] (inclusive)
+*/
+int GetLengthSymbol(int l);
+
+/* Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. */
+int GetLengthExtraBits(int l);
+
+/* Gets value of the extra bits for the given length, cfr. the DEFLATE spec. */
+int GetLengthExtraBitsValue(int l);
+
+/* Gets the symbol for the given dist, cfr. the DEFLATE spec. */
+int GetDistSymbol(int dist);
+
+/* Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. */
+int GetDistExtraBits(int dist);
+
+/* Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. */
+int GetDistExtraBitsValue(int dist);
+
+/*
+Options used throughout the program.
+*/
+typedef struct Options {
+  /* Whether to print output */
+  int verbose;
+
+  /*
+  Maximum amount of times to rerun forward and backward pass to optimize LZ77
+  compression cost. Good values: 10, 15 for small files, 5 for files over
+  several MB in size or it will be too slow.
+  */
+  int numiterations;
+
+  /*
+  If true, splits the data in multiple deflate blocks with optimal choice
+  for the block boundaries. Block splitting gives better compression. Default:
+  true (1).
+  */
+  int blocksplitting;
+
+  /*
+  If true, chooses the optimal block split points only after doing the iterative
+  LZ77 compression. If false, chooses the block split points first, then does
+  iterative LZ77 on each individual block. Depending on the file, either first
+  or last gives the best compression. Default: false (0).
+  */
+  int blocksplittinglast;
+
+  /*
+  Maximum amount of blocks to split into (0 for unlimited, but this can give
+  extreme results that hurt compression on some files). Default value: 15.
+  */
+  int blocksplittingmax;
+} Options;
+
+/* Initializes options with default values. */
+void InitOptions(Options* options);
+
+/*
+Appends value to dynamically allocated memory, doubling its allocation size
+whenever needed.
+
+value: the value to append, type T
+data: pointer to the dynamic array to append to, type T**
+size: pointer to the size of the array to append to, type size_t*. This is the
+size that you consider the array to be, not the internal allocation size.
+Precondition: allocated size of data is at least a power of two greater than or
+equal than *size.
+*/
+#ifdef __cplusplus /* C++ cannot assign void* from malloc to *data */
+#define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\
+  if (!((*size) & ((*size) - 1))) {\
+    /*double alloc size if it's a power of two*/\
+    void** data_void = reinterpret_cast<void**>(data);\
+    *data_void = (*size) == 0 ? malloc(sizeof(**data))\
+                              : realloc((*data), (*size) * 2 * sizeof(**data));\
+  }\
+  (*data)[(*size)] = (value);\
+  (*size)++;\
+}
+#else /* C gives problems with strict-aliasing rules for (void**) cast */
+#define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\
+  if (!((*size) & ((*size) - 1))) {\
+    /*double alloc size if it's a power of two*/\
+    (*data) = (*size) == 0 ? malloc(sizeof(**data))\
+                           : realloc((*data), (*size) * 2 * sizeof(**data));\
+  }\
+  (*data)[(*size)] = (value);\
+  (*size)++;\
+}
+#endif
+
+
+#endif  /* ZOPFLI_UTIL_H_ */
author	Lode Vandevenne <lode@google.com>	2013-01-16 10:30:56 +0100
committer	Lode Vandevenne <lode@google.com>	2013-01-16 10:30:56 +0100
commit	b50b7ef8f8150616d3e9a227ce2d722a8355b1da (patch)
tree	33731fde3659eb06ebf5f90f06ad7a6723eea82c /util.h
download	zopfli-b50b7ef8f8150616d3e9a227ce2d722a8355b1da.tar.gz