aboutsummaryrefslogtreecommitdiff
path: root/src/zopfli/blocksplitter.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/zopfli/blocksplitter.c')
-rw-r--r--src/zopfli/blocksplitter.c344
1 files changed, 344 insertions, 0 deletions
diff --git a/src/zopfli/blocksplitter.c b/src/zopfli/blocksplitter.c
new file mode 100644
index 0000000..9c2c40e
--- /dev/null
+++ b/src/zopfli/blocksplitter.c
@@ -0,0 +1,344 @@
+/*
+Copyright 2011 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Author: lode.vandevenne@gmail.com (Lode Vandevenne)
+Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
+*/
+
+#include "blocksplitter.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "deflate.h"
+#include "lz77.h"
+#include "squeeze.h"
+#include "tree.h"
+#include "util.h"
+
+/*
+The "f" for the FindMinimum function below.
+i: the current parameter of f(i)
+context: for your implementation
+*/
+typedef double FindMinimumFun(size_t i, void* context);
+
+/*
+Finds minimum of function f(i) where is is of type size_t, f(i) is of type
+double, i is in range start-end (excluding end).
+*/
+static size_t FindMinimum(FindMinimumFun f, void* context,
+ size_t start, size_t end) {
+ if (end - start < 1024) {
+ double best = ZOPFLI_LARGE_FLOAT;
+ size_t result = start;
+ size_t i;
+ for (i = start; i < end; i++) {
+ double v = f(i, context);
+ if (v < best) {
+ best = v;
+ result = i;
+ }
+ }
+ return result;
+ } else {
+ /* Try to find minimum faster by recursively checking multiple points. */
+#define NUM 9 /* Good value: 9. */
+ size_t i;
+ size_t p[NUM];
+ double vp[NUM];
+ size_t besti;
+ double best;
+ double lastbest = ZOPFLI_LARGE_FLOAT;
+ size_t pos = start;
+
+ for (;;) {
+ if (end - start <= NUM) break;
+
+ for (i = 0; i < NUM; i++) {
+ p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
+ vp[i] = f(p[i], context);
+ }
+ besti = 0;
+ best = vp[0];
+ for (i = 1; i < NUM; i++) {
+ if (vp[i] < best) {
+ best = vp[i];
+ besti = i;
+ }
+ }
+ if (best > lastbest) break;
+
+ start = besti == 0 ? start : p[besti - 1];
+ end = besti == NUM - 1 ? end : p[besti + 1];
+
+ pos = p[besti];
+ lastbest = best;
+ }
+ return pos;
+#undef NUM
+ }
+}
+
+/*
+Returns estimated cost of a block in bits. It includes the size to encode the
+tree and the size to encode all literal, length and distance symbols and their
+extra bits.
+
+litlens: lz77 lit/lengths
+dists: ll77 distances
+lstart: start of block
+lend: end of block (not inclusive)
+*/
+static double EstimateCost(const unsigned short* litlens,
+ const unsigned short* dists,
+ size_t lstart, size_t lend) {
+ return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
+}
+
+typedef struct SplitCostContext {
+ const unsigned short* litlens;
+ const unsigned short* dists;
+ size_t llsize;
+ size_t start;
+ size_t end;
+} SplitCostContext;
+
+
+/*
+Gets the cost which is the sum of the cost of the left and the right section
+of the data.
+type: FindMinimumFun
+*/
+static double SplitCost(size_t i, void* context) {
+ SplitCostContext* c = (SplitCostContext*)context;
+ return EstimateCost(c->litlens, c->dists, c->start, i) +
+ EstimateCost(c->litlens, c->dists, i, c->end);
+}
+
+static void AddSorted(size_t value, size_t** out, size_t* outsize) {
+ size_t i;
+ ZOPFLI_APPEND_DATA(value, out, outsize);
+ if (*outsize > 0) {
+ for (i = 0; i < *outsize - 1; i++) {
+ if ((*out)[i] > value) {
+ size_t j;
+ for (j = *outsize - 1; j > i; j--) {
+ (*out)[j] = (*out)[j - 1];
+ }
+ (*out)[i] = value;
+ break;
+ }
+ }
+ }
+}
+
+/*
+Prints the block split points as decimal and hex values in the terminal.
+*/
+static void PrintBlockSplitPoints(const unsigned short* litlens,
+ const unsigned short* dists,
+ size_t llsize, const size_t* lz77splitpoints,
+ size_t nlz77points) {
+ size_t* splitpoints = 0;
+ size_t npoints = 0;
+ size_t i;
+ /* The input is given as lz77 indices, but we want to see the uncompressed
+ index values. */
+ size_t pos = 0;
+ if (nlz77points > 0) {
+ for (i = 0; i < llsize; i++) {
+ size_t length = dists[i] == 0 ? 1 : litlens[i];
+ if (lz77splitpoints[npoints] == i) {
+ ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
+ if (npoints == nlz77points) break;
+ }
+ pos += length;
+ }
+ }
+ assert(npoints == nlz77points);
+
+ fprintf(stderr, "block split points: ");
+ for (i = 0; i < npoints; i++) {
+ fprintf(stderr, "%d ", (int)splitpoints[i]);
+ }
+ fprintf(stderr, "(hex:");
+ for (i = 0; i < npoints; i++) {
+ fprintf(stderr, " %x", (int)splitpoints[i]);
+ }
+ fprintf(stderr, ")\n");
+
+ free(splitpoints);
+}
+
+/*
+Finds next block to try to split, the largest of the available ones.
+The largest is chosen to make sure that if only a limited amount of blocks is
+requested, their sizes are spread evenly.
+llsize: the size of the LL77 data, which is the size of the done array here.
+done: array indicating which blocks starting at that position are no longer
+ splittable (splitting them increases rather than decreases cost).
+splitpoints: the splitpoints found so far.
+npoints: the amount of splitpoints found so far.
+lstart: output variable, giving start of block.
+lend: output variable, giving end of block.
+returns 1 if a block was found, 0 if no block found (all are done).
+*/
+static int FindLargestSplittableBlock(
+ size_t llsize, const unsigned char* done,
+ const size_t* splitpoints, size_t npoints,
+ size_t* lstart, size_t* lend) {
+ size_t longest = 0;
+ int found = 0;
+ size_t i;
+ for (i = 0; i <= npoints; i++) {
+ size_t start = i == 0 ? 0 : splitpoints[i - 1];
+ size_t end = i == npoints ? llsize - 1 : splitpoints[i];
+ if (!done[start] && end - start > longest) {
+ *lstart = start;
+ *lend = end;
+ found = 1;
+ longest = end - start;
+ }
+ }
+ return found;
+}
+
+void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
+ const unsigned short* litlens,
+ const unsigned short* dists,
+ size_t llsize, size_t maxblocks,
+ size_t** splitpoints, size_t* npoints) {
+ size_t lstart, lend;
+ size_t i;
+ size_t llpos = 0;
+ size_t numblocks = 1;
+ unsigned char* done;
+ double splitcost, origcost;
+
+ if (llsize < 10) return; /* This code fails on tiny files. */
+
+ done = (unsigned char*)malloc(llsize);
+ if (!done) exit(-1); /* Allocation failed. */
+ for (i = 0; i < llsize; i++) done[i] = 0;
+
+ lstart = 0;
+ lend = llsize;
+ for (;;) {
+ SplitCostContext c;
+
+ if (maxblocks > 0 && numblocks >= maxblocks) {
+ break;
+ }
+
+ c.litlens = litlens;
+ c.dists = dists;
+ c.llsize = llsize;
+ c.start = lstart;
+ c.end = lend;
+ assert(lstart < lend);
+ llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
+
+ assert(llpos > lstart);
+ assert(llpos < lend);
+
+ splitcost = EstimateCost(litlens, dists, lstart, llpos) +
+ EstimateCost(litlens, dists, llpos, lend);
+ origcost = EstimateCost(litlens, dists, lstart, lend);
+
+ if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
+ done[lstart] = 1;
+ } else {
+ AddSorted(llpos, splitpoints, npoints);
+ numblocks++;
+ }
+
+ if (!FindLargestSplittableBlock(
+ llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
+ break; /* No further split will probably reduce compression. */
+ }
+
+ if (lend - lstart < 10) {
+ break;
+ }
+ }
+
+ if (options->verbose) {
+ PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
+ }
+
+ free(done);
+}
+
+void ZopfliBlockSplit(const ZopfliOptions* options,
+ const unsigned char* in, size_t instart, size_t inend,
+ size_t maxblocks, size_t** splitpoints, size_t* npoints) {
+ size_t pos = 0;
+ size_t i;
+ ZopfliBlockState s;
+ size_t* lz77splitpoints = 0;
+ size_t nlz77points = 0;
+ ZopfliLZ77Store store;
+
+ ZopfliInitLZ77Store(&store);
+
+ s.options = options;
+ s.blockstart = instart;
+ s.blockend = inend;
+#ifdef ZOPFLI_LONGEST_MATCH_CACHE
+ s.lmc = 0;
+#endif
+
+ *npoints = 0;
+ *splitpoints = 0;
+
+ /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
+ results in better blocks. */
+ ZopfliLZ77Greedy(&s, in, instart, inend, &store);
+
+ ZopfliBlockSplitLZ77(options,
+ store.litlens, store.dists, store.size, maxblocks,
+ &lz77splitpoints, &nlz77points);
+
+ /* Convert LZ77 positions to positions in the uncompressed input. */
+ pos = instart;
+ if (nlz77points > 0) {
+ for (i = 0; i < store.size; i++) {
+ size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
+ if (lz77splitpoints[*npoints] == i) {
+ ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
+ if (*npoints == nlz77points) break;
+ }
+ pos += length;
+ }
+ }
+ assert(*npoints == nlz77points);
+
+ free(lz77splitpoints);
+ ZopfliCleanLZ77Store(&store);
+}
+
+void ZopfliBlockSplitSimple(const unsigned char* in,
+ size_t instart, size_t inend,
+ size_t blocksize,
+ size_t** splitpoints, size_t* npoints) {
+ size_t i = instart;
+ while (i < inend) {
+ ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
+ i += blocksize;
+ }
+ (void)in;
+}