aboutsummaryrefslogtreecommitdiff
path: root/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/org/tukaani/xz/lzma/LZMAEncoderNormal.java')
-rw-r--r--src/org/tukaani/xz/lzma/LZMAEncoderNormal.java568
1 files changed, 568 insertions, 0 deletions
diff --git a/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java b/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java
new file mode 100644
index 0000000..8079cd2
--- /dev/null
+++ b/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java
@@ -0,0 +1,568 @@
+/*
+ * LZMAEncoderNormal
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ * Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+package org.tukaani.xz.lzma;
+
+import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.lz.LZEncoder;
+import org.tukaani.xz.lz.Matches;
+import org.tukaani.xz.rangecoder.RangeEncoder;
+
+final class LZMAEncoderNormal extends LZMAEncoder {
+ private static final int OPTS = 4096;
+
+ private static final int EXTRA_SIZE_BEFORE = OPTS;
+ private static final int EXTRA_SIZE_AFTER = OPTS;
+
+ private final Optimum[] opts = new Optimum[OPTS];
+ private int optCur = 0;
+ private int optEnd = 0;
+
+ private Matches matches;
+
+ // These are fields solely to avoid allocating the objects again and
+ // again on each function call.
+ private final int[] repLens = new int[REPS];
+ private final State nextState = new State();
+
+ static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
+ return LZEncoder.getMemoryUsage(dictSize,
+ Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
+ EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
+ + OPTS * 64 / 1024;
+ }
+
+ LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
+ int dictSize, int extraSizeBefore,
+ int niceLen, int mf, int depthLimit,
+ ArrayCache arrayCache) {
+ super(rc, LZEncoder.getInstance(dictSize,
+ Math.max(extraSizeBefore,
+ EXTRA_SIZE_BEFORE),
+ EXTRA_SIZE_AFTER,
+ niceLen, MATCH_LEN_MAX,
+ mf, depthLimit, arrayCache),
+ lc, lp, pb, dictSize, niceLen);
+
+ for (int i = 0; i < OPTS; ++i)
+ opts[i] = new Optimum();
+ }
+
+ public void reset() {
+ optCur = 0;
+ optEnd = 0;
+ super.reset();
+ }
+
+ /**
+ * Converts the opts array from backward indexes to forward indexes.
+ * Then it will be simple to get the next symbol from the array
+ * in later calls to <code>getNextSymbol()</code>.
+ */
+ private int convertOpts() {
+ optEnd = optCur;
+
+ int optPrev = opts[optCur].optPrev;
+
+ do {
+ Optimum opt = opts[optCur];
+
+ if (opt.prev1IsLiteral) {
+ opts[optPrev].optPrev = optCur;
+ opts[optPrev].backPrev = -1;
+ optCur = optPrev--;
+
+ if (opt.hasPrev2) {
+ opts[optPrev].optPrev = optPrev + 1;
+ opts[optPrev].backPrev = opt.backPrev2;
+ optCur = optPrev;
+ optPrev = opt.optPrev2;
+ }
+ }
+
+ int temp = opts[optPrev].optPrev;
+ opts[optPrev].optPrev = optCur;
+ optCur = optPrev;
+ optPrev = temp;
+ } while (optCur > 0);
+
+ optCur = opts[0].optPrev;
+ back = opts[optCur].backPrev;
+ return optCur;
+ }
+
+ int getNextSymbol() {
+ // If there are pending symbols from an earlier call to this
+ // function, return those symbols first.
+ if (optCur < optEnd) {
+ int len = opts[optCur].optPrev - optCur;
+ optCur = opts[optCur].optPrev;
+ back = opts[optCur].backPrev;
+ return len;
+ }
+
+ assert optCur == optEnd;
+ optCur = 0;
+ optEnd = 0;
+ back = -1;
+
+ if (readAhead == -1)
+ matches = getMatches();
+
+ // Get the number of bytes available in the dictionary, but
+ // not more than the maximum match length. If there aren't
+ // enough bytes remaining to encode a match at all, return
+ // immediately to encode this byte as a literal.
+ int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
+ if (avail < MATCH_LEN_MIN)
+ return 1;
+
+ // Get the lengths of repeated matches.
+ int repBest = 0;
+ for (int rep = 0; rep < REPS; ++rep) {
+ repLens[rep] = lz.getMatchLen(reps[rep], avail);
+
+ if (repLens[rep] < MATCH_LEN_MIN) {
+ repLens[rep] = 0;
+ continue;
+ }
+
+ if (repLens[rep] > repLens[repBest])
+ repBest = rep;
+ }
+
+ // Return if the best repeated match is at least niceLen bytes long.
+ if (repLens[repBest] >= niceLen) {
+ back = repBest;
+ skip(repLens[repBest] - 1);
+ return repLens[repBest];
+ }
+
+ // Initialize mainLen and mainDist to the longest match found
+ // by the match finder.
+ int mainLen = 0;
+ int mainDist = 0;
+ if (matches.count > 0) {
+ mainLen = matches.len[matches.count - 1];
+ mainDist = matches.dist[matches.count - 1];
+
+ // Return if it is at least niceLen bytes long.
+ if (mainLen >= niceLen) {
+ back = mainDist + REPS;
+ skip(mainLen - 1);
+ return mainLen;
+ }
+ }
+
+ int curByte = lz.getByte(0);
+ int matchByte = lz.getByte(reps[0] + 1);
+
+ // If the match finder found no matches and this byte cannot be
+ // encoded as a repeated match (short or long), we must be return
+ // to have the byte encoded as a literal.
+ if (mainLen < MATCH_LEN_MIN && curByte != matchByte
+ && repLens[repBest] < MATCH_LEN_MIN)
+ return 1;
+
+
+ int pos = lz.getPos();
+ int posState = pos & posMask;
+
+ // Calculate the price of encoding the current byte as a literal.
+ {
+ int prevByte = lz.getByte(1);
+ int literalPrice = literalEncoder.getPrice(curByte, matchByte,
+ prevByte, pos, state);
+ opts[1].set1(literalPrice, 0, -1);
+ }
+
+ int anyMatchPrice = getAnyMatchPrice(state, posState);
+ int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
+
+ // If it is possible to encode this byte as a short rep, see if
+ // it is cheaper than encoding it as a literal.
+ if (matchByte == curByte) {
+ int shortRepPrice = getShortRepPrice(anyRepPrice,
+ state, posState);
+ if (shortRepPrice < opts[1].price)
+ opts[1].set1(shortRepPrice, 0, 0);
+ }
+
+ // Return if there is neither normal nor long repeated match. Use
+ // a short match instead of a literal if is is possible and cheaper.
+ optEnd = Math.max(mainLen, repLens[repBest]);
+ if (optEnd < MATCH_LEN_MIN) {
+ assert optEnd == 0 : optEnd;
+ back = opts[1].backPrev;
+ return 1;
+ }
+
+
+ // Update the lookup tables for distances and lengths before using
+ // those price calculation functions. (The price function above
+ // don't need these tables.)
+ updatePrices();
+
+ // Initialize the state and reps of this position in opts[].
+ // updateOptStateAndReps() will need these to get the new
+ // state and reps for the next byte.
+ opts[0].state.set(state);
+ System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
+
+ // Initialize the prices for latter opts that will be used below.
+ for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
+ opts[i].reset();
+
+ // Calculate the prices of repeated matches of all lengths.
+ for (int rep = 0; rep < REPS; ++rep) {
+ int repLen = repLens[rep];
+ if (repLen < MATCH_LEN_MIN)
+ continue;
+
+ int longRepPrice = getLongRepPrice(anyRepPrice, rep,
+ state, posState);
+ do {
+ int price = longRepPrice + repLenEncoder.getPrice(repLen,
+ posState);
+ if (price < opts[repLen].price)
+ opts[repLen].set1(price, 0, rep);
+ } while (--repLen >= MATCH_LEN_MIN);
+ }
+
+ // Calculate the prices of normal matches that are longer than rep0.
+ {
+ int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
+ if (len <= mainLen) {
+ int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
+ state);
+
+ // Set i to the index of the shortest match that is
+ // at least len bytes long.
+ int i = 0;
+ while (len > matches.len[i])
+ ++i;
+
+ while (true) {
+ int dist = matches.dist[i];
+ int price = getMatchAndLenPrice(normalMatchPrice,
+ dist, len, posState);
+ if (price < opts[len].price)
+ opts[len].set1(price, 0, dist + REPS);
+
+ if (len == matches.len[i])
+ if (++i == matches.count)
+ break;
+
+ ++len;
+ }
+ }
+ }
+
+
+ avail = Math.min(lz.getAvail(), OPTS - 1);
+
+ // Get matches for later bytes and optimize the use of LZMA symbols
+ // by calculating the prices and picking the cheapest symbol
+ // combinations.
+ while (++optCur < optEnd) {
+ matches = getMatches();
+ if (matches.count > 0
+ && matches.len[matches.count - 1] >= niceLen)
+ break;
+
+ --avail;
+ ++pos;
+ posState = pos & posMask;
+
+ updateOptStateAndReps();
+ anyMatchPrice = opts[optCur].price
+ + getAnyMatchPrice(opts[optCur].state, posState);
+ anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
+
+ calc1BytePrices(pos, posState, avail, anyRepPrice);
+
+ if (avail >= MATCH_LEN_MIN) {
+ int startLen = calcLongRepPrices(pos, posState,
+ avail, anyRepPrice);
+ if (matches.count > 0)
+ calcNormalMatchPrices(pos, posState, avail,
+ anyMatchPrice, startLen);
+ }
+ }
+
+ return convertOpts();
+ }
+
+ /**
+ * Updates the state and reps for the current byte in the opts array.
+ */
+ private void updateOptStateAndReps() {
+ int optPrev = opts[optCur].optPrev;
+ assert optPrev < optCur;
+
+ if (opts[optCur].prev1IsLiteral) {
+ --optPrev;
+
+ if (opts[optCur].hasPrev2) {
+ opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
+ if (opts[optCur].backPrev2 < REPS)
+ opts[optCur].state.updateLongRep();
+ else
+ opts[optCur].state.updateMatch();
+ } else {
+ opts[optCur].state.set(opts[optPrev].state);
+ }
+
+ opts[optCur].state.updateLiteral();
+ } else {
+ opts[optCur].state.set(opts[optPrev].state);
+ }
+
+ if (optPrev == optCur - 1) {
+ // Must be either a short rep or a literal.
+ assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
+
+ if (opts[optCur].backPrev == 0)
+ opts[optCur].state.updateShortRep();
+ else
+ opts[optCur].state.updateLiteral();
+
+ System.arraycopy(opts[optPrev].reps, 0,
+ opts[optCur].reps, 0, REPS);
+ } else {
+ int back;
+ if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
+ optPrev = opts[optCur].optPrev2;
+ back = opts[optCur].backPrev2;
+ opts[optCur].state.updateLongRep();
+ } else {
+ back = opts[optCur].backPrev;
+ if (back < REPS)
+ opts[optCur].state.updateLongRep();
+ else
+ opts[optCur].state.updateMatch();
+ }
+
+ if (back < REPS) {
+ opts[optCur].reps[0] = opts[optPrev].reps[back];
+
+ int rep;
+ for (rep = 1; rep <= back; ++rep)
+ opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
+
+ for (; rep < REPS; ++rep)
+ opts[optCur].reps[rep] = opts[optPrev].reps[rep];
+ } else {
+ opts[optCur].reps[0] = back - REPS;
+ System.arraycopy(opts[optPrev].reps, 0,
+ opts[optCur].reps, 1, REPS - 1);
+ }
+ }
+ }
+
+ /**
+ * Calculates prices of a literal, a short rep, and literal + rep0.
+ */
+ private void calc1BytePrices(int pos, int posState,
+ int avail, int anyRepPrice) {
+ // This will be set to true if using a literal or a short rep.
+ boolean nextIsByte = false;
+
+ int curByte = lz.getByte(0);
+ int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
+
+ // Try a literal.
+ int literalPrice = opts[optCur].price
+ + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
+ pos, opts[optCur].state);
+ if (literalPrice < opts[optCur + 1].price) {
+ opts[optCur + 1].set1(literalPrice, optCur, -1);
+ nextIsByte = true;
+ }
+
+ // Try a short rep.
+ if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
+ || opts[optCur + 1].backPrev != 0)) {
+ int shortRepPrice = getShortRepPrice(anyRepPrice,
+ opts[optCur].state,
+ posState);
+ if (shortRepPrice <= opts[optCur + 1].price) {
+ opts[optCur + 1].set1(shortRepPrice, optCur, 0);
+ nextIsByte = true;
+ }
+ }
+
+ // If neither a literal nor a short rep was the cheapest choice,
+ // try literal + long rep0.
+ if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
+ int lenLimit = Math.min(niceLen, avail - 1);
+ int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
+
+ if (len >= MATCH_LEN_MIN) {
+ nextState.set(opts[optCur].state);
+ nextState.updateLiteral();
+ int nextPosState = (pos + 1) & posMask;
+ int price = literalPrice
+ + getLongRepAndLenPrice(0, len,
+ nextState, nextPosState);
+
+ int i = optCur + 1 + len;
+ while (optEnd < i)
+ opts[++optEnd].reset();
+
+ if (price < opts[i].price)
+ opts[i].set2(price, optCur, 0);
+ }
+ }
+ }
+
+ /**
+ * Calculates prices of long rep and long rep + literal + rep0.
+ */
+ private int calcLongRepPrices(int pos, int posState,
+ int avail, int anyRepPrice) {
+ int startLen = MATCH_LEN_MIN;
+ int lenLimit = Math.min(avail, niceLen);
+
+ for (int rep = 0; rep < REPS; ++rep) {
+ int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
+ if (len < MATCH_LEN_MIN)
+ continue;
+
+ while (optEnd < optCur + len)
+ opts[++optEnd].reset();
+
+ int longRepPrice = getLongRepPrice(anyRepPrice, rep,
+ opts[optCur].state, posState);
+
+ for (int i = len; i >= MATCH_LEN_MIN; --i) {
+ int price = longRepPrice
+ + repLenEncoder.getPrice(i, posState);
+ if (price < opts[optCur + i].price)
+ opts[optCur + i].set1(price, optCur, rep);
+ }
+
+ if (rep == 0)
+ startLen = len + 1;
+
+ int len2Limit = Math.min(niceLen, avail - len - 1);
+ int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
+ len2Limit);
+
+ if (len2 >= MATCH_LEN_MIN) {
+ // Rep
+ int price = longRepPrice
+ + repLenEncoder.getPrice(len, posState);
+ nextState.set(opts[optCur].state);
+ nextState.updateLongRep();
+
+ // Literal
+ int curByte = lz.getByte(len, 0);
+ int matchByte = lz.getByte(0); // lz.getByte(len, len)
+ int prevByte = lz.getByte(len, 1);
+ price += literalEncoder.getPrice(curByte, matchByte, prevByte,
+ pos + len, nextState);
+ nextState.updateLiteral();
+
+ // Rep0
+ int nextPosState = (pos + len + 1) & posMask;
+ price += getLongRepAndLenPrice(0, len2,
+ nextState, nextPosState);
+
+ int i = optCur + len + 1 + len2;
+ while (optEnd < i)
+ opts[++optEnd].reset();
+
+ if (price < opts[i].price)
+ opts[i].set3(price, optCur, rep, len, 0);
+ }
+ }
+
+ return startLen;
+ }
+
+ /**
+ * Calculates prices of a normal match and normal match + literal + rep0.
+ */
+ private void calcNormalMatchPrices(int pos, int posState, int avail,
+ int anyMatchPrice, int startLen) {
+ // If the longest match is so long that it would not fit into
+ // the opts array, shorten the matches.
+ if (matches.len[matches.count - 1] > avail) {
+ matches.count = 0;
+ while (matches.len[matches.count] < avail)
+ ++matches.count;
+
+ matches.len[matches.count++] = avail;
+ }
+
+ if (matches.len[matches.count - 1] < startLen)
+ return;
+
+ while (optEnd < optCur + matches.len[matches.count - 1])
+ opts[++optEnd].reset();
+
+ int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
+ opts[optCur].state);
+
+ int match = 0;
+ while (startLen > matches.len[match])
+ ++match;
+
+ for (int len = startLen; ; ++len) {
+ int dist = matches.dist[match];
+
+ // Calculate the price of a match of len bytes from the nearest
+ // possible distance.
+ int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
+ dist, len, posState);
+ if (matchAndLenPrice < opts[optCur + len].price)
+ opts[optCur + len].set1(matchAndLenPrice,
+ optCur, dist + REPS);
+
+ if (len != matches.len[match])
+ continue;
+
+ // Try match + literal + rep0. First get the length of the rep0.
+ int len2Limit = Math.min(niceLen, avail - len - 1);
+ int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
+
+ if (len2 >= MATCH_LEN_MIN) {
+ nextState.set(opts[optCur].state);
+ nextState.updateMatch();
+
+ // Literal
+ int curByte = lz.getByte(len, 0);
+ int matchByte = lz.getByte(0); // lz.getByte(len, len)
+ int prevByte = lz.getByte(len, 1);
+ int price = matchAndLenPrice
+ + literalEncoder.getPrice(curByte, matchByte,
+ prevByte, pos + len,
+ nextState);
+ nextState.updateLiteral();
+
+ // Rep0
+ int nextPosState = (pos + len + 1) & posMask;
+ price += getLongRepAndLenPrice(0, len2,
+ nextState, nextPosState);
+
+ int i = optCur + len + 1 + len2;
+ while (optEnd < i)
+ opts[++optEnd].reset();
+
+ if (price < opts[i].price)
+ opts[i].set3(price, optCur, dist + REPS, len, 0);
+ }
+
+ if (++match == matches.count)
+ break;
+ }
+ }
+}