diff options
Diffstat (limited to 'src/org/tukaani/xz/lzma/LZMAEncoderNormal.java')
-rw-r--r-- | src/org/tukaani/xz/lzma/LZMAEncoderNormal.java | 568 |
1 files changed, 568 insertions, 0 deletions
diff --git a/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java b/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java new file mode 100644 index 0000000..8079cd2 --- /dev/null +++ b/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java @@ -0,0 +1,568 @@ +/* + * LZMAEncoderNormal + * + * Authors: Lasse Collin <lasse.collin@tukaani.org> + * Igor Pavlov <http://7-zip.org/> + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import org.tukaani.xz.ArrayCache; +import org.tukaani.xz.lz.LZEncoder; +import org.tukaani.xz.lz.Matches; +import org.tukaani.xz.rangecoder.RangeEncoder; + +final class LZMAEncoderNormal extends LZMAEncoder { + private static final int OPTS = 4096; + + private static final int EXTRA_SIZE_BEFORE = OPTS; + private static final int EXTRA_SIZE_AFTER = OPTS; + + private final Optimum[] opts = new Optimum[OPTS]; + private int optCur = 0; + private int optEnd = 0; + + private Matches matches; + + // These are fields solely to avoid allocating the objects again and + // again on each function call. + private final int[] repLens = new int[REPS]; + private final State nextState = new State(); + + static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { + return LZEncoder.getMemoryUsage(dictSize, + Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf) + + OPTS * 64 / 1024; + } + + LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, + int dictSize, int extraSizeBefore, + int niceLen, int mf, int depthLimit, + ArrayCache arrayCache) { + super(rc, LZEncoder.getInstance(dictSize, + Math.max(extraSizeBefore, + EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, + niceLen, MATCH_LEN_MAX, + mf, depthLimit, arrayCache), + lc, lp, pb, dictSize, niceLen); + + for (int i = 0; i < OPTS; ++i) + opts[i] = new Optimum(); + } + + public void reset() { + optCur = 0; + optEnd = 0; + super.reset(); + } + + /** + * Converts the opts array from backward indexes to forward indexes. + * Then it will be simple to get the next symbol from the array + * in later calls to <code>getNextSymbol()</code>. + */ + private int convertOpts() { + optEnd = optCur; + + int optPrev = opts[optCur].optPrev; + + do { + Optimum opt = opts[optCur]; + + if (opt.prev1IsLiteral) { + opts[optPrev].optPrev = optCur; + opts[optPrev].backPrev = -1; + optCur = optPrev--; + + if (opt.hasPrev2) { + opts[optPrev].optPrev = optPrev + 1; + opts[optPrev].backPrev = opt.backPrev2; + optCur = optPrev; + optPrev = opt.optPrev2; + } + } + + int temp = opts[optPrev].optPrev; + opts[optPrev].optPrev = optCur; + optCur = optPrev; + optPrev = temp; + } while (optCur > 0); + + optCur = opts[0].optPrev; + back = opts[optCur].backPrev; + return optCur; + } + + int getNextSymbol() { + // If there are pending symbols from an earlier call to this + // function, return those symbols first. + if (optCur < optEnd) { + int len = opts[optCur].optPrev - optCur; + optCur = opts[optCur].optPrev; + back = opts[optCur].backPrev; + return len; + } + + assert optCur == optEnd; + optCur = 0; + optEnd = 0; + back = -1; + + if (readAhead == -1) + matches = getMatches(); + + // Get the number of bytes available in the dictionary, but + // not more than the maximum match length. If there aren't + // enough bytes remaining to encode a match at all, return + // immediately to encode this byte as a literal. + int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); + if (avail < MATCH_LEN_MIN) + return 1; + + // Get the lengths of repeated matches. + int repBest = 0; + for (int rep = 0; rep < REPS; ++rep) { + repLens[rep] = lz.getMatchLen(reps[rep], avail); + + if (repLens[rep] < MATCH_LEN_MIN) { + repLens[rep] = 0; + continue; + } + + if (repLens[rep] > repLens[repBest]) + repBest = rep; + } + + // Return if the best repeated match is at least niceLen bytes long. + if (repLens[repBest] >= niceLen) { + back = repBest; + skip(repLens[repBest] - 1); + return repLens[repBest]; + } + + // Initialize mainLen and mainDist to the longest match found + // by the match finder. + int mainLen = 0; + int mainDist = 0; + if (matches.count > 0) { + mainLen = matches.len[matches.count - 1]; + mainDist = matches.dist[matches.count - 1]; + + // Return if it is at least niceLen bytes long. + if (mainLen >= niceLen) { + back = mainDist + REPS; + skip(mainLen - 1); + return mainLen; + } + } + + int curByte = lz.getByte(0); + int matchByte = lz.getByte(reps[0] + 1); + + // If the match finder found no matches and this byte cannot be + // encoded as a repeated match (short or long), we must be return + // to have the byte encoded as a literal. + if (mainLen < MATCH_LEN_MIN && curByte != matchByte + && repLens[repBest] < MATCH_LEN_MIN) + return 1; + + + int pos = lz.getPos(); + int posState = pos & posMask; + + // Calculate the price of encoding the current byte as a literal. + { + int prevByte = lz.getByte(1); + int literalPrice = literalEncoder.getPrice(curByte, matchByte, + prevByte, pos, state); + opts[1].set1(literalPrice, 0, -1); + } + + int anyMatchPrice = getAnyMatchPrice(state, posState); + int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); + + // If it is possible to encode this byte as a short rep, see if + // it is cheaper than encoding it as a literal. + if (matchByte == curByte) { + int shortRepPrice = getShortRepPrice(anyRepPrice, + state, posState); + if (shortRepPrice < opts[1].price) + opts[1].set1(shortRepPrice, 0, 0); + } + + // Return if there is neither normal nor long repeated match. Use + // a short match instead of a literal if is is possible and cheaper. + optEnd = Math.max(mainLen, repLens[repBest]); + if (optEnd < MATCH_LEN_MIN) { + assert optEnd == 0 : optEnd; + back = opts[1].backPrev; + return 1; + } + + + // Update the lookup tables for distances and lengths before using + // those price calculation functions. (The price function above + // don't need these tables.) + updatePrices(); + + // Initialize the state and reps of this position in opts[]. + // updateOptStateAndReps() will need these to get the new + // state and reps for the next byte. + opts[0].state.set(state); + System.arraycopy(reps, 0, opts[0].reps, 0, REPS); + + // Initialize the prices for latter opts that will be used below. + for (int i = optEnd; i >= MATCH_LEN_MIN; --i) + opts[i].reset(); + + // Calculate the prices of repeated matches of all lengths. + for (int rep = 0; rep < REPS; ++rep) { + int repLen = repLens[rep]; + if (repLen < MATCH_LEN_MIN) + continue; + + int longRepPrice = getLongRepPrice(anyRepPrice, rep, + state, posState); + do { + int price = longRepPrice + repLenEncoder.getPrice(repLen, + posState); + if (price < opts[repLen].price) + opts[repLen].set1(price, 0, rep); + } while (--repLen >= MATCH_LEN_MIN); + } + + // Calculate the prices of normal matches that are longer than rep0. + { + int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN); + if (len <= mainLen) { + int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, + state); + + // Set i to the index of the shortest match that is + // at least len bytes long. + int i = 0; + while (len > matches.len[i]) + ++i; + + while (true) { + int dist = matches.dist[i]; + int price = getMatchAndLenPrice(normalMatchPrice, + dist, len, posState); + if (price < opts[len].price) + opts[len].set1(price, 0, dist + REPS); + + if (len == matches.len[i]) + if (++i == matches.count) + break; + + ++len; + } + } + } + + + avail = Math.min(lz.getAvail(), OPTS - 1); + + // Get matches for later bytes and optimize the use of LZMA symbols + // by calculating the prices and picking the cheapest symbol + // combinations. + while (++optCur < optEnd) { + matches = getMatches(); + if (matches.count > 0 + && matches.len[matches.count - 1] >= niceLen) + break; + + --avail; + ++pos; + posState = pos & posMask; + + updateOptStateAndReps(); + anyMatchPrice = opts[optCur].price + + getAnyMatchPrice(opts[optCur].state, posState); + anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state); + + calc1BytePrices(pos, posState, avail, anyRepPrice); + + if (avail >= MATCH_LEN_MIN) { + int startLen = calcLongRepPrices(pos, posState, + avail, anyRepPrice); + if (matches.count > 0) + calcNormalMatchPrices(pos, posState, avail, + anyMatchPrice, startLen); + } + } + + return convertOpts(); + } + + /** + * Updates the state and reps for the current byte in the opts array. + */ + private void updateOptStateAndReps() { + int optPrev = opts[optCur].optPrev; + assert optPrev < optCur; + + if (opts[optCur].prev1IsLiteral) { + --optPrev; + + if (opts[optCur].hasPrev2) { + opts[optCur].state.set(opts[opts[optCur].optPrev2].state); + if (opts[optCur].backPrev2 < REPS) + opts[optCur].state.updateLongRep(); + else + opts[optCur].state.updateMatch(); + } else { + opts[optCur].state.set(opts[optPrev].state); + } + + opts[optCur].state.updateLiteral(); + } else { + opts[optCur].state.set(opts[optPrev].state); + } + + if (optPrev == optCur - 1) { + // Must be either a short rep or a literal. + assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1; + + if (opts[optCur].backPrev == 0) + opts[optCur].state.updateShortRep(); + else + opts[optCur].state.updateLiteral(); + + System.arraycopy(opts[optPrev].reps, 0, + opts[optCur].reps, 0, REPS); + } else { + int back; + if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) { + optPrev = opts[optCur].optPrev2; + back = opts[optCur].backPrev2; + opts[optCur].state.updateLongRep(); + } else { + back = opts[optCur].backPrev; + if (back < REPS) + opts[optCur].state.updateLongRep(); + else + opts[optCur].state.updateMatch(); + } + + if (back < REPS) { + opts[optCur].reps[0] = opts[optPrev].reps[back]; + + int rep; + for (rep = 1; rep <= back; ++rep) + opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1]; + + for (; rep < REPS; ++rep) + opts[optCur].reps[rep] = opts[optPrev].reps[rep]; + } else { + opts[optCur].reps[0] = back - REPS; + System.arraycopy(opts[optPrev].reps, 0, + opts[optCur].reps, 1, REPS - 1); + } + } + } + + /** + * Calculates prices of a literal, a short rep, and literal + rep0. + */ + private void calc1BytePrices(int pos, int posState, + int avail, int anyRepPrice) { + // This will be set to true if using a literal or a short rep. + boolean nextIsByte = false; + + int curByte = lz.getByte(0); + int matchByte = lz.getByte(opts[optCur].reps[0] + 1); + + // Try a literal. + int literalPrice = opts[optCur].price + + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1), + pos, opts[optCur].state); + if (literalPrice < opts[optCur + 1].price) { + opts[optCur + 1].set1(literalPrice, optCur, -1); + nextIsByte = true; + } + + // Try a short rep. + if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur + || opts[optCur + 1].backPrev != 0)) { + int shortRepPrice = getShortRepPrice(anyRepPrice, + opts[optCur].state, + posState); + if (shortRepPrice <= opts[optCur + 1].price) { + opts[optCur + 1].set1(shortRepPrice, optCur, 0); + nextIsByte = true; + } + } + + // If neither a literal nor a short rep was the cheapest choice, + // try literal + long rep0. + if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) { + int lenLimit = Math.min(niceLen, avail - 1); + int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit); + + if (len >= MATCH_LEN_MIN) { + nextState.set(opts[optCur].state); + nextState.updateLiteral(); + int nextPosState = (pos + 1) & posMask; + int price = literalPrice + + getLongRepAndLenPrice(0, len, + nextState, nextPosState); + + int i = optCur + 1 + len; + while (optEnd < i) + opts[++optEnd].reset(); + + if (price < opts[i].price) + opts[i].set2(price, optCur, 0); + } + } + } + + /** + * Calculates prices of long rep and long rep + literal + rep0. + */ + private int calcLongRepPrices(int pos, int posState, + int avail, int anyRepPrice) { + int startLen = MATCH_LEN_MIN; + int lenLimit = Math.min(avail, niceLen); + + for (int rep = 0; rep < REPS; ++rep) { + int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit); + if (len < MATCH_LEN_MIN) + continue; + + while (optEnd < optCur + len) + opts[++optEnd].reset(); + + int longRepPrice = getLongRepPrice(anyRepPrice, rep, + opts[optCur].state, posState); + + for (int i = len; i >= MATCH_LEN_MIN; --i) { + int price = longRepPrice + + repLenEncoder.getPrice(i, posState); + if (price < opts[optCur + i].price) + opts[optCur + i].set1(price, optCur, rep); + } + + if (rep == 0) + startLen = len + 1; + + int len2Limit = Math.min(niceLen, avail - len - 1); + int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep], + len2Limit); + + if (len2 >= MATCH_LEN_MIN) { + // Rep + int price = longRepPrice + + repLenEncoder.getPrice(len, posState); + nextState.set(opts[optCur].state); + nextState.updateLongRep(); + + // Literal + int curByte = lz.getByte(len, 0); + int matchByte = lz.getByte(0); // lz.getByte(len, len) + int prevByte = lz.getByte(len, 1); + price += literalEncoder.getPrice(curByte, matchByte, prevByte, + pos + len, nextState); + nextState.updateLiteral(); + + // Rep0 + int nextPosState = (pos + len + 1) & posMask; + price += getLongRepAndLenPrice(0, len2, + nextState, nextPosState); + + int i = optCur + len + 1 + len2; + while (optEnd < i) + opts[++optEnd].reset(); + + if (price < opts[i].price) + opts[i].set3(price, optCur, rep, len, 0); + } + } + + return startLen; + } + + /** + * Calculates prices of a normal match and normal match + literal + rep0. + */ + private void calcNormalMatchPrices(int pos, int posState, int avail, + int anyMatchPrice, int startLen) { + // If the longest match is so long that it would not fit into + // the opts array, shorten the matches. + if (matches.len[matches.count - 1] > avail) { + matches.count = 0; + while (matches.len[matches.count] < avail) + ++matches.count; + + matches.len[matches.count++] = avail; + } + + if (matches.len[matches.count - 1] < startLen) + return; + + while (optEnd < optCur + matches.len[matches.count - 1]) + opts[++optEnd].reset(); + + int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, + opts[optCur].state); + + int match = 0; + while (startLen > matches.len[match]) + ++match; + + for (int len = startLen; ; ++len) { + int dist = matches.dist[match]; + + // Calculate the price of a match of len bytes from the nearest + // possible distance. + int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice, + dist, len, posState); + if (matchAndLenPrice < opts[optCur + len].price) + opts[optCur + len].set1(matchAndLenPrice, + optCur, dist + REPS); + + if (len != matches.len[match]) + continue; + + // Try match + literal + rep0. First get the length of the rep0. + int len2Limit = Math.min(niceLen, avail - len - 1); + int len2 = lz.getMatchLen(len + 1, dist, len2Limit); + + if (len2 >= MATCH_LEN_MIN) { + nextState.set(opts[optCur].state); + nextState.updateMatch(); + + // Literal + int curByte = lz.getByte(len, 0); + int matchByte = lz.getByte(0); // lz.getByte(len, len) + int prevByte = lz.getByte(len, 1); + int price = matchAndLenPrice + + literalEncoder.getPrice(curByte, matchByte, + prevByte, pos + len, + nextState); + nextState.updateLiteral(); + + // Rep0 + int nextPosState = (pos + len + 1) & posMask; + price += getLongRepAndLenPrice(0, len2, + nextState, nextPosState); + + int i = optCur + len + 1 + len2; + while (optEnd < i) + opts[++optEnd].reset(); + + if (price < opts[i].price) + opts[i].set3(price, optCur, dist + REPS, len, 0); + } + + if (++match == matches.count) + break; + } + } +} |