From 72e99c8939d7f005487403ab54fa65ac31ee4317 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 27 Apr 2018 11:44:47 -0700 Subject: lz4hc : minor editions for clarity --- lib/lz4hc.c | 75 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 37 insertions(+), 38 deletions(-) (limited to 'lib/lz4hc.c') diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 4126ef81..07785528 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -138,6 +138,7 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, { int back = 0; int const min = (int)MAX(iMin - ip, mMin - match); + assert(min <= 0); assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); while ( (back > min) @@ -222,7 +223,7 @@ LZ4HC_InsertAndGetWiderMatch ( const U32 ipIndex = (U32)(ip - base); const U32 lowLimit = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE; const BYTE* const dictBase = hc4->dictBase; - int const delta = (int)(ip-iLowLimit); + int const lookBackLength = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; U32 const pattern = LZ4_read32(ip); U32 matchIndex; @@ -242,34 +243,35 @@ LZ4HC_InsertAndGetWiderMatch ( nbAttempts--; if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; + assert(matchPtr >= lowPrefixPtr); + assert(matchPtr < ip); assert(longest >= 1); - if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - delta + longest - 1)) { + if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - int const back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; + int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; mlt -= back; - if (mlt > longest) { longest = mlt; *matchpos = matchPtr+back; *startpos = ip+back; - } } - } + } } } } else { /* matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; if (LZ4_read32(matchPtr) == pattern) { + const BYTE* const dictLowLimit = dictBase + hc4->lowLimit; int mlt; int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) - mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); - back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictBase+lowLimit) : 0; + mlt += LZ4_count(ip+mlt, lowPrefixPtr, iHighLimit); + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictLowLimit) : 0; mlt -= back; if (mlt > longest) { longest = mlt; - *matchpos = base + matchIndex + back; + *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ *startpos = ip + back; } } } @@ -317,7 +319,7 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0; + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0; mlt -= back; if (mlt > longest) { longest = mlt; @@ -456,14 +458,14 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( BYTE* op = (BYTE*) dest; BYTE* oend = op + maxOutputSize; - int ml, ml2, ml3, ml0; + int ml0, ml, ml2, ml3; + const BYTE* start0; + const BYTE* ref0; const BYTE* ref = NULL; const BYTE* start2 = NULL; const BYTE* ref2 = NULL; const BYTE* start3 = NULL; const BYTE* ref3 = NULL; - const BYTE* start0; - const BYTE* ref0; /* init */ *srcSizePtr = 0; @@ -476,31 +478,27 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( if (ml encode ML1 */ optr = op; if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; continue; } - if (start0 < ip) { - if (start2 < ip + ml0) { /* empirical */ - ip = start0; - ref = ref0; - ml = ml0; - } - } + if (start0 < ip) { /* first match was skipped at least once */ + if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ + } } /* Here, start0==ip */ if ((start2 - ip) < 3) { /* First Match too small : removed */ @@ -528,14 +526,15 @@ _Search3: } /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ - if (start2 + ml2 <= mflimit) + if (start2 + ml2 <= mflimit) { ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts, patternAnalysis, dict); - else + } else { ml3 = ml2; + } - if (ml3 == ml2) { /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ /* ip & ref are known; Now for ml */ if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ @@ -580,11 +579,12 @@ _Search3: } /* - * OK, now we have 3 ascending matches; let's write at least the first one - * ip & ref are known; Now for ml + * OK, now we have 3 ascending matches; + * let's write the first one ML1. + * ip & ref are known; Now decide ml. */ if (start2 < ip+ml) { - if ((start2 - ip) < (int)ML_MASK) { + if ((start2 - ip) < OPTIMAL_ML) { int correction; if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; @@ -601,14 +601,13 @@ _Search3: optr = op; if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; - ip = start2; - ref = ref2; - ml = ml2; + /* ML2 becomes ML1 */ + ip = start2; ref = ref2; ml = ml2; - start2 = start3; - ref2 = ref3; - ml2 = ml3; + /* ML3 becomes ML2 */ + start2 = start3; ref2 = ref3; ml2 = ml3; + /* let's find a new ML3 */ goto _Search3; } -- cgit v1.2.3