From 2b674bf02f698b565b4525a36cdf4651899b88d1 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 11 Feb 2018 02:45:36 -0800
Subject: slightly improved hc compression speed (+~1-2%)

by removing bad candidates faster.
---
 lib/lz4hc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/lz4hc.c')

diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index cface811..9f59e80b 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -220,7 +220,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
         nbAttempts--;
         if (matchIndex >= dictLimit) {
             const BYTE* const matchPtr = base + matchIndex;
-            if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
+            if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - delta + longest - 1)) {
                 if (LZ4_read32(matchPtr) == pattern) {
                     int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
     #if 0
-- 
cgit v1.2.3


From d3a13397d921d9d2ebae56d0503d23c73fee39b1 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 11 Feb 2018 21:03:39 -0800
Subject: slight hc speed benefit (~+1%)

by optimizing countback
---
 lib/lz4hc.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'lib/lz4hc.c')

diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 9f59e80b..c888ee18 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -123,10 +123,12 @@ LZ4_FORCE_INLINE
 int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
                     const BYTE* const iMin, const BYTE* const mMin)
 {
-    int back=0;
-    while ( (ip+back > iMin)
-         && (match+back > mMin)
-         && (ip[back-1] == match[back-1]))
+    int back = 0;
+    int const min = (int)MAX(iMin - ip, mMin - match);
+    assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
+    assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
+    while ( (back > min)
+         && (ip[back-1] == match[back-1]) )
             back--;
     return back;
 }
@@ -223,17 +225,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
             if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - delta + longest - 1)) {
                 if (LZ4_read32(matchPtr) == pattern) {
                     int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
-    #if 0
-                    /* more generic but unfortunately slower on clang */
-                    int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr);
-    #else
-                    int back = 0;
-                    while ( (ip+back > iLowLimit)
-                         && (matchPtr+back > lowPrefixPtr)
-                         && (ip[back-1] == matchPtr[back-1])) {
-                            back--;
-                    }
-    #endif
+                    int const back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
                     mlt -= back;
 
                     if (mlt > longest) {
@@ -252,10 +244,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
                 mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
                 if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
                     mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
-                while ( (ip+back > iLowLimit)
-                     && (matchIndex+back > lowLimit)
-                     && (ip[back-1] == matchPtr[back-1]))
-                        back--;
+                back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictBase+lowLimit) : 0;
                 mlt -= back;
                 if (mlt > longest) {
                     longest = mlt;
@@ -333,7 +322,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
     size_t length;
     BYTE* const token = (*op)++;
 
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
     static const BYTE* start = NULL;
     static U32 totalCost = 0;
     U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
@@ -343,7 +332,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
     U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
     if (start==NULL) start = *anchor;  /* only works for single segment */
     //g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
-    DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
+    DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
                 pos,
                 (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
                 cost, totalCost);
-- 
cgit v1.2.3


From 25b16e8a2e51c41f5864ce5cc94ce65ab6fd52b9 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Feb 2018 15:25:45 -0800
Subject: added one assert()

suggested by @terrelln
---
 lib/lz4hc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'lib/lz4hc.c')

diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index c888ee18..f3631c59 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -135,7 +135,8 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
 
 /* LZ4HC_countPattern() :
  * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
-static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
+static unsigned
+LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
 {
     const BYTE* const iStart = ip;
     reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
@@ -167,7 +168,8 @@ static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 c
 /* LZ4HC_reverseCountPattern() :
  * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
  * read using natural platform endianess */
-static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
+static unsigned
+LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
 {
     const BYTE* const iStart = ip;
 
@@ -185,7 +187,8 @@ static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow
 
 typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
 
-LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndGetWiderMatch (
     LZ4HC_CCtx_internal* hc4,
     const BYTE* const ip,
     const BYTE* const iLowLimit,
@@ -222,6 +225,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
         nbAttempts--;
         if (matchIndex >= dictLimit) {
             const BYTE* const matchPtr = base + matchIndex;
+            assert(longest >= 1);
             if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - delta + longest - 1)) {
                 if (LZ4_read32(matchPtr) == pattern) {
                     int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
-- 
cgit v1.2.3


From 7173a631db61ab9535bd0d6e5e00e9dc081d4df3 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sat, 24 Feb 2018 11:47:53 -0800
Subject: edge case : compress up to end-mflimit (12 bytes)

The LZ4 block format specification
states that the last match must start
at a minimum distance of 12 bytes from the end of the block.

However, out of an abundance of caution,
the reference implementation would actually stop searching matches
at 13 bytes from the end of the block.

This patch fixes this small detail.
The new version is now able to properly compress a limit case
such as `aaaaaaaabaaa\n`
as reported by Gao Xiang (@hsiangkao).

Obviously, it doesn't change a lot of things.
This is just one additional match candidate per block, with a maximum match length of 7 (since last 5 bytes must remain literals).

With default policy, blocks are 4 MB long, so it doesn't happen too often
Compressing silesia.tar at default level 1 saves 5 bytes (100930101 -> 100930096).
At max level 12, it saves a grand 16 bytes (77389871 -> 77389855).

The impact is a bit more visible when blocks are smaller, hence more numerous.
For example, compressing silesia with blocks of 64 KB (using -12 -B4D) saves 543 bytes (77304583 -> 77304040).
So the smaller the packet size, the more visible the impact.

And it happens we have a ton of scenarios with little blocks using LZ4 compression ...

And a useless "hooray" sidenote :
the patch improves the LZ4 compression record of silesia (using -12 -B7D --no-frame-crc) by 16 bytes (77270672 -> 77270656)
and the record on enwik9 by 44 bytes (371680396 -> 371680352) (previously claimed by [smallz4](http://create.stephan-brumme.com/smallz4/) ).
---
 lib/lz4hc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/lz4hc.c')

diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index f3631c59..726cfaa5 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -425,7 +425,7 @@ static int LZ4HC_compress_hashChain (
     if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
 
     /* Main Loop */
-    while (ip < mflimit) {
+    while (ip <= mflimit) {
         ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
         if (ml<MINMATCH) { ip++; continue; }
 
@@ -435,7 +435,7 @@ static int LZ4HC_compress_hashChain (
         ml0 = ml;
 
 _Search2:
-        if (ip+ml < mflimit)
+        if (ip+ml <= mflimit)
             ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
                             ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
                             maxNbAttempts, patternAnalysis);
@@ -482,7 +482,7 @@ _Search3:
         }
         /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
 
-        if (start2 + ml2 < mflimit)
+        if (start2 + ml2 <= mflimit)
             ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
                             start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
                             maxNbAttempts, patternAnalysis);
-- 
cgit v1.2.3


From 550b40849f6b77850453a1190b97430b121802bc Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 25 Feb 2018 00:32:09 -0800
Subject: merge lz4opt.h into lz4hc.c

Having a dedicated file for optimal parser
made sense during its creation,
it allowed Przemyslaw to work more freely on lz4opt, with less dependency on lz4hc,
moreover, the optimal parser was more complex, with its own search functions.

Since the optimal was rewritten last year, it's now a lot lighter.
It makes more sense now to integrate it directly inside lz4hc.c,
making it easier to edit (editors are a bit "lost" inside a `*.h` dependent on its #include position),
it also reduces the number of files in the project,
which fits pretty well with lz4 objectives.
(adding lz4hc requires "just" lz4hc.h and lz4hc.c).
---
 lib/lz4hc.c | 331 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 327 insertions(+), 4 deletions(-)

(limited to 'lib/lz4hc.c')

diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 726cfaa5..2a6e080b 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -67,6 +67,7 @@
 
 /*===   Constants   ===*/
 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_OPT_NUM   (1<<12)
 
 
 /*===   Macros   ===*/
@@ -383,10 +384,6 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
     return 0;
 }
 
-/* btopt */
-#include "lz4opt.h"
-
-
 static int LZ4HC_compress_hashChain (
     LZ4HC_CCtx_internal* const ctx,
     const char* const source,
@@ -605,6 +602,12 @@ _dest_overflow:
     return 0;
 }
 
+static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
+    const char* const source, char* dst,
+    int* srcSizePtr, int dstCapacity,
+    int const nbSearches, size_t sufficient_len,
+    limitedOutput_directive limit, int const fullUpdate);
+
 
 static int LZ4HC_compress_generic (
     LZ4HC_CCtx_internal* const ctx,
@@ -884,3 +887,323 @@ char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
     int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
     return (char*)(hc4->inputBuffer + dictSize);
 }
+
+
+/* ================================================
+ * LZ4 Optimal parser (levels 10-12)
+ * ===============================================*/
+typedef struct {
+    int price;
+    int off;
+    int mlen;
+    int litlen;
+} LZ4HC_optimal_t;
+
+/* price in bytes */
+LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
+{
+    int price = litlen;
+    if (litlen >= (int)RUN_MASK)
+        price += 1 + (litlen-RUN_MASK)/255;
+    return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
+{
+    int price = 1 + 2 ; /* token + 16-bit offset */
+
+    price += LZ4HC_literalsPrice(litlen);
+
+    if (mlen >= (int)(ML_MASK+MINMATCH))
+        price += 1 + (mlen-(ML_MASK+MINMATCH))/255;
+
+    return price;
+}
+
+
+typedef struct {
+    int off;
+    int len;
+} LZ4HC_match_t;
+
+LZ4_FORCE_INLINE LZ4HC_match_t
+LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
+                      const BYTE* ip, const BYTE* const iHighLimit,
+                      int minLen, int nbSearches)
+{
+    LZ4HC_match_t match = { 0 , 0 };
+    const BYTE* matchPtr = NULL;
+    /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+     * but this won't be the case here, as we define iLowLimit==ip,
+     * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+    int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx,
+                                ip, ip, iHighLimit, minLen, &matchPtr, &ip,
+                                nbSearches, 1 /* patternAnalysis */);
+    if (matchLength <= minLen) return match;
+    match.len = matchLength;
+    match.off = (int)(ip-matchPtr);
+    return match;
+}
+
+
+static int LZ4HC_compress_optimal (
+    LZ4HC_CCtx_internal* ctx,
+    const char* const source,
+    char* dst,
+    int* srcSizePtr,
+    int dstCapacity,
+    int const nbSearches,
+    size_t sufficient_len,
+    limitedOutput_directive limit,
+    int const fullUpdate
+    )
+{
+#define TRAILING_LITERALS 3
+    LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS];   /* ~64 KB, which is a bit large for stack... */
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+    BYTE* op = (BYTE*) dst;
+    BYTE* opSaved = (BYTE*) dst;
+    BYTE* oend = op + dstCapacity;
+
+    /* init */
+    DEBUGLOG(5, "LZ4HC_compress_optimal");
+    *srcSizePtr = 0;
+    if (limit == limitedDestSize) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
+    if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+
+    /* Main Loop */
+    assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
+    while (ip <= mflimit) {
+         int const llen = (int)(ip - anchor);
+         int best_mlen, best_off;
+         int cur, last_match_pos = 0;
+
+         LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches);
+         if (firstMatch.len==0) { ip++; continue; }
+
+         if ((size_t)firstMatch.len > sufficient_len) {
+             /* good enough solution : immediate encoding */
+             int const firstML = firstMatch.len;
+             const BYTE* const matchPos = ip - firstMatch.off;
+             opSaved = op;
+             if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
+                 goto _dest_overflow;
+             continue;
+         }
+
+         /* set prices for first positions (literals) */
+         {   int rPos;
+             for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
+                 int const cost = LZ4HC_literalsPrice(llen + rPos);
+                 opt[rPos].mlen = 1;
+                 opt[rPos].off = 0;
+                 opt[rPos].litlen = llen + rPos;
+                 opt[rPos].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             rPos, cost, opt[rPos].litlen);
+         }   }
+         /* set prices using initial match */
+         {   int mlen = MINMATCH;
+             int const matchML = firstMatch.len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
+             int const offset = firstMatch.off;
+             assert(matchML < LZ4_OPT_NUM);
+             for ( ; mlen <= matchML ; mlen++) {
+                 int const cost = LZ4HC_sequencePrice(llen, mlen);
+                 opt[mlen].mlen = mlen;
+                 opt[mlen].off = offset;
+                 opt[mlen].litlen = llen;
+                 opt[mlen].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
+                             mlen, cost, mlen);
+         }   }
+         last_match_pos = firstMatch.len;
+         {   int addLit;
+             for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                 opt[last_match_pos+addLit].mlen = 1; /* literal */
+                 opt[last_match_pos+addLit].off = 0;
+                 opt[last_match_pos+addLit].litlen = addLit;
+                 opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+         }   }
+
+         /* check further positions */
+         for (cur = 1; cur < last_match_pos; cur++) {
+             const BYTE* const curPtr = ip + cur;
+             LZ4HC_match_t newMatch;
+
+             if (curPtr > mflimit) break;
+             DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
+                     cur, opt[cur].price, opt[cur+1].price, cur+1);
+             if (fullUpdate) {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if ( (opt[cur+1].price <= opt[cur].price)
+                   /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
+                   && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
+                     continue;
+             } else {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if (opt[cur+1].price <= opt[cur].price) continue;
+             }
+
+             DEBUGLOG(7, "search at rPos:%u", cur);
+             if (fullUpdate)
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches);
+             else
+                 /* only test matches of minimum length; slightly faster, but misses a few bytes */
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches);
+             if (!newMatch.len) continue;
+
+             if ( ((size_t)newMatch.len > sufficient_len)
+               || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
+                 /* immediate encoding */
+                 best_mlen = newMatch.len;
+                 best_off = newMatch.off;
+                 last_match_pos = cur + 1;
+                 goto encode;
+             }
+
+             /* before match : set price with literals at beginning */
+             {   int const baseLitlen = opt[cur].litlen;
+                 int litlen;
+                 for (litlen = 1; litlen < MINMATCH; litlen++) {
+                     int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
+                     int const pos = cur + litlen;
+                     if (price < opt[pos].price) {
+                         opt[pos].mlen = 1; /* literal */
+                         opt[pos].off = 0;
+                         opt[pos].litlen = baseLitlen+litlen;
+                         opt[pos].price = price;
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
+                                     pos, price, opt[pos].litlen);
+             }   }   }
+
+             /* set prices using match at position = cur */
+             {   int const matchML = newMatch.len;
+                 int ml = MINMATCH;
+
+                 assert(cur + newMatch.len < LZ4_OPT_NUM);
+                 for ( ; ml <= matchML ; ml++) {
+                     int const pos = cur + ml;
+                     int const offset = newMatch.off;
+                     int price;
+                     int ll;
+                     DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
+                                 pos, last_match_pos);
+                     if (opt[cur].mlen == 1) {
+                         ll = opt[cur].litlen;
+                         price = ((cur > ll) ? opt[cur - ll].price : 0)
+                               + LZ4HC_sequencePrice(ll, ml);
+                     } else {
+                         ll = 0;
+                         price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+                     }
+
+                     if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) {
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
+                                     pos, price, ml);
+                         assert(pos < LZ4_OPT_NUM);
+                         if ( (ml == matchML)  /* last pos of last match */
+                           && (last_match_pos < pos) )
+                             last_match_pos = pos;
+                         opt[pos].mlen = ml;
+                         opt[pos].off = offset;
+                         opt[pos].litlen = ll;
+                         opt[pos].price = price;
+             }   }   }
+             /* complete following positions with literals */
+             {   int addLit;
+                 for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                     opt[last_match_pos+addLit].mlen = 1; /* literal */
+                     opt[last_match_pos+addLit].off = 0;
+                     opt[last_match_pos+addLit].litlen = addLit;
+                     opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                     DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+             }   }
+         }  /* for (cur = 1; cur <= last_match_pos; cur++) */
+
+         best_mlen = opt[last_match_pos].mlen;
+         best_off = opt[last_match_pos].off;
+         cur = last_match_pos - best_mlen;
+
+ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+         assert(cur < LZ4_OPT_NUM);
+         assert(last_match_pos >= 1);  /* == 1 when only one candidate */
+         DEBUGLOG(6, "reverse traversal, looking for shortest path")
+         DEBUGLOG(6, "last_match_pos = %i", last_match_pos);
+         {   int candidate_pos = cur;
+             int selected_matchLength = best_mlen;
+             int selected_offset = best_off;
+             while (1) {  /* from end to beginning */
+                 int const next_matchLength = opt[candidate_pos].mlen;  /* can be 1, means literal */
+                 int const next_offset = opt[candidate_pos].off;
+                 DEBUGLOG(6, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
+                 opt[candidate_pos].mlen = selected_matchLength;
+                 opt[candidate_pos].off = selected_offset;
+                 selected_matchLength = next_matchLength;
+                 selected_offset = next_offset;
+                 if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
+                 assert(next_matchLength > 0);  /* can be 1, means literal */
+                 candidate_pos -= next_matchLength;
+         }   }
+
+         /* encode all recorded sequences in order */
+         {   int rPos = 0;  /* relative position (to ip) */
+             while (rPos < last_match_pos) {
+                 int const ml = opt[rPos].mlen;
+                 int const offset = opt[rPos].off;
+                 if (ml == 1) { ip++; rPos++; continue; }  /* literal; note: can end up with several literals, in which case, skip them */
+                 rPos += ml;
+                 assert(ml >= MINMATCH);
+                 assert((offset >= 1) && (offset <= MAX_DISTANCE));
+                 opSaved = op;
+                 if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
+                     goto _dest_overflow;
+         }   }
+     }  /* while (ip <= mflimit) */
+
+ _last_literals:
+     /* Encode Last Literals */
+     {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+         size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+         size_t const totalSize = 1 + litLength + lastRunSize;
+         if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+         if (limit && (op + totalSize > oend)) {
+             if (limit == limitedOutput) return 0;  /* Check output limit */
+             /* adapt lastRunSize to fill 'dst' */
+             lastRunSize  = (size_t)(oend - op) - 1;
+             litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+             lastRunSize -= litLength;
+         }
+         ip = anchor + lastRunSize;
+
+         if (lastRunSize >= RUN_MASK) {
+             size_t accumulator = lastRunSize - RUN_MASK;
+             *op++ = (RUN_MASK << ML_BITS);
+             for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+             *op++ = (BYTE) accumulator;
+         } else {
+             *op++ = (BYTE)(lastRunSize << ML_BITS);
+         }
+         memcpy(op, anchor, lastRunSize);
+         op += lastRunSize;
+     }
+
+     /* End */
+     *srcSizePtr = (int) (((const char*)ip) - source);
+     return (int) ((char*)op-dst);
+
+ _dest_overflow:
+     if (limit == limitedDestSize) {
+         op = opSaved;  /* restore correct out pointer */
+         goto _last_literals;
+     }
+     return 0;
+ }
-- 
cgit v1.2.3


From efc419a6d4448a5806715e967384ea85f880fe59 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Fri, 26 Jan 2018 17:29:50 -0500
Subject: Replace calloc() Calls With malloc() Where Possible

---
 lib/lz4hc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/lz4hc.c')

diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 2a6e080b..0c1b20ae 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -858,7 +858,7 @@ int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
 
 void* LZ4_createHC (char* inputBuffer)
 {
-    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t));
+    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
     if (hc4 == NULL) return NULL;   /* not enough memory */
     LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
     hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer;
-- 
cgit v1.2.3