aboutsummaryrefslogtreecommitdiff
path: root/src/zlib-ng/match_tpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/zlib-ng/match_tpl.h')
-rw-r--r--src/zlib-ng/match_tpl.h182
1 files changed, 145 insertions, 37 deletions
diff --git a/src/zlib-ng/match_tpl.h b/src/zlib-ng/match_tpl.h
index b15ca17..3fc71c1 100644
--- a/src/zlib-ng/match_tpl.h
+++ b/src/zlib-ng/match_tpl.h
@@ -1,3 +1,12 @@
+/* match_tpl.h -- find longest match template for compare256 variants
+ *
+ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Portions copyright (C) 2014-2021 Konstantin Nosov
+ * Fast-zlib optimized longest_match
+ * https://github.com/gildor2/fast_zlib
+ */
#include "zbuild.h"
#include "deflate.h"
@@ -6,16 +15,6 @@
#ifndef MATCH_TPL_H
#define MATCH_TPL_H
-#ifdef UNALIGNED_OK
-# ifdef UNALIGNED64_OK
-typedef uint64_t bestcmp_t;
-# else
-typedef uint32_t bestcmp_t;
-# endif
-#else
-typedef uint8_t bestcmp_t;
-#endif
-
#define EARLY_EXIT_TRIGGER_LEVEL 5
#endif
@@ -37,25 +36,28 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
Z_REGISTER unsigned char *mbase_end;
const Pos *prev = s->prev;
Pos limit;
+#ifdef LONGEST_MATCH_SLOW
+ Pos limit_base;
+#else
int32_t early_exit;
+#endif
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
- bestcmp_t scan_end;
-#ifndef UNALIGNED_OK
- bestcmp_t scan_end0;
-#else
- bestcmp_t scan_start;
+ Pos match_offset = 0;
+#ifdef UNALIGNED_OK
+ uint8_t scan_start[8];
#endif
+ uint8_t scan_end[8];
#define GOTO_NEXT_CHAIN \
if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
continue; \
return best_len;
- /* The code is optimized for MAX_MATCH-2 multiple of 16. */
- Assert(MAX_MATCH == 258, "Code too clever");
+ /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
+ Assert(STD_MAX_MATCH == 258, "Code too clever");
- best_len = s->prev_length ? s->prev_length : 1;
+ best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
/* Calculate read offset which should only extend an extra byte
* to find the next best match length.
@@ -71,17 +73,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
}
#endif
- scan_end = *(bestcmp_t *)(scan+offset);
-#ifndef UNALIGNED_OK
- scan_end0 = *(bestcmp_t *)(scan+offset+1);
+#ifdef UNALIGNED64_OK
+ zmemcpy_8(scan_start, scan);
+ zmemcpy_8(scan_end, scan+offset);
+#elif defined(UNALIGNED_OK)
+ zmemcpy_4(scan_start, scan);
+ zmemcpy_4(scan_end, scan+offset);
#else
- scan_start = *(bestcmp_t *)(scan);
+ scan_end[0] = *(scan+offset);
+ scan_end[1] = *(scan+offset+1);
#endif
mbase_end = (mbase_start+offset);
/* Do not waste too much time if we already have a good match */
chain_length = s->max_chain_length;
- early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
if (best_len >= s->good_match)
chain_length >>= 2;
nice_match = (uint32_t)s->nice_match;
@@ -90,7 +95,41 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* we prevent matches with the string of window index 0
*/
limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
+#ifdef LONGEST_MATCH_SLOW
+ limit_base = limit;
+ if (best_len >= STD_MIN_MATCH) {
+ /* We're continuing search (lazy evaluation). */
+ uint32_t i, hash;
+ Pos pos;
+
+ /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
+ * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
+ * these strings are not yet inserted into the hash table.
+ */
+ hash = s->update_hash(s, 0, scan[1]);
+ hash = s->update_hash(s, hash, scan[2]);
+ for (i = 3; i <= best_len; i++) {
+ hash = s->update_hash(s, hash, scan[i]);
+
+ /* If we're starting with best_len >= 3, we can use offset search. */
+ pos = s->head[hash];
+ if (pos < cur_match) {
+ match_offset = (Pos)(i - 2);
+ cur_match = pos;
+ }
+ }
+
+ /* Update offset-dependent variables */
+ limit = limit_base+match_offset;
+ if (cur_match <= limit)
+ goto break_matching;
+ mbase_start -= match_offset;
+ mbase_end -= match_offset;
+ }
+#else
+ early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
+#endif
Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
for (;;) {
if (cur_match >= strstart)
@@ -106,31 +145,31 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#ifdef UNALIGNED_OK
if (best_len < sizeof(uint32_t)) {
for (;;) {
- if (*(uint16_t *)(mbase_end+cur_match) == (uint16_t)scan_end &&
- *(uint16_t *)(mbase_start+cur_match) == (uint16_t)scan_start)
+ if (zmemcmp_2(mbase_end+cur_match, scan_end) == 0 &&
+ zmemcmp_2(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# ifdef UNALIGNED64_OK
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
- if (*(uint64_t *)(mbase_end+cur_match) == (uint64_t)scan_end &&
- *(uint64_t *)(mbase_start+cur_match) == (uint64_t)scan_start)
+ if (zmemcmp_8(mbase_end+cur_match, scan_end) == 0 &&
+ zmemcmp_8(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# endif
} else {
for (;;) {
- if (*(uint32_t *)(mbase_end+cur_match) == (uint32_t)scan_end &&
- *(uint32_t *)(mbase_start+cur_match) == (uint32_t)scan_start)
+ if (zmemcmp_4(mbase_end+cur_match, scan_end) == 0 &&
+ zmemcmp_4(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
}
#else
for (;;) {
- if (mbase_end[cur_match] == scan_end && mbase_end[cur_match+1] == scan_end0 &&
+ if (mbase_end[cur_match] == scan_end[0] && mbase_end[cur_match+1] == scan_end[1] &&
mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1])
break;
GOTO_NEXT_CHAIN;
@@ -140,7 +179,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
if (len > best_len) {
- s->match_start = cur_match;
+ uint32_t match_start = cur_match - match_offset;
+ s->match_start = match_start;
+
/* Do not look for matches beyond the end of the input. */
if (len > lookahead)
return lookahead;
@@ -158,23 +199,90 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif
}
#endif
- scan_end = *(bestcmp_t *)(scan+offset);
-#ifndef UNALIGNED_OK
- scan_end0 = *(bestcmp_t *)(scan+offset+1);
+
+#ifdef UNALIGNED64_OK
+ zmemcpy_8(scan_end, scan+offset);
+#elif defined(UNALIGNED_OK)
+ zmemcpy_4(scan_end, scan+offset);
+#else
+ scan_end[0] = *(scan+offset);
+ scan_end[1] = *(scan+offset+1);
+#endif
+
+#ifdef LONGEST_MATCH_SLOW
+ /* Look for a better string offset */
+ if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
+ Pos pos, next_pos;
+ uint32_t i, hash;
+ unsigned char *scan_endstr;
+
+ /* Go back to offset 0 */
+ cur_match -= match_offset;
+ match_offset = 0;
+ next_pos = cur_match;
+ for (i = 0; i <= len - STD_MIN_MATCH; i++) {
+ pos = prev[(cur_match + i) & wmask];
+ if (pos < next_pos) {
+ /* Hash chain is more distant, use it */
+ if (pos <= limit_base + i)
+ goto break_matching;
+ next_pos = pos;
+ match_offset = (Pos)i;
+ }
+ }
+ /* Switch cur_match to next_pos chain */
+ cur_match = next_pos;
+
+ /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
+ * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
+ * us include one more byte into hash - the byte which will be checked
+ * in main loop now, and which allows to grow match by 1.
+ */
+ scan_endstr = scan + len - (STD_MIN_MATCH+1);
+
+ hash = s->update_hash(s, 0, scan_endstr[0]);
+ hash = s->update_hash(s, hash, scan_endstr[1]);
+ hash = s->update_hash(s, hash, scan_endstr[2]);
+
+ pos = s->head[hash];
+ if (pos < cur_match) {
+ match_offset = (Pos)(len - (STD_MIN_MATCH+1));
+ if (pos <= limit_base + match_offset)
+ goto break_matching;
+ cur_match = pos;
+ }
+
+ /* Update offset-dependent variables */
+ limit = limit_base+match_offset;
+ mbase_start = window-match_offset;
+ mbase_end = (mbase_start+offset);
+ continue;
+ }
#endif
mbase_end = (mbase_start+offset);
- } else if (UNLIKELY(early_exit)) {
+ }
+#ifndef LONGEST_MATCH_SLOW
+ else if (UNLIKELY(early_exit)) {
/* The probability of finding a match later if we here is pretty low, so for
* performance it's best to outright stop here for the lower compression levels
*/
break;
}
+#endif
GOTO_NEXT_CHAIN;
}
-
return best_len;
+
+#ifdef LONGEST_MATCH_SLOW
+break_matching:
+
+ if (best_len < s->lookahead)
+ return best_len;
+
+ return s->lookahead;
+#endif
}
+#undef LONGEST_MATCH_SLOW
#undef LONGEST_MATCH
#undef COMPARE256
-#undef COMPARE258