diff options
Diffstat (limited to 'src/zlib-ng/match_tpl.h')
-rw-r--r-- | src/zlib-ng/match_tpl.h | 182 |
1 files changed, 145 insertions, 37 deletions
diff --git a/src/zlib-ng/match_tpl.h b/src/zlib-ng/match_tpl.h index b15ca17..3fc71c1 100644 --- a/src/zlib-ng/match_tpl.h +++ b/src/zlib-ng/match_tpl.h @@ -1,3 +1,12 @@ +/* match_tpl.h -- find longest match template for compare256 variants + * + * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Portions copyright (C) 2014-2021 Konstantin Nosov + * Fast-zlib optimized longest_match + * https://github.com/gildor2/fast_zlib + */ #include "zbuild.h" #include "deflate.h" @@ -6,16 +15,6 @@ #ifndef MATCH_TPL_H #define MATCH_TPL_H -#ifdef UNALIGNED_OK -# ifdef UNALIGNED64_OK -typedef uint64_t bestcmp_t; -# else -typedef uint32_t bestcmp_t; -# endif -#else -typedef uint8_t bestcmp_t; -#endif - #define EARLY_EXIT_TRIGGER_LEVEL 5 #endif @@ -37,25 +36,28 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { Z_REGISTER unsigned char *mbase_end; const Pos *prev = s->prev; Pos limit; +#ifdef LONGEST_MATCH_SLOW + Pos limit_base; +#else int32_t early_exit; +#endif uint32_t chain_length, nice_match, best_len, offset; uint32_t lookahead = s->lookahead; - bestcmp_t scan_end; -#ifndef UNALIGNED_OK - bestcmp_t scan_end0; -#else - bestcmp_t scan_start; + Pos match_offset = 0; +#ifdef UNALIGNED_OK + uint8_t scan_start[8]; #endif + uint8_t scan_end[8]; #define GOTO_NEXT_CHAIN \ if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \ continue; \ return best_len; - /* The code is optimized for MAX_MATCH-2 multiple of 16. */ - Assert(MAX_MATCH == 258, "Code too clever"); + /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */ + Assert(STD_MAX_MATCH == 258, "Code too clever"); - best_len = s->prev_length ? s->prev_length : 1; + best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1; /* Calculate read offset which should only extend an extra byte * to find the next best match length. @@ -71,17 +73,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { } #endif - scan_end = *(bestcmp_t *)(scan+offset); -#ifndef UNALIGNED_OK - scan_end0 = *(bestcmp_t *)(scan+offset+1); +#ifdef UNALIGNED64_OK + zmemcpy_8(scan_start, scan); + zmemcpy_8(scan_end, scan+offset); +#elif defined(UNALIGNED_OK) + zmemcpy_4(scan_start, scan); + zmemcpy_4(scan_end, scan+offset); #else - scan_start = *(bestcmp_t *)(scan); + scan_end[0] = *(scan+offset); + scan_end[1] = *(scan+offset+1); #endif mbase_end = (mbase_start+offset); /* Do not waste too much time if we already have a good match */ chain_length = s->max_chain_length; - early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL; if (best_len >= s->good_match) chain_length >>= 2; nice_match = (uint32_t)s->nice_match; @@ -90,7 +95,41 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * we prevent matches with the string of window index 0 */ limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0; +#ifdef LONGEST_MATCH_SLOW + limit_base = limit; + if (best_len >= STD_MIN_MATCH) { + /* We're continuing search (lazy evaluation). */ + uint32_t i, hash; + Pos pos; + + /* Find a most distant chain starting from scan with index=1 (index=0 corresponds + * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because + * these strings are not yet inserted into the hash table. + */ + hash = s->update_hash(s, 0, scan[1]); + hash = s->update_hash(s, hash, scan[2]); + for (i = 3; i <= best_len; i++) { + hash = s->update_hash(s, hash, scan[i]); + + /* If we're starting with best_len >= 3, we can use offset search. */ + pos = s->head[hash]; + if (pos < cur_match) { + match_offset = (Pos)(i - 2); + cur_match = pos; + } + } + + /* Update offset-dependent variables */ + limit = limit_base+match_offset; + if (cur_match <= limit) + goto break_matching; + mbase_start -= match_offset; + mbase_end -= match_offset; + } +#else + early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL; +#endif Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead"); for (;;) { if (cur_match >= strstart) @@ -106,31 +145,31 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { #ifdef UNALIGNED_OK if (best_len < sizeof(uint32_t)) { for (;;) { - if (*(uint16_t *)(mbase_end+cur_match) == (uint16_t)scan_end && - *(uint16_t *)(mbase_start+cur_match) == (uint16_t)scan_start) + if (zmemcmp_2(mbase_end+cur_match, scan_end) == 0 && + zmemcmp_2(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } # ifdef UNALIGNED64_OK } else if (best_len >= sizeof(uint64_t)) { for (;;) { - if (*(uint64_t *)(mbase_end+cur_match) == (uint64_t)scan_end && - *(uint64_t *)(mbase_start+cur_match) == (uint64_t)scan_start) + if (zmemcmp_8(mbase_end+cur_match, scan_end) == 0 && + zmemcmp_8(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } # endif } else { for (;;) { - if (*(uint32_t *)(mbase_end+cur_match) == (uint32_t)scan_end && - *(uint32_t *)(mbase_start+cur_match) == (uint32_t)scan_start) + if (zmemcmp_4(mbase_end+cur_match, scan_end) == 0 && + zmemcmp_4(mbase_start+cur_match, scan_start) == 0) break; GOTO_NEXT_CHAIN; } } #else for (;;) { - if (mbase_end[cur_match] == scan_end && mbase_end[cur_match+1] == scan_end0 && + if (mbase_end[cur_match] == scan_end[0] && mbase_end[cur_match+1] == scan_end[1] && mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1]) break; GOTO_NEXT_CHAIN; @@ -140,7 +179,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan"); if (len > best_len) { - s->match_start = cur_match; + uint32_t match_start = cur_match - match_offset; + s->match_start = match_start; + /* Do not look for matches beyond the end of the input. */ if (len > lookahead) return lookahead; @@ -158,23 +199,90 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { #endif } #endif - scan_end = *(bestcmp_t *)(scan+offset); -#ifndef UNALIGNED_OK - scan_end0 = *(bestcmp_t *)(scan+offset+1); + +#ifdef UNALIGNED64_OK + zmemcpy_8(scan_end, scan+offset); +#elif defined(UNALIGNED_OK) + zmemcpy_4(scan_end, scan+offset); +#else + scan_end[0] = *(scan+offset); + scan_end[1] = *(scan+offset+1); +#endif + +#ifdef LONGEST_MATCH_SLOW + /* Look for a better string offset */ + if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) { + Pos pos, next_pos; + uint32_t i, hash; + unsigned char *scan_endstr; + + /* Go back to offset 0 */ + cur_match -= match_offset; + match_offset = 0; + next_pos = cur_match; + for (i = 0; i <= len - STD_MIN_MATCH; i++) { + pos = prev[(cur_match + i) & wmask]; + if (pos < next_pos) { + /* Hash chain is more distant, use it */ + if (pos <= limit_base + i) + goto break_matching; + next_pos = pos; + match_offset = (Pos)i; + } + } + /* Switch cur_match to next_pos chain */ + cur_match = next_pos; + + /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get + * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets + * us include one more byte into hash - the byte which will be checked + * in main loop now, and which allows to grow match by 1. + */ + scan_endstr = scan + len - (STD_MIN_MATCH+1); + + hash = s->update_hash(s, 0, scan_endstr[0]); + hash = s->update_hash(s, hash, scan_endstr[1]); + hash = s->update_hash(s, hash, scan_endstr[2]); + + pos = s->head[hash]; + if (pos < cur_match) { + match_offset = (Pos)(len - (STD_MIN_MATCH+1)); + if (pos <= limit_base + match_offset) + goto break_matching; + cur_match = pos; + } + + /* Update offset-dependent variables */ + limit = limit_base+match_offset; + mbase_start = window-match_offset; + mbase_end = (mbase_start+offset); + continue; + } #endif mbase_end = (mbase_start+offset); - } else if (UNLIKELY(early_exit)) { + } +#ifndef LONGEST_MATCH_SLOW + else if (UNLIKELY(early_exit)) { /* The probability of finding a match later if we here is pretty low, so for * performance it's best to outright stop here for the lower compression levels */ break; } +#endif GOTO_NEXT_CHAIN; } - return best_len; + +#ifdef LONGEST_MATCH_SLOW +break_matching: + + if (best_len < s->lookahead) + return best_len; + + return s->lookahead; +#endif } +#undef LONGEST_MATCH_SLOW #undef LONGEST_MATCH #undef COMPARE256 -#undef COMPARE258 |