diff options
Diffstat (limited to 'contrib/optimizations')
-rw-r--r-- | contrib/optimizations/chunkcopy.h | 20 | ||||
-rw-r--r-- | contrib/optimizations/inffast_chunk.c | 29 | ||||
-rw-r--r-- | contrib/optimizations/inflate.c | 77 | ||||
-rw-r--r-- | contrib/optimizations/insert_string.h | 62 | ||||
-rw-r--r-- | contrib/optimizations/slide_hash_neon.h | 65 |
5 files changed, 132 insertions, 121 deletions
diff --git a/contrib/optimizations/chunkcopy.h b/contrib/optimizations/chunkcopy.h index 9c0b7cb..f40546d 100644 --- a/contrib/optimizations/chunkcopy.h +++ b/contrib/optimizations/chunkcopy.h @@ -1,6 +1,6 @@ /* chunkcopy.h -- fast chunk copy and set operations * Copyright (C) 2017 ARM, Inc. - * Copyright 2017 The Chromium Authors. All rights reserved. + * Copyright 2017 The Chromium Authors * Use of this source code is governed by a BSD-style license that can be * found in the Chromium source repository LICENSE file. */ @@ -36,6 +36,17 @@ typedef __m128i z_vec128i_t; #endif /* + * Suppress MSan errors about copying uninitialized bytes (crbug.com/1376033). + */ +#define Z_DISABLE_MSAN +#if defined(__has_feature) + #if __has_feature(memory_sanitizer) + #undef Z_DISABLE_MSAN + #define Z_DISABLE_MSAN __attribute__((no_sanitize("memory"))) + #endif +#endif + +/* * chunk copy type: the z_vec128i_t type size should be exactly 128-bits * and equal to CHUNKCOPY_CHUNK_SIZE. */ @@ -49,7 +60,7 @@ Z_STATIC_ASSERT(vector_128_bits_wide, * instruction appropriate for the z_vec128i_t type. */ static inline z_vec128i_t loadchunk( - const unsigned char FAR* s) { + const unsigned char FAR* s) Z_DISABLE_MSAN { z_vec128i_t v; Z_BUILTIN_MEMCPY(&v, s, sizeof(v)); return v; @@ -82,7 +93,7 @@ static inline void storechunk( static inline unsigned char FAR* chunkcopy_core( unsigned char FAR* out, const unsigned char FAR* from, - unsigned len) { + unsigned len) Z_DISABLE_MSAN { const int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1; storechunk(out, loadchunk(from)); out += bump; @@ -152,7 +163,7 @@ static inline unsigned char FAR* chunkcopy_core_safe( static inline unsigned char FAR* chunkunroll_relaxed( unsigned char FAR* out, unsigned FAR* dist, - unsigned FAR* len) { + unsigned FAR* len) Z_DISABLE_MSAN { const unsigned char FAR* from = out - *dist; while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) { storechunk(out, loadchunk(from)); @@ -473,5 +484,6 @@ typedef unsigned long inflate_holder_t; #undef Z_STATIC_ASSERT #undef Z_RESTRICT #undef Z_BUILTIN_MEMCPY +#undef Z_DISABLE_MSAN #endif /* CHUNKCOPY_H */ diff --git a/contrib/optimizations/inffast_chunk.c b/contrib/optimizations/inffast_chunk.c index 4bacbc4..5b09487 100644 --- a/contrib/optimizations/inffast_chunk.c +++ b/contrib/optimizations/inffast_chunk.c @@ -95,7 +95,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ code const FAR *dcode; /* local strm->distcode */ unsigned lmask; /* mask for first level of length codes */ unsigned dmask; /* mask for first level of distance codes */ - code here; /* retrieved table entry */ + code const *here; /* retrieved table entry */ unsigned op; /* code bits, operation, extra bits, or */ /* window position, window bytes to copy */ unsigned len; /* match length, unused bytes */ @@ -139,20 +139,20 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ bits += 8; #endif } - here = lcode[hold & lmask]; + here = lcode + (hold & lmask); dolen: - op = (unsigned)(here.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(here->op); if (op == 0) { /* literal */ - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - *out++ = (unsigned char)(here.val); + "inflate: literal 0x%02x\n", here->val)); + *out++ = (unsigned char)(here->val); } else if (op & 16) { /* length base */ - len = (unsigned)(here.val); + len = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (op) { if (bits < op) { @@ -182,14 +182,14 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ bits += 8; #endif } - here = dcode[hold & dmask]; + here = dcode + (hold & dmask); dodist: - op = (unsigned)(here.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(here->op); if (op & 16) { /* distance base */ - dist = (unsigned)(here.val); + dist = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (bits < op) { #ifdef INFLATE_CHUNK_READ_64LE @@ -295,7 +295,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level distance code */ - here = dcode[here.val + (hold & ((1U << op) - 1))]; + here = dcode + here->val + (hold & ((1U << op) - 1)); goto dodist; } else { @@ -305,7 +305,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level length code */ - here = lcode[here.val + (hold & ((1U << op) - 1))]; + here = lcode + here->val + (hold & ((1U << op) - 1)); goto dolen; } else if (op & 32) { /* end-of-block */ @@ -339,7 +339,6 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ state->bits = bits; Assert((state->hold >> state->bits) == 0, "invalid input data state"); - return; } /* diff --git a/contrib/optimizations/inflate.c b/contrib/optimizations/inflate.c index 81d558b..f3dfba8 100644 --- a/contrib/optimizations/inflate.c +++ b/contrib/optimizations/inflate.c @@ -1,5 +1,5 @@ /* inflate.c -- zlib decompression - * Copyright (C) 1995-2016 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -131,6 +131,7 @@ z_streamp strm; state->mode = HEAD; state->last = 0; state->havedict = 0; + state->flags = -1; state->dmax = 32768U; state->head = Z_NULL; state->hold = 0; @@ -168,6 +169,8 @@ int windowBits; /* extract wrap request from windowBits parameter */ if (windowBits < 0) { + if (windowBits < -15) + return Z_STREAM_ERROR; wrap = 0; windowBits = -windowBits; } @@ -459,10 +462,10 @@ unsigned copy; /* check function to use adler32() for zlib or crc32() for gzip */ #ifdef GUNZIP -# define UPDATE(check, buf, len) \ +# define UPDATE_CHECK(check, buf, len) \ (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) #else -# define UPDATE(check, buf, len) adler32(check, buf, len) +# define UPDATE_CHECK(check, buf, len) adler32(check, buf, len) #endif /* check macros for header crc */ @@ -682,7 +685,6 @@ int flush; state->mode = FLAGS; break; } - state->flags = 0; /* expect zlib header */ if (state->head != Z_NULL) state->head->done = -1; if (!(state->wrap & 1) || /* check if zlib header allowed */ @@ -709,6 +711,7 @@ int flush; break; } state->dmax = 1U << len; + state->flags = 0; /* indicate zlib header */ Tracev((stderr, "inflate: zlib header ok\n")); strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = hold & 0x200 ? DICTID : TYPE; @@ -734,6 +737,7 @@ int flush; CRC2(state->check, hold); INITBITS(); state->mode = TIME; + /* fallthrough */ case TIME: NEEDBITS(32); if (state->head != Z_NULL) @@ -742,6 +746,7 @@ int flush; CRC4(state->check, hold); INITBITS(); state->mode = OS; + /* fallthrough */ case OS: NEEDBITS(16); if (state->head != Z_NULL) { @@ -752,6 +757,7 @@ int flush; CRC2(state->check, hold); INITBITS(); state->mode = EXLEN; + /* fallthrough */ case EXLEN: if (state->flags & 0x0400) { NEEDBITS(16); @@ -765,14 +771,16 @@ int flush; else if (state->head != Z_NULL) state->head->extra = Z_NULL; state->mode = EXTRA; + /* fallthrough */ case EXTRA: if (state->flags & 0x0400) { copy = state->length; if (copy > have) copy = have; if (copy) { if (state->head != Z_NULL && - state->head->extra != Z_NULL) { - len = state->head->extra_len - state->length; + state->head->extra != Z_NULL && + (len = state->head->extra_len - state->length) < + state->head->extra_max) { zmemcpy(state->head->extra + len, next, len + copy > state->head->extra_max ? state->head->extra_max - len : copy); @@ -787,6 +795,7 @@ int flush; } state->length = 0; state->mode = NAME; + /* fallthrough */ case NAME: if (state->flags & 0x0800) { if (have == 0) goto inf_leave; @@ -808,6 +817,7 @@ int flush; state->head->name = Z_NULL; state->length = 0; state->mode = COMMENT; + /* fallthrough */ case COMMENT: if (state->flags & 0x1000) { if (have == 0) goto inf_leave; @@ -828,6 +838,7 @@ int flush; else if (state->head != Z_NULL) state->head->comment = Z_NULL; state->mode = HCRC; + /* fallthrough */ case HCRC: if (state->flags & 0x0200) { NEEDBITS(16); @@ -851,6 +862,7 @@ int flush; strm->adler = state->check = ZSWAP32(hold); INITBITS(); state->mode = DICT; + /* fallthrough */ case DICT: if (state->havedict == 0) { RESTORE(); @@ -858,8 +870,10 @@ int flush; } strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = TYPE; + /* fallthrough */ case TYPE: if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + /* fallthrough */ case TYPEDO: if (state->last) { BYTEBITS(); @@ -910,8 +924,10 @@ int flush; INITBITS(); state->mode = COPY_; if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ case COPY_: state->mode = COPY; + /* fallthrough */ case COPY: copy = state->length; if (copy) { @@ -947,6 +963,7 @@ int flush; Tracev((stderr, "inflate: table sizes ok\n")); state->have = 0; state->mode = LENLENS; + /* fallthrough */ case LENLENS: while (state->have < state->ncode) { NEEDBITS(3); @@ -968,6 +985,7 @@ int flush; Tracev((stderr, "inflate: code lengths ok\n")); state->have = 0; state->mode = CODELENS; + /* fallthrough */ case CODELENS: while (state->have < state->nlen + state->ndist) { for (;;) { @@ -1027,11 +1045,11 @@ int flush; } /* build code tables -- note: do not change the lenbits or distbits - values here (9 and 6) without reading the comments in inftrees.h + values here (10 and 9) without reading the comments in inftrees.h concerning the ENOUGH constants, which depend on those values */ state->next = state->codes; state->lencode = (const code FAR *)(state->next); - state->lenbits = 9; + state->lenbits = 10; ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), &(state->lenbits), state->work); if (ret) { @@ -1040,7 +1058,7 @@ int flush; break; } state->distcode = (const code FAR *)(state->next); - state->distbits = 6; + state->distbits = 9; ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, &(state->next), &(state->distbits), state->work); if (ret) { @@ -1051,8 +1069,10 @@ int flush; Tracev((stderr, "inflate: codes ok\n")); state->mode = LEN_; if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ case LEN_: state->mode = LEN; + /* fallthrough */ case LEN: if (have >= INFLATE_FAST_MIN_INPUT && left >= INFLATE_FAST_MIN_OUTPUT) { @@ -1103,6 +1123,7 @@ int flush; } state->extra = (unsigned)(here.op) & 15; state->mode = LENEXT; + /* fallthrough */ case LENEXT: if (state->extra) { NEEDBITS(state->extra); @@ -1113,6 +1134,7 @@ int flush; Tracevv((stderr, "inflate: length %u\n", state->length)); state->was = state->length; state->mode = DIST; + /* fallthrough */ case DIST: for (;;) { here = state->distcode[BITS(state->distbits)]; @@ -1140,6 +1162,7 @@ int flush; state->offset = (unsigned)here.val; state->extra = (unsigned)(here.op) & 15; state->mode = DISTEXT; + /* fallthrough */ case DISTEXT: if (state->extra) { NEEDBITS(state->extra); @@ -1156,6 +1179,7 @@ int flush; #endif Tracevv((stderr, "inflate: distance %u\n", state->offset)); state->mode = MATCH; + /* fallthrough */ case MATCH: if (left == 0) goto inf_leave; copy = out - left; @@ -1214,7 +1238,7 @@ int flush; state->total += out; if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, put - out, out); + UPDATE_CHECK(state->check, put - out, out); out = left; if ((state->wrap & 4) && ( #ifdef GUNZIP @@ -1230,10 +1254,11 @@ int flush; } #ifdef GUNZIP state->mode = LENGTH; + /* fallthrough */ case LENGTH: if (state->wrap && state->flags) { NEEDBITS(32); - if (hold != (state->total & 0xffffffffUL)) { + if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) { strm->msg = (char *)"incorrect length check"; state->mode = BAD; break; @@ -1243,6 +1268,7 @@ int flush; } #endif state->mode = DONE; + /* fallthrough */ case DONE: ret = Z_STREAM_END; goto inf_leave; @@ -1252,6 +1278,7 @@ int flush; case MEM: return Z_MEM_ERROR; case SYNC: + /* fallthrough */ default: return Z_STREAM_ERROR; } @@ -1263,16 +1290,29 @@ int flush; Note: a memory error from inflate() is non-recoverable. */ inf_leave: - /* We write a defined value in the unused space to help mark +#if defined(ZLIB_DEBUG) + /* XXX(cavalcantii): I put this in place back in 2017 to help debug faulty + * client code relying on undefined behavior when chunk_copy first landed. + * + * It is save to say after all these years that Chromium code is well + * behaved and works fine with the optimization, therefore we can enable + * this only for DEBUG builds. + * + * We write a defined value in the unused space to help mark * where the stream has ended. We don't use zeros as that can * mislead clients relying on undefined behavior (i.e. assuming * that the data is over when the buffer has a zero/null value). + * + * The basic idea is that if client code is not relying on the zlib context + * to inform the amount of decompressed data, but instead reads the output + * buffer until a zero/null is found, it will fail faster and harder + * when the remaining of the buffer is marked with a symbol (e.g. 0x55). */ if (left >= CHUNKCOPY_CHUNK_SIZE) memset(put, 0x55, CHUNKCOPY_CHUNK_SIZE); else memset(put, 0x55, left); - +#endif RESTORE(); if (state->wsize || (out != strm->avail_out && state->mode < BAD && (state->mode < CHECK || flush != Z_FINISH))) @@ -1287,7 +1327,7 @@ int flush; state->total += out; if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, strm->next_out - out, out); + UPDATE_CHECK(state->check, strm->next_out - out, out); strm->data_type = (int)state->bits + (state->last ? 64 : 0) + (state->mode == TYPE ? 128 : 0) + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); @@ -1423,6 +1463,7 @@ int ZEXPORT inflateSync(strm) z_streamp strm; { unsigned len; /* number of bytes to look at or looked at */ + int flags; /* temporary to save header status */ unsigned long in, out; /* temporary to save total_in and total_out */ unsigned char buf[4]; /* to restore bit buffer to byte string */ struct inflate_state FAR *state; @@ -1455,9 +1496,15 @@ z_streamp strm; /* return no joy or set up to restart inflate() on a new block */ if (state->have != 4) return Z_DATA_ERROR; + if (state->flags == -1) + state->wrap = 0; /* if no header yet, treat as raw */ + else + state->wrap &= ~4; /* no point in computing a check value now */ + flags = state->flags; in = strm->total_in; out = strm->total_out; inflateReset(strm); strm->total_in = in; strm->total_out = out; + state->flags = flags; state->mode = TYPE; return Z_OK; } @@ -1553,7 +1600,7 @@ int check; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; - if (check) + if (check && state->wrap) state->wrap |= 4; else state->wrap &= ~4; diff --git a/contrib/optimizations/insert_string.h b/contrib/optimizations/insert_string.h index 9f634ae..c6a296a 100644 --- a/contrib/optimizations/insert_string.h +++ b/contrib/optimizations/insert_string.h @@ -1,15 +1,20 @@ /* insert_string.h * - * Copyright 2019 The Chromium Authors. All rights reserved. + * Copyright 2019 The Chromium Authors * Use of this source code is governed by a BSD-style license that can be * found in the Chromium source repository LICENSE file. */ -#if defined(_MSC_VER) +#ifndef INSERT_STRING_H +#define INSERT_STRING_H + +#ifndef INLINE +#if defined(_MSC_VER) && !defined(__clang__) #define INLINE __inline #else #define INLINE inline #endif +#endif #include "cpu_features.h" @@ -23,7 +28,8 @@ #define TARGET_CPU_WITH_CRC #endif - #define _cpu_crc32_u32 _mm_crc32_u32 + /* CRC32C uint32_t */ + #define _cpu_crc32c_hash_u32 _mm_crc32_u32 #elif defined(CRC32_ARMV8_CRC32) #if defined(__clang__) @@ -40,7 +46,8 @@ #define TARGET_CPU_WITH_CRC __attribute__((target("armv8-a,crc"))) #endif // defined(__aarch64__) - #define _cpu_crc32_u32 __crc32cw + /* CRC32C uint32_t */ + #define _cpu_crc32c_hash_u32 __crc32cw #endif // clang-format on @@ -50,20 +57,15 @@ TARGET_CPU_WITH_CRC local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) { Pos ret; - unsigned *ip, val, h = 0; + unsigned val, h = 0; - ip = (unsigned*)&s->window[str]; - val = *ip; + zmemcpy(&val, &s->window[str], sizeof(val)); if (s->level >= 6) val &= 0xFFFFFF; - /* Unlike the case of data integrity checks for GZIP format where the - * polynomial used is defined (https://tools.ietf.org/html/rfc1952#page-11), - * here it is just a hash function for the hash table used while - * performing compression. - */ - h = _cpu_crc32_u32(h, val); + /* Compute hash from the CRC32C of |val|. */ + h = _cpu_crc32c_hash_u32(h, val); ret = s->head[h & s->hash_mask]; s->head[h & s->hash_mask] = str; @@ -73,8 +75,22 @@ local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) { #endif // TARGET_CPU_WITH_CRC +/** + * Some applications need to match zlib DEFLATE output exactly [3]. Use the + * canonical zlib Rabin-Karp rolling hash [1,2] in that case. + * + * [1] For a description of the Rabin and Karp algorithm, see "Algorithms" + * book by R. Sedgewick, Addison-Wesley, p252. + * [2] https://www.euccas.me/zlib/#zlib_rabin_karp and also "rolling hash" + * https://en.wikipedia.org/wiki/Rolling_hash + * [3] crbug.com/1316541 AOSP incremental client APK package OTA upgrades. + */ +#ifdef CHROMIUM_ZLIB_NO_CASTAGNOLI +#define USE_ZLIB_RABIN_KARP_ROLLING_HASH +#endif + /* =========================================================================== - * Update a hash value with the given input byte + * Update a hash value with the given input byte (Rabin-Karp rolling hash). * IN assertion: all calls to UPDATE_HASH are made with consecutive input * characters, so that a running hash key can be computed from the previous * key instead of complete recalculation each time. @@ -106,16 +122,16 @@ local INLINE Pos insert_string_c(deflate_state* const s, const Pos str) { } local INLINE Pos insert_string(deflate_state* const s, const Pos str) { -/* insert_string_simd string dictionary insertion: this SIMD symbol hashing +/* insert_string_simd string dictionary insertion: SIMD crc32c symbol hasher * significantly improves data compression speed. * - * Note: the generated compressed output is a valid DEFLATE stream but will - * differ from vanilla zlib output ... + * Note: the generated compressed output is a valid DEFLATE stream, but will + * differ from canonical zlib output. */ -#if defined(CHROMIUM_ZLIB_NO_CASTAGNOLI) -/* ... so this build-time option can used to disable the SIMD symbol hasher - * if matching vanilla zlib DEFLATE output is required. - */ (;) /* FALLTHOUGH */ +#if defined(USE_ZLIB_RABIN_KARP_ROLLING_HASH) +/* So this build-time option can be used to disable the crc32c hash, and use + * the Rabin-Karp hash instead. + */ /* FALLTHROUGH Rabin-Karp */ #elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL) if (x86_cpu_enable_simd) return insert_string_simd(s, str); @@ -123,5 +139,7 @@ local INLINE Pos insert_string(deflate_state* const s, const Pos str) { if (arm_cpu_enable_crc32) return insert_string_simd(s, str); #endif - return insert_string_c(s, str); + return insert_string_c(s, str); /* Rabin-Karp */ } + +#endif /* INSERT_STRING_H */ diff --git a/contrib/optimizations/slide_hash_neon.h b/contrib/optimizations/slide_hash_neon.h deleted file mode 100644 index 26995d7..0000000 --- a/contrib/optimizations/slide_hash_neon.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2018 The Chromium Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the Chromium source repository LICENSE file. - */ -#ifndef __SLIDE_HASH__NEON__ -#define __SLIDE_HASH__NEON__ - -#include "deflate.h" -#include <arm_neon.h> - -inline static void ZLIB_INTERNAL neon_slide_hash_update(Posf *hash, - const uInt hash_size, - const ush w_size) -{ - /* NEON 'Q' registers allow to store 128 bits, so we can load 8x16-bits - * values. For further details, check: - * ARM DHT 0002A, section 1.3.2 NEON Registers. - */ - const size_t chunk = sizeof(uint16x8_t) / sizeof(uint16_t); - /* Unrolling the operation yielded a compression performance boost in both - * ARMv7 (from 11.7% to 13.4%) and ARMv8 (from 3.7% to 7.5%) for HTML4 - * content. For full benchmarking data, check: http://crbug.com/863257. - */ - const size_t stride = 2*chunk; - const uint16x8_t v = vdupq_n_u16(w_size); - - for (Posf *end = hash + hash_size; hash != end; hash += stride) { - uint16x8_t m_low = vld1q_u16(hash); - uint16x8_t m_high = vld1q_u16(hash + chunk); - - /* The first 'q' in vqsubq_u16 makes these subtracts saturate to zero, - * replacing the ternary operator expression in the original code: - * (m >= wsize ? m - wsize : NIL). - */ - m_low = vqsubq_u16(m_low, v); - m_high = vqsubq_u16(m_high, v); - - vst1q_u16(hash, m_low); - vst1q_u16(hash + chunk, m_high); - } -} - - -inline static void ZLIB_INTERNAL neon_slide_hash(Posf *head, Posf *prev, - const unsigned short w_size, - const uInt hash_size) -{ - /* - * SIMD implementation for hash table rebase assumes: - * 1. hash chain offset (Pos) is 2 bytes. - * 2. hash table size is multiple of 32 bytes. - * #1 should be true as Pos is defined as "ush" - * #2 should be true as hash_bits are greater than 7 - */ - const size_t size = hash_size * sizeof(head[0]); - Assert(sizeof(Pos) == 2, "Wrong Pos size."); - Assert((size % sizeof(uint16x8_t) * 2) == 0, "Hash table size error."); - - neon_slide_hash_update(head, hash_size, w_size); -#ifndef FASTEST - neon_slide_hash_update(prev, w_size, w_size); -#endif -} - -#endif |