diff options
author | Elliott Hughes <enh@google.com> | 2020-01-09 09:00:48 -0800 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2020-01-09 09:00:48 -0800 |
commit | 9db9402ccd554cf0f23033d093fdf071583b4bb3 (patch) | |
tree | a913e49d27ee556972f2998051ab01e139969ba4 | |
parent | 889a077ed1a44499ec57c082029087b0d619073b (diff) | |
parent | 457c92b855a561252861559abc6bbc5fa08d4f4b (diff) | |
download | zlib-9db9402ccd554cf0f23033d093fdf071583b4bb3.tar.gz |
Merge "Upgrade zlib to 814da1f383b625955149c3845db62af3f29a4ffe"
am: 457c92b855
Change-Id: I4266d1bb73f697b525158bfefdfec3453ddf796c
-rw-r--r-- | BUILD.gn | 1 | ||||
-rw-r--r-- | METADATA | 8 | ||||
-rw-r--r-- | contrib/optimizations/insert_string.h | 122 | ||||
-rw-r--r-- | crc32_simd.c | 27 | ||||
-rw-r--r-- | crc32_simd.h | 5 | ||||
-rw-r--r-- | deflate.c | 97 |
6 files changed, 128 insertions, 132 deletions
@@ -247,6 +247,7 @@ component("zlib") { "adler32.c", "chromeconf.h", "compress.c", + "contrib/optimizations/insert_string.h", "crc32.c", "crc32.h", "deflate.c", @@ -5,11 +5,11 @@ third_party { type: GIT value: "https://chromium.googlesource.com/chromium/src/third_party/zlib/" } - version: "f262c1b3c4196a2fee98c113142faff525b8d884" + version: "814da1f383b625955149c3845db62af3f29a4ffe" license_type: NOTICE last_upgrade_date { - year: 2019 - month: 12 - day: 9 + year: 2020 + month: 1 + day: 8 } } diff --git a/contrib/optimizations/insert_string.h b/contrib/optimizations/insert_string.h new file mode 100644 index 0000000..69eee3d --- /dev/null +++ b/contrib/optimizations/insert_string.h @@ -0,0 +1,122 @@ +/* insert_string.h + * + * Copyright 2019 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ +#ifdef _MSC_VER +#define INLINE __inline +#else +#define INLINE inline +#endif + +/* Optimized insert_string block */ +#if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32) +#define TARGET_CPU_WITH_CRC +// clang-format off +#if defined(CRC32_SIMD_SSE42_PCLMUL) + /* Required to make MSVC bot build pass. */ + #include <smmintrin.h> + #if defined(__GNUC__) || defined(__clang__) + #undef TARGET_CPU_WITH_CRC + #define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2"))) + #endif + + #define _cpu_crc32_u32 _mm_crc32_u32 + +#elif defined(CRC32_ARMV8_CRC32) + #include "arm_features.h" + #if defined(__clang__) + #undef TARGET_CPU_WITH_CRC + #define __crc32cw __builtin_arm_crc32cw + #endif + + #define _cpu_crc32_u32 __crc32cw + + #if defined(__aarch64__) + #define TARGET_CPU_WITH_CRC __attribute__((target("crc"))) + #else // !defined(__aarch64__) + #define TARGET_CPU_WITH_CRC __attribute__((target("armv8-a,crc"))) + #endif // defined(__aarch64__) +#endif +// clang-format on +TARGET_CPU_WITH_CRC +local INLINE Pos insert_string_optimized(deflate_state* const s, + const Pos str) { + Pos ret; + unsigned *ip, val, h = 0; + + ip = (unsigned*)&s->window[str]; + val = *ip; + + if (s->level >= 6) + val &= 0xFFFFFF; + + /* Unlike the case of data integrity checks for GZIP format where the + * polynomial used is defined (https://tools.ietf.org/html/rfc1952#page-11), + * here it is just a hash function for the hash table used while + * performing compression. + */ + h = _cpu_crc32_u32(h, val); + + ret = s->head[h & s->hash_mask]; + s->head[h & s->hash_mask] = str; + s->prev[str & s->w_mask] = ret; + return ret; +} +#endif /* Optimized insert_string block */ + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s, h, c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask) + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). + */ +local INLINE Pos insert_string_c(deflate_state* const s, const Pos str) { + Pos ret; + + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH - 1)]); +#ifdef FASTEST + ret = s->head[s->ins_h]; +#else + ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = str; + + return ret; +} + +local INLINE Pos insert_string(deflate_state* const s, const Pos str) { +/* String dictionary insertion: faster symbol hashing has a positive impact + * on data compression speeds (around 20% on Intel and 36% on Arm Cortex big + * cores). + * A misfeature is that the generated compressed output will differ from + * vanilla zlib (even though it is still valid 'DEFLATE-d' content). + * + * We offer here a way to disable the optimization if there is the expectation + * that compressed content should match when compared to vanilla zlib. + */ +#if !defined(CHROMIUM_ZLIB_NO_CASTAGNOLI) + /* TODO(cavalcantii): unify CPU features code. */ +#if defined(CRC32_ARMV8_CRC32) + if (arm_cpu_enable_crc32) + return insert_string_optimized(s, str); +#elif defined(CRC32_SIMD_SSE42_PCLMUL) + if (x86_cpu_enable_simd) + return insert_string_optimized(s, str); +#endif +#endif + return insert_string_c(s, str); +} diff --git a/crc32_simd.c b/crc32_simd.c index 988f00b..c8e5592 100644 --- a/crc32_simd.c +++ b/crc32_simd.c @@ -240,31 +240,4 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc, return ~c; } -TARGET_ARMV8_WITH_CRC -Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - - /* We use CRC32C (Castagnoli) to ensure that the compressed output - * will match between Intel x ARM. - * Unlike the case of data integrity checks for GZIP format where the - * polynomial used is defined (https://tools.ietf.org/html/rfc1952#page-11), - * here it is just a hash function for the hash table used while - * performing compression. - */ - h = __crc32cw(h, val); - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} - #endif diff --git a/crc32_simd.h b/crc32_simd.h index 08f1756..68bc235 100644 --- a/crc32_simd.h +++ b/crc32_simd.h @@ -34,8 +34,3 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc, const unsigned char* buf, z_size_t len); -/* - * Insert hash string. - */ -Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str); - @@ -51,19 +51,12 @@ #include <assert.h> #include "deflate.h" #include "x86.h" - -#if defined(CRC32_SIMD_SSE42_PCLMUL) -#include <smmintrin.h> -#endif +#include "contrib/optimizations/insert_string.h" #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) #include "contrib/optimizations/slide_hash_neon.h" #endif -/* We need crypto extension crc32 to implement optimized hash in - * insert_string. - */ #if defined(CRC32_ARMV8_CRC32) -#include "arm_features.h" #include "crc32_simd.h" #endif @@ -121,38 +114,6 @@ extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); -#ifdef _MSC_VER -#define INLINE __inline -#else -#define INLINE inline -#endif - -/* Intel optimized insert_string. */ -#if defined(CRC32_SIMD_SSE42_PCLMUL) - -#if defined(__GNUC__) || defined(__clang__) -__attribute__((target("sse4.2"))) -#endif -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - - h = _mm_crc32_u32(h, val); - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} -#endif - /* =========================================================================== * Local data */ @@ -208,62 +169,6 @@ local const config configuration_table[10] = { #define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) /* =========================================================================== - * Update a hash value with the given input byte - * IN assertion: all calls to UPDATE_HASH are made with consecutive input - * characters, so that a running hash key can be computed from the previous - * key instead of complete recalculation each time. - */ -#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) - -/* =========================================================================== - * Insert string str in the dictionary and set match_head to the previous head - * of the hash chain (the most recent string with same hash key). Return - * the previous length of the hash chain. - * If this file is compiled with -DFASTEST, the compression level is forced - * to 1, and no hash chains are maintained. - * IN assertion: all calls to INSERT_STRING are made with consecutive input - * characters and the first MIN_MATCH bytes of str are valid (except for - * the last MIN_MATCH-1 bytes of the input file). - */ -local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) -{ - Pos ret; - - UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); -#ifdef FASTEST - ret = s->head[s->ins_h]; -#else - ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; -#endif - s->head[s->ins_h] = str; - - return ret; -} - -local INLINE Pos insert_string(deflate_state *const s, const Pos str) -{ -/* String dictionary insertion: faster symbol hashing has a positive impact - * on data compression speeds (around 20% on Intel and 36% on ARM Cortex big - * cores). - * A misfeature is that the generated compressed output will differ from - * vanilla zlib (even though it is still valid 'DEFLATE-d' content). - * - * We offer here a way to disable the optimization if there is the expectation - * that compressed content should match when compared to vanilla zlib. - */ -#if !defined(CHROMIUM_ZLIB_NO_CASTAGNOLI) -#if defined(CRC32_ARMV8_CRC32) - if (arm_cpu_enable_crc32) - return insert_string_arm(s, str); -#elif defined(CRC32_SIMD_SSE42_PCLMUL) - if (x86_cpu_enable_simd) - return insert_string_sse(s, str); -#endif -#endif - return insert_string_c(s, str); -} - -/* =========================================================================== * Initialize the hash table (avoiding 64K overflow for 16 bit systems). * prev[] will be initialized on the fly. */ |