From d7f3ca98b2b0d5f72656502961a59353791c4f8a Mon Sep 17 00:00:00 2001 From: Adenilson Cavalcanti Date: Thu, 12 Dec 2019 20:49:49 +0000 Subject: Unify optimized insert_string implementations This change will unify the x86 and Arm optimized implementations for insert_string (used for compression). The objective here is two-fold: a) Remove duplicated code. b) Better insulate deflate.c divergence when compared to vanilla zlib. Bug: 1032721 Change-Id: Id2f65398aeb5a6384708493f0f6ae1fcd14022c2 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1960893 Reviewed-by: Chris Blume Commit-Queue: Adenilson Cavalcanti Cr-Original-Commit-Position: refs/heads/master@{#724325} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 9d4ec9349a1bf609eedb917c44c69eb0df9ff6bb --- BUILD.gn | 1 + contrib/optimizations/insert_string.h | 122 ++++++++++++++++++++++++++++++++++ crc32_simd.c | 27 -------- crc32_simd.h | 5 -- deflate.c | 97 +-------------------------- 5 files changed, 124 insertions(+), 128 deletions(-) create mode 100644 contrib/optimizations/insert_string.h diff --git a/BUILD.gn b/BUILD.gn index 0f59c0a..5f88733 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -247,6 +247,7 @@ component("zlib") { "adler32.c", "chromeconf.h", "compress.c", + "contrib/optimizations/insert_string.h", "crc32.c", "crc32.h", "deflate.c", diff --git a/contrib/optimizations/insert_string.h b/contrib/optimizations/insert_string.h new file mode 100644 index 0000000..69eee3d --- /dev/null +++ b/contrib/optimizations/insert_string.h @@ -0,0 +1,122 @@ +/* insert_string.h + * + * Copyright 2019 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ +#ifdef _MSC_VER +#define INLINE __inline +#else +#define INLINE inline +#endif + +/* Optimized insert_string block */ +#if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32) +#define TARGET_CPU_WITH_CRC +// clang-format off +#if defined(CRC32_SIMD_SSE42_PCLMUL) + /* Required to make MSVC bot build pass. */ + #include + #if defined(__GNUC__) || defined(__clang__) + #undef TARGET_CPU_WITH_CRC + #define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2"))) + #endif + + #define _cpu_crc32_u32 _mm_crc32_u32 + +#elif defined(CRC32_ARMV8_CRC32) + #include "arm_features.h" + #if defined(__clang__) + #undef TARGET_CPU_WITH_CRC + #define __crc32cw __builtin_arm_crc32cw + #endif + + #define _cpu_crc32_u32 __crc32cw + + #if defined(__aarch64__) + #define TARGET_CPU_WITH_CRC __attribute__((target("crc"))) + #else // !defined(__aarch64__) + #define TARGET_CPU_WITH_CRC __attribute__((target("armv8-a,crc"))) + #endif // defined(__aarch64__) +#endif +// clang-format on +TARGET_CPU_WITH_CRC +local INLINE Pos insert_string_optimized(deflate_state* const s, + const Pos str) { + Pos ret; + unsigned *ip, val, h = 0; + + ip = (unsigned*)&s->window[str]; + val = *ip; + + if (s->level >= 6) + val &= 0xFFFFFF; + + /* Unlike the case of data integrity checks for GZIP format where the + * polynomial used is defined (https://tools.ietf.org/html/rfc1952#page-11), + * here it is just a hash function for the hash table used while + * performing compression. + */ + h = _cpu_crc32_u32(h, val); + + ret = s->head[h & s->hash_mask]; + s->head[h & s->hash_mask] = str; + s->prev[str & s->w_mask] = ret; + return ret; +} +#endif /* Optimized insert_string block */ + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s, h, c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask) + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). + */ +local INLINE Pos insert_string_c(deflate_state* const s, const Pos str) { + Pos ret; + + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH - 1)]); +#ifdef FASTEST + ret = s->head[s->ins_h]; +#else + ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = str; + + return ret; +} + +local INLINE Pos insert_string(deflate_state* const s, const Pos str) { +/* String dictionary insertion: faster symbol hashing has a positive impact + * on data compression speeds (around 20% on Intel and 36% on Arm Cortex big + * cores). + * A misfeature is that the generated compressed output will differ from + * vanilla zlib (even though it is still valid 'DEFLATE-d' content). + * + * We offer here a way to disable the optimization if there is the expectation + * that compressed content should match when compared to vanilla zlib. + */ +#if !defined(CHROMIUM_ZLIB_NO_CASTAGNOLI) + /* TODO(cavalcantii): unify CPU features code. */ +#if defined(CRC32_ARMV8_CRC32) + if (arm_cpu_enable_crc32) + return insert_string_optimized(s, str); +#elif defined(CRC32_SIMD_SSE42_PCLMUL) + if (x86_cpu_enable_simd) + return insert_string_optimized(s, str); +#endif +#endif + return insert_string_c(s, str); +} diff --git a/crc32_simd.c b/crc32_simd.c index 988f00b..c8e5592 100644 --- a/crc32_simd.c +++ b/crc32_simd.c @@ -240,31 +240,4 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc, return ~c; } -TARGET_ARMV8_WITH_CRC -Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - - /* We use CRC32C (Castagnoli) to ensure that the compressed output - * will match between Intel x ARM. - * Unlike the case of data integrity checks for GZIP format where the - * polynomial used is defined (https://tools.ietf.org/html/rfc1952#page-11), - * here it is just a hash function for the hash table used while - * performing compression. - */ - h = __crc32cw(h, val); - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} - #endif diff --git a/crc32_simd.h b/crc32_simd.h index 08f1756..68bc235 100644 --- a/crc32_simd.h +++ b/crc32_simd.h @@ -34,8 +34,3 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc, const unsigned char* buf, z_size_t len); -/* - * Insert hash string. - */ -Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str); - diff --git a/deflate.c b/deflate.c index b21175b..201254a 100644 --- a/deflate.c +++ b/deflate.c @@ -51,19 +51,12 @@ #include #include "deflate.h" #include "x86.h" - -#if defined(CRC32_SIMD_SSE42_PCLMUL) -#include -#endif +#include "contrib/optimizations/insert_string.h" #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) #include "contrib/optimizations/slide_hash_neon.h" #endif -/* We need crypto extension crc32 to implement optimized hash in - * insert_string. - */ #if defined(CRC32_ARMV8_CRC32) -#include "arm_features.h" #include "crc32_simd.h" #endif @@ -121,38 +114,6 @@ extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); -#ifdef _MSC_VER -#define INLINE __inline -#else -#define INLINE inline -#endif - -/* Intel optimized insert_string. */ -#if defined(CRC32_SIMD_SSE42_PCLMUL) - -#if defined(__GNUC__) || defined(__clang__) -__attribute__((target("sse4.2"))) -#endif -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - - h = _mm_crc32_u32(h, val); - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} -#endif - /* =========================================================================== * Local data */ @@ -207,62 +168,6 @@ local const config configuration_table[10] = { /* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ #define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) -/* =========================================================================== - * Update a hash value with the given input byte - * IN assertion: all calls to UPDATE_HASH are made with consecutive input - * characters, so that a running hash key can be computed from the previous - * key instead of complete recalculation each time. - */ -#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) - -/* =========================================================================== - * Insert string str in the dictionary and set match_head to the previous head - * of the hash chain (the most recent string with same hash key). Return - * the previous length of the hash chain. - * If this file is compiled with -DFASTEST, the compression level is forced - * to 1, and no hash chains are maintained. - * IN assertion: all calls to INSERT_STRING are made with consecutive input - * characters and the first MIN_MATCH bytes of str are valid (except for - * the last MIN_MATCH-1 bytes of the input file). - */ -local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) -{ - Pos ret; - - UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); -#ifdef FASTEST - ret = s->head[s->ins_h]; -#else - ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; -#endif - s->head[s->ins_h] = str; - - return ret; -} - -local INLINE Pos insert_string(deflate_state *const s, const Pos str) -{ -/* String dictionary insertion: faster symbol hashing has a positive impact - * on data compression speeds (around 20% on Intel and 36% on ARM Cortex big - * cores). - * A misfeature is that the generated compressed output will differ from - * vanilla zlib (even though it is still valid 'DEFLATE-d' content). - * - * We offer here a way to disable the optimization if there is the expectation - * that compressed content should match when compared to vanilla zlib. - */ -#if !defined(CHROMIUM_ZLIB_NO_CASTAGNOLI) -#if defined(CRC32_ARMV8_CRC32) - if (arm_cpu_enable_crc32) - return insert_string_arm(s, str); -#elif defined(CRC32_SIMD_SSE42_PCLMUL) - if (x86_cpu_enable_simd) - return insert_string_sse(s, str); -#endif -#endif - return insert_string_c(s, str); -} - /* =========================================================================== * Initialize the hash table (avoiding 64K overflow for 16 bit systems). * prev[] will be initialized on the fly. -- cgit v1.2.3 From ee4f17204f61e39851f833199965e72a76e5437f Mon Sep 17 00:00:00 2001 From: Adenilson Cavalcanti Date: Sat, 21 Dec 2019 06:10:04 +0000 Subject: Unify CPU features detection code This will allow to remove some duplicated code (i.e. thread synchronization) while at same time removing unnecessary use of inline ASM for Intel features detection. A few other advantages: - remove some extra logic (e.g. no need to test the platform to include the correct CPU detection header). - simplifies the buildsystem (i.e. we always include cpu_features.c) - get rid of the simd_stub file. Bug: 1032721 Change-Id: Ic93472d3337bc2cbe092d4cf8fbe4b31b1ceca6d Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1976820 Reviewed-by: Chris Blume Reviewed-by: Adenilson Cavalcanti Commit-Queue: Adenilson Cavalcanti Cr-Original-Commit-Position: refs/heads/master@{#727038} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 6f7e5e79cefe982ad84a88927565a88db2e592be --- BUILD.gn | 44 ++++++----- adler32.c | 8 +- arm_features.c | 90 ----------------------- arm_features.h | 13 ---- contrib/optimizations/insert_string.h | 2 +- cpu_features.c | 134 ++++++++++++++++++++++++++++++++++ cpu_features.h | 17 +++++ crc32.c | 17 +++-- deflate.c | 11 ++- simd_stub.c | 35 --------- x86.c | 101 ------------------------- x86.h | 16 ---- 12 files changed, 195 insertions(+), 293 deletions(-) delete mode 100644 arm_features.c delete mode 100644 arm_features.h create mode 100644 cpu_features.c create mode 100644 cpu_features.h delete mode 100644 simd_stub.c delete mode 100644 x86.c delete mode 100644 x86.h diff --git a/BUILD.gn b/BUILD.gn index 5f88733..ed57899 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -26,9 +26,20 @@ if (current_cpu == "arm" || current_cpu == "arm64") { use_x86_x64_optimizations = (current_cpu == "x86" || current_cpu == "x64") && !is_ios +if (!use_x86_x64_optimizations && !use_arm_neon_optimizations) { + # Apparently android_cronet bot builds with NEON disabled and + # we also should disable optimizations for iOS@x86 (a.k.a. simulator). + defines = [ "CPU_NO_SIMD" ] +} + config("zlib_adler32_simd_config") { if (use_x86_x64_optimizations) { defines = [ "ADLER32_SIMD_SSSE3" ] + if (is_win) { + defines += [ "X86_WINDOWS" ] + } else { + defines += [ "X86_NOT_WINDOWS" ] + } } if (use_arm_neon_optimizations) { @@ -94,24 +105,13 @@ if (use_arm_neon_optimizations) { if (!is_ios) { include_dirs = [ "." ] - if (is_android) { - import("//build/config/android/config.gni") - if (defined(android_ndk_root) && android_ndk_root != "") { - deps = [ - "//third_party/android_ndk:cpu_features", - ] - } else { - assert(false, "CPU detection requires the Android NDK") - } - } else if (!is_win && !is_clang) { + if (!is_win && !is_clang) { assert(!use_thin_lto, "ThinLTO fails mixing different module-level targets") cflags_c = [ "-march=armv8-a+crc" ] } sources = [ - "arm_features.c", - "arm_features.h", "crc32_simd.c", "crc32_simd.h", ] @@ -218,10 +218,6 @@ source_set("zlib_x86_simd") { "-mpclmul", ] } - } else { - sources = [ - "simd_stub.c", - ] } configs -= [ "//build/config/compiler:chromium_code" ] @@ -248,6 +244,8 @@ component("zlib") { "chromeconf.h", "compress.c", "contrib/optimizations/insert_string.h", + "cpu_features.c", + "cpu_features.h", "crc32.c", "crc32.h", "deflate.c", @@ -267,7 +265,6 @@ component("zlib") { "trees.c", "trees.h", "uncompr.c", - "x86.h", "zconf.h", "zlib.h", "zutil.c", @@ -284,7 +281,6 @@ component("zlib") { ] if (use_x86_x64_optimizations) { - sources += [ "x86.c" ] deps += [ ":zlib_crc32_simd" ] } else if (use_arm_neon_optimizations) { sources += [ "contrib/optimizations/slide_hash_neon.h" ] @@ -294,6 +290,15 @@ component("zlib") { sources += [ "inflate.c" ] } + if (is_android) { + import("//build/config/android/config.gni") + if (defined(android_ndk_root) && android_ndk_root != "") { + deps += [ "//third_party/android_ndk:cpu_features" ] + } else { + assert(false, "CPU detection requires the Android NDK") + } + } + configs -= [ "//build/config/compiler:chromium_code" ] configs += [ ":zlib_internal_config", @@ -319,6 +324,7 @@ config("minizip_warnings") { } static_library("minizip") { + defines = [] sources = [ "contrib/minizip/ioapi.c", "contrib/minizip/ioapi.h", @@ -340,7 +346,7 @@ static_library("minizip") { if (is_mac || is_ios || is_android || is_nacl) { # Mac, Android and the BSDs don't have fopen64, ftello64, or fseeko64. We # use fopen, ftell, and fseek instead on these systems. - defines = [ "USE_FILE32API" ] + defines += [ "USE_FILE32API" ] } deps = [ diff --git a/adler32.c b/adler32.c index a42f35f..696773a 100644 --- a/adler32.c +++ b/adler32.c @@ -59,10 +59,8 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); # define MOD63(a) a %= BASE #endif -#if defined(ADLER32_SIMD_SSSE3) -#include "adler32_simd.h" -#include "x86.h" -#elif defined(ADLER32_SIMD_NEON) +#include "cpu_features.h" +#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) #include "adler32_simd.h" #endif @@ -108,7 +106,7 @@ uLong ZEXPORT adler32_z(adler, buf, len) */ if (buf == Z_NULL) { if (!len) /* Assume user is calling adler32(0, NULL, 0); */ - x86_check_features(); + cpu_check_features(); return 1L; } #else diff --git a/arm_features.c b/arm_features.c deleted file mode 100644 index f5641c3..0000000 --- a/arm_features.c +++ /dev/null @@ -1,90 +0,0 @@ -/* arm_features.c -- ARM processor features detection. - * - * Copyright 2018 The Chromium Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the Chromium source repository LICENSE file. - */ - -#include "arm_features.h" -#include "zutil.h" -#include - -int ZLIB_INTERNAL arm_cpu_enable_crc32 = 0; -int ZLIB_INTERNAL arm_cpu_enable_pmull = 0; - -#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) -#include -#endif - -#if defined(ARMV8_OS_ANDROID) -#include -#elif defined(ARMV8_OS_LINUX) -#include -#include -#elif defined(ARMV8_OS_FUCHSIA) -#include -#include -#include -#elif defined(ARMV8_OS_WINDOWS) -#include -#else -#error arm_features.c ARM feature detection in not defined for your platform -#endif - -static void _arm_check_features(void); - -#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) -static pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; -void ZLIB_INTERNAL arm_check_features(void) -{ - pthread_once(&cpu_check_inited_once, _arm_check_features); -} -#elif defined(ARMV8_OS_WINDOWS) -static INIT_ONCE cpu_check_inited_once = INIT_ONCE_STATIC_INIT; -static BOOL CALLBACK _arm_check_features_forwarder(PINIT_ONCE once, PVOID param, PVOID* context) -{ - _arm_check_features(); - return TRUE; -} -void ZLIB_INTERNAL arm_check_features(void) -{ - InitOnceExecuteOnce(&cpu_check_inited_once, _arm_check_features_forwarder, - NULL, NULL); -} -#endif - -/* - * See http://bit.ly/2CcoEsr for run-time detection of ARM features and also - * crbug.com/931275 for android_getCpuFeatures() use in the Android sandbox. - */ -static void _arm_check_features(void) -{ -#if defined(ARMV8_OS_ANDROID) && defined(__aarch64__) - uint64_t features = android_getCpuFeatures(); - arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM64_FEATURE_CRC32); - arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM64_FEATURE_PMULL); -#elif defined(ARMV8_OS_ANDROID) /* aarch32 */ - uint64_t features = android_getCpuFeatures(); - arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM_FEATURE_CRC32); - arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM_FEATURE_PMULL); -#elif defined(ARMV8_OS_LINUX) && defined(__aarch64__) - unsigned long features = getauxval(AT_HWCAP); - arm_cpu_enable_crc32 = !!(features & HWCAP_CRC32); - arm_cpu_enable_pmull = !!(features & HWCAP_PMULL); -#elif defined(ARMV8_OS_LINUX) && (defined(__ARM_NEON) || defined(__ARM_NEON__)) - /* Query HWCAP2 for ARMV8-A SoCs running in aarch32 mode */ - unsigned long features = getauxval(AT_HWCAP2); - arm_cpu_enable_crc32 = !!(features & HWCAP2_CRC32); - arm_cpu_enable_pmull = !!(features & HWCAP2_PMULL); -#elif defined(ARMV8_OS_FUCHSIA) - uint32_t features; - zx_status_t rc = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); - if (rc != ZX_OK || (features & ZX_ARM64_FEATURE_ISA_ASIMD) == 0) - return; /* Report nothing if ASIMD(NEON) is missing */ - arm_cpu_enable_crc32 = !!(features & ZX_ARM64_FEATURE_ISA_CRC32); - arm_cpu_enable_pmull = !!(features & ZX_ARM64_FEATURE_ISA_PMULL); -#elif defined(ARMV8_OS_WINDOWS) - arm_cpu_enable_crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); - arm_cpu_enable_pmull = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); -#endif -} diff --git a/arm_features.h b/arm_features.h deleted file mode 100644 index 09fec25..0000000 --- a/arm_features.h +++ /dev/null @@ -1,13 +0,0 @@ -/* arm_features.h -- ARM processor features detection. - * - * Copyright 2018 The Chromium Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the Chromium source repository LICENSE file. - */ - -#include "zlib.h" - -extern int arm_cpu_enable_crc32; -extern int arm_cpu_enable_pmull; - -void arm_check_features(void); diff --git a/contrib/optimizations/insert_string.h b/contrib/optimizations/insert_string.h index 69eee3d..1826601 100644 --- a/contrib/optimizations/insert_string.h +++ b/contrib/optimizations/insert_string.h @@ -10,6 +10,7 @@ #define INLINE inline #endif +#include "cpu_features.h" /* Optimized insert_string block */ #if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32) #define TARGET_CPU_WITH_CRC @@ -25,7 +26,6 @@ #define _cpu_crc32_u32 _mm_crc32_u32 #elif defined(CRC32_ARMV8_CRC32) - #include "arm_features.h" #if defined(__clang__) #undef TARGET_CPU_WITH_CRC #define __crc32cw __builtin_arm_crc32cw diff --git a/cpu_features.c b/cpu_features.c new file mode 100644 index 0000000..731126a --- /dev/null +++ b/cpu_features.c @@ -0,0 +1,134 @@ +/* cpu_features.c -- Processor features detection. + * + * Copyright 2018 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include "cpu_features.h" +#include "zutil.h" + +#include +#if defined(_MSC_VER) +#include +#elif defined(ADLER32_SIMD_SSSE3) +#include +#endif + +/* TODO(cavalcantii): remove checks for x86_flags on deflate. + */ +int ZLIB_INTERNAL arm_cpu_enable_crc32 = 0; +int ZLIB_INTERNAL arm_cpu_enable_pmull = 0; +int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0; +int ZLIB_INTERNAL x86_cpu_enable_simd = 0; + +#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) +#include +#endif + +#if defined(ARMV8_OS_ANDROID) +#include +#elif defined(ARMV8_OS_LINUX) +#include +#include +#elif defined(ARMV8_OS_FUCHSIA) +#include +#include +#include +#elif defined(ARMV8_OS_WINDOWS) || defined(X86_WINDOWS) +#include +#elif !defined(_MSC_VER) +#include +#else +#error cpu_features.c CPU feature detection in not defined for your platform +#endif + +#ifndef CPU_NO_SIMD +static void _cpu_check_features(void); +#endif + +#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) || defined(X86_NOT_WINDOWS) +static pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; +void ZLIB_INTERNAL cpu_check_features(void) +{ + pthread_once(&cpu_check_inited_once, _cpu_check_features); +} +#elif defined(ARMV8_OS_WINDOWS) || defined(X86_WINDOWS) +static INIT_ONCE cpu_check_inited_once = INIT_ONCE_STATIC_INIT; +static BOOL CALLBACK _cpu_check_features_forwarder(PINIT_ONCE once, PVOID param, PVOID* context) +{ + _cpu_check_features(); + return TRUE; +} +void ZLIB_INTERNAL cpu_check_features(void) +{ + InitOnceExecuteOnce(&cpu_check_inited_once, _cpu_check_features_forwarder, + NULL, NULL); +} +#endif + +#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) +/* + * See http://bit.ly/2CcoEsr for run-time detection of ARM features and also + * crbug.com/931275 for android_getCpuFeatures() use in the Android sandbox. + */ +static void _cpu_check_features(void) +{ +#if defined(ARMV8_OS_ANDROID) && defined(__aarch64__) + uint64_t features = android_getCpuFeatures(); + arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM64_FEATURE_CRC32); + arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM64_FEATURE_PMULL); +#elif defined(ARMV8_OS_ANDROID) /* aarch32 */ + uint64_t features = android_getCpuFeatures(); + arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM_FEATURE_CRC32); + arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM_FEATURE_PMULL); +#elif defined(ARMV8_OS_LINUX) && defined(__aarch64__) + unsigned long features = getauxval(AT_HWCAP); + arm_cpu_enable_crc32 = !!(features & HWCAP_CRC32); + arm_cpu_enable_pmull = !!(features & HWCAP_PMULL); +#elif defined(ARMV8_OS_LINUX) && (defined(__ARM_NEON) || defined(__ARM_NEON__)) + /* Query HWCAP2 for ARMV8-A SoCs running in aarch32 mode */ + unsigned long features = getauxval(AT_HWCAP2); + arm_cpu_enable_crc32 = !!(features & HWCAP2_CRC32); + arm_cpu_enable_pmull = !!(features & HWCAP2_PMULL); +#elif defined(ARMV8_OS_FUCHSIA) + uint32_t features; + zx_status_t rc = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); + if (rc != ZX_OK || (features & ZX_ARM64_FEATURE_ISA_ASIMD) == 0) + return; /* Report nothing if ASIMD(NEON) is missing */ + arm_cpu_enable_crc32 = !!(features & ZX_ARM64_FEATURE_ISA_CRC32); + arm_cpu_enable_pmull = !!(features & ZX_ARM64_FEATURE_ISA_PMULL); +#elif defined(ARMV8_OS_WINDOWS) + arm_cpu_enable_crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); + arm_cpu_enable_pmull = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); +#endif +} + +#elif defined(X86_NOT_WINDOWS) || defined(X86_WINDOWS) +/* On x86 we simply use a instruction to check the CPU features. + * (i.e. CPUID). + */ +static void _cpu_check_features(void) +{ + int x86_cpu_has_sse2; + int x86_cpu_has_ssse3; + int x86_cpu_has_sse42; + int x86_cpu_has_pclmulqdq; + int abcd[4]; +#ifdef _MSC_VER + __cpuid(abcd, 1); +#else + __cpuid(1, abcd[0], abcd[1], abcd[2], abcd[3]); +#endif + x86_cpu_has_sse2 = abcd[3] & 0x4000000; + x86_cpu_has_ssse3 = abcd[2] & 0x000200; + x86_cpu_has_sse42 = abcd[2] & 0x100000; + x86_cpu_has_pclmulqdq = abcd[2] & 0x2; + + x86_cpu_enable_ssse3 = x86_cpu_has_ssse3; + + x86_cpu_enable_simd = x86_cpu_has_sse2 && + x86_cpu_has_sse42 && + x86_cpu_has_pclmulqdq; +} +#endif diff --git a/cpu_features.h b/cpu_features.h new file mode 100644 index 0000000..2a4a797 --- /dev/null +++ b/cpu_features.h @@ -0,0 +1,17 @@ +/* cpu_features.h -- Processor features detection. + * + * Copyright 2018 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include "zlib.h" + +/* TODO(cavalcantii): remove checks for x86_flags on deflate. + */ +extern int arm_cpu_enable_crc32; +extern int arm_cpu_enable_pmull; +extern int x86_cpu_enable_ssse3; +extern int x86_cpu_enable_simd; + +void cpu_check_features(void); diff --git a/crc32.c b/crc32.c index e95b908..bd69647 100644 --- a/crc32.c +++ b/crc32.c @@ -29,13 +29,10 @@ #endif /* MAKECRCH */ #include "deflate.h" -#include "x86.h" +#include "cpu_features.h" #include "zutil.h" /* for STDC and FAR definitions */ -#if defined(CRC32_SIMD_SSE42_PCLMUL) -#include "crc32_simd.h" -#elif defined(CRC32_ARMV8_CRC32) -#include "arm_features.h" +#if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32) #include "crc32_simd.h" #endif @@ -226,7 +223,7 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) */ if (buf == Z_NULL) { if (!len) /* Assume user is calling crc32(0, NULL, 0); */ - x86_check_features(); + cpu_check_features(); return 0UL; } @@ -289,7 +286,7 @@ unsigned long ZEXPORT crc32(crc, buf, len) */ if (buf == Z_NULL) { if (!len) /* Assume user is calling crc32(0, NULL, 0); */ - arm_check_features(); + cpu_check_features(); return 0UL; } @@ -500,25 +497,31 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) ZLIB_INTERNAL void crc_reset(deflate_state *const s) { +#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) { crc_fold_init(s); return; } +#endif s->strm->adler = crc32(0L, Z_NULL, 0); } ZLIB_INTERNAL void crc_finalize(deflate_state *const s) { +#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) s->strm->adler = crc_fold_512to32(s); +#endif } ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) { +#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) { crc_fold_copy(strm->state, dst, strm->next_in, size); return; } +#endif zmemcpy(dst, strm->next_in, size); strm->adler = crc32(strm->adler, dst, size); } diff --git a/deflate.c b/deflate.c index 201254a..a39e627 100644 --- a/deflate.c +++ b/deflate.c @@ -50,7 +50,7 @@ /* @(#) $Id$ */ #include #include "deflate.h" -#include "x86.h" +#include "cpu_features.h" #include "contrib/optimizations/insert_string.h" #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) @@ -244,10 +244,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, // for all wrapper formats (e.g. RAW, ZLIB, GZIP). // Feature detection is not triggered while using RAW mode (i.e. we never // call crc32() with a NULL buffer). -#if defined(CRC32_ARMV8_CRC32) - arm_check_features(); -#elif defined(CRC32_SIMD_SSE42_PCLMUL) - x86_check_features(); +#if defined(CRC32_ARMV8_CRC32) || defined(CRC32_SIMD_SSE42_PCLMUL) + cpu_check_features(); #endif if (version == Z_NULL || version[0] != my_version[0] || @@ -1519,11 +1517,12 @@ local void fill_window_c(deflate_state *s); local void fill_window(deflate_state *s) { +#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) { fill_window_sse(s); return; } - +#endif fill_window_c(s); } diff --git a/simd_stub.c b/simd_stub.c deleted file mode 100644 index c6d4605..0000000 --- a/simd_stub.c +++ /dev/null @@ -1,35 +0,0 @@ -/* simd_stub.c -- stub implementations -* Copyright (C) 2014 Intel Corporation -* For conditions of distribution and use, see copyright notice in zlib.h -*/ -#include - -#include "deflate.h" -#include "x86.h" - -int ZLIB_INTERNAL x86_cpu_enable_simd = 0; - -void ZLIB_INTERNAL crc_fold_init(deflate_state *const s) { - assert(0); -} - -void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, - unsigned char *dst, - const unsigned char *src, - long len) { - assert(0); -} - -unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) { - assert(0); - return 0; -} - -void ZLIB_INTERNAL fill_window_sse(deflate_state *s) -{ - assert(0); -} - -void x86_check_features(void) -{ -} diff --git a/x86.c b/x86.c deleted file mode 100644 index 7488ad0..0000000 --- a/x86.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * x86 feature check - * - * Copyright (C) 2013 Intel Corporation. All rights reserved. - * Author: - * Jim Kukunas - * - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include "x86.h" -#include "zutil.h" - -int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0; -int ZLIB_INTERNAL x86_cpu_enable_simd = 0; - -#ifndef _MSC_VER -#include - -pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; -static void _x86_check_features(void); - -void x86_check_features(void) -{ - pthread_once(&cpu_check_inited_once, _x86_check_features); -} - -static void _x86_check_features(void) -{ - int x86_cpu_has_sse2; - int x86_cpu_has_ssse3; - int x86_cpu_has_sse42; - int x86_cpu_has_pclmulqdq; - unsigned eax, ebx, ecx, edx; - - eax = 1; -#ifdef __i386__ - __asm__ __volatile__ ( - "xchg %%ebx, %1\n\t" - "cpuid\n\t" - "xchg %1, %%ebx\n\t" - : "+a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) - ); -#else - __asm__ __volatile__ ( - "cpuid\n\t" - : "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - ); -#endif /* (__i386__) */ - - x86_cpu_has_sse2 = edx & 0x4000000; - x86_cpu_has_ssse3 = ecx & 0x000200; - x86_cpu_has_sse42 = ecx & 0x100000; - x86_cpu_has_pclmulqdq = ecx & 0x2; - - x86_cpu_enable_ssse3 = x86_cpu_has_ssse3; - - x86_cpu_enable_simd = x86_cpu_has_sse2 && - x86_cpu_has_sse42 && - x86_cpu_has_pclmulqdq; -} -#else -#include -#include - -static BOOL CALLBACK _x86_check_features(PINIT_ONCE once, - PVOID param, - PVOID *context); -static INIT_ONCE cpu_check_inited_once = INIT_ONCE_STATIC_INIT; - -void x86_check_features(void) -{ - InitOnceExecuteOnce(&cpu_check_inited_once, _x86_check_features, - NULL, NULL); -} - -static BOOL CALLBACK _x86_check_features(PINIT_ONCE once, - PVOID param, - PVOID *context) -{ - int x86_cpu_has_sse2; - int x86_cpu_has_ssse3; - int x86_cpu_has_sse42; - int x86_cpu_has_pclmulqdq; - int regs[4]; - - __cpuid(regs, 1); - - x86_cpu_has_sse2 = regs[3] & 0x4000000; - x86_cpu_has_ssse3 = regs[2] & 0x000200; - x86_cpu_has_sse42 = regs[2] & 0x100000; - x86_cpu_has_pclmulqdq = regs[2] & 0x2; - - x86_cpu_enable_ssse3 = x86_cpu_has_ssse3; - - x86_cpu_enable_simd = x86_cpu_has_sse2 && - x86_cpu_has_sse42 && - x86_cpu_has_pclmulqdq; - return TRUE; -} -#endif /* _MSC_VER */ diff --git a/x86.h b/x86.h deleted file mode 100644 index 7205d50..0000000 --- a/x86.h +++ /dev/null @@ -1,16 +0,0 @@ -/* x86.h -- check for x86 CPU features -* Copyright (C) 2013 Intel Corporation Jim Kukunas -* For conditions of distribution and use, see copyright notice in zlib.h -*/ - -#ifndef X86_H -#define X86_H - -#include "zlib.h" - -extern int x86_cpu_enable_ssse3; -extern int x86_cpu_enable_simd; - -void x86_check_features(void); - -#endif /* X86_H */ -- cgit v1.2.3 From 814da1f383b625955149c3845db62af3f29a4ffe Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Sat, 21 Dec 2019 11:13:14 +0000 Subject: Revert "Unify CPU features detection code" This reverts commit 6f7e5e79cefe982ad84a88927565a88db2e592be. Reason for revert: This broke the build, e.g. https://ci.chromium.org/p/chromium/builders/ci/ios-device/144512 ../../third_party/zlib/cpu_features.c:75:13: error: unused function '_cpu_check_features' [-Werror,-Wunused-function] static void _cpu_check_features(void) ^ Original change's description: > Unify CPU features detection code > > This will allow to remove some duplicated code (i.e. thread synchronization) > while at same time removing unnecessary use of inline ASM for Intel features > detection. > > A few other advantages: > - remove some extra logic (e.g. no need to test the platform to include the > correct CPU detection header). > - simplifies the buildsystem (i.e. we always include cpu_features.c) > - get rid of the simd_stub file. > > Bug: 1032721 > Change-Id: Ic93472d3337bc2cbe092d4cf8fbe4b31b1ceca6d > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1976820 > Reviewed-by: Chris Blume > Reviewed-by: Adenilson Cavalcanti > Commit-Queue: Adenilson Cavalcanti > Cr-Commit-Position: refs/heads/master@{#727038} TBR=cavalcantii@chromium.org,cblume@chromium.org,mtklein@chromium.org,adenilson.cavalcanti@arm.com Change-Id: I20c5dedb98ba8b5d304ff1339042bcf243505e88 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 1032721 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1979790 Reviewed-by: Hans Wennborg Commit-Queue: Hans Wennborg Cr-Original-Commit-Position: refs/heads/master@{#727039} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 9d8f976414a7608c3361718462253104a761c6bb --- BUILD.gn | 44 +++++------ adler32.c | 8 +- arm_features.c | 90 +++++++++++++++++++++++ arm_features.h | 13 ++++ contrib/optimizations/insert_string.h | 2 +- cpu_features.c | 134 ---------------------------------- cpu_features.h | 17 ----- crc32.c | 17 ++--- deflate.c | 11 +-- simd_stub.c | 35 +++++++++ x86.c | 101 +++++++++++++++++++++++++ x86.h | 16 ++++ 12 files changed, 293 insertions(+), 195 deletions(-) create mode 100644 arm_features.c create mode 100644 arm_features.h delete mode 100644 cpu_features.c delete mode 100644 cpu_features.h create mode 100644 simd_stub.c create mode 100644 x86.c create mode 100644 x86.h diff --git a/BUILD.gn b/BUILD.gn index ed57899..5f88733 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -26,20 +26,9 @@ if (current_cpu == "arm" || current_cpu == "arm64") { use_x86_x64_optimizations = (current_cpu == "x86" || current_cpu == "x64") && !is_ios -if (!use_x86_x64_optimizations && !use_arm_neon_optimizations) { - # Apparently android_cronet bot builds with NEON disabled and - # we also should disable optimizations for iOS@x86 (a.k.a. simulator). - defines = [ "CPU_NO_SIMD" ] -} - config("zlib_adler32_simd_config") { if (use_x86_x64_optimizations) { defines = [ "ADLER32_SIMD_SSSE3" ] - if (is_win) { - defines += [ "X86_WINDOWS" ] - } else { - defines += [ "X86_NOT_WINDOWS" ] - } } if (use_arm_neon_optimizations) { @@ -105,13 +94,24 @@ if (use_arm_neon_optimizations) { if (!is_ios) { include_dirs = [ "." ] - if (!is_win && !is_clang) { + if (is_android) { + import("//build/config/android/config.gni") + if (defined(android_ndk_root) && android_ndk_root != "") { + deps = [ + "//third_party/android_ndk:cpu_features", + ] + } else { + assert(false, "CPU detection requires the Android NDK") + } + } else if (!is_win && !is_clang) { assert(!use_thin_lto, "ThinLTO fails mixing different module-level targets") cflags_c = [ "-march=armv8-a+crc" ] } sources = [ + "arm_features.c", + "arm_features.h", "crc32_simd.c", "crc32_simd.h", ] @@ -218,6 +218,10 @@ source_set("zlib_x86_simd") { "-mpclmul", ] } + } else { + sources = [ + "simd_stub.c", + ] } configs -= [ "//build/config/compiler:chromium_code" ] @@ -244,8 +248,6 @@ component("zlib") { "chromeconf.h", "compress.c", "contrib/optimizations/insert_string.h", - "cpu_features.c", - "cpu_features.h", "crc32.c", "crc32.h", "deflate.c", @@ -265,6 +267,7 @@ component("zlib") { "trees.c", "trees.h", "uncompr.c", + "x86.h", "zconf.h", "zlib.h", "zutil.c", @@ -281,6 +284,7 @@ component("zlib") { ] if (use_x86_x64_optimizations) { + sources += [ "x86.c" ] deps += [ ":zlib_crc32_simd" ] } else if (use_arm_neon_optimizations) { sources += [ "contrib/optimizations/slide_hash_neon.h" ] @@ -290,15 +294,6 @@ component("zlib") { sources += [ "inflate.c" ] } - if (is_android) { - import("//build/config/android/config.gni") - if (defined(android_ndk_root) && android_ndk_root != "") { - deps += [ "//third_party/android_ndk:cpu_features" ] - } else { - assert(false, "CPU detection requires the Android NDK") - } - } - configs -= [ "//build/config/compiler:chromium_code" ] configs += [ ":zlib_internal_config", @@ -324,7 +319,6 @@ config("minizip_warnings") { } static_library("minizip") { - defines = [] sources = [ "contrib/minizip/ioapi.c", "contrib/minizip/ioapi.h", @@ -346,7 +340,7 @@ static_library("minizip") { if (is_mac || is_ios || is_android || is_nacl) { # Mac, Android and the BSDs don't have fopen64, ftello64, or fseeko64. We # use fopen, ftell, and fseek instead on these systems. - defines += [ "USE_FILE32API" ] + defines = [ "USE_FILE32API" ] } deps = [ diff --git a/adler32.c b/adler32.c index 696773a..a42f35f 100644 --- a/adler32.c +++ b/adler32.c @@ -59,8 +59,10 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); # define MOD63(a) a %= BASE #endif -#include "cpu_features.h" -#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) +#if defined(ADLER32_SIMD_SSSE3) +#include "adler32_simd.h" +#include "x86.h" +#elif defined(ADLER32_SIMD_NEON) #include "adler32_simd.h" #endif @@ -106,7 +108,7 @@ uLong ZEXPORT adler32_z(adler, buf, len) */ if (buf == Z_NULL) { if (!len) /* Assume user is calling adler32(0, NULL, 0); */ - cpu_check_features(); + x86_check_features(); return 1L; } #else diff --git a/arm_features.c b/arm_features.c new file mode 100644 index 0000000..f5641c3 --- /dev/null +++ b/arm_features.c @@ -0,0 +1,90 @@ +/* arm_features.c -- ARM processor features detection. + * + * Copyright 2018 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include "arm_features.h" +#include "zutil.h" +#include + +int ZLIB_INTERNAL arm_cpu_enable_crc32 = 0; +int ZLIB_INTERNAL arm_cpu_enable_pmull = 0; + +#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) +#include +#endif + +#if defined(ARMV8_OS_ANDROID) +#include +#elif defined(ARMV8_OS_LINUX) +#include +#include +#elif defined(ARMV8_OS_FUCHSIA) +#include +#include +#include +#elif defined(ARMV8_OS_WINDOWS) +#include +#else +#error arm_features.c ARM feature detection in not defined for your platform +#endif + +static void _arm_check_features(void); + +#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) +static pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; +void ZLIB_INTERNAL arm_check_features(void) +{ + pthread_once(&cpu_check_inited_once, _arm_check_features); +} +#elif defined(ARMV8_OS_WINDOWS) +static INIT_ONCE cpu_check_inited_once = INIT_ONCE_STATIC_INIT; +static BOOL CALLBACK _arm_check_features_forwarder(PINIT_ONCE once, PVOID param, PVOID* context) +{ + _arm_check_features(); + return TRUE; +} +void ZLIB_INTERNAL arm_check_features(void) +{ + InitOnceExecuteOnce(&cpu_check_inited_once, _arm_check_features_forwarder, + NULL, NULL); +} +#endif + +/* + * See http://bit.ly/2CcoEsr for run-time detection of ARM features and also + * crbug.com/931275 for android_getCpuFeatures() use in the Android sandbox. + */ +static void _arm_check_features(void) +{ +#if defined(ARMV8_OS_ANDROID) && defined(__aarch64__) + uint64_t features = android_getCpuFeatures(); + arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM64_FEATURE_CRC32); + arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM64_FEATURE_PMULL); +#elif defined(ARMV8_OS_ANDROID) /* aarch32 */ + uint64_t features = android_getCpuFeatures(); + arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM_FEATURE_CRC32); + arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM_FEATURE_PMULL); +#elif defined(ARMV8_OS_LINUX) && defined(__aarch64__) + unsigned long features = getauxval(AT_HWCAP); + arm_cpu_enable_crc32 = !!(features & HWCAP_CRC32); + arm_cpu_enable_pmull = !!(features & HWCAP_PMULL); +#elif defined(ARMV8_OS_LINUX) && (defined(__ARM_NEON) || defined(__ARM_NEON__)) + /* Query HWCAP2 for ARMV8-A SoCs running in aarch32 mode */ + unsigned long features = getauxval(AT_HWCAP2); + arm_cpu_enable_crc32 = !!(features & HWCAP2_CRC32); + arm_cpu_enable_pmull = !!(features & HWCAP2_PMULL); +#elif defined(ARMV8_OS_FUCHSIA) + uint32_t features; + zx_status_t rc = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); + if (rc != ZX_OK || (features & ZX_ARM64_FEATURE_ISA_ASIMD) == 0) + return; /* Report nothing if ASIMD(NEON) is missing */ + arm_cpu_enable_crc32 = !!(features & ZX_ARM64_FEATURE_ISA_CRC32); + arm_cpu_enable_pmull = !!(features & ZX_ARM64_FEATURE_ISA_PMULL); +#elif defined(ARMV8_OS_WINDOWS) + arm_cpu_enable_crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); + arm_cpu_enable_pmull = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); +#endif +} diff --git a/arm_features.h b/arm_features.h new file mode 100644 index 0000000..09fec25 --- /dev/null +++ b/arm_features.h @@ -0,0 +1,13 @@ +/* arm_features.h -- ARM processor features detection. + * + * Copyright 2018 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the Chromium source repository LICENSE file. + */ + +#include "zlib.h" + +extern int arm_cpu_enable_crc32; +extern int arm_cpu_enable_pmull; + +void arm_check_features(void); diff --git a/contrib/optimizations/insert_string.h b/contrib/optimizations/insert_string.h index 1826601..69eee3d 100644 --- a/contrib/optimizations/insert_string.h +++ b/contrib/optimizations/insert_string.h @@ -10,7 +10,6 @@ #define INLINE inline #endif -#include "cpu_features.h" /* Optimized insert_string block */ #if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32) #define TARGET_CPU_WITH_CRC @@ -26,6 +25,7 @@ #define _cpu_crc32_u32 _mm_crc32_u32 #elif defined(CRC32_ARMV8_CRC32) + #include "arm_features.h" #if defined(__clang__) #undef TARGET_CPU_WITH_CRC #define __crc32cw __builtin_arm_crc32cw diff --git a/cpu_features.c b/cpu_features.c deleted file mode 100644 index 731126a..0000000 --- a/cpu_features.c +++ /dev/null @@ -1,134 +0,0 @@ -/* cpu_features.c -- Processor features detection. - * - * Copyright 2018 The Chromium Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the Chromium source repository LICENSE file. - */ - -#include "cpu_features.h" -#include "zutil.h" - -#include -#if defined(_MSC_VER) -#include -#elif defined(ADLER32_SIMD_SSSE3) -#include -#endif - -/* TODO(cavalcantii): remove checks for x86_flags on deflate. - */ -int ZLIB_INTERNAL arm_cpu_enable_crc32 = 0; -int ZLIB_INTERNAL arm_cpu_enable_pmull = 0; -int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0; -int ZLIB_INTERNAL x86_cpu_enable_simd = 0; - -#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) -#include -#endif - -#if defined(ARMV8_OS_ANDROID) -#include -#elif defined(ARMV8_OS_LINUX) -#include -#include -#elif defined(ARMV8_OS_FUCHSIA) -#include -#include -#include -#elif defined(ARMV8_OS_WINDOWS) || defined(X86_WINDOWS) -#include -#elif !defined(_MSC_VER) -#include -#else -#error cpu_features.c CPU feature detection in not defined for your platform -#endif - -#ifndef CPU_NO_SIMD -static void _cpu_check_features(void); -#endif - -#if defined(ARMV8_OS_ANDROID) || defined(ARMV8_OS_LINUX) || defined(ARMV8_OS_FUCHSIA) || defined(X86_NOT_WINDOWS) -static pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; -void ZLIB_INTERNAL cpu_check_features(void) -{ - pthread_once(&cpu_check_inited_once, _cpu_check_features); -} -#elif defined(ARMV8_OS_WINDOWS) || defined(X86_WINDOWS) -static INIT_ONCE cpu_check_inited_once = INIT_ONCE_STATIC_INIT; -static BOOL CALLBACK _cpu_check_features_forwarder(PINIT_ONCE once, PVOID param, PVOID* context) -{ - _cpu_check_features(); - return TRUE; -} -void ZLIB_INTERNAL cpu_check_features(void) -{ - InitOnceExecuteOnce(&cpu_check_inited_once, _cpu_check_features_forwarder, - NULL, NULL); -} -#endif - -#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) -/* - * See http://bit.ly/2CcoEsr for run-time detection of ARM features and also - * crbug.com/931275 for android_getCpuFeatures() use in the Android sandbox. - */ -static void _cpu_check_features(void) -{ -#if defined(ARMV8_OS_ANDROID) && defined(__aarch64__) - uint64_t features = android_getCpuFeatures(); - arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM64_FEATURE_CRC32); - arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM64_FEATURE_PMULL); -#elif defined(ARMV8_OS_ANDROID) /* aarch32 */ - uint64_t features = android_getCpuFeatures(); - arm_cpu_enable_crc32 = !!(features & ANDROID_CPU_ARM_FEATURE_CRC32); - arm_cpu_enable_pmull = !!(features & ANDROID_CPU_ARM_FEATURE_PMULL); -#elif defined(ARMV8_OS_LINUX) && defined(__aarch64__) - unsigned long features = getauxval(AT_HWCAP); - arm_cpu_enable_crc32 = !!(features & HWCAP_CRC32); - arm_cpu_enable_pmull = !!(features & HWCAP_PMULL); -#elif defined(ARMV8_OS_LINUX) && (defined(__ARM_NEON) || defined(__ARM_NEON__)) - /* Query HWCAP2 for ARMV8-A SoCs running in aarch32 mode */ - unsigned long features = getauxval(AT_HWCAP2); - arm_cpu_enable_crc32 = !!(features & HWCAP2_CRC32); - arm_cpu_enable_pmull = !!(features & HWCAP2_PMULL); -#elif defined(ARMV8_OS_FUCHSIA) - uint32_t features; - zx_status_t rc = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); - if (rc != ZX_OK || (features & ZX_ARM64_FEATURE_ISA_ASIMD) == 0) - return; /* Report nothing if ASIMD(NEON) is missing */ - arm_cpu_enable_crc32 = !!(features & ZX_ARM64_FEATURE_ISA_CRC32); - arm_cpu_enable_pmull = !!(features & ZX_ARM64_FEATURE_ISA_PMULL); -#elif defined(ARMV8_OS_WINDOWS) - arm_cpu_enable_crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); - arm_cpu_enable_pmull = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); -#endif -} - -#elif defined(X86_NOT_WINDOWS) || defined(X86_WINDOWS) -/* On x86 we simply use a instruction to check the CPU features. - * (i.e. CPUID). - */ -static void _cpu_check_features(void) -{ - int x86_cpu_has_sse2; - int x86_cpu_has_ssse3; - int x86_cpu_has_sse42; - int x86_cpu_has_pclmulqdq; - int abcd[4]; -#ifdef _MSC_VER - __cpuid(abcd, 1); -#else - __cpuid(1, abcd[0], abcd[1], abcd[2], abcd[3]); -#endif - x86_cpu_has_sse2 = abcd[3] & 0x4000000; - x86_cpu_has_ssse3 = abcd[2] & 0x000200; - x86_cpu_has_sse42 = abcd[2] & 0x100000; - x86_cpu_has_pclmulqdq = abcd[2] & 0x2; - - x86_cpu_enable_ssse3 = x86_cpu_has_ssse3; - - x86_cpu_enable_simd = x86_cpu_has_sse2 && - x86_cpu_has_sse42 && - x86_cpu_has_pclmulqdq; -} -#endif diff --git a/cpu_features.h b/cpu_features.h deleted file mode 100644 index 2a4a797..0000000 --- a/cpu_features.h +++ /dev/null @@ -1,17 +0,0 @@ -/* cpu_features.h -- Processor features detection. - * - * Copyright 2018 The Chromium Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the Chromium source repository LICENSE file. - */ - -#include "zlib.h" - -/* TODO(cavalcantii): remove checks for x86_flags on deflate. - */ -extern int arm_cpu_enable_crc32; -extern int arm_cpu_enable_pmull; -extern int x86_cpu_enable_ssse3; -extern int x86_cpu_enable_simd; - -void cpu_check_features(void); diff --git a/crc32.c b/crc32.c index bd69647..e95b908 100644 --- a/crc32.c +++ b/crc32.c @@ -29,10 +29,13 @@ #endif /* MAKECRCH */ #include "deflate.h" -#include "cpu_features.h" +#include "x86.h" #include "zutil.h" /* for STDC and FAR definitions */ -#if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32) +#if defined(CRC32_SIMD_SSE42_PCLMUL) +#include "crc32_simd.h" +#elif defined(CRC32_ARMV8_CRC32) +#include "arm_features.h" #include "crc32_simd.h" #endif @@ -223,7 +226,7 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) */ if (buf == Z_NULL) { if (!len) /* Assume user is calling crc32(0, NULL, 0); */ - cpu_check_features(); + x86_check_features(); return 0UL; } @@ -286,7 +289,7 @@ unsigned long ZEXPORT crc32(crc, buf, len) */ if (buf == Z_NULL) { if (!len) /* Assume user is calling crc32(0, NULL, 0); */ - cpu_check_features(); + arm_check_features(); return 0UL; } @@ -497,31 +500,25 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) ZLIB_INTERNAL void crc_reset(deflate_state *const s) { -#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) { crc_fold_init(s); return; } -#endif s->strm->adler = crc32(0L, Z_NULL, 0); } ZLIB_INTERNAL void crc_finalize(deflate_state *const s) { -#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) s->strm->adler = crc_fold_512to32(s); -#endif } ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) { -#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) { crc_fold_copy(strm->state, dst, strm->next_in, size); return; } -#endif zmemcpy(dst, strm->next_in, size); strm->adler = crc32(strm->adler, dst, size); } diff --git a/deflate.c b/deflate.c index a39e627..201254a 100644 --- a/deflate.c +++ b/deflate.c @@ -50,7 +50,7 @@ /* @(#) $Id$ */ #include #include "deflate.h" -#include "cpu_features.h" +#include "x86.h" #include "contrib/optimizations/insert_string.h" #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) @@ -244,8 +244,10 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, // for all wrapper formats (e.g. RAW, ZLIB, GZIP). // Feature detection is not triggered while using RAW mode (i.e. we never // call crc32() with a NULL buffer). -#if defined(CRC32_ARMV8_CRC32) || defined(CRC32_SIMD_SSE42_PCLMUL) - cpu_check_features(); +#if defined(CRC32_ARMV8_CRC32) + arm_check_features(); +#elif defined(CRC32_SIMD_SSE42_PCLMUL) + x86_check_features(); #endif if (version == Z_NULL || version[0] != my_version[0] || @@ -1517,12 +1519,11 @@ local void fill_window_c(deflate_state *s); local void fill_window(deflate_state *s) { -#ifdef ADLER32_SIMD_SSSE3 if (x86_cpu_enable_simd) { fill_window_sse(s); return; } -#endif + fill_window_c(s); } diff --git a/simd_stub.c b/simd_stub.c new file mode 100644 index 0000000..c6d4605 --- /dev/null +++ b/simd_stub.c @@ -0,0 +1,35 @@ +/* simd_stub.c -- stub implementations +* Copyright (C) 2014 Intel Corporation +* For conditions of distribution and use, see copyright notice in zlib.h +*/ +#include + +#include "deflate.h" +#include "x86.h" + +int ZLIB_INTERNAL x86_cpu_enable_simd = 0; + +void ZLIB_INTERNAL crc_fold_init(deflate_state *const s) { + assert(0); +} + +void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, + unsigned char *dst, + const unsigned char *src, + long len) { + assert(0); +} + +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) { + assert(0); + return 0; +} + +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) +{ + assert(0); +} + +void x86_check_features(void) +{ +} diff --git a/x86.c b/x86.c new file mode 100644 index 0000000..7488ad0 --- /dev/null +++ b/x86.c @@ -0,0 +1,101 @@ +/* + * x86 feature check + * + * Copyright (C) 2013 Intel Corporation. All rights reserved. + * Author: + * Jim Kukunas + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "x86.h" +#include "zutil.h" + +int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0; +int ZLIB_INTERNAL x86_cpu_enable_simd = 0; + +#ifndef _MSC_VER +#include + +pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; +static void _x86_check_features(void); + +void x86_check_features(void) +{ + pthread_once(&cpu_check_inited_once, _x86_check_features); +} + +static void _x86_check_features(void) +{ + int x86_cpu_has_sse2; + int x86_cpu_has_ssse3; + int x86_cpu_has_sse42; + int x86_cpu_has_pclmulqdq; + unsigned eax, ebx, ecx, edx; + + eax = 1; +#ifdef __i386__ + __asm__ __volatile__ ( + "xchg %%ebx, %1\n\t" + "cpuid\n\t" + "xchg %1, %%ebx\n\t" + : "+a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) + ); +#else + __asm__ __volatile__ ( + "cpuid\n\t" + : "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + ); +#endif /* (__i386__) */ + + x86_cpu_has_sse2 = edx & 0x4000000; + x86_cpu_has_ssse3 = ecx & 0x000200; + x86_cpu_has_sse42 = ecx & 0x100000; + x86_cpu_has_pclmulqdq = ecx & 0x2; + + x86_cpu_enable_ssse3 = x86_cpu_has_ssse3; + + x86_cpu_enable_simd = x86_cpu_has_sse2 && + x86_cpu_has_sse42 && + x86_cpu_has_pclmulqdq; +} +#else +#include +#include + +static BOOL CALLBACK _x86_check_features(PINIT_ONCE once, + PVOID param, + PVOID *context); +static INIT_ONCE cpu_check_inited_once = INIT_ONCE_STATIC_INIT; + +void x86_check_features(void) +{ + InitOnceExecuteOnce(&cpu_check_inited_once, _x86_check_features, + NULL, NULL); +} + +static BOOL CALLBACK _x86_check_features(PINIT_ONCE once, + PVOID param, + PVOID *context) +{ + int x86_cpu_has_sse2; + int x86_cpu_has_ssse3; + int x86_cpu_has_sse42; + int x86_cpu_has_pclmulqdq; + int regs[4]; + + __cpuid(regs, 1); + + x86_cpu_has_sse2 = regs[3] & 0x4000000; + x86_cpu_has_ssse3 = regs[2] & 0x000200; + x86_cpu_has_sse42 = regs[2] & 0x100000; + x86_cpu_has_pclmulqdq = regs[2] & 0x2; + + x86_cpu_enable_ssse3 = x86_cpu_has_ssse3; + + x86_cpu_enable_simd = x86_cpu_has_sse2 && + x86_cpu_has_sse42 && + x86_cpu_has_pclmulqdq; + return TRUE; +} +#endif /* _MSC_VER */ diff --git a/x86.h b/x86.h new file mode 100644 index 0000000..7205d50 --- /dev/null +++ b/x86.h @@ -0,0 +1,16 @@ +/* x86.h -- check for x86 CPU features +* Copyright (C) 2013 Intel Corporation Jim Kukunas +* For conditions of distribution and use, see copyright notice in zlib.h +*/ + +#ifndef X86_H +#define X86_H + +#include "zlib.h" + +extern int x86_cpu_enable_ssse3; +extern int x86_cpu_enable_simd; + +void x86_check_features(void); + +#endif /* X86_H */ -- cgit v1.2.3