summaryrefslogtreecommitdiff
path: root/deflate.c
diff options
context:
space:
mode:
authorChris Blume <cblume@chromium.org>2019-12-04 11:18:49 +0000
committerCommit Bot <commit-bot@chromium.org>2019-12-04 11:18:49 +0000
commitf262c1b3c4196a2fee98c113142faff525b8d884 (patch)
treef1a4a54b58d010618cbeab48dad0596ec1321dd1 /deflate.c
parente77e1c06c8881abff0c7418368d147ff4a474d08 (diff)
downloadzlib-f262c1b3c4196a2fee98c113142faff525b8d884.tar.gz
Remove use of inline ASM in insert_string_sse
It seems that some years ago clang@Windows didn't have the proper intrinsic required, which prompted the use of inline ASM. It has a side effect in that it will allow compilation of the optimized function within the same compilation unit while using regular compiler flags (i.e. 'crc32' instruction on x86 requires some special compiler flags). Main issue is that inline ASM is blocked on dependencies (e.g. 'base') that will be linked to NaCl. The main idea here is to allow the whole Chromium code base to use the highly optimized checksums in zlib (e.g. crc32 and Adler-32), exported through an interface (i.e. base::Crc32()). This patch fixes this issue by removing the use of inline ASM. The workaround is to use clang/gcc 'target attributes' to instruct the backend to use different code generation options for the optimized function, see: https://clang.llvm.org/docs/AttributeReference.html#target NOTE: While testing on my personal Windows PC, VS2019 including smmintrin.h was insufficient. I needed to explicitly include either immintrin.h or nmmintrin.h. I expected I would need that, but it seems to be working with just smmintrin.h. Bug: 902789 Change-Id: Id692fb839e20b26f9ba8b45538e652d5b140cd36 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1941688 Commit-Queue: Chris Blume <cblume@chromium.org> Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org> Cr-Original-Commit-Position: refs/heads/master@{#721425} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 1cebcd57bc3d09c39783395e6b173ff1f358a91b
Diffstat (limited to 'deflate.c')
-rw-r--r--deflate.c68
1 files changed, 31 insertions, 37 deletions
diff --git a/deflate.c b/deflate.c
index 1f0bc0e..b21175b 100644
--- a/deflate.c
+++ b/deflate.c
@@ -52,6 +52,10 @@
#include "deflate.h"
#include "x86.h"
+#if defined(CRC32_SIMD_SSE42_PCLMUL)
+#include <smmintrin.h>
+#endif
+
#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
#include "contrib/optimizations/slide_hash_neon.h"
#endif
@@ -123,8 +127,31 @@ extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size);
#define INLINE inline
#endif
-/* Inline optimisation */
-local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str);
+/* Intel optimized insert_string. */
+#if defined(CRC32_SIMD_SSE42_PCLMUL)
+
+#if defined(__GNUC__) || defined(__clang__)
+__attribute__((target("sse4.2")))
+#endif
+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
+{
+ Pos ret;
+ unsigned *ip, val, h = 0;
+
+ ip = (unsigned *)&s->window[str];
+ val = *ip;
+
+ if (s->level >= 6)
+ val &= 0xFFFFFF;
+
+ h = _mm_crc32_u32(h, val);
+
+ ret = s->head[h & s->hash_mask];
+ s->head[h & s->hash_mask] = str;
+ s->prev[str & s->w_mask] = ret;
+ return ret;
+}
+#endif
/* ===========================================================================
* Local data
@@ -228,10 +255,11 @@ local INLINE Pos insert_string(deflate_state *const s, const Pos str)
#if defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32)
return insert_string_arm(s, str);
-#endif
+#elif defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd)
return insert_string_sse(s, str);
#endif
+#endif
return insert_string_c(s, str);
}
@@ -2276,37 +2304,3 @@ local block_state deflate_huff(s, flush)
FLUSH_BLOCK(s, 0);
return block_done;
}
-
-/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will
- * use intrinsic without extra params
- */
-local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
-{
- Pos ret;
- unsigned *ip, val, h = 0;
-
- ip = (unsigned *)&s->window[str];
- val = *ip;
-
- if (s->level >= 6)
- val &= 0xFFFFFF;
-
-/* Windows clang should use inline asm */
-#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
- h = _mm_crc32_u32(h, val);
-#elif defined(__i386__) || defined(__amd64__)
- __asm__ __volatile__ (
- "crc32 %1,%0\n\t"
- : "+r" (h)
- : "r" (val)
- );
-#else
- /* This should never happen */
- assert(0);
-#endif
-
- ret = s->head[h & s->hash_mask];
- s->head[h & s->hash_mask] = str;
- s->prev[str & s->w_mask] = ret;
- return ret;
-}