summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdenilson Cavalcanti <adenilson.cavalcanti@arm.com>2019-11-25 20:28:15 +0000
committerCommit Bot <commit-bot@chromium.org>2019-11-25 20:28:15 +0000
commitae16db5504a5c2fd4f074c128bb703fd4cdc36e1 (patch)
treedaa55e39e61a65de2ff579dc437644185feae0ff
parent7c4128a124a812d086478e5c5f9f2f5893ab8871 (diff)
downloadzlib-ae16db5504a5c2fd4f074c128bb703fd4cdc36e1.tar.gz
Remove use of inline ASM in insert_string_sse
It seems that some years ago clang@Windows didn't have the proper intrinsic required, which prompted the use of inline ASM. It has a side effect in that it will allow compilation of the optimized function within the same compilation unit while using regular compiler flags (i.e. 'crc32' instruction on x86 requires some special compiler flags). Main issue is that inline ASM is blocked on dependencies (e.g. 'base') that will be linked to NaCl. The main idea here is to allow the whole Chromium code base to use the highly optimized checksums in zlib (e.g. crc32 and Adler-32), exported through an interface (i.e. base::Crc32()). This patch fixes this issue by removing the use of inline ASM. The workaround is to use clang/gcc 'target attributes' to instruct the backend to use different code generation options for the optimized function, see: https://clang.llvm.org/docs/AttributeReference.html#target Bug: 902789 Change-Id: I0d139268aefb8335310c0e3f6533006be9af6470 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1931272 Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org> Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org> Cr-Original-Commit-Position: refs/heads/master@{#718788} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: ea6b9281bbf3ca08ccef8f5266f88de6f56c5ff6
-rw-r--r--deflate.c63
1 files changed, 26 insertions, 37 deletions
diff --git a/deflate.c b/deflate.c
index 1f0bc0e..185514a 100644
--- a/deflate.c
+++ b/deflate.c
@@ -123,8 +123,30 @@ extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size);
#define INLINE inline
#endif
-/* Inline optimisation */
-local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str);
+/* Intel optimized insert_string. */
+#if defined(CRC32_SIMD_SSE42_PCLMUL)
+#define _mm_crc32_u32 __builtin_ia32_crc32si
+#define TARGET_INTEL_WITH_CRC __attribute__((target("sse4.2")))
+TARGET_INTEL_WITH_CRC
+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
+{
+ Pos ret;
+ unsigned *ip, val, h = 0;
+
+ ip = (unsigned *)&s->window[str];
+ val = *ip;
+
+ if (s->level >= 6)
+ val &= 0xFFFFFF;
+
+ h = _mm_crc32_u32(h, val);
+
+ ret = s->head[h & s->hash_mask];
+ s->head[h & s->hash_mask] = str;
+ s->prev[str & s->w_mask] = ret;
+ return ret;
+}
+#endif
/* ===========================================================================
* Local data
@@ -228,10 +250,11 @@ local INLINE Pos insert_string(deflate_state *const s, const Pos str)
#if defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32)
return insert_string_arm(s, str);
-#endif
+#elif defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd)
return insert_string_sse(s, str);
#endif
+#endif
return insert_string_c(s, str);
}
@@ -2276,37 +2299,3 @@ local block_state deflate_huff(s, flush)
FLUSH_BLOCK(s, 0);
return block_done;
}
-
-/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will
- * use intrinsic without extra params
- */
-local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
-{
- Pos ret;
- unsigned *ip, val, h = 0;
-
- ip = (unsigned *)&s->window[str];
- val = *ip;
-
- if (s->level >= 6)
- val &= 0xFFFFFF;
-
-/* Windows clang should use inline asm */
-#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
- h = _mm_crc32_u32(h, val);
-#elif defined(__i386__) || defined(__amd64__)
- __asm__ __volatile__ (
- "crc32 %1,%0\n\t"
- : "+r" (h)
- : "r" (val)
- );
-#else
- /* This should never happen */
- assert(0);
-#endif
-
- ret = s->head[h & s->hash_mask];
- s->head[h & s->hash_mask] = str;
- s->prev[str & s->w_mask] = ret;
- return ret;
-}