summaryrefslogtreecommitdiff
path: root/deflate.c
diff options
context:
space:
mode:
authorChris Blume <cblume@chromium.org>2019-11-26 11:52:02 +0000
committerCommit Bot <commit-bot@chromium.org>2019-11-26 11:52:02 +0000
commite77e1c06c8881abff0c7418368d147ff4a474d08 (patch)
tree9c903a3e86fec47ca8aebcd7741fe2aa29bc2d71 /deflate.c
parentae16db5504a5c2fd4f074c128bb703fd4cdc36e1 (diff)
downloadzlib-e77e1c06c8881abff0c7418368d147ff4a474d08.tar.gz
Revert "Remove use of inline ASM in insert_string_sse"
This reverts commit ea6b9281bbf3ca08ccef8f5266f88de6f56c5ff6. Reason for revert: It turns out the V8 team needs the MSVC build. :) I tried installing the Clang compiler as part of Visual Studio 2017 and 2019 but neither of them became options in the toolchain. I'll need to spend more time figuring out how to get Clang on Windows. Original change's description: > Remove use of inline ASM in insert_string_sse > > It seems that some years ago clang@Windows didn't have the > proper intrinsic required, which prompted the use of inline > ASM. > > It has a side effect in that it will allow compilation of the > optimized function within the same compilation unit while using regular > compiler flags (i.e. 'crc32' instruction on x86 requires some special > compiler flags). > > Main issue is that inline ASM is blocked on dependencies (e.g. 'base') > that will be linked to NaCl. > > The main idea here is to allow the whole Chromium code base to use the > highly optimized checksums in zlib (e.g. crc32 and Adler-32), exported > through an interface (i.e. base::Crc32()). > > This patch fixes this issue by removing the use of inline ASM. > > The workaround is to use clang/gcc 'target attributes' to instruct the > backend to use different code generation options for the optimized > function, see: > https://clang.llvm.org/docs/AttributeReference.html#target > > Bug: 902789 > Change-Id: I0d139268aefb8335310c0e3f6533006be9af6470 > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1931272 > Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org> > Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org> > Cr-Commit-Position: refs/heads/master@{#718788} TBR=cavalcantii@chromium.org,cblume@chromium.org,mtklein@chromium.org,adenilson.cavalcanti@arm.com Change-Id: I6b3fcce10197121b548300855710e99f7048f1ae No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 902789 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1936189 Reviewed-by: Chris Blume <cblume@chromium.org> Commit-Queue: Chris Blume <cblume@chromium.org> Cr-Original-Commit-Position: refs/heads/master@{#719105} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 80c2a793b4ba20d9638fbdd030a1687dc26242a3
Diffstat (limited to 'deflate.c')
-rw-r--r--deflate.c63
1 files changed, 37 insertions, 26 deletions
diff --git a/deflate.c b/deflate.c
index 185514a..1f0bc0e 100644
--- a/deflate.c
+++ b/deflate.c
@@ -123,30 +123,8 @@ extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size);
#define INLINE inline
#endif
-/* Intel optimized insert_string. */
-#if defined(CRC32_SIMD_SSE42_PCLMUL)
-#define _mm_crc32_u32 __builtin_ia32_crc32si
-#define TARGET_INTEL_WITH_CRC __attribute__((target("sse4.2")))
-TARGET_INTEL_WITH_CRC
-local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
-{
- Pos ret;
- unsigned *ip, val, h = 0;
-
- ip = (unsigned *)&s->window[str];
- val = *ip;
-
- if (s->level >= 6)
- val &= 0xFFFFFF;
-
- h = _mm_crc32_u32(h, val);
-
- ret = s->head[h & s->hash_mask];
- s->head[h & s->hash_mask] = str;
- s->prev[str & s->w_mask] = ret;
- return ret;
-}
-#endif
+/* Inline optimisation */
+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str);
/* ===========================================================================
* Local data
@@ -250,11 +228,10 @@ local INLINE Pos insert_string(deflate_state *const s, const Pos str)
#if defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32)
return insert_string_arm(s, str);
-#elif defined(CRC32_SIMD_SSE42_PCLMUL)
+#endif
if (x86_cpu_enable_simd)
return insert_string_sse(s, str);
#endif
-#endif
return insert_string_c(s, str);
}
@@ -2299,3 +2276,37 @@ local block_state deflate_huff(s, flush)
FLUSH_BLOCK(s, 0);
return block_done;
}
+
+/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will
+ * use intrinsic without extra params
+ */
+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
+{
+ Pos ret;
+ unsigned *ip, val, h = 0;
+
+ ip = (unsigned *)&s->window[str];
+ val = *ip;
+
+ if (s->level >= 6)
+ val &= 0xFFFFFF;
+
+/* Windows clang should use inline asm */
+#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
+ h = _mm_crc32_u32(h, val);
+#elif defined(__i386__) || defined(__amd64__)
+ __asm__ __volatile__ (
+ "crc32 %1,%0\n\t"
+ : "+r" (h)
+ : "r" (val)
+ );
+#else
+ /* This should never happen */
+ assert(0);
+#endif
+
+ ret = s->head[h & s->hash_mask];
+ s->head[h & s->hash_mask] = str;
+ s->prev[str & s->w_mask] = ret;
+ return ret;
+}