From e5c4d8c45ed18f84ea68f5029c9bceb1f67268b8 Mon Sep 17 00:00:00 2001 From: Sam Maier Date: Thu, 14 Nov 2019 22:34:40 +0000 Subject: zlib: Allowing compression level to be selected in utils Allow users to set the compression level while using CompressHelper(). There is a 2x to 4x compression speed difference between levels [1..9], where it is possible to trade speed for better compression ratios (and vice-versa). It is up the user to experiment with the data to find the sweet spot for its application (default compression level is 6). Bug: 833361 Change-Id: Ia710cc7322707ace242133b283610eaa48def31f Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1912791 Commit-Queue: Adenilson Cavalcanti Commit-Queue: Sam Maier Auto-Submit: Sam Maier Reviewed-by: Adenilson Cavalcanti Cr-Original-Commit-Position: refs/heads/master@{#715455} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 3857767142ed56b1ec44a8cdda52472e1b357cc3 --- google/compression_utils_portable.cc | 12 ++++++++++-- google/compression_utils_portable.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/google/compression_utils_portable.cc b/google/compression_utils_portable.cc index 21338b5..191e349 100644 --- a/google/compression_utils_portable.cc +++ b/google/compression_utils_portable.cc @@ -66,20 +66,28 @@ int GzipCompressHelper(Bytef* dest, void* (*malloc_fn)(size_t), void (*free_fn)(void*)) { return CompressHelper(GZIP, dest, dest_length, source, source_length, - malloc_fn, free_fn); + Z_DEFAULT_COMPRESSION, malloc_fn, free_fn); } // This code is taken almost verbatim from third_party/zlib/compress.c. The only // difference is deflateInit2() is called which allows different window bits to // be set. > 16 causes a gzip header to be emitted rather than a zlib header, // and negative causes no header to emitted. +// +// Compression level can be a number from 1-9, with 1 being the fastest, 9 being +// the best compression. The default, which the GZIP helper uses, is 6. int CompressHelper(WrapperType wrapper_type, Bytef* dest, uLongf* dest_length, const Bytef* source, uLong source_length, + int compression_level, void* (*malloc_fn)(size_t), void (*free_fn)(void*)) { + if (compression_level < 1 || compression_level > 9) { + compression_level = Z_DEFAULT_COMPRESSION; + } + z_stream stream; // FIXME(cavalcantii): z_const is not defined as 'const'. @@ -118,7 +126,7 @@ int CompressHelper(WrapperType wrapper_type, stream.opaque = static_cast(0); } - int err = deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + int err = deflateInit2(&stream, compression_level, Z_DEFLATED, ZlibStreamWrapperType(wrapper_type), kZlibMemoryLevel, Z_DEFAULT_STRATEGY); if (err != Z_OK) diff --git a/google/compression_utils_portable.h b/google/compression_utils_portable.h index 7c3753b..cd004e8 100644 --- a/google/compression_utils_portable.h +++ b/google/compression_utils_portable.h @@ -39,6 +39,7 @@ int CompressHelper(WrapperType wrapper_type, uLongf* dest_length, const Bytef* source, uLong source_length, + int compression_level, void* (*malloc_fn)(size_t), void (*free_fn)(void*)); -- cgit v1.2.3 From 7c4128a124a812d086478e5c5f9f2f5893ab8871 Mon Sep 17 00:00:00 2001 From: Adenilson Cavalcanti Date: Thu, 21 Nov 2019 23:04:38 +0000 Subject: Fix performance issue in RAW mode While investigating the use of RAW mode, I noticed that it was *slower* for compression than GZIP or ZLIB wrapper formats, which didn't make sense (i.e. we don't calculate a data integrity check using RAW mode). It turns out that the code was falling back to the portable implementation of insert_string(), instead of using the optimized version that rely on 'crc32w' as a hash function. The reason is that CPU features detection is not triggered while using RAW mode (i.e. we never call crc32() with a NULL buffer). This patch fixes this issue ensuring that RAW mode is going to be faster for both compression/decompression than either ZLIB or GZIP formats. Bug: 833361 Change-Id: I285297f67ffc0114700ed03c2186ad21aab8b40e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1929634 Reviewed-by: Chris Blume Reviewed-by: Adenilson Cavalcanti Commit-Queue: Adenilson Cavalcanti Cr-Original-Commit-Position: refs/heads/master@{#717877} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 171a0a69eb5d70f8a9f44000e26bc7dc65f1fd97 --- deflate.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/deflate.c b/deflate.c index c950d6f..1f0bc0e 100644 --- a/deflate.c +++ b/deflate.c @@ -307,7 +307,15 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, int wrap = 1; static const char my_version[] = ZLIB_VERSION; + // Needed to activate optimized insert_string() that helps compression + // for all wrapper formats (e.g. RAW, ZLIB, GZIP). + // Feature detection is not triggered while using RAW mode (i.e. we never + // call crc32() with a NULL buffer). +#if defined(CRC32_ARMV8_CRC32) + arm_check_features(); +#elif defined(CRC32_SIMD_SSE42_PCLMUL) x86_check_features(); +#endif if (version == Z_NULL || version[0] != my_version[0] || stream_size != sizeof(z_stream)) { -- cgit v1.2.3 From ae16db5504a5c2fd4f074c128bb703fd4cdc36e1 Mon Sep 17 00:00:00 2001 From: Adenilson Cavalcanti Date: Mon, 25 Nov 2019 20:28:15 +0000 Subject: Remove use of inline ASM in insert_string_sse It seems that some years ago clang@Windows didn't have the proper intrinsic required, which prompted the use of inline ASM. It has a side effect in that it will allow compilation of the optimized function within the same compilation unit while using regular compiler flags (i.e. 'crc32' instruction on x86 requires some special compiler flags). Main issue is that inline ASM is blocked on dependencies (e.g. 'base') that will be linked to NaCl. The main idea here is to allow the whole Chromium code base to use the highly optimized checksums in zlib (e.g. crc32 and Adler-32), exported through an interface (i.e. base::Crc32()). This patch fixes this issue by removing the use of inline ASM. The workaround is to use clang/gcc 'target attributes' to instruct the backend to use different code generation options for the optimized function, see: https://clang.llvm.org/docs/AttributeReference.html#target Bug: 902789 Change-Id: I0d139268aefb8335310c0e3f6533006be9af6470 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1931272 Reviewed-by: Adenilson Cavalcanti Commit-Queue: Adenilson Cavalcanti Cr-Original-Commit-Position: refs/heads/master@{#718788} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: ea6b9281bbf3ca08ccef8f5266f88de6f56c5ff6 --- deflate.c | 63 ++++++++++++++++++++++++++------------------------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/deflate.c b/deflate.c index 1f0bc0e..185514a 100644 --- a/deflate.c +++ b/deflate.c @@ -123,8 +123,30 @@ extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); #define INLINE inline #endif -/* Inline optimisation */ -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); +/* Intel optimized insert_string. */ +#if defined(CRC32_SIMD_SSE42_PCLMUL) +#define _mm_crc32_u32 __builtin_ia32_crc32si +#define TARGET_INTEL_WITH_CRC __attribute__((target("sse4.2"))) +TARGET_INTEL_WITH_CRC +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) +{ + Pos ret; + unsigned *ip, val, h = 0; + + ip = (unsigned *)&s->window[str]; + val = *ip; + + if (s->level >= 6) + val &= 0xFFFFFF; + + h = _mm_crc32_u32(h, val); + + ret = s->head[h & s->hash_mask]; + s->head[h & s->hash_mask] = str; + s->prev[str & s->w_mask] = ret; + return ret; +} +#endif /* =========================================================================== * Local data @@ -228,9 +250,10 @@ local INLINE Pos insert_string(deflate_state *const s, const Pos str) #if defined(CRC32_ARMV8_CRC32) if (arm_cpu_enable_crc32) return insert_string_arm(s, str); -#endif +#elif defined(CRC32_SIMD_SSE42_PCLMUL) if (x86_cpu_enable_simd) return insert_string_sse(s, str); +#endif #endif return insert_string_c(s, str); } @@ -2276,37 +2299,3 @@ local block_state deflate_huff(s, flush) FLUSH_BLOCK(s, 0); return block_done; } - -/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will - * use intrinsic without extra params - */ -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - -/* Windows clang should use inline asm */ -#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) - h = _mm_crc32_u32(h, val); -#elif defined(__i386__) || defined(__amd64__) - __asm__ __volatile__ ( - "crc32 %1,%0\n\t" - : "+r" (h) - : "r" (val) - ); -#else - /* This should never happen */ - assert(0); -#endif - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} -- cgit v1.2.3 From e77e1c06c8881abff0c7418368d147ff4a474d08 Mon Sep 17 00:00:00 2001 From: Chris Blume Date: Tue, 26 Nov 2019 11:52:02 +0000 Subject: Revert "Remove use of inline ASM in insert_string_sse" This reverts commit ea6b9281bbf3ca08ccef8f5266f88de6f56c5ff6. Reason for revert: It turns out the V8 team needs the MSVC build. :) I tried installing the Clang compiler as part of Visual Studio 2017 and 2019 but neither of them became options in the toolchain. I'll need to spend more time figuring out how to get Clang on Windows. Original change's description: > Remove use of inline ASM in insert_string_sse > > It seems that some years ago clang@Windows didn't have the > proper intrinsic required, which prompted the use of inline > ASM. > > It has a side effect in that it will allow compilation of the > optimized function within the same compilation unit while using regular > compiler flags (i.e. 'crc32' instruction on x86 requires some special > compiler flags). > > Main issue is that inline ASM is blocked on dependencies (e.g. 'base') > that will be linked to NaCl. > > The main idea here is to allow the whole Chromium code base to use the > highly optimized checksums in zlib (e.g. crc32 and Adler-32), exported > through an interface (i.e. base::Crc32()). > > This patch fixes this issue by removing the use of inline ASM. > > The workaround is to use clang/gcc 'target attributes' to instruct the > backend to use different code generation options for the optimized > function, see: > https://clang.llvm.org/docs/AttributeReference.html#target > > Bug: 902789 > Change-Id: I0d139268aefb8335310c0e3f6533006be9af6470 > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1931272 > Reviewed-by: Adenilson Cavalcanti > Commit-Queue: Adenilson Cavalcanti > Cr-Commit-Position: refs/heads/master@{#718788} TBR=cavalcantii@chromium.org,cblume@chromium.org,mtklein@chromium.org,adenilson.cavalcanti@arm.com Change-Id: I6b3fcce10197121b548300855710e99f7048f1ae No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 902789 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1936189 Reviewed-by: Chris Blume Commit-Queue: Chris Blume Cr-Original-Commit-Position: refs/heads/master@{#719105} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 80c2a793b4ba20d9638fbdd030a1687dc26242a3 --- deflate.c | 63 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/deflate.c b/deflate.c index 185514a..1f0bc0e 100644 --- a/deflate.c +++ b/deflate.c @@ -123,30 +123,8 @@ extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); #define INLINE inline #endif -/* Intel optimized insert_string. */ -#if defined(CRC32_SIMD_SSE42_PCLMUL) -#define _mm_crc32_u32 __builtin_ia32_crc32si -#define TARGET_INTEL_WITH_CRC __attribute__((target("sse4.2"))) -TARGET_INTEL_WITH_CRC -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - - h = _mm_crc32_u32(h, val); - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} -#endif +/* Inline optimisation */ +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); /* =========================================================================== * Local data @@ -250,10 +228,9 @@ local INLINE Pos insert_string(deflate_state *const s, const Pos str) #if defined(CRC32_ARMV8_CRC32) if (arm_cpu_enable_crc32) return insert_string_arm(s, str); -#elif defined(CRC32_SIMD_SSE42_PCLMUL) +#endif if (x86_cpu_enable_simd) return insert_string_sse(s, str); -#endif #endif return insert_string_c(s, str); } @@ -2299,3 +2276,37 @@ local block_state deflate_huff(s, flush) FLUSH_BLOCK(s, 0); return block_done; } + +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will + * use intrinsic without extra params + */ +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) +{ + Pos ret; + unsigned *ip, val, h = 0; + + ip = (unsigned *)&s->window[str]; + val = *ip; + + if (s->level >= 6) + val &= 0xFFFFFF; + +/* Windows clang should use inline asm */ +#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) + h = _mm_crc32_u32(h, val); +#elif defined(__i386__) || defined(__amd64__) + __asm__ __volatile__ ( + "crc32 %1,%0\n\t" + : "+r" (h) + : "r" (val) + ); +#else + /* This should never happen */ + assert(0); +#endif + + ret = s->head[h & s->hash_mask]; + s->head[h & s->hash_mask] = str; + s->prev[str & s->w_mask] = ret; + return ret; +} -- cgit v1.2.3 From f262c1b3c4196a2fee98c113142faff525b8d884 Mon Sep 17 00:00:00 2001 From: Chris Blume Date: Wed, 4 Dec 2019 11:18:49 +0000 Subject: Remove use of inline ASM in insert_string_sse It seems that some years ago clang@Windows didn't have the proper intrinsic required, which prompted the use of inline ASM. It has a side effect in that it will allow compilation of the optimized function within the same compilation unit while using regular compiler flags (i.e. 'crc32' instruction on x86 requires some special compiler flags). Main issue is that inline ASM is blocked on dependencies (e.g. 'base') that will be linked to NaCl. The main idea here is to allow the whole Chromium code base to use the highly optimized checksums in zlib (e.g. crc32 and Adler-32), exported through an interface (i.e. base::Crc32()). This patch fixes this issue by removing the use of inline ASM. The workaround is to use clang/gcc 'target attributes' to instruct the backend to use different code generation options for the optimized function, see: https://clang.llvm.org/docs/AttributeReference.html#target NOTE: While testing on my personal Windows PC, VS2019 including smmintrin.h was insufficient. I needed to explicitly include either immintrin.h or nmmintrin.h. I expected I would need that, but it seems to be working with just smmintrin.h. Bug: 902789 Change-Id: Id692fb839e20b26f9ba8b45538e652d5b140cd36 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1941688 Commit-Queue: Chris Blume Reviewed-by: Adenilson Cavalcanti Cr-Original-Commit-Position: refs/heads/master@{#721425} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 1cebcd57bc3d09c39783395e6b173ff1f358a91b --- deflate.c | 68 +++++++++++++++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/deflate.c b/deflate.c index 1f0bc0e..b21175b 100644 --- a/deflate.c +++ b/deflate.c @@ -52,6 +52,10 @@ #include "deflate.h" #include "x86.h" +#if defined(CRC32_SIMD_SSE42_PCLMUL) +#include +#endif + #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) #include "contrib/optimizations/slide_hash_neon.h" #endif @@ -123,8 +127,31 @@ extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); #define INLINE inline #endif -/* Inline optimisation */ -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); +/* Intel optimized insert_string. */ +#if defined(CRC32_SIMD_SSE42_PCLMUL) + +#if defined(__GNUC__) || defined(__clang__) +__attribute__((target("sse4.2"))) +#endif +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) +{ + Pos ret; + unsigned *ip, val, h = 0; + + ip = (unsigned *)&s->window[str]; + val = *ip; + + if (s->level >= 6) + val &= 0xFFFFFF; + + h = _mm_crc32_u32(h, val); + + ret = s->head[h & s->hash_mask]; + s->head[h & s->hash_mask] = str; + s->prev[str & s->w_mask] = ret; + return ret; +} +#endif /* =========================================================================== * Local data @@ -228,9 +255,10 @@ local INLINE Pos insert_string(deflate_state *const s, const Pos str) #if defined(CRC32_ARMV8_CRC32) if (arm_cpu_enable_crc32) return insert_string_arm(s, str); -#endif +#elif defined(CRC32_SIMD_SSE42_PCLMUL) if (x86_cpu_enable_simd) return insert_string_sse(s, str); +#endif #endif return insert_string_c(s, str); } @@ -2276,37 +2304,3 @@ local block_state deflate_huff(s, flush) FLUSH_BLOCK(s, 0); return block_done; } - -/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will - * use intrinsic without extra params - */ -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - -/* Windows clang should use inline asm */ -#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) - h = _mm_crc32_u32(h, val); -#elif defined(__i386__) || defined(__amd64__) - __asm__ __volatile__ ( - "crc32 %1,%0\n\t" - : "+r" (h) - : "r" (val) - ); -#else - /* This should never happen */ - assert(0); -#endif - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} -- cgit v1.2.3 From d457ebca6c670bc876f0d795fffc555756c207b1 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Tue, 10 Dec 2019 13:37:44 -0800 Subject: zlib: add OWNERS.android for the local janitors. Exempt-From-Owner-Approval: recursion: see recursion. Change-Id: I99474a2ba24492486fe5366d68a7e75437e0507d --- OWNERS.android | 1 + 1 file changed, 1 insertion(+) create mode 100644 OWNERS.android diff --git a/OWNERS.android b/OWNERS.android new file mode 100644 index 0000000..7529cb9 --- /dev/null +++ b/OWNERS.android @@ -0,0 +1 @@ +include platform/system/core:/janitors/OWNERS -- cgit v1.2.3