diff options
author | qyearsley <qyearsley@chromium.org> | 2014-10-24 16:09:39 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-10-24 23:10:06 +0000 |
commit | 3230118192e5332c934514c094f33581a355fb3d (patch) | |
tree | 88b3e24081d6ba79716f6ca55df76dbb5fc3e6b0 /deflate.c | |
parent | a8515195fb47947578abc944a48a95faad59bbcc (diff) | |
download | zlib-3230118192e5332c934514c094f33581a355fb3d.tar.gz |
Revert of Reland "Integrate SIMD optimisations for zlib" (patchset #2 id:40001 of https://codereview.chromium.org/677713002/)
Reason for revert:
Speculatively reverting because XP Tests (1) is having failures.
https://build.chromium.org/p/chromium.win/builders/XP%20Tests%20(1)
Original issue's description:
> Reland "Integrate SIMD optimisations for zlib"
>
> This reland adds an MSan suppression entry to work around gaps in MSan's
> support for some of the intrinsics this patch uses. This version also inlines
> the insert_string_sse function as it uses inline assembly and therefore does
> not need to be in the static library.
>
> Original CL: https://codereview.chromium.org/552123005
>
> These optimisations have been published on zlib mailing list and at
> https://github.com/jtkukunas/zlib/
>
> This change merges the following optimisation patches:
> - "For x86, add CPUID check."
> - "Adds SSE2 optimized hash shifting to fill_window."
> - "add SSE4.2 optimized hash function"
> - "add PCLMULQDQ optimized CRC folding"
>
> From Jim Kukunas <james.t.kukunas@linux.intel.com>; and adapts them to the
> current zlib version in Chromium.
>
> The optimisations are enabled at runtime if all the necessary CPU features are
> present. As the optimisations require extra cflags to enable the compiler to
> use the instructions the optimisations are held in their own static library
> with a stub implementation to allow linking on other platforms.
>
> TEST=net_unittests(GZipUnitTest) passes, Chrome functions and performance
> improvement seen on RoboHornet benchmark on Linux Desktop
> BUG=401517
>
> Committed: https://crrev.com/a5022d5eab6f77889aceed6ab0ccaf44a657ffc4
> Cr-Commit-Position: refs/heads/master@{#301162}
TBR=agl@chromium.org,hans@chromium.org,robert.bradford@intel.com
NOTREECHECKS=true
NOTRY=true
BUG=401517
Review URL: https://codereview.chromium.org/665203006
Cr-Original-Commit-Position: refs/heads/master@{#301221}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: 5d38e0bd32f9a7e4766b877711c710df986d74ed
Diffstat (limited to 'deflate.c')
-rw-r--r-- | deflate.c | 139 |
1 files changed, 26 insertions, 113 deletions
@@ -49,10 +49,7 @@ /* @(#) $Id$ */ -#include <assert.h> - #include "deflate.h" -#include "x86.h" const char deflate_copyright[] = " deflate 1.2.5 Copyright 1995-2010 Jean-loup Gailly and Mark Adler "; @@ -88,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush)); local void lm_init OF((deflate_state *s)); local void putShortMSB OF((deflate_state *s, uInt b)); local void flush_pending OF((z_streamp strm)); - +local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); #ifdef ASMV void match_init OF((void)); /* asm code initialization */ uInt longest_match OF((deflate_state *s, IPos cur_match, int clas)); @@ -101,23 +98,6 @@ local void check_match OF((deflate_state *s, IPos start, IPos match, int length)); #endif -/* For fill_window_sse.c to use */ -ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); - -/* From crc32.c */ -extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); -extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); -extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); - -#ifdef _MSC_VER -#define INLINE __inline -#else -#define INLINE inline -#endif - -/* Inline optimisation */ -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); - /* =========================================================================== * Local data */ @@ -184,6 +164,7 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ */ #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) + /* =========================================================================== * Insert string str in the dictionary and set match_head to the previous head * of the hash chain (the most recent string with same hash key). Return @@ -194,28 +175,17 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ -local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) -{ - Pos ret; - - UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); #ifdef FASTEST - ret = s->head[s->ins_h]; +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) #else - ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) #endif - s->head[s->ins_h] = str; - - return ret; -} - -local INLINE Pos insert_string(deflate_state *const s, const Pos str) -{ - if (x86_cpu_enable_simd) - return insert_string_sse(s, str); - return insert_string_c(s, str); -} - /* =========================================================================== * Initialize the hash table (avoiding 64K overflow for 16 bit systems). @@ -249,7 +219,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, const char *version; int stream_size; { - unsigned window_padding = 8; deflate_state *s; int wrap = 1; static const char my_version[] = ZLIB_VERSION; @@ -259,8 +228,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, * output size for (length,distance) codes is <= 24 bits. */ - x86_check_features(); - if (version == Z_NULL || version[0] != my_version[0] || stream_size != sizeof(z_stream)) { return Z_VERSION_ERROR; @@ -307,17 +274,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, s->w_size = 1 << s->w_bits; s->w_mask = s->w_size - 1; - if (x86_cpu_enable_simd) { - s->hash_bits = 15; - } else { - s->hash_bits = memLevel + 7; - } - + s->hash_bits = memLevel + 7; s->hash_size = 1 << s->hash_bits; s->hash_mask = s->hash_size - 1; s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); - s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byte)); + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); s->class_bitmap = NULL; @@ -385,7 +347,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) s->ins_h = s->window[0]; UPDATE_HASH(s, s->ins_h, s->window[1]); for (n = 0; n <= length - MIN_MATCH; n++) { - insert_string(s, n); + INSERT_STRING(s, n, hash_head); } if (hash_head) hash_head = 0; /* to make compiler happy */ return Z_OK; @@ -651,7 +613,7 @@ int ZEXPORT deflate (strm, flush) if (s->status == INIT_STATE) { #ifdef GZIP if (s->wrap == 2) { - crc_reset(s); + strm->adler = crc32(0L, Z_NULL, 0); put_byte(s, 31); put_byte(s, 139); put_byte(s, 8); @@ -929,7 +891,6 @@ int ZEXPORT deflate (strm, flush) /* Write the trailer */ #ifdef GZIP if (s->wrap == 2) { - crc_finalize(s); put_byte(s, (Byte)(strm->adler & 0xff)); put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); @@ -1052,7 +1013,7 @@ int ZEXPORT deflateCopy (dest, source) * allocating a large strm->next_in buffer and copying from it. * (See also flush_pending()). */ -ZLIB_INTERNAL int read_buf(strm, buf, size) +local int read_buf(strm, buf, size) z_streamp strm; Bytef *buf; unsigned size; @@ -1064,17 +1025,15 @@ ZLIB_INTERNAL int read_buf(strm, buf, size) strm->avail_in -= len; + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, strm->next_in, len); + } #ifdef GZIP - if (strm->state->wrap == 2) { - copy_with_crc(strm, buf, len); + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, strm->next_in, len); } - else #endif - { - zmemcpy(buf, strm->next_in, len); - if (strm->state->wrap == 1) - strm->adler = adler32(strm->adler, buf, len); - } + zmemcpy(buf, strm->next_in, len); strm->next_in += len; strm->total_in += len; @@ -1486,19 +1445,7 @@ local void check_match(s, start, match, length) * performed for at least two bytes (required for the zip translate_eol * option -- not supported here). */ -local void fill_window_c(deflate_state *s); - -local void fill_window(deflate_state *s) -{ - if (x86_cpu_enable_simd) { - fill_window_sse(s); - return; - } - - fill_window_c(s); -} - -local void fill_window_c(s) +local void fill_window(s) deflate_state *s; { register unsigned n, m; @@ -1764,7 +1711,7 @@ local block_state deflate_fast(s, flush, clas) */ hash_head = NIL; if (s->lookahead >= MIN_MATCH) { - hash_head = insert_string(s, s->strstart); + INSERT_STRING(s, s->strstart, hash_head); } /* Find the longest match, discarding those <= prev_length. @@ -1795,7 +1742,7 @@ local block_state deflate_fast(s, flush, clas) s->match_length--; /* string at strstart already in table */ do { s->strstart++; - hash_head = insert_string(s, s->strstart); + INSERT_STRING(s, s->strstart, hash_head); /* strstart never exceeds WSIZE-MAX_MATCH, so there are * always MIN_MATCH bytes ahead. */ @@ -1874,7 +1821,7 @@ local block_state deflate_slow(s, flush, clas) */ hash_head = NIL; if (s->lookahead >= MIN_MATCH) { - hash_head = insert_string(s, s->strstart); + INSERT_STRING(s, s->strstart, hash_head); } /* Find the longest match, discarding those <= prev_length. @@ -1943,7 +1890,7 @@ local block_state deflate_slow(s, flush, clas) s->prev_length -= 2; do { if (++s->strstart <= max_insert) { - hash_head = insert_string(s, s->strstart); + INSERT_STRING(s, s->strstart, hash_head); } } while (--s->prev_length != 0); s->match_available = 0; @@ -2084,37 +2031,3 @@ local block_state deflate_huff(s, flush) FLUSH_BLOCK(s, flush == Z_FINISH); return flush == Z_FINISH ? finish_done : block_done; } - -/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will - * use intrinsic without extra params - */ -local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) -{ - Pos ret; - unsigned *ip, val, h = 0; - - ip = (unsigned *)&s->window[str]; - val = *ip; - - if (s->level >= 6) - val &= 0xFFFFFF; - -/* Windows clang should use inline asm */ -#if defined(_MSC_VER) && !defined(__clang__) - h = _mm_crc32_u32(h, val); -#elif defined(__i386__) || defined(__amd64__) - __asm__ __volatile__ ( - "crc32 %1,%0\n\t" - : "+r" (h) - : "r" (val) - ); -#else - /* This should never happen */ - assert(0); -#endif - - ret = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str; - s->prev[str & s->w_mask] = ret; - return ret; -} |