summaryrefslogtreecommitdiff
path: root/deflate.c
diff options
context:
space:
mode:
authorqyearsley <qyearsley@chromium.org>2014-10-24 16:09:39 -0700
committerCommit bot <commit-bot@chromium.org>2014-10-24 23:10:06 +0000
commit3230118192e5332c934514c094f33581a355fb3d (patch)
tree88b3e24081d6ba79716f6ca55df76dbb5fc3e6b0 /deflate.c
parenta8515195fb47947578abc944a48a95faad59bbcc (diff)
downloadzlib-3230118192e5332c934514c094f33581a355fb3d.tar.gz
Revert of Reland "Integrate SIMD optimisations for zlib" (patchset #2 id:40001 of https://codereview.chromium.org/677713002/)
Reason for revert: Speculatively reverting because XP Tests (1) is having failures. https://build.chromium.org/p/chromium.win/builders/XP%20Tests%20(1) Original issue's description: > Reland "Integrate SIMD optimisations for zlib" > > This reland adds an MSan suppression entry to work around gaps in MSan's > support for some of the intrinsics this patch uses. This version also inlines > the insert_string_sse function as it uses inline assembly and therefore does > not need to be in the static library. > > Original CL: https://codereview.chromium.org/552123005 > > These optimisations have been published on zlib mailing list and at > https://github.com/jtkukunas/zlib/ > > This change merges the following optimisation patches: > - "For x86, add CPUID check." > - "Adds SSE2 optimized hash shifting to fill_window." > - "add SSE4.2 optimized hash function" > - "add PCLMULQDQ optimized CRC folding" > > From Jim Kukunas <james.t.kukunas@linux.intel.com>; and adapts them to the > current zlib version in Chromium. > > The optimisations are enabled at runtime if all the necessary CPU features are > present. As the optimisations require extra cflags to enable the compiler to > use the instructions the optimisations are held in their own static library > with a stub implementation to allow linking on other platforms. > > TEST=net_unittests(GZipUnitTest) passes, Chrome functions and performance > improvement seen on RoboHornet benchmark on Linux Desktop > BUG=401517 > > Committed: https://crrev.com/a5022d5eab6f77889aceed6ab0ccaf44a657ffc4 > Cr-Commit-Position: refs/heads/master@{#301162} TBR=agl@chromium.org,hans@chromium.org,robert.bradford@intel.com NOTREECHECKS=true NOTRY=true BUG=401517 Review URL: https://codereview.chromium.org/665203006 Cr-Original-Commit-Position: refs/heads/master@{#301221} Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src Cr-Mirrored-Commit: 5d38e0bd32f9a7e4766b877711c710df986d74ed
Diffstat (limited to 'deflate.c')
-rw-r--r--deflate.c139
1 files changed, 26 insertions, 113 deletions
diff --git a/deflate.c b/deflate.c
index 55ec215..8043e5b 100644
--- a/deflate.c
+++ b/deflate.c
@@ -49,10 +49,7 @@
/* @(#) $Id$ */
-#include <assert.h>
-
#include "deflate.h"
-#include "x86.h"
const char deflate_copyright[] =
" deflate 1.2.5 Copyright 1995-2010 Jean-loup Gailly and Mark Adler ";
@@ -88,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush));
local void lm_init OF((deflate_state *s));
local void putShortMSB OF((deflate_state *s, uInt b));
local void flush_pending OF((z_streamp strm));
-
+local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
#ifdef ASMV
void match_init OF((void)); /* asm code initialization */
uInt longest_match OF((deflate_state *s, IPos cur_match, int clas));
@@ -101,23 +98,6 @@ local void check_match OF((deflate_state *s, IPos start, IPos match,
int length));
#endif
-/* For fill_window_sse.c to use */
-ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
-
-/* From crc32.c */
-extern void ZLIB_INTERNAL crc_reset(deflate_state *const s);
-extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s);
-extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size);
-
-#ifdef _MSC_VER
-#define INLINE __inline
-#else
-#define INLINE inline
-#endif
-
-/* Inline optimisation */
-local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str);
-
/* ===========================================================================
* Local data
*/
@@ -184,6 +164,7 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
*/
#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
@@ -194,28 +175,17 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
-local INLINE Pos insert_string_c(deflate_state *const s, const Pos str)
-{
- Pos ret;
-
- UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]);
#ifdef FASTEST
- ret = s->head[s->ins_h];
+#define INSERT_STRING(s, str, match_head) \
+ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+ match_head = s->head[s->ins_h], \
+ s->head[s->ins_h] = (Pos)(str))
#else
- ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
+#define INSERT_STRING(s, str, match_head) \
+ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+ match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
+ s->head[s->ins_h] = (Pos)(str))
#endif
- s->head[s->ins_h] = str;
-
- return ret;
-}
-
-local INLINE Pos insert_string(deflate_state *const s, const Pos str)
-{
- if (x86_cpu_enable_simd)
- return insert_string_sse(s, str);
- return insert_string_c(s, str);
-}
-
/* ===========================================================================
* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
@@ -249,7 +219,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
const char *version;
int stream_size;
{
- unsigned window_padding = 8;
deflate_state *s;
int wrap = 1;
static const char my_version[] = ZLIB_VERSION;
@@ -259,8 +228,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
* output size for (length,distance) codes is <= 24 bits.
*/
- x86_check_features();
-
if (version == Z_NULL || version[0] != my_version[0] ||
stream_size != sizeof(z_stream)) {
return Z_VERSION_ERROR;
@@ -307,17 +274,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
s->w_size = 1 << s->w_bits;
s->w_mask = s->w_size - 1;
- if (x86_cpu_enable_simd) {
- s->hash_bits = 15;
- } else {
- s->hash_bits = memLevel + 7;
- }
-
+ s->hash_bits = memLevel + 7;
s->hash_size = 1 << s->hash_bits;
s->hash_mask = s->hash_size - 1;
s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
- s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byte));
+ s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
s->class_bitmap = NULL;
@@ -385,7 +347,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
s->ins_h = s->window[0];
UPDATE_HASH(s, s->ins_h, s->window[1]);
for (n = 0; n <= length - MIN_MATCH; n++) {
- insert_string(s, n);
+ INSERT_STRING(s, n, hash_head);
}
if (hash_head) hash_head = 0; /* to make compiler happy */
return Z_OK;
@@ -651,7 +613,7 @@ int ZEXPORT deflate (strm, flush)
if (s->status == INIT_STATE) {
#ifdef GZIP
if (s->wrap == 2) {
- crc_reset(s);
+ strm->adler = crc32(0L, Z_NULL, 0);
put_byte(s, 31);
put_byte(s, 139);
put_byte(s, 8);
@@ -929,7 +891,6 @@ int ZEXPORT deflate (strm, flush)
/* Write the trailer */
#ifdef GZIP
if (s->wrap == 2) {
- crc_finalize(s);
put_byte(s, (Byte)(strm->adler & 0xff));
put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
@@ -1052,7 +1013,7 @@ int ZEXPORT deflateCopy (dest, source)
* allocating a large strm->next_in buffer and copying from it.
* (See also flush_pending()).
*/
-ZLIB_INTERNAL int read_buf(strm, buf, size)
+local int read_buf(strm, buf, size)
z_streamp strm;
Bytef *buf;
unsigned size;
@@ -1064,17 +1025,15 @@ ZLIB_INTERNAL int read_buf(strm, buf, size)
strm->avail_in -= len;
+ if (strm->state->wrap == 1) {
+ strm->adler = adler32(strm->adler, strm->next_in, len);
+ }
#ifdef GZIP
- if (strm->state->wrap == 2) {
- copy_with_crc(strm, buf, len);
+ else if (strm->state->wrap == 2) {
+ strm->adler = crc32(strm->adler, strm->next_in, len);
}
- else
#endif
- {
- zmemcpy(buf, strm->next_in, len);
- if (strm->state->wrap == 1)
- strm->adler = adler32(strm->adler, buf, len);
- }
+ zmemcpy(buf, strm->next_in, len);
strm->next_in += len;
strm->total_in += len;
@@ -1486,19 +1445,7 @@ local void check_match(s, start, match, length)
* performed for at least two bytes (required for the zip translate_eol
* option -- not supported here).
*/
-local void fill_window_c(deflate_state *s);
-
-local void fill_window(deflate_state *s)
-{
- if (x86_cpu_enable_simd) {
- fill_window_sse(s);
- return;
- }
-
- fill_window_c(s);
-}
-
-local void fill_window_c(s)
+local void fill_window(s)
deflate_state *s;
{
register unsigned n, m;
@@ -1764,7 +1711,7 @@ local block_state deflate_fast(s, flush, clas)
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
- hash_head = insert_string(s, s->strstart);
+ INSERT_STRING(s, s->strstart, hash_head);
}
/* Find the longest match, discarding those <= prev_length.
@@ -1795,7 +1742,7 @@ local block_state deflate_fast(s, flush, clas)
s->match_length--; /* string at strstart already in table */
do {
s->strstart++;
- hash_head = insert_string(s, s->strstart);
+ INSERT_STRING(s, s->strstart, hash_head);
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
*/
@@ -1874,7 +1821,7 @@ local block_state deflate_slow(s, flush, clas)
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
- hash_head = insert_string(s, s->strstart);
+ INSERT_STRING(s, s->strstart, hash_head);
}
/* Find the longest match, discarding those <= prev_length.
@@ -1943,7 +1890,7 @@ local block_state deflate_slow(s, flush, clas)
s->prev_length -= 2;
do {
if (++s->strstart <= max_insert) {
- hash_head = insert_string(s, s->strstart);
+ INSERT_STRING(s, s->strstart, hash_head);
}
} while (--s->prev_length != 0);
s->match_available = 0;
@@ -2084,37 +2031,3 @@ local block_state deflate_huff(s, flush)
FLUSH_BLOCK(s, flush == Z_FINISH);
return flush == Z_FINISH ? finish_done : block_done;
}
-
-/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will
- * use intrinsic without extra params
- */
-local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
-{
- Pos ret;
- unsigned *ip, val, h = 0;
-
- ip = (unsigned *)&s->window[str];
- val = *ip;
-
- if (s->level >= 6)
- val &= 0xFFFFFF;
-
-/* Windows clang should use inline asm */
-#if defined(_MSC_VER) && !defined(__clang__)
- h = _mm_crc32_u32(h, val);
-#elif defined(__i386__) || defined(__amd64__)
- __asm__ __volatile__ (
- "crc32 %1,%0\n\t"
- : "+r" (h)
- : "r" (val)
- );
-#else
- /* This should never happen */
- assert(0);
-#endif
-
- ret = s->head[h & s->hash_mask];
- s->head[h & s->hash_mask] = str;
- s->prev[str & s->w_mask] = ret;
- return ret;
-}