summaryrefslogtreecommitdiff
path: root/contrib/optimizations/slide_hash_neon.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/optimizations/slide_hash_neon.h')
-rw-r--r--contrib/optimizations/slide_hash_neon.h65
1 files changed, 0 insertions, 65 deletions
diff --git a/contrib/optimizations/slide_hash_neon.h b/contrib/optimizations/slide_hash_neon.h
deleted file mode 100644
index 26995d7..0000000
--- a/contrib/optimizations/slide_hash_neon.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright 2018 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the Chromium source repository LICENSE file.
- */
-#ifndef __SLIDE_HASH__NEON__
-#define __SLIDE_HASH__NEON__
-
-#include "deflate.h"
-#include <arm_neon.h>
-
-inline static void ZLIB_INTERNAL neon_slide_hash_update(Posf *hash,
- const uInt hash_size,
- const ush w_size)
-{
- /* NEON 'Q' registers allow to store 128 bits, so we can load 8x16-bits
- * values. For further details, check:
- * ARM DHT 0002A, section 1.3.2 NEON Registers.
- */
- const size_t chunk = sizeof(uint16x8_t) / sizeof(uint16_t);
- /* Unrolling the operation yielded a compression performance boost in both
- * ARMv7 (from 11.7% to 13.4%) and ARMv8 (from 3.7% to 7.5%) for HTML4
- * content. For full benchmarking data, check: http://crbug.com/863257.
- */
- const size_t stride = 2*chunk;
- const uint16x8_t v = vdupq_n_u16(w_size);
-
- for (Posf *end = hash + hash_size; hash != end; hash += stride) {
- uint16x8_t m_low = vld1q_u16(hash);
- uint16x8_t m_high = vld1q_u16(hash + chunk);
-
- /* The first 'q' in vqsubq_u16 makes these subtracts saturate to zero,
- * replacing the ternary operator expression in the original code:
- * (m >= wsize ? m - wsize : NIL).
- */
- m_low = vqsubq_u16(m_low, v);
- m_high = vqsubq_u16(m_high, v);
-
- vst1q_u16(hash, m_low);
- vst1q_u16(hash + chunk, m_high);
- }
-}
-
-
-inline static void ZLIB_INTERNAL neon_slide_hash(Posf *head, Posf *prev,
- const unsigned short w_size,
- const uInt hash_size)
-{
- /*
- * SIMD implementation for hash table rebase assumes:
- * 1. hash chain offset (Pos) is 2 bytes.
- * 2. hash table size is multiple of 32 bytes.
- * #1 should be true as Pos is defined as "ush"
- * #2 should be true as hash_bits are greater than 7
- */
- const size_t size = hash_size * sizeof(head[0]);
- Assert(sizeof(Pos) == 2, "Wrong Pos size.");
- Assert((size % sizeof(uint16x8_t) * 2) == 0, "Hash table size error.");
-
- neon_slide_hash_update(head, hash_size, w_size);
-#ifndef FASTEST
- neon_slide_hash_update(prev, w_size, w_size);
-#endif
-}
-
-#endif