summaryrefslogtreecommitdiff
path: root/crc/crc32c-arm64.c
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2017-05-20 03:46:16 +0000
committerandroid-build-merger <android-build-merger@google.com>2017-05-20 03:46:16 +0000
commit5198dc19ac049d99835e760026de33ed6972ab0a (patch)
tree7da7fbd89e536bc8e7465f86c0b738dae13ec124 /crc/crc32c-arm64.c
parent9fcb2b709187bc177f79162bee1969d197f8ec33 (diff)
parenteda3a60699e1d96bb68875ef2169ca819eb8f4f9 (diff)
downloadfio-5198dc19ac049d99835e760026de33ed6972ab0a.tar.gz
Update to fio-2.20.
am: eda3a60699 Change-Id: Iba672643113fd1b737633c915e7e68012067639b
Diffstat (limited to 'crc/crc32c-arm64.c')
-rw-r--r--crc/crc32c-arm64.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/crc/crc32c-arm64.c b/crc/crc32c-arm64.c
new file mode 100644
index 00000000..c3f42c7c
--- /dev/null
+++ b/crc/crc32c-arm64.c
@@ -0,0 +1,115 @@
+#include "crc32c.h"
+
+#define CRC32C3X8(ITR) \
+ crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
+ crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
+ crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
+
+#define CRC32C7X3X8(ITR) do {\
+ CRC32C3X8((ITR)*7+0) \
+ CRC32C3X8((ITR)*7+1) \
+ CRC32C3X8((ITR)*7+2) \
+ CRC32C3X8((ITR)*7+3) \
+ CRC32C3X8((ITR)*7+4) \
+ CRC32C3X8((ITR)*7+5) \
+ CRC32C3X8((ITR)*7+6) \
+ } while(0)
+
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1 << 7)
+#endif /* HWCAP_CRC32 */
+
+int crc32c_arm64_available = 0;
+
+#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
+
+#include <sys/auxv.h>
+#include <arm_acle.h>
+#include <arm_neon.h>
+
+static int crc32c_probed;
+
+/*
+ * Function to calculate reflected crc with PMULL Instruction
+ * crc done "by 3" for fixed input block size of 1024 bytes
+ */
+uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
+{
+ signed long len = length;
+ uint32_t crc = ~0;
+ uint32_t crc0, crc1, crc2;
+
+ /* Load two consts: K1 and K2 */
+ const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
+ uint64_t t0, t1;
+
+ while ((len -= 1024) >= 0) {
+ /* Do first 8 bytes here for better pipelining */
+ crc0 = __crc32cd(crc, *(const uint64_t *)data);
+ crc1 = 0;
+ crc2 = 0;
+ data += sizeof(uint64_t);
+
+ /* Process block inline
+ Process crc0 last to avoid dependency with above */
+ CRC32C7X3X8(0);
+ CRC32C7X3X8(1);
+ CRC32C7X3X8(2);
+ CRC32C7X3X8(3);
+ CRC32C7X3X8(4);
+ CRC32C7X3X8(5);
+
+ data += 42*3*sizeof(uint64_t);
+
+ /* Merge crc0 and crc1 into crc2
+ crc1 multiply by K2
+ crc0 multiply by K1 */
+
+ t1 = (uint64_t)vmull_p64(crc1, k2);
+ t0 = (uint64_t)vmull_p64(crc0, k1);
+ crc = __crc32cd(crc2, *(const uint64_t *)data);
+ crc1 = __crc32cd(0, t1);
+ crc ^= crc1;
+ crc0 = __crc32cd(0, t0);
+ crc ^= crc0;
+
+ data += sizeof(uint64_t);
+ }
+
+ if (!(len += 1024))
+ return crc;
+
+ while ((len -= sizeof(uint64_t)) >= 0) {
+ crc = __crc32cd(crc, *(const uint64_t *)data);
+ data += sizeof(uint64_t);
+ }
+
+ /* The following is more efficient than the straight loop */
+ if (len & sizeof(uint32_t)) {
+ crc = __crc32cw(crc, *(const uint32_t *)data);
+ data += sizeof(uint32_t);
+ }
+ if (len & sizeof(uint16_t)) {
+ crc = __crc32ch(crc, *(const uint16_t *)data);
+ data += sizeof(uint16_t);
+ }
+ if (len & sizeof(uint8_t)) {
+ crc = __crc32cb(crc, *(const uint8_t *)data);
+ }
+
+ return crc;
+}
+
+void crc32c_arm64_probe(void)
+{
+ unsigned long hwcap;
+
+ if (!crc32c_probed) {
+ hwcap = getauxval(AT_HWCAP);
+ if (hwcap & HWCAP_CRC32)
+ crc32c_arm64_available = 1;
+ crc32c_probed = 1;
+ }
+}
+
+#endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */