diff options
author | Elliott Hughes <enh@google.com> | 2017-05-20 03:46:16 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2017-05-20 03:46:16 +0000 |
commit | 5198dc19ac049d99835e760026de33ed6972ab0a (patch) | |
tree | 7da7fbd89e536bc8e7465f86c0b738dae13ec124 /crc/crc32c-arm64.c | |
parent | 9fcb2b709187bc177f79162bee1969d197f8ec33 (diff) | |
parent | eda3a60699e1d96bb68875ef2169ca819eb8f4f9 (diff) | |
download | fio-5198dc19ac049d99835e760026de33ed6972ab0a.tar.gz |
Update to fio-2.20.
am: eda3a60699
Change-Id: Iba672643113fd1b737633c915e7e68012067639b
Diffstat (limited to 'crc/crc32c-arm64.c')
-rw-r--r-- | crc/crc32c-arm64.c | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/crc/crc32c-arm64.c b/crc/crc32c-arm64.c new file mode 100644 index 00000000..c3f42c7c --- /dev/null +++ b/crc/crc32c-arm64.c @@ -0,0 +1,115 @@ +#include "crc32c.h" + +#define CRC32C3X8(ITR) \ + crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\ + crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\ + crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR))); + +#define CRC32C7X3X8(ITR) do {\ + CRC32C3X8((ITR)*7+0) \ + CRC32C3X8((ITR)*7+1) \ + CRC32C3X8((ITR)*7+2) \ + CRC32C3X8((ITR)*7+3) \ + CRC32C3X8((ITR)*7+4) \ + CRC32C3X8((ITR)*7+5) \ + CRC32C3X8((ITR)*7+6) \ + } while(0) + +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif /* HWCAP_CRC32 */ + +int crc32c_arm64_available = 0; + +#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO + +#include <sys/auxv.h> +#include <arm_acle.h> +#include <arm_neon.h> + +static int crc32c_probed; + +/* + * Function to calculate reflected crc with PMULL Instruction + * crc done "by 3" for fixed input block size of 1024 bytes + */ +uint32_t crc32c_arm64(unsigned char const *data, unsigned long length) +{ + signed long len = length; + uint32_t crc = ~0; + uint32_t crc0, crc1, crc2; + + /* Load two consts: K1 and K2 */ + const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014; + uint64_t t0, t1; + + while ((len -= 1024) >= 0) { + /* Do first 8 bytes here for better pipelining */ + crc0 = __crc32cd(crc, *(const uint64_t *)data); + crc1 = 0; + crc2 = 0; + data += sizeof(uint64_t); + + /* Process block inline + Process crc0 last to avoid dependency with above */ + CRC32C7X3X8(0); + CRC32C7X3X8(1); + CRC32C7X3X8(2); + CRC32C7X3X8(3); + CRC32C7X3X8(4); + CRC32C7X3X8(5); + + data += 42*3*sizeof(uint64_t); + + /* Merge crc0 and crc1 into crc2 + crc1 multiply by K2 + crc0 multiply by K1 */ + + t1 = (uint64_t)vmull_p64(crc1, k2); + t0 = (uint64_t)vmull_p64(crc0, k1); + crc = __crc32cd(crc2, *(const uint64_t *)data); + crc1 = __crc32cd(0, t1); + crc ^= crc1; + crc0 = __crc32cd(0, t0); + crc ^= crc0; + + data += sizeof(uint64_t); + } + + if (!(len += 1024)) + return crc; + + while ((len -= sizeof(uint64_t)) >= 0) { + crc = __crc32cd(crc, *(const uint64_t *)data); + data += sizeof(uint64_t); + } + + /* The following is more efficient than the straight loop */ + if (len & sizeof(uint32_t)) { + crc = __crc32cw(crc, *(const uint32_t *)data); + data += sizeof(uint32_t); + } + if (len & sizeof(uint16_t)) { + crc = __crc32ch(crc, *(const uint16_t *)data); + data += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + crc = __crc32cb(crc, *(const uint8_t *)data); + } + + return crc; +} + +void crc32c_arm64_probe(void) +{ + unsigned long hwcap; + + if (!crc32c_probed) { + hwcap = getauxval(AT_HWCAP); + if (hwcap & HWCAP_CRC32) + crc32c_arm64_available = 1; + crc32c_probed = 1; + } +} + +#endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */ |