diff options
Diffstat (limited to 'networking/test/chksum.c')
-rw-r--r-- | networking/test/chksum.c | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/networking/test/chksum.c b/networking/test/chksum.c new file mode 100644 index 0000000..50722a4 --- /dev/null +++ b/networking/test/chksum.c @@ -0,0 +1,381 @@ +/* + * Ones' complement checksum test & benchmark + * + * Copyright 2016-2020 ARM Limited + * SPDX-License-Identifier: MIT + */ + +#define _GNU_SOURCE +#include <inttypes.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <time.h> +#include <unistd.h> +#include "../include/networking.h" + +#if WANT_ASSERT +#undef NDEBUG +#include <assert.h> +#define Assert(exp) assert(exp) +#else +#define Assert(exp) (void) (exp) +#endif + +#ifdef __GNUC__ +#define may_alias __attribute__((__may_alias__)) +#else +#define may_alias +#endif + +#define CACHE_LINE 64 +#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) + +/* Reference implementation - do not modify! */ +static uint16_t +checksum_simple(const void *ptr, uint32_t nbytes) +{ + const uint16_t *may_alias hptr = ptr; + uint64_t sum = 0;/* Need 64-bit accumulator when nbytes > 64K */ + + /* Sum all halfwords, assume misaligned accesses are handled in HW */ + for (uint32_t nhalfs = nbytes >> 1; nhalfs != 0; nhalfs--) + { + sum += *hptr++; + } + + /* Add any trailing odd byte */ + if ((nbytes & 0x01) != 0) + { + sum += *(uint8_t *) hptr; + } + + /* Fold 64-bit sum to 32 bits */ + sum = (sum & 0xffffffff) + (sum >> 32); + sum = (sum & 0xffffffff) + (sum >> 32); + Assert(sum == (uint32_t) sum); + + /* Fold 32-bit sum to 16 bits */ + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + Assert(sum == (uint16_t) sum); + + return (uint16_t) sum; +} + +static struct +{ + uint16_t (*cksum_fp)(const void *, uint32_t); + const char *name; +} implementations[] = +{ + { checksum_simple, "simple"}, + { __chksum, "scalar"}, +#if __arm__ + { __chksum_arm_simd, "simd" }, +#elif __aarch64__ + { __chksum_aarch64_simd, "simd" }, +#endif + { NULL, NULL} +}; + +static int +find_impl(const char *name) +{ + for (int i = 0; implementations[i].name != NULL; i++) + { + if (strcmp(implementations[i].name, name) == 0) + { + return i; + } + } + return -1; +} + +static uint16_t (*CKSUM_FP)(const void *, uint32_t); +static volatile uint16_t SINK; + +static bool +verify(const void *data, uint32_t offset, uint32_t size) +{ + + uint16_t csum_expected = checksum_simple(data, size); + uint16_t csum_actual = CKSUM_FP(data, size); + if (csum_actual != csum_expected) + { + fprintf(stderr, "\nInvalid checksum for offset %u size %u: " + "actual %04x expected %04x (valid)", + offset, size, csum_actual, csum_expected); + if (size < 65536) + { + /* Fatal error */ + exit(EXIT_FAILURE); + } + /* Else some implementations only support sizes up to 2^16 */ + return false; + } + return true; +} + +static uint64_t +clock_get_ns(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * (uint64_t) 1000000000 + ts.tv_nsec; +} + +static void +benchmark(const uint8_t *base, + size_t poolsize, + uint32_t blksize, + uint32_t numops, + uint64_t cpufreq) +{ + printf("%11u ", (unsigned int) blksize); fflush(stdout); + + uint64_t start = clock_get_ns(); + for (uint32_t i = 0; i < numops; i ++) + { + /* Read a random value from the pool */ + uint32_t random = ((uint32_t *) base)[i % (poolsize / 4)]; + /* Generate a random starting address */ + const void *data = &base[random % (poolsize - blksize)]; + SINK = CKSUM_FP(data, blksize); + } + uint64_t end = clock_get_ns(); + +#define MEGABYTE 1000000 /* Decimal megabyte (MB) */ + uint64_t elapsed_ns = end - start; + uint64_t elapsed_ms = elapsed_ns / 1000000; + uint32_t blks_per_s = (uint32_t) ((numops / elapsed_ms) * 1000); + uint64_t accbytes = (uint64_t) numops * blksize; + printf("%11ju ", (uintmax_t) ((accbytes / elapsed_ms) * 1000) / MEGABYTE); + unsigned int cyc_per_blk = cpufreq / blks_per_s; + printf("%11u ", cyc_per_blk); + if (blksize != 0) + { + unsigned int cyc_per_byte = 1000 * cyc_per_blk / blksize; + printf("%7u.%03u ", + cyc_per_byte / 1000, cyc_per_byte % 1000); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int c; + bool DUMP = false; + uint32_t IMPL = 0;/* Simple implementation */ + uint64_t CPUFREQ = 0; + uint32_t BLKSIZE = 0; + uint32_t NUMOPS = 1000000; + uint32_t POOLSIZE = 512 * 1024;/* Typical ARM L2 cache size */ + + setvbuf(stdout, NULL, _IOLBF, 160); + while ((c = getopt(argc, argv, "b:df:i:n:p:")) != -1) + { + switch (c) + { + case 'b' : + { + int blksize = atoi(optarg); + if (blksize < 1 || blksize > POOLSIZE / 2) + { + fprintf(stderr, "Invalid block size %d\n", blksize); + exit(EXIT_FAILURE); + } + BLKSIZE = (unsigned) blksize; + break; + } + case 'd' : + DUMP = true; + break; + case 'f' : + { + int64_t cpufreq = atoll(optarg); + if (cpufreq < 1) + { + fprintf(stderr, "Invalid CPU frequency %"PRId64"\n", + cpufreq); + exit(EXIT_FAILURE); + } + CPUFREQ = cpufreq; + break; + } + case 'i' : + { + int impl = find_impl(optarg); + if (impl < 0) + { + fprintf(stderr, "Invalid implementation %s\n", optarg); + goto usage; + } + IMPL = (unsigned) impl; + break; + } + case 'n' : + { + int numops = atoi(optarg); + if (numops < 1) + { + fprintf(stderr, "Invalid number of operations %d\n", numops); + exit(EXIT_FAILURE); + } + NUMOPS = (unsigned) numops; + break; + } + case 'p' : + { + int poolsize = atoi(optarg); + if (poolsize < 4096) + { + fprintf(stderr, "Invalid pool size %d\n", poolsize); + exit(EXIT_FAILURE); + } + char c = optarg[strlen(optarg) - 1]; + if (c == 'M') + { + POOLSIZE = (unsigned) poolsize * 1024 * 1024; + } + else if (c == 'K') + { + POOLSIZE = (unsigned) poolsize * 1024; + } + else + { + POOLSIZE = (unsigned) poolsize; + } + break; + } + default : +usage : + fprintf(stderr, "Usage: checksum <options>\n" + "-b <blksize> Block size\n" + "-d Dump first 96 bytes of data\n" + "-f <cpufreq> CPU frequency (Hz)\n" + "-i <impl> Implementation\n" + "-n <numops> Number of operations\n" + "-p <poolsize> Pool size (K or M suffix)\n" + ); + printf("Implementations:"); + for (int i = 0; implementations[i].name != NULL; i++) + { + printf(" %s", implementations[i].name); + } + printf("\n"); + exit(EXIT_FAILURE); + } + } + if (optind > argc) + { + goto usage; + } + + CKSUM_FP = implementations[IMPL].cksum_fp; + POOLSIZE = ALIGN(POOLSIZE, CACHE_LINE); + uint8_t *base = mmap(0, POOLSIZE, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (base == MAP_FAILED) + { + perror("aligned_alloc"), exit(EXIT_FAILURE); + } + for (size_t i = 0; i < POOLSIZE / 4; i++) + { + ((uint32_t *) base)[i] = rand(); + } + + printf("Implementation: %s\n", implementations[IMPL].name); + printf("numops %u, poolsize ", NUMOPS); + if (POOLSIZE % (1024 * 1024) == 0) + { + printf("%uMiB", POOLSIZE / (1024 * 1024)); + } + else if (POOLSIZE % 1024 == 0) + { + printf("%uKiB", POOLSIZE / 1024); + } + else + { + printf("%uB", POOLSIZE); + } + printf(", blocksize %u, CPU frequency %juMHz\n", + BLKSIZE, (uintmax_t) (CPUFREQ / 1000000)); +#if WANT_ASSERT + printf("Warning: assertions are enabled\n"); +#endif + + if (DUMP) + { + /* Print out first 96 bytes of data for human debugging */ + for (int i = 0; i < 96; i++) + { + if (i % 8 == 0) + printf("%2u:", i); + printf(" %02x", base[i]); + if (i % 8 == 7) + printf("\n"); + } + } + + /* Verify that chosen algorithm handles all combinations of offsets and sizes */ + printf("Verifying..."); fflush(stdout); + bool success = true; + /* Check all (relevant) combinations of size and offset */ + for (int size = 0; size <= 256; size++) + { + for (int offset = 0; offset < 255; offset++) + { + /* Check at start of mapped memory */ + success &= verify(&base[offset], offset, size); + /* Check at end of mapped memory */ + uint8_t *p = base + POOLSIZE - (size + offset); + success &= verify(p, (uintptr_t) p % 64, size); + } + } + /* Check increasingly larger sizes */ + for (size_t size = 1; size < POOLSIZE; size *= 2) + { + success &= verify(base, 0, size); + } + /* Check the full size, this can detect accumulator overflows */ + success &= verify(base, 0, POOLSIZE); + printf("%s\n", success ? "OK" : "failure"); + + /* Print throughput in decimal megabyte (1000000B) per second */ + if (CPUFREQ != 0) + { + printf("%11s %11s %11s %11s\n", + "block size", "MB/s", "cycles/blk", "cycles/byte"); + } + else + { + printf("%11s %11s %11s %11s\n", + "block size", "MB/s", "ns/blk", "ns/byte"); + CPUFREQ = 1000000000; + } + if (BLKSIZE != 0) + { + benchmark(base, POOLSIZE, BLKSIZE, NUMOPS, CPUFREQ); + } + else + { + static const uint16_t sizes[] = + { 20, 42, 102, 250, 612, 1500, 3674, 9000, 0 }; + for (int i = 0; sizes[i] != 0; i++) + { + uint32_t numops = NUMOPS * 10000 / (40 + sizes[i]); + benchmark(base, POOLSIZE, sizes[i], numops, CPUFREQ); + } + } + + if (munmap(base, POOLSIZE) != 0) + { + perror("munmap"), exit(EXIT_FAILURE); + } + + return success ? EXIT_SUCCESS : EXIT_FAILURE; +} |