diff options
Diffstat (limited to 'networking/chksum.c')
-rw-r--r-- | networking/chksum.c | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/networking/chksum.c b/networking/chksum.c new file mode 100644 index 0000000..95ce5ba --- /dev/null +++ b/networking/chksum.c @@ -0,0 +1,81 @@ +/* + * Compute 16-bit sum in ones' complement arithmetic (with end-around carry). + * This sum is often used as a simple checksum in networking. + * + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "networking.h" +#include "chksum_common.h" + +always_inline +static inline uint32_t +slurp_head32(const void **pptr, uint32_t *nbytes) +{ + uint32_t sum = 0; + Assert(*nbytes >= 4); + uint32_t off = (uintptr_t) *pptr % 4; + if (likely(off != 0)) + { + /* Get rid of bytes 0..off-1 */ + const unsigned char *ptr32 = align_ptr(*pptr, 4); + uint32_t mask = ~0U << (CHAR_BIT * off); + sum = load32(ptr32) & mask; + *pptr = ptr32 + 4; + *nbytes -= 4 - off; + } + return sum; +} + +/* Additional loop unrolling would help when not auto-vectorizing */ +unsigned short +__chksum(const void *ptr, unsigned int nbytes) +{ + bool swap = false; + uint64_t sum = 0; + + if (nbytes > 300) + { + /* 4-byte align pointer */ + swap = (uintptr_t) ptr & 1; + sum = slurp_head32(&ptr, &nbytes); + } + /* Else benefit of aligning not worth the overhead */ + + /* Sum all 16-byte chunks */ + const char *cptr = ptr; + for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--) + { + uint64_t h0 = load32(cptr + 0); + uint64_t h1 = load32(cptr + 4); + uint64_t h2 = load32(cptr + 8); + uint64_t h3 = load32(cptr + 12); + sum += h0 + h1 + h2 + h3; + cptr += 16; + } + nbytes %= 16; + Assert(nbytes < 16); + + /* Handle any trailing 4-byte chunks */ + while (nbytes >= 4) + { + sum += load32(cptr); + cptr += 4; + nbytes -= 4; + } + Assert(nbytes < 4); + + if (nbytes & 2) + { + sum += load16(cptr); + cptr += 2; + } + + if (nbytes & 1) + { + sum += *(uint8_t *)cptr; + } + + return fold_and_swap(sum, swap); +} |