1 /* 2 * Compute 16-bit sum in ones' complement arithmetic (with end-around carry). 3 * This sum is often used as a simple checksum in networking. 4 * 5 * Copyright (c) 2020, Arm Limited. 6 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 */ 8 9 #include "networking.h" 10 #include "chksum_common.h" 11 12 always_inline 13 static inline uint32_t 14 slurp_head32(const void **pptr, uint32_t *nbytes) 15 { 16 uint32_t sum = 0; 17 Assert(*nbytes >= 4); 18 uint32_t off = (uintptr_t) *pptr % 4; 19 if (likely(off != 0)) 20 { 21 /* Get rid of bytes 0..off-1 */ 22 const unsigned char *ptr32 = align_ptr(*pptr, 4); 23 uint32_t mask = ~0U << (CHAR_BIT * off); 24 sum = load32(ptr32) & mask; 25 *pptr = ptr32 + 4; 26 *nbytes -= 4 - off; 27 } 28 return sum; 29 } 30 31 /* Additional loop unrolling would help when not auto-vectorizing */ 32 unsigned short 33 __chksum(const void *ptr, unsigned int nbytes) 34 { 35 bool swap = false; 36 uint64_t sum = 0; 37 38 if (nbytes > 300) 39 { 40 /* 4-byte align pointer */ 41 swap = (uintptr_t) ptr & 1; 42 sum = slurp_head32(&ptr, &nbytes); 43 } 44 /* Else benefit of aligning not worth the overhead */ 45 46 /* Sum all 16-byte chunks */ 47 const char *cptr = ptr; 48 for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--) 49 { 50 uint64_t h0 = load32(cptr + 0); 51 uint64_t h1 = load32(cptr + 4); 52 uint64_t h2 = load32(cptr + 8); 53 uint64_t h3 = load32(cptr + 12); 54 sum += h0 + h1 + h2 + h3; 55 cptr += 16; 56 } 57 nbytes %= 16; 58 Assert(nbytes < 16); 59 60 /* Handle any trailing 4-byte chunks */ 61 while (nbytes >= 4) 62 { 63 sum += load32(cptr); 64 cptr += 4; 65 nbytes -= 4; 66 } 67 Assert(nbytes < 4); 68 69 if (nbytes & 2) 70 { 71 sum += load16(cptr); 72 cptr += 2; 73 } 74 75 if (nbytes & 1) 76 { 77 sum += *(uint8_t *)cptr; 78 } 79 80 return fold_and_swap(sum, swap); 81 } 82