xref: /freebsd/contrib/arm-optimized-routines/networking/chksum_common.h (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 /*
2  * Common code for checksum implementations
3  *
4  * Copyright (c) 2020, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #ifndef CHKSUM_COMMON_H
9 #define CHKSUM_COMMON_H
10 
11 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
12 #error Only little endian supported
13 #endif
14 
15 #include <limits.h>
16 #include <stdbool.h>
17 #include <stdint.h>
18 #include <string.h>
19 
20 /* Assertions must be explicitly enabled */
21 #if WANT_ASSERT
22 #undef NDEBUG
23 #include <assert.h>
24 #define Assert(exp) assert(exp)
25 #else
26 #define Assert(exp) (void) (exp)
27 #endif
28 
29 #ifdef __GNUC__
30 #define likely(x)     __builtin_expect(!!(x), 1)
31 #define unlikely(x)   __builtin_expect(!!(x), 0)
32 #define may_alias     __attribute__((__may_alias__))
33 #define always_inline __attribute__((always_inline))
34 #ifdef __clang__
35 #define no_unroll_loops
36 #else
37 #define no_unroll_loops  __attribute__((optimize("no-unroll-loops")))
38 #endif
39 #define bswap16(x)    __builtin_bswap16((x))
40 #else
41 #define likely(x)     (x)
42 #define unlikely(x)   (x)
43 #define may_alias
44 #define always_inline
45 #define no_unroll_loops
46 #define bswap16(x)    ((uint8_t)((x) >> 8) | ((uint8_t)(x) << 8))
47 #endif
48 
49 #define ALL_ONES ~UINT64_C(0)
50 
51 static inline
52 uint64_t load64(const void *ptr)
53 {
54     /* GCC will optimise this to a normal load instruction */
55     uint64_t v;
56     memcpy(&v, ptr, sizeof v);
57     return v;
58 }
59 
60 static inline
61 uint32_t load32(const void *ptr)
62 {
63     /* GCC will optimise this to a normal load instruction */
64     uint32_t v;
65     memcpy(&v, ptr, sizeof v);
66     return v;
67 }
68 
69 static inline
70 uint16_t load16(const void *ptr)
71 {
72     /* GCC will optimise this to a normal load instruction */
73     uint16_t v;
74     memcpy(&v, ptr, sizeof v);
75     return v;
76 }
77 
78 /* slurp_small() is for small buffers, don't waste cycles on alignment */
79 no_unroll_loops
80 always_inline
81 static inline uint64_t
82 slurp_small(const void *ptr, uint32_t nbytes)
83 {
84     const unsigned char *cptr = ptr;
85     uint64_t sum = 0;
86     while (nbytes >= 4)
87     {
88 	sum += load32(cptr);
89 	cptr += 4;
90 	nbytes -= 4;
91     }
92     if (nbytes & 2)
93     {
94 	sum += load16(cptr);
95 	cptr += 2;
96     }
97     if (nbytes & 1)
98     {
99 	sum += (uint8_t) *cptr;
100     }
101     return sum;
102 }
103 
104 static inline const void *
105 align_ptr(const void *ptr, size_t bytes)
106 {
107     return (void *) ((uintptr_t) ptr & -(uintptr_t) bytes);
108 }
109 
110 always_inline
111 static inline uint16_t
112 fold_and_swap(uint64_t sum, bool swap)
113 {
114     /* Fold 64-bit sum to 32 bits */
115     sum = (sum & 0xffffffff) + (sum >> 32);
116     sum = (sum & 0xffffffff) + (sum >> 32);
117     Assert(sum == (uint32_t) sum);
118 
119     /* Fold 32-bit sum to 16 bits */
120     sum = (sum & 0xffff) + (sum >> 16);
121     sum = (sum & 0xffff) + (sum >> 16);
122     Assert(sum == (uint16_t) sum);
123 
124     if (unlikely(swap)) /* Odd base pointer is unexpected */
125     {
126 	sum = bswap16(sum);
127     }
128 
129     return (uint16_t) sum;
130 }
131 
132 #endif
133