1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Checksum library
4 *
5 * Influenced by arch/arm64/lib/csum.c
6 * Copyright (C) 2023-2024 Rivos Inc.
7 */
8 #include <linux/bitops.h>
9 #include <linux/compiler.h>
10 #include <linux/jump_label.h>
11 #include <linux/kasan-checks.h>
12 #include <linux/kernel.h>
13
14 #include <asm/cpufeature.h>
15
16 #include <net/checksum.h>
17
18 /* Default version is sufficient for 32 bit */
19 #ifndef CONFIG_32BIT
csum_ipv6_magic(const struct in6_addr * saddr,const struct in6_addr * daddr,__u32 len,__u8 proto,__wsum csum)20 __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
21 const struct in6_addr *daddr,
22 __u32 len, __u8 proto, __wsum csum)
23 {
24 unsigned int ulen, uproto;
25 unsigned long sum = (__force unsigned long)csum;
26
27 sum += (__force unsigned long)saddr->s6_addr32[0];
28 sum += (__force unsigned long)saddr->s6_addr32[1];
29 sum += (__force unsigned long)saddr->s6_addr32[2];
30 sum += (__force unsigned long)saddr->s6_addr32[3];
31
32 sum += (__force unsigned long)daddr->s6_addr32[0];
33 sum += (__force unsigned long)daddr->s6_addr32[1];
34 sum += (__force unsigned long)daddr->s6_addr32[2];
35 sum += (__force unsigned long)daddr->s6_addr32[3];
36
37 ulen = (__force unsigned int)htonl((unsigned int)len);
38 sum += ulen;
39
40 uproto = (__force unsigned int)htonl(proto);
41 sum += uproto;
42
43 if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
44 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
45 riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {
46 unsigned long fold_temp;
47
48 asm(".option push \n\
49 .option arch,+zbb \n\
50 rori %[fold_temp], %[sum], 32 \n\
51 add %[sum], %[fold_temp], %[sum] \n\
52 srli %[sum], %[sum], 32 \n\
53 not %[fold_temp], %[sum] \n\
54 roriw %[sum], %[sum], 16 \n\
55 subw %[sum], %[fold_temp], %[sum] \n\
56 .option pop"
57 : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
58 return (__force __sum16)(sum >> 16);
59 }
60
61 sum += ror64(sum, 32);
62 sum >>= 32;
63 return csum_fold((__force __wsum)sum);
64 }
65 EXPORT_SYMBOL(csum_ipv6_magic);
66 #endif /* !CONFIG_32BIT */
67
68 #ifdef CONFIG_32BIT
69 #define OFFSET_MASK 3
70 #elif CONFIG_64BIT
71 #define OFFSET_MASK 7
72 #endif
73
74 static inline __no_sanitize_address unsigned long
do_csum_common(const unsigned long * ptr,const unsigned long * end,unsigned long data)75 do_csum_common(const unsigned long *ptr, const unsigned long *end,
76 unsigned long data)
77 {
78 unsigned int shift;
79 unsigned long csum = 0, carry = 0;
80
81 /*
82 * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
83 * faster than doing 32-bit reads on architectures that support larger
84 * reads.
85 */
86 while (ptr < end) {
87 csum += data;
88 carry += csum < data;
89 data = *(ptr++);
90 }
91
92 /*
93 * Perform alignment (and over-read) bytes on the tail if any bytes
94 * leftover.
95 */
96 shift = ((long)ptr - (long)end) * 8;
97 #ifdef __LITTLE_ENDIAN
98 data = (data << shift) >> shift;
99 #else
100 data = (data >> shift) << shift;
101 #endif
102 csum += data;
103 carry += csum < data;
104 csum += carry;
105 csum += csum < carry;
106
107 return csum;
108 }
109
110 /*
111 * Algorithm accounts for buff being misaligned.
112 * If buff is not aligned, will over-read bytes but not use the bytes that it
113 * shouldn't. The same thing will occur on the tail-end of the read.
114 */
115 static inline __no_sanitize_address unsigned int
do_csum_with_alignment(const unsigned char * buff,int len)116 do_csum_with_alignment(const unsigned char *buff, int len)
117 {
118 unsigned int offset, shift;
119 unsigned long csum, data;
120 const unsigned long *ptr, *end;
121
122 /*
123 * Align address to closest word (double word on rv64) that comes before
124 * buff. This should always be in the same page and cache line.
125 * Directly call KASAN with the alignment we will be using.
126 */
127 offset = (unsigned long)buff & OFFSET_MASK;
128 kasan_check_read(buff, len);
129 ptr = (const unsigned long *)(buff - offset);
130
131 /*
132 * Clear the most significant bytes that were over-read if buff was not
133 * aligned.
134 */
135 shift = offset * 8;
136 data = *(ptr++);
137 #ifdef __LITTLE_ENDIAN
138 data = (data >> shift) << shift;
139 #else
140 data = (data << shift) >> shift;
141 #endif
142 end = (const unsigned long *)(buff + len);
143 csum = do_csum_common(ptr, end, data);
144
145 #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
146 if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
147 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
148 riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {
149 unsigned long fold_temp;
150
151 #ifdef CONFIG_32BIT
152 asm_goto_output(".option push \n\
153 .option arch,+zbb \n\
154 rori %[fold_temp], %[csum], 16 \n\
155 andi %[offset], %[offset], 1 \n\
156 add %[csum], %[fold_temp], %[csum] \n\
157 beq %[offset], zero, %l[end] \n\
158 rev8 %[csum], %[csum] \n\
159 .option pop"
160 : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
161 : [offset] "r" (offset)
162 :
163 : end);
164
165 return (unsigned short)csum;
166 #else /* !CONFIG_32BIT */
167 asm_goto_output(".option push \n\
168 .option arch,+zbb \n\
169 rori %[fold_temp], %[csum], 32 \n\
170 add %[csum], %[fold_temp], %[csum] \n\
171 srli %[csum], %[csum], 32 \n\
172 roriw %[fold_temp], %[csum], 16 \n\
173 addw %[csum], %[fold_temp], %[csum] \n\
174 andi %[offset], %[offset], 1 \n\
175 beq %[offset], zero, %l[end] \n\
176 rev8 %[csum], %[csum] \n\
177 .option pop"
178 : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
179 : [offset] "r" (offset)
180 :
181 : end);
182
183 return (csum << 16) >> 48;
184 #endif /* !CONFIG_32BIT */
185 end:
186 return csum >> 16;
187 }
188
189 #endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
190 #ifndef CONFIG_32BIT
191 csum += ror64(csum, 32);
192 csum >>= 32;
193 #endif
194 csum = (u32)csum + ror32((u32)csum, 16);
195 if (offset & 1)
196 return (u16)swab32(csum);
197 return csum >> 16;
198 }
199
200 /*
201 * Does not perform alignment, should only be used if machine has fast
202 * misaligned accesses, or when buff is known to be aligned.
203 */
204 static inline __no_sanitize_address unsigned int
do_csum_no_alignment(const unsigned char * buff,int len)205 do_csum_no_alignment(const unsigned char *buff, int len)
206 {
207 unsigned long csum, data;
208 const unsigned long *ptr, *end;
209
210 ptr = (const unsigned long *)(buff);
211 data = *(ptr++);
212
213 kasan_check_read(buff, len);
214
215 end = (const unsigned long *)(buff + len);
216 csum = do_csum_common(ptr, end, data);
217
218 if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
219 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
220 riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {
221 unsigned long fold_temp;
222
223 #ifdef CONFIG_32BIT
224 asm (".option push \n\
225 .option arch,+zbb \n\
226 rori %[fold_temp], %[csum], 16 \n\
227 add %[csum], %[fold_temp], %[csum] \n\
228 .option pop"
229 : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
230 :
231 : );
232
233 #else /* !CONFIG_32BIT */
234 asm (".option push \n\
235 .option arch,+zbb \n\
236 rori %[fold_temp], %[csum], 32 \n\
237 add %[csum], %[fold_temp], %[csum] \n\
238 srli %[csum], %[csum], 32 \n\
239 roriw %[fold_temp], %[csum], 16 \n\
240 addw %[csum], %[fold_temp], %[csum] \n\
241 .option pop"
242 : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
243 :
244 : );
245 #endif /* !CONFIG_32BIT */
246 return csum >> 16;
247 }
248
249 #ifndef CONFIG_32BIT
250 csum += ror64(csum, 32);
251 csum >>= 32;
252 #endif
253 csum = (u32)csum + ror32((u32)csum, 16);
254 return csum >> 16;
255 }
256
257 /*
258 * Perform a checksum on an arbitrary memory address.
259 * Will do a light-weight address alignment if buff is misaligned, unless
260 * cpu supports fast misaligned accesses.
261 */
do_csum(const unsigned char * buff,int len)262 unsigned int do_csum(const unsigned char *buff, int len)
263 {
264 if (unlikely(len <= 0))
265 return 0;
266
267 /*
268 * Significant performance gains can be seen by not doing alignment
269 * on machines with fast misaligned accesses.
270 *
271 * There is some duplicate code between the "with_alignment" and
272 * "no_alignment" implmentations, but the overlap is too awkward to be
273 * able to fit in one function without introducing multiple static
274 * branches. The largest chunk of overlap was delegated into the
275 * do_csum_common function.
276 */
277 if (has_fast_unaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0))
278 return do_csum_no_alignment(buff, len);
279
280 return do_csum_with_alignment(buff, len);
281 }
282