xref: /linux/arch/arm64/lib/csum.c (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (C) 2019-2020 Arm Ltd.
3 
4 #include <linux/compiler.h>
5 #include <linux/kasan-checks.h>
6 #include <linux/kernel.h>
7 
8 #include <net/checksum.h>
9 
10 /* Looks dumb, but generates nice-ish code */
11 static u64 accumulate(u64 sum, u64 data)
12 {
13 	__uint128_t tmp = (__uint128_t)sum + data;
14 	return tmp + (tmp >> 64);
15 }
16 
17 /*
18  * We over-read the buffer and this makes KASAN unhappy. Instead, disable
19  * instrumentation and call kasan explicitly.
20  */
21 unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
22 {
23 	unsigned int offset, shift, sum;
24 	const u64 *ptr;
25 	u64 data, sum64 = 0;
26 
27 	if (unlikely(len == 0))
28 		return 0;
29 
30 	offset = (unsigned long)buff & 7;
31 	/*
32 	 * This is to all intents and purposes safe, since rounding down cannot
33 	 * result in a different page or cache line being accessed, and @buff
34 	 * should absolutely not be pointing to anything read-sensitive. We do,
35 	 * however, have to be careful not to piss off KASAN, which means using
36 	 * unchecked reads to accommodate the head and tail, for which we'll
37 	 * compensate with an explicit check up-front.
38 	 */
39 	kasan_check_read(buff, len);
40 	ptr = (u64 *)(buff - offset);
41 	len = len + offset - 8;
42 
43 	/*
44 	 * Head: zero out any excess leading bytes. Shifting back by the same
45 	 * amount should be at least as fast as any other way of handling the
46 	 * odd/even alignment, and means we can ignore it until the very end.
47 	 */
48 	shift = offset * 8;
49 	data = *ptr++;
50 #ifdef __LITTLE_ENDIAN
51 	data = (data >> shift) << shift;
52 #else
53 	data = (data << shift) >> shift;
54 #endif
55 
56 	/*
57 	 * Body: straightforward aligned loads from here on (the paired loads
58 	 * underlying the quadword type still only need dword alignment). The
59 	 * main loop strictly excludes the tail, so the second loop will always
60 	 * run at least once.
61 	 */
62 	while (unlikely(len > 64)) {
63 		__uint128_t tmp1, tmp2, tmp3, tmp4;
64 
65 		tmp1 = *(__uint128_t *)ptr;
66 		tmp2 = *(__uint128_t *)(ptr + 2);
67 		tmp3 = *(__uint128_t *)(ptr + 4);
68 		tmp4 = *(__uint128_t *)(ptr + 6);
69 
70 		len -= 64;
71 		ptr += 8;
72 
73 		/* This is the "don't dump the carry flag into a GPR" idiom */
74 		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
75 		tmp2 += (tmp2 >> 64) | (tmp2 << 64);
76 		tmp3 += (tmp3 >> 64) | (tmp3 << 64);
77 		tmp4 += (tmp4 >> 64) | (tmp4 << 64);
78 		tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
79 		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
80 		tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
81 		tmp3 += (tmp3 >> 64) | (tmp3 << 64);
82 		tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
83 		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
84 		tmp1 = ((tmp1 >> 64) << 64) | sum64;
85 		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
86 		sum64 = tmp1 >> 64;
87 	}
88 	while (len > 8) {
89 		__uint128_t tmp;
90 
91 		sum64 = accumulate(sum64, data);
92 		tmp = *(__uint128_t *)ptr;
93 
94 		len -= 16;
95 		ptr += 2;
96 
97 #ifdef __LITTLE_ENDIAN
98 		data = tmp >> 64;
99 		sum64 = accumulate(sum64, tmp);
100 #else
101 		data = tmp;
102 		sum64 = accumulate(sum64, tmp >> 64);
103 #endif
104 	}
105 	if (len > 0) {
106 		sum64 = accumulate(sum64, data);
107 		data = *ptr;
108 		len -= 8;
109 	}
110 	/*
111 	 * Tail: zero any over-read bytes similarly to the head, again
112 	 * preserving odd/even alignment.
113 	 */
114 	shift = len * -8;
115 #ifdef __LITTLE_ENDIAN
116 	data = (data << shift) >> shift;
117 #else
118 	data = (data >> shift) << shift;
119 #endif
120 	sum64 = accumulate(sum64, data);
121 
122 	/* Finally, folding */
123 	sum64 += (sum64 >> 32) | (sum64 << 32);
124 	sum = sum64 >> 32;
125 	sum += (sum >> 16) | (sum << 16);
126 	if (offset & 1)
127 		return (u16)swab32(sum);
128 
129 	return sum >> 16;
130 }
131 
132 __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
133 			const struct in6_addr *daddr,
134 			__u32 len, __u8 proto, __wsum csum)
135 {
136 	__uint128_t src, dst;
137 	u64 sum = (__force u64)csum;
138 
139 	src = *(const __uint128_t *)saddr->s6_addr;
140 	dst = *(const __uint128_t *)daddr->s6_addr;
141 
142 	sum += (__force u32)htonl(len);
143 #ifdef __LITTLE_ENDIAN
144 	sum += (u32)proto << 24;
145 #else
146 	sum += proto;
147 #endif
148 	src += (src >> 64) | (src << 64);
149 	dst += (dst >> 64) | (dst << 64);
150 
151 	sum = accumulate(sum, src >> 64);
152 	sum = accumulate(sum, dst >> 64);
153 
154 	sum += ((sum >> 32) | (sum << 32));
155 	return csum_fold((__force __wsum)(sum >> 32));
156 }
157 EXPORT_SYMBOL(csum_ipv6_magic);
158