15777eaedSRobin Murphy // SPDX-License-Identifier: GPL-2.0-only 25777eaedSRobin Murphy // Copyright (C) 2019-2020 Arm Ltd. 35777eaedSRobin Murphy 45777eaedSRobin Murphy #include <linux/compiler.h> 55777eaedSRobin Murphy #include <linux/kasan-checks.h> 65777eaedSRobin Murphy #include <linux/kernel.h> 75777eaedSRobin Murphy 85777eaedSRobin Murphy #include <net/checksum.h> 95777eaedSRobin Murphy 105777eaedSRobin Murphy /* Looks dumb, but generates nice-ish code */ 115777eaedSRobin Murphy static u64 accumulate(u64 sum, u64 data) 125777eaedSRobin Murphy { 135777eaedSRobin Murphy __uint128_t tmp = (__uint128_t)sum + data; 145777eaedSRobin Murphy return tmp + (tmp >> 64); 155777eaedSRobin Murphy } 165777eaedSRobin Murphy 17c6a771d9SWill Deacon /* 18c6a771d9SWill Deacon * We over-read the buffer and this makes KASAN unhappy. Instead, disable 19c6a771d9SWill Deacon * instrumentation and call kasan explicitly. 20c6a771d9SWill Deacon */ 21c6a771d9SWill Deacon unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) 225777eaedSRobin Murphy { 235777eaedSRobin Murphy unsigned int offset, shift, sum; 245777eaedSRobin Murphy const u64 *ptr; 255777eaedSRobin Murphy u64 data, sum64 = 0; 265777eaedSRobin Murphy 27*8bd795feSWill Deacon if (unlikely(len <= 0)) 28c2c24edbSRobin Murphy return 0; 29c2c24edbSRobin Murphy 305777eaedSRobin Murphy offset = (unsigned long)buff & 7; 315777eaedSRobin Murphy /* 325777eaedSRobin Murphy * This is to all intents and purposes safe, since rounding down cannot 335777eaedSRobin Murphy * result in a different page or cache line being accessed, and @buff 345777eaedSRobin Murphy * should absolutely not be pointing to anything read-sensitive. We do, 355777eaedSRobin Murphy * however, have to be careful not to piss off KASAN, which means using 365777eaedSRobin Murphy * unchecked reads to accommodate the head and tail, for which we'll 375777eaedSRobin Murphy * compensate with an explicit check up-front. 385777eaedSRobin Murphy */ 395777eaedSRobin Murphy kasan_check_read(buff, len); 405777eaedSRobin Murphy ptr = (u64 *)(buff - offset); 415777eaedSRobin Murphy len = len + offset - 8; 425777eaedSRobin Murphy 435777eaedSRobin Murphy /* 445777eaedSRobin Murphy * Head: zero out any excess leading bytes. Shifting back by the same 455777eaedSRobin Murphy * amount should be at least as fast as any other way of handling the 465777eaedSRobin Murphy * odd/even alignment, and means we can ignore it until the very end. 475777eaedSRobin Murphy */ 485777eaedSRobin Murphy shift = offset * 8; 49c6a771d9SWill Deacon data = *ptr++; 505777eaedSRobin Murphy #ifdef __LITTLE_ENDIAN 515777eaedSRobin Murphy data = (data >> shift) << shift; 525777eaedSRobin Murphy #else 535777eaedSRobin Murphy data = (data << shift) >> shift; 545777eaedSRobin Murphy #endif 555777eaedSRobin Murphy 565777eaedSRobin Murphy /* 575777eaedSRobin Murphy * Body: straightforward aligned loads from here on (the paired loads 585777eaedSRobin Murphy * underlying the quadword type still only need dword alignment). The 595777eaedSRobin Murphy * main loop strictly excludes the tail, so the second loop will always 605777eaedSRobin Murphy * run at least once. 615777eaedSRobin Murphy */ 625777eaedSRobin Murphy while (unlikely(len > 64)) { 635777eaedSRobin Murphy __uint128_t tmp1, tmp2, tmp3, tmp4; 645777eaedSRobin Murphy 65c6a771d9SWill Deacon tmp1 = *(__uint128_t *)ptr; 66c6a771d9SWill Deacon tmp2 = *(__uint128_t *)(ptr + 2); 67c6a771d9SWill Deacon tmp3 = *(__uint128_t *)(ptr + 4); 68c6a771d9SWill Deacon tmp4 = *(__uint128_t *)(ptr + 6); 695777eaedSRobin Murphy 705777eaedSRobin Murphy len -= 64; 715777eaedSRobin Murphy ptr += 8; 725777eaedSRobin Murphy 735777eaedSRobin Murphy /* This is the "don't dump the carry flag into a GPR" idiom */ 745777eaedSRobin Murphy tmp1 += (tmp1 >> 64) | (tmp1 << 64); 755777eaedSRobin Murphy tmp2 += (tmp2 >> 64) | (tmp2 << 64); 765777eaedSRobin Murphy tmp3 += (tmp3 >> 64) | (tmp3 << 64); 775777eaedSRobin Murphy tmp4 += (tmp4 >> 64) | (tmp4 << 64); 785777eaedSRobin Murphy tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); 795777eaedSRobin Murphy tmp1 += (tmp1 >> 64) | (tmp1 << 64); 805777eaedSRobin Murphy tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); 815777eaedSRobin Murphy tmp3 += (tmp3 >> 64) | (tmp3 << 64); 825777eaedSRobin Murphy tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); 835777eaedSRobin Murphy tmp1 += (tmp1 >> 64) | (tmp1 << 64); 845777eaedSRobin Murphy tmp1 = ((tmp1 >> 64) << 64) | sum64; 855777eaedSRobin Murphy tmp1 += (tmp1 >> 64) | (tmp1 << 64); 865777eaedSRobin Murphy sum64 = tmp1 >> 64; 875777eaedSRobin Murphy } 885777eaedSRobin Murphy while (len > 8) { 895777eaedSRobin Murphy __uint128_t tmp; 905777eaedSRobin Murphy 915777eaedSRobin Murphy sum64 = accumulate(sum64, data); 92c6a771d9SWill Deacon tmp = *(__uint128_t *)ptr; 935777eaedSRobin Murphy 945777eaedSRobin Murphy len -= 16; 955777eaedSRobin Murphy ptr += 2; 965777eaedSRobin Murphy 975777eaedSRobin Murphy #ifdef __LITTLE_ENDIAN 985777eaedSRobin Murphy data = tmp >> 64; 995777eaedSRobin Murphy sum64 = accumulate(sum64, tmp); 1005777eaedSRobin Murphy #else 1015777eaedSRobin Murphy data = tmp; 1025777eaedSRobin Murphy sum64 = accumulate(sum64, tmp >> 64); 1035777eaedSRobin Murphy #endif 1045777eaedSRobin Murphy } 1055777eaedSRobin Murphy if (len > 0) { 1065777eaedSRobin Murphy sum64 = accumulate(sum64, data); 107c6a771d9SWill Deacon data = *ptr; 1085777eaedSRobin Murphy len -= 8; 1095777eaedSRobin Murphy } 1105777eaedSRobin Murphy /* 1115777eaedSRobin Murphy * Tail: zero any over-read bytes similarly to the head, again 1125777eaedSRobin Murphy * preserving odd/even alignment. 1135777eaedSRobin Murphy */ 1145777eaedSRobin Murphy shift = len * -8; 1155777eaedSRobin Murphy #ifdef __LITTLE_ENDIAN 1165777eaedSRobin Murphy data = (data << shift) >> shift; 1175777eaedSRobin Murphy #else 1185777eaedSRobin Murphy data = (data >> shift) << shift; 1195777eaedSRobin Murphy #endif 1205777eaedSRobin Murphy sum64 = accumulate(sum64, data); 1215777eaedSRobin Murphy 1225777eaedSRobin Murphy /* Finally, folding */ 1235777eaedSRobin Murphy sum64 += (sum64 >> 32) | (sum64 << 32); 1245777eaedSRobin Murphy sum = sum64 >> 32; 1255777eaedSRobin Murphy sum += (sum >> 16) | (sum << 16); 1265777eaedSRobin Murphy if (offset & 1) 1275777eaedSRobin Murphy return (u16)swab32(sum); 1285777eaedSRobin Murphy 1295777eaedSRobin Murphy return sum >> 16; 1305777eaedSRobin Murphy } 131e9c7ddbfSRobin Murphy 132e9c7ddbfSRobin Murphy __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 133e9c7ddbfSRobin Murphy const struct in6_addr *daddr, 134e9c7ddbfSRobin Murphy __u32 len, __u8 proto, __wsum csum) 135e9c7ddbfSRobin Murphy { 136e9c7ddbfSRobin Murphy __uint128_t src, dst; 137e9c7ddbfSRobin Murphy u64 sum = (__force u64)csum; 138e9c7ddbfSRobin Murphy 139e9c7ddbfSRobin Murphy src = *(const __uint128_t *)saddr->s6_addr; 140e9c7ddbfSRobin Murphy dst = *(const __uint128_t *)daddr->s6_addr; 141e9c7ddbfSRobin Murphy 142e9c7ddbfSRobin Murphy sum += (__force u32)htonl(len); 143e9c7ddbfSRobin Murphy #ifdef __LITTLE_ENDIAN 144e9c7ddbfSRobin Murphy sum += (u32)proto << 24; 145e9c7ddbfSRobin Murphy #else 146e9c7ddbfSRobin Murphy sum += proto; 147e9c7ddbfSRobin Murphy #endif 148e9c7ddbfSRobin Murphy src += (src >> 64) | (src << 64); 149e9c7ddbfSRobin Murphy dst += (dst >> 64) | (dst << 64); 150e9c7ddbfSRobin Murphy 151e9c7ddbfSRobin Murphy sum = accumulate(sum, src >> 64); 152e9c7ddbfSRobin Murphy sum = accumulate(sum, dst >> 64); 153e9c7ddbfSRobin Murphy 154e9c7ddbfSRobin Murphy sum += ((sum >> 32) | (sum << 32)); 155e9c7ddbfSRobin Murphy return csum_fold((__force __wsum)(sum >> 32)); 156e9c7ddbfSRobin Murphy } 157e9c7ddbfSRobin Murphy EXPORT_SYMBOL(csum_ipv6_magic); 158