1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated CRC32 implementation with Zbc extension. 4 * 5 * Copyright (C) 2024 Intel Corporation 6 */ 7 8 #include <asm/hwcap.h> 9 #include <asm/alternative-macros.h> 10 #include <asm/byteorder.h> 11 12 #include <linux/types.h> 13 #include <linux/minmax.h> 14 #include <linux/crc32poly.h> 15 #include <linux/crc32.h> 16 #include <linux/byteorder/generic.h> 17 18 /* 19 * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for 20 * better understanding of how this math works. 21 * 22 * let "+" denotes polynomial add (XOR) 23 * let "-" denotes polynomial sub (XOR) 24 * let "*" denotes polynomial multiplication 25 * let "/" denotes polynomial floor division 26 * let "S" denotes source data, XLEN bit wide 27 * let "P" denotes CRC32 polynomial 28 * let "T" denotes 2^(XLEN+32) 29 * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit 30 * 31 * crc32(S, P) 32 * => S * (2^32) - S * (2^32) / P * P 33 * => lowest 32 bits of: S * (2^32) / P * P 34 * => lowest 32 bits of: S * (2^32) * (T / P) / T * P 35 * => lowest 32 bits of: S * (2^32) * quotient / T * P 36 * => lowest 32 bits of: S * quotient / 2^XLEN * P 37 * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P 38 * => clmul_low_part(clmul_high_part(S, QT) + S, P) 39 * 40 * In terms of below implementations, the BE case is more intuitive, since the 41 * higher order bit sits at more significant position. 42 */ 43 44 #if __riscv_xlen == 64 45 /* Slide by XLEN bits per iteration */ 46 # define STEP_ORDER 3 47 48 /* Each below polynomial quotient has an implicit bit for 2^XLEN */ 49 50 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */ 51 # define CRC32_POLY_QT_LE 0x5a72d812fb808b20 52 53 /* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ 54 # define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 55 56 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be 57 * the same as the bit-reversed version of CRC32_POLY_QT_LE 58 */ 59 # define CRC32_POLY_QT_BE 0x04d101df481b4e5a 60 61 static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr) 62 { 63 return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr); 64 } 65 66 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) 67 { 68 u32 crc; 69 70 /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ 71 asm volatile (".option push\n" 72 ".option arch,+zbc\n" 73 "clmul %0, %1, %2\n" 74 "slli %0, %0, 1\n" 75 "xor %0, %0, %1\n" 76 "clmulr %0, %0, %3\n" 77 "srli %0, %0, 32\n" 78 ".option pop\n" 79 : "=&r" (crc) 80 : "r" (s), 81 "r" (poly_qt), 82 "r" ((u64)poly << 32) 83 :); 84 return crc; 85 } 86 87 static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr) 88 { 89 return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr); 90 } 91 92 #elif __riscv_xlen == 32 93 # define STEP_ORDER 2 94 /* Each quotient should match the upper half of its analog in RV64 */ 95 # define CRC32_POLY_QT_LE 0xfb808b20 96 # define CRC32C_POLY_QT_LE 0x6f5389f8 97 # define CRC32_POLY_QT_BE 0x04d101df 98 99 static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr) 100 { 101 return crc ^ (__force u32)__cpu_to_le32(*ptr); 102 } 103 104 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) 105 { 106 u32 crc; 107 108 /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ 109 asm volatile (".option push\n" 110 ".option arch,+zbc\n" 111 "clmul %0, %1, %2\n" 112 "slli %0, %0, 1\n" 113 "xor %0, %0, %1\n" 114 "clmulr %0, %0, %3\n" 115 ".option pop\n" 116 : "=&r" (crc) 117 : "r" (s), 118 "r" (poly_qt), 119 "r" (poly) 120 :); 121 return crc; 122 } 123 124 static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr) 125 { 126 return crc ^ (__force u32)__cpu_to_be32(*ptr); 127 } 128 129 #else 130 # error "Unexpected __riscv_xlen" 131 #endif 132 133 static inline u32 crc32_be_zbc(unsigned long s) 134 { 135 u32 crc; 136 137 asm volatile (".option push\n" 138 ".option arch,+zbc\n" 139 "clmulh %0, %1, %2\n" 140 "xor %0, %0, %1\n" 141 "clmul %0, %0, %3\n" 142 ".option pop\n" 143 : "=&r" (crc) 144 : "r" (s), 145 "r" (CRC32_POLY_QT_BE), 146 "r" (CRC32_POLY_BE) 147 :); 148 return crc; 149 } 150 151 #define STEP (1 << STEP_ORDER) 152 #define OFFSET_MASK (STEP - 1) 153 154 typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len); 155 156 static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p, 157 size_t len, u32 poly, 158 unsigned long poly_qt) 159 { 160 size_t bits = len * 8; 161 unsigned long s = 0; 162 u32 crc_low = 0; 163 164 for (int i = 0; i < len; i++) 165 s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); 166 167 s ^= (unsigned long)crc << (__riscv_xlen - bits); 168 if (__riscv_xlen == 32 || len < sizeof(u32)) 169 crc_low = crc >> bits; 170 171 crc = crc32_le_zbc(s, poly, poly_qt); 172 crc ^= crc_low; 173 174 return crc; 175 } 176 177 static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, 178 size_t len, u32 poly, 179 unsigned long poly_qt, 180 fallback crc_fb) 181 { 182 size_t offset, head_len, tail_len; 183 unsigned long const *p_ul; 184 unsigned long s; 185 186 asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, 187 RISCV_ISA_EXT_ZBC, 1) 188 : : : : legacy); 189 190 /* Handle the unaligned head. */ 191 offset = (unsigned long)p & OFFSET_MASK; 192 if (offset && len) { 193 head_len = min(STEP - offset, len); 194 crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); 195 p += head_len; 196 len -= head_len; 197 } 198 199 tail_len = len & OFFSET_MASK; 200 len = len >> STEP_ORDER; 201 p_ul = (unsigned long const *)p; 202 203 for (int i = 0; i < len; i++) { 204 s = crc32_le_prep(crc, p_ul); 205 crc = crc32_le_zbc(s, poly, poly_qt); 206 p_ul++; 207 } 208 209 /* Handle the tail bytes. */ 210 p = (unsigned char const *)p_ul; 211 if (tail_len) 212 crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); 213 214 return crc; 215 216 legacy: 217 return crc_fb(crc, p, len); 218 } 219 220 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 221 { 222 return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, 223 crc32_le_base); 224 } 225 226 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) 227 { 228 return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, 229 CRC32C_POLY_QT_LE, __crc32c_le_base); 230 } 231 232 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, 233 size_t len) 234 { 235 size_t bits = len * 8; 236 unsigned long s = 0; 237 u32 crc_low = 0; 238 239 s = 0; 240 for (int i = 0; i < len; i++) 241 s = *p++ | (s << 8); 242 243 if (__riscv_xlen == 32 || len < sizeof(u32)) { 244 s ^= crc >> (32 - bits); 245 crc_low = crc << bits; 246 } else { 247 s ^= (unsigned long)crc << (bits - 32); 248 } 249 250 crc = crc32_be_zbc(s); 251 crc ^= crc_low; 252 253 return crc; 254 } 255 256 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 257 { 258 size_t offset, head_len, tail_len; 259 unsigned long const *p_ul; 260 unsigned long s; 261 262 asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, 263 RISCV_ISA_EXT_ZBC, 1) 264 : : : : legacy); 265 266 /* Handle the unaligned head. */ 267 offset = (unsigned long)p & OFFSET_MASK; 268 if (offset && len) { 269 head_len = min(STEP - offset, len); 270 crc = crc32_be_unaligned(crc, p, head_len); 271 p += head_len; 272 len -= head_len; 273 } 274 275 tail_len = len & OFFSET_MASK; 276 len = len >> STEP_ORDER; 277 p_ul = (unsigned long const *)p; 278 279 for (int i = 0; i < len; i++) { 280 s = crc32_be_prep(crc, p_ul); 281 crc = crc32_be_zbc(s); 282 p_ul++; 283 } 284 285 /* Handle the tail bytes. */ 286 p = (unsigned char const *)p_ul; 287 if (tail_len) 288 crc = crc32_be_unaligned(crc, p, tail_len); 289 290 return crc; 291 292 legacy: 293 return crc32_be_base(crc, p, len); 294 } 295