xref: /linux/arch/riscv/lib/crc32-riscv.c (revision 37b33c68b00089a574ebd0a856a5d554eb3001b7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Accelerated CRC32 implementation with Zbc extension.
4  *
5  * Copyright (C) 2024 Intel Corporation
6  */
7 
8 #include <asm/hwcap.h>
9 #include <asm/alternative-macros.h>
10 #include <asm/byteorder.h>
11 
12 #include <linux/types.h>
13 #include <linux/minmax.h>
14 #include <linux/crc32poly.h>
15 #include <linux/crc32.h>
16 #include <linux/byteorder/generic.h>
17 #include <linux/module.h>
18 
19 /*
20  * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
21  * better understanding of how this math works.
22  *
23  * let "+" denotes polynomial add (XOR)
24  * let "-" denotes polynomial sub (XOR)
25  * let "*" denotes polynomial multiplication
26  * let "/" denotes polynomial floor division
27  * let "S" denotes source data, XLEN bit wide
28  * let "P" denotes CRC32 polynomial
29  * let "T" denotes 2^(XLEN+32)
30  * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
31  *
32  * crc32(S, P)
33  * => S * (2^32) - S * (2^32) / P * P
34  * => lowest 32 bits of: S * (2^32) / P * P
35  * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
36  * => lowest 32 bits of: S * (2^32) * quotient / T * P
37  * => lowest 32 bits of: S * quotient / 2^XLEN * P
38  * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
39  * => clmul_low_part(clmul_high_part(S, QT) + S, P)
40  *
41  * In terms of below implementations, the BE case is more intuitive, since the
42  * higher order bit sits at more significant position.
43  */
44 
45 #if __riscv_xlen == 64
46 /* Slide by XLEN bits per iteration */
47 # define STEP_ORDER 3
48 
49 /* Each below polynomial quotient has an implicit bit for 2^XLEN */
50 
51 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
52 # define CRC32_POLY_QT_LE	0x5a72d812fb808b20
53 
54 /* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
55 # define CRC32C_POLY_QT_LE	0xa434f61c6f5389f8
56 
57 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
58  * the same as the bit-reversed version of CRC32_POLY_QT_LE
59  */
60 # define CRC32_POLY_QT_BE	0x04d101df481b4e5a
61 
crc32_le_prep(u32 crc,unsigned long const * ptr)62 static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
63 {
64 	return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
65 }
66 
crc32_le_zbc(unsigned long s,u32 poly,unsigned long poly_qt)67 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
68 {
69 	u32 crc;
70 
71 	/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
72 	asm volatile (".option push\n"
73 		      ".option arch,+zbc\n"
74 		      "clmul	%0, %1, %2\n"
75 		      "slli	%0, %0, 1\n"
76 		      "xor	%0, %0, %1\n"
77 		      "clmulr	%0, %0, %3\n"
78 		      "srli	%0, %0, 32\n"
79 		      ".option pop\n"
80 		      : "=&r" (crc)
81 		      : "r" (s),
82 			"r" (poly_qt),
83 			"r" ((u64)poly << 32)
84 		      :);
85 	return crc;
86 }
87 
crc32_be_prep(u32 crc,unsigned long const * ptr)88 static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
89 {
90 	return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
91 }
92 
93 #elif __riscv_xlen == 32
94 # define STEP_ORDER 2
95 /* Each quotient should match the upper half of its analog in RV64 */
96 # define CRC32_POLY_QT_LE	0xfb808b20
97 # define CRC32C_POLY_QT_LE	0x6f5389f8
98 # define CRC32_POLY_QT_BE	0x04d101df
99 
crc32_le_prep(u32 crc,unsigned long const * ptr)100 static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
101 {
102 	return crc ^ (__force u32)__cpu_to_le32(*ptr);
103 }
104 
crc32_le_zbc(unsigned long s,u32 poly,unsigned long poly_qt)105 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
106 {
107 	u32 crc;
108 
109 	/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
110 	asm volatile (".option push\n"
111 		      ".option arch,+zbc\n"
112 		      "clmul	%0, %1, %2\n"
113 		      "slli	%0, %0, 1\n"
114 		      "xor	%0, %0, %1\n"
115 		      "clmulr	%0, %0, %3\n"
116 		      ".option pop\n"
117 		      : "=&r" (crc)
118 		      : "r" (s),
119 			"r" (poly_qt),
120 			"r" (poly)
121 		      :);
122 	return crc;
123 }
124 
crc32_be_prep(u32 crc,unsigned long const * ptr)125 static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
126 {
127 	return crc ^ (__force u32)__cpu_to_be32(*ptr);
128 }
129 
130 #else
131 # error "Unexpected __riscv_xlen"
132 #endif
133 
crc32_be_zbc(unsigned long s)134 static inline u32 crc32_be_zbc(unsigned long s)
135 {
136 	u32 crc;
137 
138 	asm volatile (".option push\n"
139 		      ".option arch,+zbc\n"
140 		      "clmulh	%0, %1, %2\n"
141 		      "xor	%0, %0, %1\n"
142 		      "clmul	%0, %0, %3\n"
143 		      ".option pop\n"
144 		      : "=&r" (crc)
145 		      : "r" (s),
146 			"r" (CRC32_POLY_QT_BE),
147 			"r" (CRC32_POLY_BE)
148 		      :);
149 	return crc;
150 }
151 
152 #define STEP		(1 << STEP_ORDER)
153 #define OFFSET_MASK	(STEP - 1)
154 
155 typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
156 
crc32_le_unaligned(u32 crc,unsigned char const * p,size_t len,u32 poly,unsigned long poly_qt)157 static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
158 				     size_t len, u32 poly,
159 				     unsigned long poly_qt)
160 {
161 	size_t bits = len * 8;
162 	unsigned long s = 0;
163 	u32 crc_low = 0;
164 
165 	for (int i = 0; i < len; i++)
166 		s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
167 
168 	s ^= (unsigned long)crc << (__riscv_xlen - bits);
169 	if (__riscv_xlen == 32 || len < sizeof(u32))
170 		crc_low = crc >> bits;
171 
172 	crc = crc32_le_zbc(s, poly, poly_qt);
173 	crc ^= crc_low;
174 
175 	return crc;
176 }
177 
crc32_le_generic(u32 crc,unsigned char const * p,size_t len,u32 poly,unsigned long poly_qt,fallback crc_fb)178 static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
179 					  size_t len, u32 poly,
180 					  unsigned long poly_qt,
181 					  fallback crc_fb)
182 {
183 	size_t offset, head_len, tail_len;
184 	unsigned long const *p_ul;
185 	unsigned long s;
186 
187 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
188 			     RISCV_ISA_EXT_ZBC, 1)
189 		 : : : : legacy);
190 
191 	/* Handle the unaligned head. */
192 	offset = (unsigned long)p & OFFSET_MASK;
193 	if (offset && len) {
194 		head_len = min(STEP - offset, len);
195 		crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
196 		p += head_len;
197 		len -= head_len;
198 	}
199 
200 	tail_len = len & OFFSET_MASK;
201 	len = len >> STEP_ORDER;
202 	p_ul = (unsigned long const *)p;
203 
204 	for (int i = 0; i < len; i++) {
205 		s = crc32_le_prep(crc, p_ul);
206 		crc = crc32_le_zbc(s, poly, poly_qt);
207 		p_ul++;
208 	}
209 
210 	/* Handle the tail bytes. */
211 	p = (unsigned char const *)p_ul;
212 	if (tail_len)
213 		crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
214 
215 	return crc;
216 
217 legacy:
218 	return crc_fb(crc, p, len);
219 }
220 
crc32_le_arch(u32 crc,const u8 * p,size_t len)221 u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
222 {
223 	return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
224 				crc32_le_base);
225 }
226 EXPORT_SYMBOL(crc32_le_arch);
227 
crc32c_le_arch(u32 crc,const u8 * p,size_t len)228 u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
229 {
230 	return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
231 				CRC32C_POLY_QT_LE, crc32c_le_base);
232 }
233 EXPORT_SYMBOL(crc32c_le_arch);
234 
crc32_be_unaligned(u32 crc,unsigned char const * p,size_t len)235 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
236 				     size_t len)
237 {
238 	size_t bits = len * 8;
239 	unsigned long s = 0;
240 	u32 crc_low = 0;
241 
242 	s = 0;
243 	for (int i = 0; i < len; i++)
244 		s = *p++ | (s << 8);
245 
246 	if (__riscv_xlen == 32 || len < sizeof(u32)) {
247 		s ^= crc >> (32 - bits);
248 		crc_low = crc << bits;
249 	} else {
250 		s ^= (unsigned long)crc << (bits - 32);
251 	}
252 
253 	crc = crc32_be_zbc(s);
254 	crc ^= crc_low;
255 
256 	return crc;
257 }
258 
crc32_be_arch(u32 crc,const u8 * p,size_t len)259 u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
260 {
261 	size_t offset, head_len, tail_len;
262 	unsigned long const *p_ul;
263 	unsigned long s;
264 
265 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
266 			     RISCV_ISA_EXT_ZBC, 1)
267 		 : : : : legacy);
268 
269 	/* Handle the unaligned head. */
270 	offset = (unsigned long)p & OFFSET_MASK;
271 	if (offset && len) {
272 		head_len = min(STEP - offset, len);
273 		crc = crc32_be_unaligned(crc, p, head_len);
274 		p += head_len;
275 		len -= head_len;
276 	}
277 
278 	tail_len = len & OFFSET_MASK;
279 	len = len >> STEP_ORDER;
280 	p_ul = (unsigned long const *)p;
281 
282 	for (int i = 0; i < len; i++) {
283 		s = crc32_be_prep(crc, p_ul);
284 		crc = crc32_be_zbc(s);
285 		p_ul++;
286 	}
287 
288 	/* Handle the tail bytes. */
289 	p = (unsigned char const *)p_ul;
290 	if (tail_len)
291 		crc = crc32_be_unaligned(crc, p, tail_len);
292 
293 	return crc;
294 
295 legacy:
296 	return crc32_be_base(crc, p, len);
297 }
298 EXPORT_SYMBOL(crc32_be_arch);
299 
crc32_optimizations(void)300 u32 crc32_optimizations(void)
301 {
302 	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
303 		return CRC32_LE_OPTIMIZATION |
304 		       CRC32_BE_OPTIMIZATION |
305 		       CRC32C_OPTIMIZATION;
306 	return 0;
307 }
308 EXPORT_SYMBOL(crc32_optimizations);
309 
310 MODULE_LICENSE("GPL");
311 MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
312