xref: /linux/arch/riscv/lib/crc32.c (revision 55d0969c451159cff86949b38c39171cab962069)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Accelerated CRC32 implementation with Zbc extension.
4  *
5  * Copyright (C) 2024 Intel Corporation
6  */
7 
8 #include <asm/hwcap.h>
9 #include <asm/alternative-macros.h>
10 #include <asm/byteorder.h>
11 
12 #include <linux/types.h>
13 #include <linux/minmax.h>
14 #include <linux/crc32poly.h>
15 #include <linux/crc32.h>
16 #include <linux/byteorder/generic.h>
17 
18 /*
19  * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
20  * better understanding of how this math works.
21  *
22  * let "+" denotes polynomial add (XOR)
23  * let "-" denotes polynomial sub (XOR)
24  * let "*" denotes polynomial multiplication
25  * let "/" denotes polynomial floor division
26  * let "S" denotes source data, XLEN bit wide
27  * let "P" denotes CRC32 polynomial
28  * let "T" denotes 2^(XLEN+32)
29  * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
30  *
31  * crc32(S, P)
32  * => S * (2^32) - S * (2^32) / P * P
33  * => lowest 32 bits of: S * (2^32) / P * P
34  * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
35  * => lowest 32 bits of: S * (2^32) * quotient / T * P
36  * => lowest 32 bits of: S * quotient / 2^XLEN * P
37  * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
38  * => clmul_low_part(clmul_high_part(S, QT) + S, P)
39  *
40  * In terms of below implementations, the BE case is more intuitive, since the
41  * higher order bit sits at more significant position.
42  */
43 
44 #if __riscv_xlen == 64
45 /* Slide by XLEN bits per iteration */
46 # define STEP_ORDER 3
47 
48 /* Each below polynomial quotient has an implicit bit for 2^XLEN */
49 
50 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
51 # define CRC32_POLY_QT_LE	0x5a72d812fb808b20
52 
53 /* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
54 # define CRC32C_POLY_QT_LE	0xa434f61c6f5389f8
55 
56 /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
57  * the same as the bit-reversed version of CRC32_POLY_QT_LE
58  */
59 # define CRC32_POLY_QT_BE	0x04d101df481b4e5a
60 
61 static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
62 {
63 	return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
64 }
65 
66 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
67 {
68 	u32 crc;
69 
70 	/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
71 	asm volatile (".option push\n"
72 		      ".option arch,+zbc\n"
73 		      "clmul	%0, %1, %2\n"
74 		      "slli	%0, %0, 1\n"
75 		      "xor	%0, %0, %1\n"
76 		      "clmulr	%0, %0, %3\n"
77 		      "srli	%0, %0, 32\n"
78 		      ".option pop\n"
79 		      : "=&r" (crc)
80 		      : "r" (s),
81 			"r" (poly_qt),
82 			"r" ((u64)poly << 32)
83 		      :);
84 	return crc;
85 }
86 
87 static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
88 {
89 	return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
90 }
91 
92 #elif __riscv_xlen == 32
93 # define STEP_ORDER 2
94 /* Each quotient should match the upper half of its analog in RV64 */
95 # define CRC32_POLY_QT_LE	0xfb808b20
96 # define CRC32C_POLY_QT_LE	0x6f5389f8
97 # define CRC32_POLY_QT_BE	0x04d101df
98 
99 static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
100 {
101 	return crc ^ (__force u32)__cpu_to_le32(*ptr);
102 }
103 
104 static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
105 {
106 	u32 crc;
107 
108 	/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
109 	asm volatile (".option push\n"
110 		      ".option arch,+zbc\n"
111 		      "clmul	%0, %1, %2\n"
112 		      "slli	%0, %0, 1\n"
113 		      "xor	%0, %0, %1\n"
114 		      "clmulr	%0, %0, %3\n"
115 		      ".option pop\n"
116 		      : "=&r" (crc)
117 		      : "r" (s),
118 			"r" (poly_qt),
119 			"r" (poly)
120 		      :);
121 	return crc;
122 }
123 
124 static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
125 {
126 	return crc ^ (__force u32)__cpu_to_be32(*ptr);
127 }
128 
129 #else
130 # error "Unexpected __riscv_xlen"
131 #endif
132 
133 static inline u32 crc32_be_zbc(unsigned long s)
134 {
135 	u32 crc;
136 
137 	asm volatile (".option push\n"
138 		      ".option arch,+zbc\n"
139 		      "clmulh	%0, %1, %2\n"
140 		      "xor	%0, %0, %1\n"
141 		      "clmul	%0, %0, %3\n"
142 		      ".option pop\n"
143 		      : "=&r" (crc)
144 		      : "r" (s),
145 			"r" (CRC32_POLY_QT_BE),
146 			"r" (CRC32_POLY_BE)
147 		      :);
148 	return crc;
149 }
150 
151 #define STEP		(1 << STEP_ORDER)
152 #define OFFSET_MASK	(STEP - 1)
153 
154 typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
155 
156 static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
157 				     size_t len, u32 poly,
158 				     unsigned long poly_qt)
159 {
160 	size_t bits = len * 8;
161 	unsigned long s = 0;
162 	u32 crc_low = 0;
163 
164 	for (int i = 0; i < len; i++)
165 		s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
166 
167 	s ^= (unsigned long)crc << (__riscv_xlen - bits);
168 	if (__riscv_xlen == 32 || len < sizeof(u32))
169 		crc_low = crc >> bits;
170 
171 	crc = crc32_le_zbc(s, poly, poly_qt);
172 	crc ^= crc_low;
173 
174 	return crc;
175 }
176 
177 static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
178 					  size_t len, u32 poly,
179 					  unsigned long poly_qt,
180 					  fallback crc_fb)
181 {
182 	size_t offset, head_len, tail_len;
183 	unsigned long const *p_ul;
184 	unsigned long s;
185 
186 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
187 			     RISCV_ISA_EXT_ZBC, 1)
188 		 : : : : legacy);
189 
190 	/* Handle the unaligned head. */
191 	offset = (unsigned long)p & OFFSET_MASK;
192 	if (offset && len) {
193 		head_len = min(STEP - offset, len);
194 		crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
195 		p += head_len;
196 		len -= head_len;
197 	}
198 
199 	tail_len = len & OFFSET_MASK;
200 	len = len >> STEP_ORDER;
201 	p_ul = (unsigned long const *)p;
202 
203 	for (int i = 0; i < len; i++) {
204 		s = crc32_le_prep(crc, p_ul);
205 		crc = crc32_le_zbc(s, poly, poly_qt);
206 		p_ul++;
207 	}
208 
209 	/* Handle the tail bytes. */
210 	p = (unsigned char const *)p_ul;
211 	if (tail_len)
212 		crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
213 
214 	return crc;
215 
216 legacy:
217 	return crc_fb(crc, p, len);
218 }
219 
220 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
221 {
222 	return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
223 				crc32_le_base);
224 }
225 
226 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
227 {
228 	return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
229 				CRC32C_POLY_QT_LE, __crc32c_le_base);
230 }
231 
232 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
233 				     size_t len)
234 {
235 	size_t bits = len * 8;
236 	unsigned long s = 0;
237 	u32 crc_low = 0;
238 
239 	s = 0;
240 	for (int i = 0; i < len; i++)
241 		s = *p++ | (s << 8);
242 
243 	if (__riscv_xlen == 32 || len < sizeof(u32)) {
244 		s ^= crc >> (32 - bits);
245 		crc_low = crc << bits;
246 	} else {
247 		s ^= (unsigned long)crc << (bits - 32);
248 	}
249 
250 	crc = crc32_be_zbc(s);
251 	crc ^= crc_low;
252 
253 	return crc;
254 }
255 
256 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
257 {
258 	size_t offset, head_len, tail_len;
259 	unsigned long const *p_ul;
260 	unsigned long s;
261 
262 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
263 			     RISCV_ISA_EXT_ZBC, 1)
264 		 : : : : legacy);
265 
266 	/* Handle the unaligned head. */
267 	offset = (unsigned long)p & OFFSET_MASK;
268 	if (offset && len) {
269 		head_len = min(STEP - offset, len);
270 		crc = crc32_be_unaligned(crc, p, head_len);
271 		p += head_len;
272 		len -= head_len;
273 	}
274 
275 	tail_len = len & OFFSET_MASK;
276 	len = len >> STEP_ORDER;
277 	p_ul = (unsigned long const *)p;
278 
279 	for (int i = 0; i < len; i++) {
280 		s = crc32_be_prep(crc, p_ul);
281 		crc = crc32_be_zbc(s);
282 		p_ul++;
283 	}
284 
285 	/* Handle the tail bytes. */
286 	p = (unsigned char const *)p_ul;
287 	if (tail_len)
288 		crc = crc32_be_unaligned(crc, p, tail_len);
289 
290 	return crc;
291 
292 legacy:
293 	return crc32_be_base(crc, p, len);
294 }
295