xref: /linux/arch/arm64/lib/crc32.S (revision 79d2e1919a2728ef49d938eb20ebd5903c14dfb0)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions
4 *
5 * Copyright (C) 2016 - 2018 Linaro Ltd.
6 * Copyright (C) 2024 Google LLC
7 *
8 * Author: Ard Biesheuvel <ardb@kernel.org>
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14	.cpu		generic+crc+crypto
15
16	.macro		bitle, reg
17	.endm
18
19	.macro		bitbe, reg
20	rbit		\reg, \reg
21	.endm
22
23	.macro		bytele, reg
24	.endm
25
26	.macro		bytebe, reg
27	rbit		\reg, \reg
28	lsr		\reg, \reg, #24
29	.endm
30
31	.macro		hwordle, reg
32CPU_BE(	rev16		\reg, \reg	)
33	.endm
34
35	.macro		hwordbe, reg
36CPU_LE(	rev		\reg, \reg	)
37	rbit		\reg, \reg
38CPU_BE(	lsr		\reg, \reg, #16	)
39	.endm
40
41	.macro		le, regs:vararg
42	.irp		r, \regs
43CPU_BE(	rev		\r, \r		)
44	.endr
45	.endm
46
47	.macro		be, regs:vararg
48	.irp		r, \regs
49CPU_LE(	rev		\r, \r		)
50	.endr
51	.irp		r, \regs
52	rbit		\r, \r
53	.endr
54	.endm
55
56	.macro		__crc32, c, order=le
57	bit\order	w0
58	cmp		x2, #16
59	b.lt		8f			// less than 16 bytes
60
61	and		x7, x2, #0x1f
62	and		x2, x2, #~0x1f
63	cbz		x7, 32f			// multiple of 32 bytes
64
65	and		x8, x7, #0xf
66	ldp		x3, x4, [x1]
67	add		x8, x8, x1
68	add		x1, x1, x7
69	ldp		x5, x6, [x8]
70	\order		x3, x4, x5, x6
71
72	tst		x7, #8
73	crc32\c\()x	w8, w0, x3
74	csel		x3, x3, x4, eq
75	csel		w0, w0, w8, eq
76	tst		x7, #4
77	lsr		x4, x3, #32
78	crc32\c\()w	w8, w0, w3
79	csel		x3, x3, x4, eq
80	csel		w0, w0, w8, eq
81	tst		x7, #2
82	lsr		w4, w3, #16
83	crc32\c\()h	w8, w0, w3
84	csel		w3, w3, w4, eq
85	csel		w0, w0, w8, eq
86	tst		x7, #1
87	crc32\c\()b	w8, w0, w3
88	csel		w0, w0, w8, eq
89	tst		x7, #16
90	crc32\c\()x	w8, w0, x5
91	crc32\c\()x	w8, w8, x6
92	csel		w0, w0, w8, eq
93	cbz		x2, 0f
94
9532:	ldp		x3, x4, [x1], #32
96	sub		x2, x2, #32
97	ldp		x5, x6, [x1, #-16]
98	\order		x3, x4, x5, x6
99	crc32\c\()x	w0, w0, x3
100	crc32\c\()x	w0, w0, x4
101	crc32\c\()x	w0, w0, x5
102	crc32\c\()x	w0, w0, x6
103	cbnz		x2, 32b
1040:	bit\order	w0
105	ret
106
1078:	tbz		x2, #3, 4f
108	ldr		x3, [x1], #8
109	\order		x3
110	crc32\c\()x	w0, w0, x3
1114:	tbz		x2, #2, 2f
112	ldr		w3, [x1], #4
113	\order		w3
114	crc32\c\()w	w0, w0, w3
1152:	tbz		x2, #1, 1f
116	ldrh		w3, [x1], #2
117	hword\order	w3
118	crc32\c\()h	w0, w0, w3
1191:	tbz		x2, #0, 0f
120	ldrb		w3, [x1]
121	byte\order	w3
122	crc32\c\()b	w0, w0, w3
1230:	bit\order	w0
124	ret
125	.endm
126
127	.align		5
128SYM_FUNC_START(crc32_le_arm64)
129	__crc32
130SYM_FUNC_END(crc32_le_arm64)
131
132	.align		5
133SYM_FUNC_START(crc32c_le_arm64)
134	__crc32		c
135SYM_FUNC_END(crc32c_le_arm64)
136
137	.align		5
138SYM_FUNC_START(crc32_be_arm64)
139	__crc32		order=be
140SYM_FUNC_END(crc32_be_arm64)
141
142	in		.req	x1
143	len		.req	x2
144
145	/*
146	 * w0: input CRC at entry, output CRC at exit
147	 * x1: pointer to input buffer
148	 * x2: length of input in bytes
149	 */
150	.macro		crc4way, insn, table, order=le
151	bit\order	w0
152	lsr		len, len, #6		// len := # of 64-byte blocks
153
154	/* Process up to 64 blocks of 64 bytes at a time */
155.La\@:	mov		x3, #64
156	cmp		len, #64
157	csel		x3, x3, len, hi		// x3 := min(len, 64)
158	sub		len, len, x3
159
160	/* Divide the input into 4 contiguous blocks */
161	add		x4, x3, x3, lsl #1	// x4 :=  3 * x3
162	add		x7, in, x3, lsl #4	// x7 := in + 16 * x3
163	add		x8, in, x3, lsl #5	// x8 := in + 32 * x3
164	add		x9, in, x4, lsl #4	// x9 := in + 16 * x4
165
166	/* Load the folding coefficients from the lookup table */
167	adr_l		x5, \table - 12		// entry 0 omitted
168	add		x5, x5, x4, lsl #2	// x5 += 12 * x3
169	ldp		s0, s1, [x5]
170	ldr		s2, [x5, #8]
171
172	/* Zero init partial CRCs for this iteration */
173	mov		w4, wzr
174	mov		w5, wzr
175	mov		w6, wzr
176	mov		x17, xzr
177
178.Lb\@:	sub		x3, x3, #1
179	\insn		w6, w6, x17
180	ldp		x10, x11, [in], #16
181	ldp		x12, x13, [x7], #16
182	ldp		x14, x15, [x8], #16
183	ldp		x16, x17, [x9], #16
184
185	\order		x10, x11, x12, x13, x14, x15, x16, x17
186
187	/* Apply the CRC transform to 4 16-byte blocks in parallel */
188	\insn		w0, w0, x10
189	\insn		w4, w4, x12
190	\insn		w5, w5, x14
191	\insn		w6, w6, x16
192	\insn		w0, w0, x11
193	\insn		w4, w4, x13
194	\insn		w5, w5, x15
195	cbnz		x3, .Lb\@
196
197	/* Combine the 4 partial results into w0 */
198	mov		v3.d[0], x0
199	mov		v4.d[0], x4
200	mov		v5.d[0], x5
201	pmull		v0.1q, v0.1d, v3.1d
202	pmull		v1.1q, v1.1d, v4.1d
203	pmull		v2.1q, v2.1d, v5.1d
204	eor		v0.8b, v0.8b, v1.8b
205	eor		v0.8b, v0.8b, v2.8b
206	mov		x5, v0.d[0]
207	eor		x5, x5, x17
208	\insn		w0, w6, x5
209
210	mov		in, x9
211	cbnz		len, .La\@
212
213	bit\order	w0
214	ret
215	.endm
216
217	.align		5
218SYM_FUNC_START(crc32c_le_arm64_4way)
219	crc4way		crc32cx, .L0
220SYM_FUNC_END(crc32c_le_arm64_4way)
221
222	.align		5
223SYM_FUNC_START(crc32_le_arm64_4way)
224	crc4way		crc32x, .L1
225SYM_FUNC_END(crc32_le_arm64_4way)
226
227	.align		5
228SYM_FUNC_START(crc32_be_arm64_4way)
229	crc4way		crc32x, .L1, be
230SYM_FUNC_END(crc32_be_arm64_4way)
231
232	.section	.rodata, "a", %progbits
233	.align		6
234.L0:	.long		0xddc0152b, 0xba4fc28e, 0x493c7d27
235	.long		0x0715ce53, 0x9e4addf8, 0xba4fc28e
236	.long		0xc96cfdc0, 0x0715ce53, 0xddc0152b
237	.long		0xab7aff2a, 0x0d3b6092, 0x9e4addf8
238	.long		0x299847d5, 0x878a92a7, 0x39d3b296
239	.long		0xb6dd949b, 0xab7aff2a, 0x0715ce53
240	.long		0xa60ce07b, 0x83348832, 0x47db8317
241	.long		0xd270f1a2, 0xb9e02b86, 0x0d3b6092
242	.long		0x65863b64, 0xb6dd949b, 0xc96cfdc0
243	.long		0xb3e32c28, 0xbac2fd7b, 0x878a92a7
244	.long		0xf285651c, 0xce7f39f4, 0xdaece73e
245	.long		0x271d9844, 0xd270f1a2, 0xab7aff2a
246	.long		0x6cb08e5c, 0x2b3cac5d, 0x2162d385
247	.long		0xcec3662e, 0x1b03397f, 0x83348832
248	.long		0x8227bb8a, 0xb3e32c28, 0x299847d5
249	.long		0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86
250	.long		0xf6076544, 0x10746f3c, 0x18b33a4e
251	.long		0x98d8d9cb, 0x271d9844, 0xb6dd949b
252	.long		0x57a3d037, 0x93a5f730, 0x78d9ccb7
253	.long		0x3771e98f, 0x6b749fb2, 0xbac2fd7b
254	.long		0xe0ac139e, 0xcec3662e, 0xa60ce07b
255	.long		0x6f345e45, 0xe6fc4e6a, 0xce7f39f4
256	.long		0xa2b73df1, 0xb0cd4768, 0x61d82e56
257	.long		0x86d8e4d2, 0xd7a4825c, 0xd270f1a2
258	.long		0xa90fd27a, 0x0167d312, 0xc619809d
259	.long		0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d
260	.long		0x4597456a, 0x98d8d9cb, 0x65863b64
261	.long		0xc9c8b782, 0x68bce87a, 0x1b03397f
262	.long		0x62ec6c6d, 0x6956fc3b, 0xebb883bd
263	.long		0x2342001e, 0x3771e98f, 0xb3e32c28
264	.long		0xe8b6368b, 0x2178513a, 0x064f7f26
265	.long		0x9ef68d35, 0x170076fa, 0xdd7e3b0c
266	.long		0x0b0bf8ca, 0x6f345e45, 0xf285651c
267	.long		0x02ee03b2, 0xff0dba97, 0x10746f3c
268	.long		0x135c83fd, 0xf872e54c, 0xc7a68855
269	.long		0x00bcf5f6, 0x86d8e4d2, 0x271d9844
270	.long		0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c
271	.long		0xded288f8, 0xb3af077a, 0x93a5f730
272	.long		0x37170390, 0xca6ef3ac, 0x6cb08e5c
273	.long		0xf48642e9, 0xdd66cbbb, 0x6b749fb2
274	.long		0xb25b29f2, 0xe9e28eb4, 0x1393e203
275	.long		0x45cddf4e, 0xc9c8b782, 0xcec3662e
276	.long		0xdfd94fb2, 0x93e106a4, 0x96c515bb
277	.long		0x021ac5ef, 0xd813b325, 0xe6fc4e6a
278	.long		0x8e1450f7, 0x2342001e, 0x8227bb8a
279	.long		0xe0cdcf86, 0x6d9a4957, 0xb0cd4768
280	.long		0x613eee91, 0xd2c3ed1a, 0x39c7ff35
281	.long		0xbedc6ba1, 0x9ef68d35, 0xd7a4825c
282	.long		0x0cd1526a, 0xf2271e60, 0x0ab3844b
283	.long		0xd6c3a807, 0x2664fd8b, 0x0167d312
284	.long		0x1d31175f, 0x02ee03b2, 0xf6076544
285	.long		0x4be7fd90, 0x363bd6b3, 0x26f6a60a
286	.long		0x6eeed1c9, 0x5fabe670, 0xa741c1bf
287	.long		0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb
288	.long		0x2e7d11a7, 0x17f27698, 0x49c3cc9c
289	.long		0x889774e1, 0xaa7c7ad5, 0x68bce87a
290	.long		0x8a074012, 0xded288f8, 0x57a3d037
291	.long		0xbd0bb25f, 0x6d390dec, 0x6956fc3b
292	.long		0x3be3c09b, 0x6353c1cc, 0x42d98888
293	.long		0x465a4eee, 0xf48642e9, 0x3771e98f
294	.long		0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9
295	.long		0xa52f58ec, 0x9a5ede41, 0x2178513a
296	.long		0x47972100, 0x45cddf4e, 0xe0ac139e
297	.long		0x359674f7, 0xa51b6135, 0x170076fa
298
299.L1:	.long		0xaf449247, 0x81256527, 0xccaa009e
300	.long		0x57c54819, 0x1d9513d7, 0x81256527
301	.long		0x3f41287a, 0x57c54819, 0xaf449247
302	.long		0xf5e48c85, 0x910eeec1, 0x1d9513d7
303	.long		0x1f0c2cdd, 0x9026d5b1, 0xae0b5394
304	.long		0x71d54a59, 0xf5e48c85, 0x57c54819
305	.long		0x1c63267b, 0xfe807bbd, 0x0cbec0ed
306	.long		0xd31343ea, 0xe95c1271, 0x910eeec1
307	.long		0xf9d9c7ee, 0x71d54a59, 0x3f41287a
308	.long		0x9ee62949, 0xcec97417, 0x9026d5b1
309	.long		0xa55d1514, 0xf183c71b, 0xd1df2327
310	.long		0x21aa2b26, 0xd31343ea, 0xf5e48c85
311	.long		0x9d842b80, 0xeea395c4, 0x3c656ced
312	.long		0xd8110ff1, 0xcd669a40, 0xfe807bbd
313	.long		0x3f9e9356, 0x9ee62949, 0x1f0c2cdd
314	.long		0x1d6708a0, 0x0c30f51d, 0xe95c1271
315	.long		0xef82aa68, 0xdb3935ea, 0xb918a347
316	.long		0xd14bcc9b, 0x21aa2b26, 0x71d54a59
317	.long		0x99cce860, 0x356d209f, 0xff6f2fc2
318	.long		0xd8af8e46, 0xc352f6de, 0xcec97417
319	.long		0xf1996890, 0xd8110ff1, 0x1c63267b
320	.long		0x631bc508, 0xe95c7216, 0xf183c71b
321	.long		0x8511c306, 0x8e031a19, 0x9b9bdbd0
322	.long		0xdb3839f3, 0x1d6708a0, 0xd31343ea
323	.long		0x7a92fffb, 0xf7003835, 0x4470ac44
324	.long		0x6ce68f2a, 0x00eba0c8, 0xeea395c4
325	.long		0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee
326	.long		0xb46f7cff, 0x9a1b53c8, 0xcd669a40
327	.long		0x60290934, 0x81b6f443, 0x6d40f445
328	.long		0x8e976a7d, 0xd8af8e46, 0x9ee62949
329	.long		0xdcf5088a, 0x9dbdc100, 0x145575d5
330	.long		0x1753ab84, 0xbbf2f6d6, 0x0c30f51d
331	.long		0x255b139e, 0x631bc508, 0xa55d1514
332	.long		0xd784eaa8, 0xce26786c, 0xdb3935ea
333	.long		0x6d2c864a, 0x8068c345, 0x2586d334
334	.long		0x02072e24, 0xdb3839f3, 0x21aa2b26
335	.long		0x06689b0a, 0x5efd72f5, 0xe0575528
336	.long		0x1e52f5ea, 0x4117915b, 0x356d209f
337	.long		0x1d3d1db6, 0x6ce68f2a, 0x9d842b80
338	.long		0x3796455c, 0xb8e0e4a8, 0xc352f6de
339	.long		0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c
340	.long		0x28ae0976, 0xb46f7cff, 0xd8110ff1
341	.long		0x9764bc8d, 0xd7e7a22c, 0x712510f0
342	.long		0x13a13e18, 0x3e9a43cd, 0xe95c7216
343	.long		0xb8ee242e, 0x8e976a7d, 0x3f9e9356
344	.long		0x0c540e7b, 0x753c81ff, 0x8e031a19
345	.long		0x9924c781, 0xb9220208, 0x3edcde65
346	.long		0x3954de39, 0x1753ab84, 0x1d6708a0
347	.long		0xf32238b5, 0xbec81497, 0x9e70b943
348	.long		0xbbd2cd2c, 0x0925d861, 0xf7003835
349	.long		0xcc401304, 0xd784eaa8, 0xef82aa68
350	.long		0x4987e684, 0x6044fbb0, 0x00eba0c8
351	.long		0x3aa11427, 0x18fe3b4a, 0x87441142
352	.long		0x297aad60, 0x02072e24, 0xd14bcc9b
353	.long		0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a
354	.long		0x632d78c5, 0x3fc33de4, 0x9a1b53c8
355	.long		0x25b8822a, 0x1e52f5ea, 0x99cce860
356	.long		0xd4fc84bc, 0x1af62fb8, 0x81b6f443
357	.long		0x5690aa32, 0xa91fdefb, 0x688a110e
358	.long		0x1357a093, 0x3796455c, 0xd8af8e46
359	.long		0x798fdd33, 0xaaa18a37, 0x357b9517
360	.long		0xc2815395, 0x54d42691, 0x9dbdc100
361	.long		0x21cfc0f7, 0x28ae0976, 0xf1996890
362	.long		0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6
363