xref: /linux/lib/crc/arm/crc32-core.S (revision a578dd095dfe8b56c167201d9aea43e47d27f807)
1*530b304fSEric Biggers/*
2*530b304fSEric Biggers * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
3*530b304fSEric Biggers *
4*530b304fSEric Biggers * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
5*530b304fSEric Biggers *
6*530b304fSEric Biggers * This program is free software; you can redistribute it and/or modify
7*530b304fSEric Biggers * it under the terms of the GNU General Public License version 2 as
8*530b304fSEric Biggers * published by the Free Software Foundation.
9*530b304fSEric Biggers */
10*530b304fSEric Biggers
11*530b304fSEric Biggers/* GPL HEADER START
12*530b304fSEric Biggers *
13*530b304fSEric Biggers * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
14*530b304fSEric Biggers *
15*530b304fSEric Biggers * This program is free software; you can redistribute it and/or modify
16*530b304fSEric Biggers * it under the terms of the GNU General Public License version 2 only,
17*530b304fSEric Biggers * as published by the Free Software Foundation.
18*530b304fSEric Biggers *
19*530b304fSEric Biggers * This program is distributed in the hope that it will be useful, but
20*530b304fSEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of
21*530b304fSEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22*530b304fSEric Biggers * General Public License version 2 for more details (a copy is included
23*530b304fSEric Biggers * in the LICENSE file that accompanied this code).
24*530b304fSEric Biggers *
25*530b304fSEric Biggers * You should have received a copy of the GNU General Public License
26*530b304fSEric Biggers * version 2 along with this program; If not, see http://www.gnu.org/licenses
27*530b304fSEric Biggers *
28*530b304fSEric Biggers * Please  visit http://www.xyratex.com/contact if you need additional
29*530b304fSEric Biggers * information or have any questions.
30*530b304fSEric Biggers *
31*530b304fSEric Biggers * GPL HEADER END
32*530b304fSEric Biggers */
33*530b304fSEric Biggers
34*530b304fSEric Biggers/*
35*530b304fSEric Biggers * Copyright 2012 Xyratex Technology Limited
36*530b304fSEric Biggers *
37*530b304fSEric Biggers * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
38*530b304fSEric Biggers * calculation.
39*530b304fSEric Biggers * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
40*530b304fSEric Biggers * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
41*530b304fSEric Biggers * at:
42*530b304fSEric Biggers * https://www.intel.com/products/processor/manuals/
43*530b304fSEric Biggers * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
44*530b304fSEric Biggers * Volume 2B: Instruction Set Reference, N-Z
45*530b304fSEric Biggers *
46*530b304fSEric Biggers * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
47*530b304fSEric Biggers *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
48*530b304fSEric Biggers */
49*530b304fSEric Biggers
50*530b304fSEric Biggers#include <linux/linkage.h>
51*530b304fSEric Biggers#include <asm/assembler.h>
52*530b304fSEric Biggers
53*530b304fSEric Biggers	.text
54*530b304fSEric Biggers	.align		6
55*530b304fSEric Biggers	.arch		armv8-a
56*530b304fSEric Biggers	.arch_extension	crc
57*530b304fSEric Biggers	.fpu		crypto-neon-fp-armv8
58*530b304fSEric Biggers
59*530b304fSEric Biggers.Lcrc32_constants:
60*530b304fSEric Biggers	/*
61*530b304fSEric Biggers	 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
62*530b304fSEric Biggers	 * #define CONSTANT_R1  0x154442bd4LL
63*530b304fSEric Biggers	 *
64*530b304fSEric Biggers	 * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
65*530b304fSEric Biggers	 * #define CONSTANT_R2  0x1c6e41596LL
66*530b304fSEric Biggers	 */
67*530b304fSEric Biggers	.quad		0x0000000154442bd4
68*530b304fSEric Biggers	.quad		0x00000001c6e41596
69*530b304fSEric Biggers
70*530b304fSEric Biggers	/*
71*530b304fSEric Biggers	 * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
72*530b304fSEric Biggers	 * #define CONSTANT_R3  0x1751997d0LL
73*530b304fSEric Biggers	 *
74*530b304fSEric Biggers	 * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
75*530b304fSEric Biggers	 * #define CONSTANT_R4  0x0ccaa009eLL
76*530b304fSEric Biggers	 */
77*530b304fSEric Biggers	.quad		0x00000001751997d0
78*530b304fSEric Biggers	.quad		0x00000000ccaa009e
79*530b304fSEric Biggers
80*530b304fSEric Biggers	/*
81*530b304fSEric Biggers	 * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
82*530b304fSEric Biggers	 * #define CONSTANT_R5  0x163cd6124LL
83*530b304fSEric Biggers	 */
84*530b304fSEric Biggers	.quad		0x0000000163cd6124
85*530b304fSEric Biggers	.quad		0x00000000FFFFFFFF
86*530b304fSEric Biggers
87*530b304fSEric Biggers	/*
88*530b304fSEric Biggers	 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
89*530b304fSEric Biggers	 *
90*530b304fSEric Biggers	 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
91*530b304fSEric Biggers	 *                                                      = 0x1F7011641LL
92*530b304fSEric Biggers	 * #define CONSTANT_RU  0x1F7011641LL
93*530b304fSEric Biggers	 */
94*530b304fSEric Biggers	.quad		0x00000001DB710641
95*530b304fSEric Biggers	.quad		0x00000001F7011641
96*530b304fSEric Biggers
97*530b304fSEric Biggers.Lcrc32c_constants:
98*530b304fSEric Biggers	.quad		0x00000000740eef02
99*530b304fSEric Biggers	.quad		0x000000009e4addf8
100*530b304fSEric Biggers	.quad		0x00000000f20c0dfe
101*530b304fSEric Biggers	.quad		0x000000014cd00bd6
102*530b304fSEric Biggers	.quad		0x00000000dd45aab8
103*530b304fSEric Biggers	.quad		0x00000000FFFFFFFF
104*530b304fSEric Biggers	.quad		0x0000000105ec76f0
105*530b304fSEric Biggers	.quad		0x00000000dea713f1
106*530b304fSEric Biggers
107*530b304fSEric Biggers	dCONSTANTl	.req	d0
108*530b304fSEric Biggers	dCONSTANTh	.req	d1
109*530b304fSEric Biggers	qCONSTANT	.req	q0
110*530b304fSEric Biggers
111*530b304fSEric Biggers	BUF		.req	r0
112*530b304fSEric Biggers	LEN		.req	r1
113*530b304fSEric Biggers	CRC		.req	r2
114*530b304fSEric Biggers
115*530b304fSEric Biggers	qzr		.req	q9
116*530b304fSEric Biggers
117*530b304fSEric Biggers	/**
118*530b304fSEric Biggers	 * Calculate crc32
119*530b304fSEric Biggers	 * BUF - buffer
120*530b304fSEric Biggers	 * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
121*530b304fSEric Biggers	 * CRC - initial crc32
122*530b304fSEric Biggers	 * return %eax crc32
123*530b304fSEric Biggers	 * uint crc32_pmull_le(unsigned char const *buffer,
124*530b304fSEric Biggers	 *                     size_t len, uint crc32)
125*530b304fSEric Biggers	 */
126*530b304fSEric BiggersSYM_FUNC_START(crc32_pmull_le)
127*530b304fSEric Biggers	adr		r3, .Lcrc32_constants
128*530b304fSEric Biggers	b		0f
129*530b304fSEric BiggersSYM_FUNC_END(crc32_pmull_le)
130*530b304fSEric Biggers
131*530b304fSEric BiggersSYM_FUNC_START(crc32c_pmull_le)
132*530b304fSEric Biggers	adr		r3, .Lcrc32c_constants
133*530b304fSEric Biggers
134*530b304fSEric Biggers0:	bic		LEN, LEN, #15
135*530b304fSEric Biggers	vld1.8		{q1-q2}, [BUF, :128]!
136*530b304fSEric Biggers	vld1.8		{q3-q4}, [BUF, :128]!
137*530b304fSEric Biggers	vmov.i8		qzr, #0
138*530b304fSEric Biggers	vmov.i8		qCONSTANT, #0
139*530b304fSEric Biggers	vmov.32		dCONSTANTl[0], CRC
140*530b304fSEric Biggers	veor.8		d2, d2, dCONSTANTl
141*530b304fSEric Biggers	sub		LEN, LEN, #0x40
142*530b304fSEric Biggers	cmp		LEN, #0x40
143*530b304fSEric Biggers	blt		less_64
144*530b304fSEric Biggers
145*530b304fSEric Biggers	vld1.64		{qCONSTANT}, [r3]
146*530b304fSEric Biggers
147*530b304fSEric Biggersloop_64:		/* 64 bytes Full cache line folding */
148*530b304fSEric Biggers	sub		LEN, LEN, #0x40
149*530b304fSEric Biggers
150*530b304fSEric Biggers	vmull.p64	q5, d3, dCONSTANTh
151*530b304fSEric Biggers	vmull.p64	q6, d5, dCONSTANTh
152*530b304fSEric Biggers	vmull.p64	q7, d7, dCONSTANTh
153*530b304fSEric Biggers	vmull.p64	q8, d9, dCONSTANTh
154*530b304fSEric Biggers
155*530b304fSEric Biggers	vmull.p64	q1, d2, dCONSTANTl
156*530b304fSEric Biggers	vmull.p64	q2, d4, dCONSTANTl
157*530b304fSEric Biggers	vmull.p64	q3, d6, dCONSTANTl
158*530b304fSEric Biggers	vmull.p64	q4, d8, dCONSTANTl
159*530b304fSEric Biggers
160*530b304fSEric Biggers	veor.8		q1, q1, q5
161*530b304fSEric Biggers	vld1.8		{q5}, [BUF, :128]!
162*530b304fSEric Biggers	veor.8		q2, q2, q6
163*530b304fSEric Biggers	vld1.8		{q6}, [BUF, :128]!
164*530b304fSEric Biggers	veor.8		q3, q3, q7
165*530b304fSEric Biggers	vld1.8		{q7}, [BUF, :128]!
166*530b304fSEric Biggers	veor.8		q4, q4, q8
167*530b304fSEric Biggers	vld1.8		{q8}, [BUF, :128]!
168*530b304fSEric Biggers
169*530b304fSEric Biggers	veor.8		q1, q1, q5
170*530b304fSEric Biggers	veor.8		q2, q2, q6
171*530b304fSEric Biggers	veor.8		q3, q3, q7
172*530b304fSEric Biggers	veor.8		q4, q4, q8
173*530b304fSEric Biggers
174*530b304fSEric Biggers	cmp		LEN, #0x40
175*530b304fSEric Biggers	bge		loop_64
176*530b304fSEric Biggers
177*530b304fSEric Biggersless_64:		/* Folding cache line into 128bit */
178*530b304fSEric Biggers	vldr		dCONSTANTl, [r3, #16]
179*530b304fSEric Biggers	vldr		dCONSTANTh, [r3, #24]
180*530b304fSEric Biggers
181*530b304fSEric Biggers	vmull.p64	q5, d3, dCONSTANTh
182*530b304fSEric Biggers	vmull.p64	q1, d2, dCONSTANTl
183*530b304fSEric Biggers	veor.8		q1, q1, q5
184*530b304fSEric Biggers	veor.8		q1, q1, q2
185*530b304fSEric Biggers
186*530b304fSEric Biggers	vmull.p64	q5, d3, dCONSTANTh
187*530b304fSEric Biggers	vmull.p64	q1, d2, dCONSTANTl
188*530b304fSEric Biggers	veor.8		q1, q1, q5
189*530b304fSEric Biggers	veor.8		q1, q1, q3
190*530b304fSEric Biggers
191*530b304fSEric Biggers	vmull.p64	q5, d3, dCONSTANTh
192*530b304fSEric Biggers	vmull.p64	q1, d2, dCONSTANTl
193*530b304fSEric Biggers	veor.8		q1, q1, q5
194*530b304fSEric Biggers	veor.8		q1, q1, q4
195*530b304fSEric Biggers
196*530b304fSEric Biggers	teq		LEN, #0
197*530b304fSEric Biggers	beq		fold_64
198*530b304fSEric Biggers
199*530b304fSEric Biggersloop_16:		/* Folding rest buffer into 128bit */
200*530b304fSEric Biggers	subs		LEN, LEN, #0x10
201*530b304fSEric Biggers
202*530b304fSEric Biggers	vld1.8		{q2}, [BUF, :128]!
203*530b304fSEric Biggers	vmull.p64	q5, d3, dCONSTANTh
204*530b304fSEric Biggers	vmull.p64	q1, d2, dCONSTANTl
205*530b304fSEric Biggers	veor.8		q1, q1, q5
206*530b304fSEric Biggers	veor.8		q1, q1, q2
207*530b304fSEric Biggers
208*530b304fSEric Biggers	bne		loop_16
209*530b304fSEric Biggers
210*530b304fSEric Biggersfold_64:
211*530b304fSEric Biggers	/* perform the last 64 bit fold, also adds 32 zeroes
212*530b304fSEric Biggers	 * to the input stream */
213*530b304fSEric Biggers	vmull.p64	q2, d2, dCONSTANTh
214*530b304fSEric Biggers	vext.8		q1, q1, qzr, #8
215*530b304fSEric Biggers	veor.8		q1, q1, q2
216*530b304fSEric Biggers
217*530b304fSEric Biggers	/* final 32-bit fold */
218*530b304fSEric Biggers	vldr		dCONSTANTl, [r3, #32]
219*530b304fSEric Biggers	vldr		d6, [r3, #40]
220*530b304fSEric Biggers	vmov.i8		d7, #0
221*530b304fSEric Biggers
222*530b304fSEric Biggers	vext.8		q2, q1, qzr, #4
223*530b304fSEric Biggers	vand.8		d2, d2, d6
224*530b304fSEric Biggers	vmull.p64	q1, d2, dCONSTANTl
225*530b304fSEric Biggers	veor.8		q1, q1, q2
226*530b304fSEric Biggers
227*530b304fSEric Biggers	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
228*530b304fSEric Biggers	vldr		dCONSTANTl, [r3, #48]
229*530b304fSEric Biggers	vldr		dCONSTANTh, [r3, #56]
230*530b304fSEric Biggers
231*530b304fSEric Biggers	vand.8		q2, q1, q3
232*530b304fSEric Biggers	vext.8		q2, qzr, q2, #8
233*530b304fSEric Biggers	vmull.p64	q2, d5, dCONSTANTh
234*530b304fSEric Biggers	vand.8		q2, q2, q3
235*530b304fSEric Biggers	vmull.p64	q2, d4, dCONSTANTl
236*530b304fSEric Biggers	veor.8		q1, q1, q2
237*530b304fSEric Biggers	vmov		r0, s5
238*530b304fSEric Biggers
239*530b304fSEric Biggers	bx		lr
240*530b304fSEric BiggersSYM_FUNC_END(crc32c_pmull_le)
241*530b304fSEric Biggers
242*530b304fSEric Biggers	.macro		__crc32, c
243*530b304fSEric Biggers	subs		ip, r2, #8
244*530b304fSEric Biggers	bmi		.Ltail\c
245*530b304fSEric Biggers
246*530b304fSEric Biggers	tst		r1, #3
247*530b304fSEric Biggers	bne		.Lunaligned\c
248*530b304fSEric Biggers
249*530b304fSEric Biggers	teq		ip, #0
250*530b304fSEric Biggers.Laligned8\c:
251*530b304fSEric Biggers	ldrd		r2, r3, [r1], #8
252*530b304fSEric BiggersARM_BE8(rev		r2, r2		)
253*530b304fSEric BiggersARM_BE8(rev		r3, r3		)
254*530b304fSEric Biggers	crc32\c\()w	r0, r0, r2
255*530b304fSEric Biggers	crc32\c\()w	r0, r0, r3
256*530b304fSEric Biggers	bxeq		lr
257*530b304fSEric Biggers	subs		ip, ip, #8
258*530b304fSEric Biggers	bpl		.Laligned8\c
259*530b304fSEric Biggers
260*530b304fSEric Biggers.Ltail\c:
261*530b304fSEric Biggers	tst		ip, #4
262*530b304fSEric Biggers	beq		2f
263*530b304fSEric Biggers	ldr		r3, [r1], #4
264*530b304fSEric BiggersARM_BE8(rev		r3, r3		)
265*530b304fSEric Biggers	crc32\c\()w	r0, r0, r3
266*530b304fSEric Biggers
267*530b304fSEric Biggers2:	tst		ip, #2
268*530b304fSEric Biggers	beq		1f
269*530b304fSEric Biggers	ldrh		r3, [r1], #2
270*530b304fSEric BiggersARM_BE8(rev16		r3, r3		)
271*530b304fSEric Biggers	crc32\c\()h	r0, r0, r3
272*530b304fSEric Biggers
273*530b304fSEric Biggers1:	tst		ip, #1
274*530b304fSEric Biggers	bxeq		lr
275*530b304fSEric Biggers	ldrb		r3, [r1]
276*530b304fSEric Biggers	crc32\c\()b	r0, r0, r3
277*530b304fSEric Biggers	bx		lr
278*530b304fSEric Biggers
279*530b304fSEric Biggers.Lunaligned\c:
280*530b304fSEric Biggers	tst		r1, #1
281*530b304fSEric Biggers	beq		2f
282*530b304fSEric Biggers	ldrb		r3, [r1], #1
283*530b304fSEric Biggers	subs		r2, r2, #1
284*530b304fSEric Biggers	crc32\c\()b	r0, r0, r3
285*530b304fSEric Biggers
286*530b304fSEric Biggers	tst		r1, #2
287*530b304fSEric Biggers	beq		0f
288*530b304fSEric Biggers2:	ldrh		r3, [r1], #2
289*530b304fSEric Biggers	subs		r2, r2, #2
290*530b304fSEric BiggersARM_BE8(rev16		r3, r3		)
291*530b304fSEric Biggers	crc32\c\()h	r0, r0, r3
292*530b304fSEric Biggers
293*530b304fSEric Biggers0:	subs		ip, r2, #8
294*530b304fSEric Biggers	bpl		.Laligned8\c
295*530b304fSEric Biggers	b		.Ltail\c
296*530b304fSEric Biggers	.endm
297*530b304fSEric Biggers
298*530b304fSEric Biggers	.align		5
299*530b304fSEric BiggersSYM_FUNC_START(crc32_armv8_le)
300*530b304fSEric Biggers	__crc32
301*530b304fSEric BiggersSYM_FUNC_END(crc32_armv8_le)
302*530b304fSEric Biggers
303*530b304fSEric Biggers	.align		5
304*530b304fSEric BiggersSYM_FUNC_START(crc32c_armv8_le)
305*530b304fSEric Biggers	__crc32		c
306*530b304fSEric BiggersSYM_FUNC_END(crc32c_armv8_le)
307