xref: /linux/lib/crypto/arm64/ghash-neon-core.S (revision 370c3883195566ee3e7d79e0146c3d735a406573)
1a336c01fSEric Biggers/* SPDX-License-Identifier: GPL-2.0-only */
2a336c01fSEric Biggers/*
3a336c01fSEric Biggers * Accelerated GHASH implementation with ARMv8 ASIMD instructions.
4a336c01fSEric Biggers *
5a336c01fSEric Biggers * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
6a336c01fSEric Biggers */
7a336c01fSEric Biggers
8a336c01fSEric Biggers#include <linux/linkage.h>
9a336c01fSEric Biggers#include <asm/assembler.h>
10a336c01fSEric Biggers
11a336c01fSEric Biggers	SHASH		.req	v0
12a336c01fSEric Biggers	SHASH2		.req	v1
13a336c01fSEric Biggers	T1		.req	v2
14a336c01fSEric Biggers	T2		.req	v3
15a336c01fSEric Biggers	XM		.req	v5
16a336c01fSEric Biggers	XL		.req	v6
17a336c01fSEric Biggers	XH		.req	v7
18a336c01fSEric Biggers	IN1		.req	v7
19a336c01fSEric Biggers
20a336c01fSEric Biggers	k00_16		.req	v8
21a336c01fSEric Biggers	k32_48		.req	v9
22a336c01fSEric Biggers
23a336c01fSEric Biggers	t3		.req	v10
24a336c01fSEric Biggers	t4		.req	v11
25a336c01fSEric Biggers	t5		.req	v12
26a336c01fSEric Biggers	t6		.req	v13
27a336c01fSEric Biggers	t7		.req	v14
28a336c01fSEric Biggers	t8		.req	v15
29a336c01fSEric Biggers	t9		.req	v16
30a336c01fSEric Biggers
31a336c01fSEric Biggers	perm1		.req	v17
32a336c01fSEric Biggers	perm2		.req	v18
33a336c01fSEric Biggers	perm3		.req	v19
34a336c01fSEric Biggers
35a336c01fSEric Biggers	sh1		.req	v20
36a336c01fSEric Biggers	sh2		.req	v21
37a336c01fSEric Biggers	sh3		.req	v22
38a336c01fSEric Biggers	sh4		.req	v23
39a336c01fSEric Biggers
40a336c01fSEric Biggers	ss1		.req	v24
41a336c01fSEric Biggers	ss2		.req	v25
42a336c01fSEric Biggers	ss3		.req	v26
43a336c01fSEric Biggers	ss4		.req	v27
44a336c01fSEric Biggers
45a336c01fSEric Biggers	.text
46a336c01fSEric Biggers
47a336c01fSEric Biggers	.macro		__pmull_p8, rq, ad, bd
48a336c01fSEric Biggers	ext		t3.8b, \ad\().8b, \ad\().8b, #1		// A1
49a336c01fSEric Biggers	ext		t5.8b, \ad\().8b, \ad\().8b, #2		// A2
50a336c01fSEric Biggers	ext		t7.8b, \ad\().8b, \ad\().8b, #3		// A3
51a336c01fSEric Biggers
52a336c01fSEric Biggers	__pmull_p8_\bd	\rq, \ad
53a336c01fSEric Biggers	.endm
54a336c01fSEric Biggers
55a336c01fSEric Biggers	.macro		__pmull2_p8, rq, ad, bd
56a336c01fSEric Biggers	tbl		t3.16b, {\ad\().16b}, perm1.16b		// A1
57a336c01fSEric Biggers	tbl		t5.16b, {\ad\().16b}, perm2.16b		// A2
58a336c01fSEric Biggers	tbl		t7.16b, {\ad\().16b}, perm3.16b		// A3
59a336c01fSEric Biggers
60a336c01fSEric Biggers	__pmull2_p8_\bd	\rq, \ad
61a336c01fSEric Biggers	.endm
62a336c01fSEric Biggers
63a336c01fSEric Biggers	.macro		__pmull_p8_SHASH, rq, ad
64a336c01fSEric Biggers	__pmull_p8_tail	\rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
65a336c01fSEric Biggers	.endm
66a336c01fSEric Biggers
67a336c01fSEric Biggers	.macro		__pmull_p8_SHASH2, rq, ad
68a336c01fSEric Biggers	__pmull_p8_tail	\rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
69a336c01fSEric Biggers	.endm
70a336c01fSEric Biggers
71a336c01fSEric Biggers	.macro		__pmull2_p8_SHASH, rq, ad
72a336c01fSEric Biggers	__pmull_p8_tail	\rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
73a336c01fSEric Biggers	.endm
74a336c01fSEric Biggers
75a336c01fSEric Biggers	.macro		__pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
76a336c01fSEric Biggers	pmull\t		t3.8h, t3.\nb, \bd			// F = A1*B
77a336c01fSEric Biggers	pmull\t		t4.8h, \ad, \b1\().\nb			// E = A*B1
78a336c01fSEric Biggers	pmull\t		t5.8h, t5.\nb, \bd			// H = A2*B
79a336c01fSEric Biggers	pmull\t		t6.8h, \ad, \b2\().\nb			// G = A*B2
80a336c01fSEric Biggers	pmull\t		t7.8h, t7.\nb, \bd			// J = A3*B
81a336c01fSEric Biggers	pmull\t		t8.8h, \ad, \b3\().\nb			// I = A*B3
82a336c01fSEric Biggers	pmull\t		t9.8h, \ad, \b4\().\nb			// K = A*B4
83a336c01fSEric Biggers	pmull\t		\rq\().8h, \ad, \bd			// D = A*B
84a336c01fSEric Biggers
85a336c01fSEric Biggers	eor		t3.16b, t3.16b, t4.16b			// L = E + F
86a336c01fSEric Biggers	eor		t5.16b, t5.16b, t6.16b			// M = G + H
87a336c01fSEric Biggers	eor		t7.16b, t7.16b, t8.16b			// N = I + J
88a336c01fSEric Biggers
89a336c01fSEric Biggers	uzp1		t4.2d, t3.2d, t5.2d
90a336c01fSEric Biggers	uzp2		t3.2d, t3.2d, t5.2d
91a336c01fSEric Biggers	uzp1		t6.2d, t7.2d, t9.2d
92a336c01fSEric Biggers	uzp2		t7.2d, t7.2d, t9.2d
93a336c01fSEric Biggers
94a336c01fSEric Biggers	// t3 = (L) (P0 + P1) << 8
95a336c01fSEric Biggers	// t5 = (M) (P2 + P3) << 16
96a336c01fSEric Biggers	eor		t4.16b, t4.16b, t3.16b
97a336c01fSEric Biggers	and		t3.16b, t3.16b, k32_48.16b
98a336c01fSEric Biggers
99a336c01fSEric Biggers	// t7 = (N) (P4 + P5) << 24
100a336c01fSEric Biggers	// t9 = (K) (P6 + P7) << 32
101a336c01fSEric Biggers	eor		t6.16b, t6.16b, t7.16b
102a336c01fSEric Biggers	and		t7.16b, t7.16b, k00_16.16b
103a336c01fSEric Biggers
104a336c01fSEric Biggers	eor		t4.16b, t4.16b, t3.16b
105a336c01fSEric Biggers	eor		t6.16b, t6.16b, t7.16b
106a336c01fSEric Biggers
107a336c01fSEric Biggers	zip2		t5.2d, t4.2d, t3.2d
108a336c01fSEric Biggers	zip1		t3.2d, t4.2d, t3.2d
109a336c01fSEric Biggers	zip2		t9.2d, t6.2d, t7.2d
110a336c01fSEric Biggers	zip1		t7.2d, t6.2d, t7.2d
111a336c01fSEric Biggers
112a336c01fSEric Biggers	ext		t3.16b, t3.16b, t3.16b, #15
113a336c01fSEric Biggers	ext		t5.16b, t5.16b, t5.16b, #14
114a336c01fSEric Biggers	ext		t7.16b, t7.16b, t7.16b, #13
115a336c01fSEric Biggers	ext		t9.16b, t9.16b, t9.16b, #12
116a336c01fSEric Biggers
117a336c01fSEric Biggers	eor		t3.16b, t3.16b, t5.16b
118a336c01fSEric Biggers	eor		t7.16b, t7.16b, t9.16b
119a336c01fSEric Biggers	eor		\rq\().16b, \rq\().16b, t3.16b
120a336c01fSEric Biggers	eor		\rq\().16b, \rq\().16b, t7.16b
121a336c01fSEric Biggers	.endm
122a336c01fSEric Biggers
123a336c01fSEric Biggers	.macro		__pmull_pre_p8
124a336c01fSEric Biggers	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
125a336c01fSEric Biggers	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
126a336c01fSEric Biggers
127a336c01fSEric Biggers	// k00_16 := 0x0000000000000000_000000000000ffff
128a336c01fSEric Biggers	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
129a336c01fSEric Biggers	movi		k32_48.2d, #0xffffffff
130a336c01fSEric Biggers	mov		k32_48.h[2], k32_48.h[0]
131a336c01fSEric Biggers	ushr		k00_16.2d, k32_48.2d, #32
132a336c01fSEric Biggers
133a336c01fSEric Biggers	// prepare the permutation vectors
134a336c01fSEric Biggers	mov_q		x5, 0x080f0e0d0c0b0a09
135a336c01fSEric Biggers	movi		T1.8b, #8
136a336c01fSEric Biggers	dup		perm1.2d, x5
137a336c01fSEric Biggers	eor		perm1.16b, perm1.16b, T1.16b
138a336c01fSEric Biggers	ushr		perm2.2d, perm1.2d, #8
139a336c01fSEric Biggers	ushr		perm3.2d, perm1.2d, #16
140a336c01fSEric Biggers	ushr		T1.2d, perm1.2d, #24
141a336c01fSEric Biggers	sli		perm2.2d, perm1.2d, #56
142a336c01fSEric Biggers	sli		perm3.2d, perm1.2d, #48
143a336c01fSEric Biggers	sli		T1.2d, perm1.2d, #40
144a336c01fSEric Biggers
145a336c01fSEric Biggers	// precompute loop invariants
146a336c01fSEric Biggers	tbl		sh1.16b, {SHASH.16b}, perm1.16b
147a336c01fSEric Biggers	tbl		sh2.16b, {SHASH.16b}, perm2.16b
148a336c01fSEric Biggers	tbl		sh3.16b, {SHASH.16b}, perm3.16b
149a336c01fSEric Biggers	tbl		sh4.16b, {SHASH.16b}, T1.16b
150a336c01fSEric Biggers	ext		ss1.8b, SHASH2.8b, SHASH2.8b, #1
151a336c01fSEric Biggers	ext		ss2.8b, SHASH2.8b, SHASH2.8b, #2
152a336c01fSEric Biggers	ext		ss3.8b, SHASH2.8b, SHASH2.8b, #3
153a336c01fSEric Biggers	ext		ss4.8b, SHASH2.8b, SHASH2.8b, #4
154a336c01fSEric Biggers	.endm
155a336c01fSEric Biggers
156a336c01fSEric Biggers	.macro		__pmull_reduce_p8
157a336c01fSEric Biggers	eor		XM.16b, XM.16b, T1.16b
158a336c01fSEric Biggers
159a336c01fSEric Biggers	mov		XL.d[1], XM.d[0]
160a336c01fSEric Biggers	mov		XH.d[0], XM.d[1]
161a336c01fSEric Biggers
162a336c01fSEric Biggers	shl		T1.2d, XL.2d, #57
163a336c01fSEric Biggers	shl		T2.2d, XL.2d, #62
164a336c01fSEric Biggers	eor		T2.16b, T2.16b, T1.16b
165a336c01fSEric Biggers	shl		T1.2d, XL.2d, #63
166a336c01fSEric Biggers	eor		T2.16b, T2.16b, T1.16b
167a336c01fSEric Biggers	ext		T1.16b, XL.16b, XH.16b, #8
168a336c01fSEric Biggers	eor		T2.16b, T2.16b, T1.16b
169a336c01fSEric Biggers
170a336c01fSEric Biggers	mov		XL.d[1], T2.d[0]
171a336c01fSEric Biggers	mov		XH.d[0], T2.d[1]
172a336c01fSEric Biggers
173a336c01fSEric Biggers	ushr		T2.2d, XL.2d, #1
174a336c01fSEric Biggers	eor		XH.16b, XH.16b, XL.16b
175a336c01fSEric Biggers	eor		XL.16b, XL.16b, T2.16b
176a336c01fSEric Biggers	ushr		T2.2d, T2.2d, #6
177a336c01fSEric Biggers	ushr		XL.2d, XL.2d, #1
178a336c01fSEric Biggers	.endm
179a336c01fSEric Biggers
180a336c01fSEric Biggers	/*
181a336c01fSEric Biggers	 * void pmull_ghash_update_p8(size_t blocks, struct polyval_elem *dg,
182a336c01fSEric Biggers	 *			      const u8 *src,
183a336c01fSEric Biggers	 *			      const struct polyval_elem *h)
184a336c01fSEric Biggers	 */
185a336c01fSEric BiggersSYM_FUNC_START(pmull_ghash_update_p8)
186a336c01fSEric Biggers	ld1		{SHASH.2d}, [x3]
187a336c01fSEric Biggers	ld1		{XL.2d}, [x1]
188a336c01fSEric Biggers
189a336c01fSEric Biggers	__pmull_pre_p8
190a336c01fSEric Biggers
191a336c01fSEric Biggers0:	ld1		{T1.2d}, [x2], #16
192a336c01fSEric Biggers	sub		x0, x0, #1
193a336c01fSEric Biggers
194a336c01fSEric Biggers	/* multiply XL by SHASH in GF(2^128) */
195*12b11e47SEric Biggers	rev64		T1.16b, T1.16b
196a336c01fSEric Biggers
197a336c01fSEric Biggers	ext		T2.16b, XL.16b, XL.16b, #8
198a336c01fSEric Biggers	ext		IN1.16b, T1.16b, T1.16b, #8
199a336c01fSEric Biggers	eor		T1.16b, T1.16b, T2.16b
200a336c01fSEric Biggers	eor		XL.16b, XL.16b, IN1.16b
201a336c01fSEric Biggers
202a336c01fSEric Biggers	__pmull2_p8	XH, XL, SHASH			// a1 * b1
203a336c01fSEric Biggers	eor		T1.16b, T1.16b, XL.16b
204a336c01fSEric Biggers	__pmull_p8 	XL, XL, SHASH			// a0 * b0
205a336c01fSEric Biggers	__pmull_p8	XM, T1, SHASH2			// (a1 + a0)(b1 + b0)
206a336c01fSEric Biggers
207a336c01fSEric Biggers	eor		T2.16b, XL.16b, XH.16b
208a336c01fSEric Biggers	ext		T1.16b, XL.16b, XH.16b, #8
209a336c01fSEric Biggers	eor		XM.16b, XM.16b, T2.16b
210a336c01fSEric Biggers
211a336c01fSEric Biggers	__pmull_reduce_p8
212a336c01fSEric Biggers
213a336c01fSEric Biggers	eor		T2.16b, T2.16b, XH.16b
214a336c01fSEric Biggers	eor		XL.16b, XL.16b, T2.16b
215a336c01fSEric Biggers
216a336c01fSEric Biggers	cbnz		x0, 0b
217a336c01fSEric Biggers
218a336c01fSEric Biggers	st1		{XL.2d}, [x1]
219a336c01fSEric Biggers	ret
220a336c01fSEric BiggersSYM_FUNC_END(pmull_ghash_update_p8)
221