xref: /linux/lib/crypto/arm64/sha3-ce-core.S (revision 1e29a750572a25200fcea995d91e5f6448f340c0)
1*1e29a750SEric Biggers/* SPDX-License-Identifier: GPL-2.0 */
2*1e29a750SEric Biggers/*
3*1e29a750SEric Biggers * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
4*1e29a750SEric Biggers *
5*1e29a750SEric Biggers * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6*1e29a750SEric Biggers *
7*1e29a750SEric Biggers * This program is free software; you can redistribute it and/or modify
8*1e29a750SEric Biggers * it under the terms of the GNU General Public License version 2 as
9*1e29a750SEric Biggers * published by the Free Software Foundation.
10*1e29a750SEric Biggers */
11*1e29a750SEric Biggers
12*1e29a750SEric Biggers#include <linux/linkage.h>
13*1e29a750SEric Biggers#include <asm/assembler.h>
14*1e29a750SEric Biggers
15*1e29a750SEric Biggers	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
16*1e29a750SEric Biggers	.set	.Lv\b\().2d, \b
17*1e29a750SEric Biggers	.set	.Lv\b\().16b, \b
18*1e29a750SEric Biggers	.endr
19*1e29a750SEric Biggers
20*1e29a750SEric Biggers	/*
21*1e29a750SEric Biggers	 * ARMv8.2 Crypto Extensions instructions
22*1e29a750SEric Biggers	 */
23*1e29a750SEric Biggers	.macro	eor3, rd, rn, rm, ra
24*1e29a750SEric Biggers	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25*1e29a750SEric Biggers	.endm
26*1e29a750SEric Biggers
27*1e29a750SEric Biggers	.macro	rax1, rd, rn, rm
28*1e29a750SEric Biggers	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
29*1e29a750SEric Biggers	.endm
30*1e29a750SEric Biggers
31*1e29a750SEric Biggers	.macro	bcax, rd, rn, rm, ra
32*1e29a750SEric Biggers	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
33*1e29a750SEric Biggers	.endm
34*1e29a750SEric Biggers
35*1e29a750SEric Biggers	.macro	xar, rd, rn, rm, imm6
36*1e29a750SEric Biggers	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
37*1e29a750SEric Biggers	.endm
38*1e29a750SEric Biggers
39*1e29a750SEric Biggers	/*
40*1e29a750SEric Biggers	 * size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
41*1e29a750SEric Biggers	 *			    size_t nblocks, size_t block_size)
42*1e29a750SEric Biggers	 *
43*1e29a750SEric Biggers	 * block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136
44*1e29a750SEric Biggers	 * (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128).
45*1e29a750SEric Biggers	 */
46*1e29a750SEric Biggers	.text
47*1e29a750SEric BiggersSYM_FUNC_START(sha3_ce_transform)
48*1e29a750SEric Biggers	/* load state */
49*1e29a750SEric Biggers	add	x8, x0, #32
50*1e29a750SEric Biggers	ld1	{ v0.1d- v3.1d}, [x0]
51*1e29a750SEric Biggers	ld1	{ v4.1d- v7.1d}, [x8], #32
52*1e29a750SEric Biggers	ld1	{ v8.1d-v11.1d}, [x8], #32
53*1e29a750SEric Biggers	ld1	{v12.1d-v15.1d}, [x8], #32
54*1e29a750SEric Biggers	ld1	{v16.1d-v19.1d}, [x8], #32
55*1e29a750SEric Biggers	ld1	{v20.1d-v23.1d}, [x8], #32
56*1e29a750SEric Biggers	ld1	{v24.1d}, [x8]
57*1e29a750SEric Biggers
58*1e29a750SEric Biggers0:	sub	x2, x2, #1
59*1e29a750SEric Biggers	mov	w8, #24
60*1e29a750SEric Biggers	adr_l	x9, .Lsha3_rcon
61*1e29a750SEric Biggers
62*1e29a750SEric Biggers	/* load input */
63*1e29a750SEric Biggers	ld1	{v25.8b-v28.8b}, [x1], #32
64*1e29a750SEric Biggers	ld1	{v29.8b}, [x1], #8
65*1e29a750SEric Biggers	eor	v0.8b, v0.8b, v25.8b
66*1e29a750SEric Biggers	eor	v1.8b, v1.8b, v26.8b
67*1e29a750SEric Biggers	eor	v2.8b, v2.8b, v27.8b
68*1e29a750SEric Biggers	eor	v3.8b, v3.8b, v28.8b
69*1e29a750SEric Biggers	eor	v4.8b, v4.8b, v29.8b
70*1e29a750SEric Biggers
71*1e29a750SEric Biggers	ld1	{v25.8b-v28.8b}, [x1], #32
72*1e29a750SEric Biggers	eor	v5.8b, v5.8b, v25.8b
73*1e29a750SEric Biggers	eor	v6.8b, v6.8b, v26.8b
74*1e29a750SEric Biggers	eor	v7.8b, v7.8b, v27.8b
75*1e29a750SEric Biggers	eor	v8.8b, v8.8b, v28.8b
76*1e29a750SEric Biggers	cmp	x3, #72
77*1e29a750SEric Biggers	b.eq	3f	/* SHA3-512 (block_size=72)? */
78*1e29a750SEric Biggers
79*1e29a750SEric Biggers	ld1	{v25.8b-v28.8b}, [x1], #32
80*1e29a750SEric Biggers	eor	v9.8b, v9.8b, v25.8b
81*1e29a750SEric Biggers	eor	v10.8b, v10.8b, v26.8b
82*1e29a750SEric Biggers	eor	v11.8b, v11.8b, v27.8b
83*1e29a750SEric Biggers	eor	v12.8b, v12.8b, v28.8b
84*1e29a750SEric Biggers	cmp	x3, #104
85*1e29a750SEric Biggers	b.eq	3f	/* SHA3-384 (block_size=104)? */
86*1e29a750SEric Biggers
87*1e29a750SEric Biggers	ld1	{v25.8b-v28.8b}, [x1], #32
88*1e29a750SEric Biggers	eor	v13.8b, v13.8b, v25.8b
89*1e29a750SEric Biggers	eor	v14.8b, v14.8b, v26.8b
90*1e29a750SEric Biggers	eor	v15.8b, v15.8b, v27.8b
91*1e29a750SEric Biggers	eor	v16.8b, v16.8b, v28.8b
92*1e29a750SEric Biggers	cmp	x3, #144
93*1e29a750SEric Biggers	b.lt	3f	/* SHA3-256 or SHAKE256 (block_size=136)? */
94*1e29a750SEric Biggers	b.eq	2f	/* SHA3-224 (block_size=144)? */
95*1e29a750SEric Biggers
96*1e29a750SEric Biggers	/* SHAKE128 (block_size=168) */
97*1e29a750SEric Biggers	ld1	{v25.8b-v28.8b}, [x1], #32
98*1e29a750SEric Biggers	eor	v17.8b, v17.8b, v25.8b
99*1e29a750SEric Biggers	eor	v18.8b, v18.8b, v26.8b
100*1e29a750SEric Biggers	eor	v19.8b, v19.8b, v27.8b
101*1e29a750SEric Biggers	eor	v20.8b, v20.8b, v28.8b
102*1e29a750SEric Biggers	b	3f
103*1e29a750SEric Biggers2:
104*1e29a750SEric Biggers	/* SHA3-224 (block_size=144) */
105*1e29a750SEric Biggers	ld1	{v25.8b}, [x1], #8
106*1e29a750SEric Biggers	eor	v17.8b, v17.8b, v25.8b
107*1e29a750SEric Biggers
108*1e29a750SEric Biggers3:	sub	w8, w8, #1
109*1e29a750SEric Biggers
110*1e29a750SEric Biggers	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
111*1e29a750SEric Biggers	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
112*1e29a750SEric Biggers	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
113*1e29a750SEric Biggers	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
114*1e29a750SEric Biggers	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
115*1e29a750SEric Biggers	eor3	v29.16b, v29.16b, v19.16b, v24.16b
116*1e29a750SEric Biggers	eor3	v26.16b, v26.16b, v16.16b, v21.16b
117*1e29a750SEric Biggers	eor3	v28.16b, v28.16b, v18.16b, v23.16b
118*1e29a750SEric Biggers	eor3	v25.16b, v25.16b, v15.16b, v20.16b
119*1e29a750SEric Biggers	eor3	v27.16b, v27.16b, v17.16b, v22.16b
120*1e29a750SEric Biggers
121*1e29a750SEric Biggers	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
122*1e29a750SEric Biggers	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
123*1e29a750SEric Biggers	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
124*1e29a750SEric Biggers	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
125*1e29a750SEric Biggers	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
126*1e29a750SEric Biggers
127*1e29a750SEric Biggers	eor	 v0.16b,  v0.16b, v30.16b
128*1e29a750SEric Biggers	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
129*1e29a750SEric Biggers	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
130*1e29a750SEric Biggers	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
131*1e29a750SEric Biggers	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
132*1e29a750SEric Biggers	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
133*1e29a750SEric Biggers	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
134*1e29a750SEric Biggers	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
135*1e29a750SEric Biggers	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
136*1e29a750SEric Biggers	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
137*1e29a750SEric Biggers	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
138*1e29a750SEric Biggers	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
139*1e29a750SEric Biggers	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
140*1e29a750SEric Biggers	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
141*1e29a750SEric Biggers	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
142*1e29a750SEric Biggers	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
143*1e29a750SEric Biggers	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
144*1e29a750SEric Biggers	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
145*1e29a750SEric Biggers	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
146*1e29a750SEric Biggers	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
147*1e29a750SEric Biggers	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
148*1e29a750SEric Biggers	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
149*1e29a750SEric Biggers	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
150*1e29a750SEric Biggers	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
151*1e29a750SEric Biggers	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
152*1e29a750SEric Biggers
153*1e29a750SEric Biggers	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
154*1e29a750SEric Biggers	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
155*1e29a750SEric Biggers	bcax	v22.16b, v22.16b, v24.16b, v23.16b
156*1e29a750SEric Biggers	bcax	v23.16b, v23.16b, v31.16b, v24.16b
157*1e29a750SEric Biggers	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
158*1e29a750SEric Biggers
159*1e29a750SEric Biggers	ld1r	{v31.2d}, [x9], #8
160*1e29a750SEric Biggers
161*1e29a750SEric Biggers	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
162*1e29a750SEric Biggers	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
163*1e29a750SEric Biggers	bcax	v19.16b, v19.16b, v16.16b, v15.16b
164*1e29a750SEric Biggers	bcax	v15.16b, v15.16b, v25.16b, v16.16b
165*1e29a750SEric Biggers	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
166*1e29a750SEric Biggers
167*1e29a750SEric Biggers	bcax	v10.16b, v29.16b, v12.16b, v26.16b
168*1e29a750SEric Biggers	bcax	v11.16b, v26.16b, v13.16b, v12.16b
169*1e29a750SEric Biggers	bcax	v12.16b, v12.16b, v14.16b, v13.16b
170*1e29a750SEric Biggers	bcax	v13.16b, v13.16b, v29.16b, v14.16b
171*1e29a750SEric Biggers	bcax	v14.16b, v14.16b, v26.16b, v29.16b
172*1e29a750SEric Biggers
173*1e29a750SEric Biggers	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
174*1e29a750SEric Biggers	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
175*1e29a750SEric Biggers	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
176*1e29a750SEric Biggers	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
177*1e29a750SEric Biggers	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
178*1e29a750SEric Biggers
179*1e29a750SEric Biggers	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
180*1e29a750SEric Biggers	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
181*1e29a750SEric Biggers	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
182*1e29a750SEric Biggers	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
183*1e29a750SEric Biggers	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
184*1e29a750SEric Biggers
185*1e29a750SEric Biggers	eor	 v0.16b,  v0.16b, v31.16b
186*1e29a750SEric Biggers
187*1e29a750SEric Biggers	cbnz	w8, 3b
188*1e29a750SEric Biggers	cond_yield 4f, x8, x9
189*1e29a750SEric Biggers	cbnz	x2, 0b
190*1e29a750SEric Biggers
191*1e29a750SEric Biggers	/* save state */
192*1e29a750SEric Biggers4:	st1	{ v0.1d- v3.1d}, [x0], #32
193*1e29a750SEric Biggers	st1	{ v4.1d- v7.1d}, [x0], #32
194*1e29a750SEric Biggers	st1	{ v8.1d-v11.1d}, [x0], #32
195*1e29a750SEric Biggers	st1	{v12.1d-v15.1d}, [x0], #32
196*1e29a750SEric Biggers	st1	{v16.1d-v19.1d}, [x0], #32
197*1e29a750SEric Biggers	st1	{v20.1d-v23.1d}, [x0], #32
198*1e29a750SEric Biggers	st1	{v24.1d}, [x0]
199*1e29a750SEric Biggers	mov	x0, x2
200*1e29a750SEric Biggers	ret
201*1e29a750SEric BiggersSYM_FUNC_END(sha3_ce_transform)
202*1e29a750SEric Biggers
203*1e29a750SEric Biggers	.section	".rodata", "a"
204*1e29a750SEric Biggers	.align		8
205*1e29a750SEric Biggers.Lsha3_rcon:
206*1e29a750SEric Biggers	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
207*1e29a750SEric Biggers	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
208*1e29a750SEric Biggers	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
209*1e29a750SEric Biggers	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
210*1e29a750SEric Biggers	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
211*1e29a750SEric Biggers	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
212*1e29a750SEric Biggers	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
213*1e29a750SEric Biggers	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
214