xref: /linux/arch/riscv/kernel/vdso/vgetrandom-chacha.S (revision d30c1683aaecb93d2ab95685dc4300a33d3cea7a)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
4 *
5 * Based on arch/loongarch/vdso/vgetrandom-chacha.S.
6 */
7
8#include <asm/asm.h>
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12.text
13
14.macro	ROTRI	rd rs imm
15	slliw	t0, \rs, 32 - \imm
16	srliw	\rd, \rs, \imm
17	or	\rd, \rd, t0
18.endm
19
20.macro	OP_4REG	op d0 d1 d2 d3 s0 s1 s2 s3
21	\op	\d0, \d0, \s0
22	\op	\d1, \d1, \s1
23	\op	\d2, \d2, \s2
24	\op	\d3, \d3, \s3
25.endm
26
27/*
28 *	a0: output bytes
29 * 	a1: 32-byte key input
30 *	a2: 8-byte counter input/output
31 *	a3: number of 64-byte blocks to write to output
32 */
33SYM_FUNC_START(__arch_chacha20_blocks_nostack)
34
35#define output		a0
36#define key		a1
37#define counter		a2
38#define nblocks		a3
39#define i		a4
40#define state0		s0
41#define state1		s1
42#define state2		s2
43#define state3		s3
44#define state4		s4
45#define state5		s5
46#define state6		s6
47#define state7		s7
48#define state8		s8
49#define state9		s9
50#define state10		s10
51#define state11		s11
52#define state12		a5
53#define state13		a6
54#define state14		a7
55#define state15		t1
56#define cnt		t2
57#define copy0		t3
58#define copy1		t4
59#define copy2		t5
60#define copy3		t6
61
62/* Packs to be used with OP_4REG */
63#define line0		state0, state1, state2, state3
64#define line1		state4, state5, state6, state7
65#define line2		state8, state9, state10, state11
66#define line3		state12, state13, state14, state15
67
68#define line1_perm	state5, state6, state7, state4
69#define line2_perm	state10, state11, state8, state9
70#define line3_perm	state15, state12, state13, state14
71
72#define copy		copy0, copy1, copy2, copy3
73
74#define _16		16, 16, 16, 16
75#define _20		20, 20, 20, 20
76#define _24		24, 24, 24, 24
77#define _25		25, 25, 25, 25
78	vdso_lpad
79	/*
80	 * The ABI requires s0-s9 saved.
81	 * This does not violate the stack-less requirement: no sensitive data
82	 * is spilled onto the stack.
83	 */
84	addi		sp, sp, -12*SZREG
85	REG_S		s0,         (sp)
86	REG_S		s1,    SZREG(sp)
87	REG_S		s2,  2*SZREG(sp)
88	REG_S		s3,  3*SZREG(sp)
89	REG_S		s4,  4*SZREG(sp)
90	REG_S		s5,  5*SZREG(sp)
91	REG_S		s6,  6*SZREG(sp)
92	REG_S		s7,  7*SZREG(sp)
93	REG_S		s8,  8*SZREG(sp)
94	REG_S		s9,  9*SZREG(sp)
95	REG_S		s10, 10*SZREG(sp)
96	REG_S		s11, 11*SZREG(sp)
97
98	ld		cnt, (counter)
99
100	li		copy0, 0x61707865
101	li		copy1, 0x3320646e
102	li		copy2, 0x79622d32
103	li		copy3, 0x6b206574
104
105.Lblock:
106	/* state[0,1,2,3] = "expand 32-byte k" */
107	mv		state0, copy0
108	mv		state1, copy1
109	mv		state2, copy2
110	mv		state3, copy3
111
112	/* state[4,5,..,11] = key */
113	lw		state4,   (key)
114	lw		state5,  4(key)
115	lw		state6,  8(key)
116	lw		state7,  12(key)
117	lw		state8,  16(key)
118	lw		state9,  20(key)
119	lw		state10, 24(key)
120	lw		state11, 28(key)
121
122	/* state[12,13] = counter */
123	mv		state12, cnt
124	srli		state13, cnt, 32
125
126	/* state[14,15] = 0 */
127	mv		state14, zero
128	mv		state15, zero
129
130	li		i, 10
131.Lpermute:
132	/* odd round */
133	OP_4REG	addw	line0, line1
134	OP_4REG	xor	line3, line0
135	OP_4REG	ROTRI	line3, _16
136
137	OP_4REG	addw	line2, line3
138	OP_4REG	xor	line1, line2
139	OP_4REG	ROTRI	line1, _20
140
141	OP_4REG	addw	line0, line1
142	OP_4REG	xor	line3, line0
143	OP_4REG	ROTRI	line3, _24
144
145	OP_4REG	addw	line2, line3
146	OP_4REG	xor	line1, line2
147	OP_4REG	ROTRI	line1, _25
148
149	/* even round */
150	OP_4REG	addw	line0, line1_perm
151	OP_4REG	xor	line3_perm, line0
152	OP_4REG	ROTRI	line3_perm, _16
153
154	OP_4REG	addw	line2_perm, line3_perm
155	OP_4REG	xor	line1_perm, line2_perm
156	OP_4REG	ROTRI	line1_perm, _20
157
158	OP_4REG	addw	line0, line1_perm
159	OP_4REG	xor	line3_perm, line0
160	OP_4REG	ROTRI	line3_perm, _24
161
162	OP_4REG	addw	line2_perm, line3_perm
163	OP_4REG	xor	line1_perm, line2_perm
164	OP_4REG	ROTRI	line1_perm, _25
165
166	addi		i, i, -1
167	bnez		i, .Lpermute
168
169	/* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
170	OP_4REG	addw	line0, copy
171	sw		state0,   (output)
172	sw		state1,  4(output)
173	sw		state2,  8(output)
174	sw		state3, 12(output)
175
176	/* from now on state[0,1,2,3] are scratch registers  */
177
178	/* state[0,1,2,3] = lo(key) */
179	lw		state0,   (key)
180	lw		state1,  4(key)
181	lw		state2,  8(key)
182	lw		state3, 12(key)
183
184	/* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
185	OP_4REG	addw	line1, line0
186	sw		state4, 16(output)
187	sw		state5, 20(output)
188	sw		state6, 24(output)
189	sw		state7, 28(output)
190
191	/* state[0,1,2,3] = hi(key) */
192	lw		state0, 16(key)
193	lw		state1, 20(key)
194	lw		state2, 24(key)
195	lw		state3, 28(key)
196
197	/* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */
198	OP_4REG	addw	line2, line0
199	sw		state8,  32(output)
200	sw		state9,  36(output)
201	sw		state10, 40(output)
202	sw		state11, 44(output)
203
204	/* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
205	addw		state12, state12, cnt
206	srli		state0, cnt, 32
207	addw		state13, state13, state0
208	sw		state12, 48(output)
209	sw		state13, 52(output)
210	sw		state14, 56(output)
211	sw		state15, 60(output)
212
213	/* ++counter */
214	addi		cnt, cnt, 1
215
216	/* output += 64 */
217	addi		output, output, 64
218	/* --nblocks */
219	addi		nblocks, nblocks, -1
220	bnez		nblocks, .Lblock
221
222	/* counter = [cnt_lo, cnt_hi] */
223	sd		cnt, (counter)
224
225	/* Zero out the potentially sensitive regs, in case nothing uses these
226	 * again.  As at now copy[0,1,2,3] just contains "expand 32-byte k" and
227	 * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we
228	 * only need to zero state[12,...,15].
229	 */
230	mv		state12, zero
231	mv		state13, zero
232	mv		state14, zero
233	mv		state15, zero
234
235	REG_L		s0,         (sp)
236	REG_L		s1,    SZREG(sp)
237	REG_L		s2,  2*SZREG(sp)
238	REG_L		s3,  3*SZREG(sp)
239	REG_L		s4,  4*SZREG(sp)
240	REG_L		s5,  5*SZREG(sp)
241	REG_L		s6,  6*SZREG(sp)
242	REG_L		s7,  7*SZREG(sp)
243	REG_L		s8,  8*SZREG(sp)
244	REG_L		s9,  9*SZREG(sp)
245	REG_L		s10, 10*SZREG(sp)
246	REG_L		s11, 11*SZREG(sp)
247	addi		sp, sp, 12*SZREG
248
249	ret
250SYM_FUNC_END(__arch_chacha20_blocks_nostack)
251
252emit_riscv_feature_1_and
253