xref: /linux/arch/riscv/kernel/vdso/vgetrandom-chacha.S (revision 119b1e61a769aa98e68599f44721661a4d8c55f3)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
4 *
5 * Based on arch/loongarch/vdso/vgetrandom-chacha.S.
6 */
7
8#include <asm/asm.h>
9#include <linux/linkage.h>
10
11.text
12
13.macro	ROTRI	rd rs imm
14	slliw	t0, \rs, 32 - \imm
15	srliw	\rd, \rs, \imm
16	or	\rd, \rd, t0
17.endm
18
19.macro	OP_4REG	op d0 d1 d2 d3 s0 s1 s2 s3
20	\op	\d0, \d0, \s0
21	\op	\d1, \d1, \s1
22	\op	\d2, \d2, \s2
23	\op	\d3, \d3, \s3
24.endm
25
26/*
27 *	a0: output bytes
28 * 	a1: 32-byte key input
29 *	a2: 8-byte counter input/output
30 *	a3: number of 64-byte blocks to write to output
31 */
32SYM_FUNC_START(__arch_chacha20_blocks_nostack)
33
34#define output		a0
35#define key		a1
36#define counter		a2
37#define nblocks		a3
38#define i		a4
39#define state0		s0
40#define state1		s1
41#define state2		s2
42#define state3		s3
43#define state4		s4
44#define state5		s5
45#define state6		s6
46#define state7		s7
47#define state8		s8
48#define state9		s9
49#define state10		s10
50#define state11		s11
51#define state12		a5
52#define state13		a6
53#define state14		a7
54#define state15		t1
55#define cnt		t2
56#define copy0		t3
57#define copy1		t4
58#define copy2		t5
59#define copy3		t6
60
61/* Packs to be used with OP_4REG */
62#define line0		state0, state1, state2, state3
63#define line1		state4, state5, state6, state7
64#define line2		state8, state9, state10, state11
65#define line3		state12, state13, state14, state15
66
67#define line1_perm	state5, state6, state7, state4
68#define line2_perm	state10, state11, state8, state9
69#define line3_perm	state15, state12, state13, state14
70
71#define copy		copy0, copy1, copy2, copy3
72
73#define _16		16, 16, 16, 16
74#define _20		20, 20, 20, 20
75#define _24		24, 24, 24, 24
76#define _25		25, 25, 25, 25
77
78	/*
79	 * The ABI requires s0-s9 saved.
80	 * This does not violate the stack-less requirement: no sensitive data
81	 * is spilled onto the stack.
82	 */
83	addi		sp, sp, -12*SZREG
84	REG_S		s0,         (sp)
85	REG_S		s1,    SZREG(sp)
86	REG_S		s2,  2*SZREG(sp)
87	REG_S		s3,  3*SZREG(sp)
88	REG_S		s4,  4*SZREG(sp)
89	REG_S		s5,  5*SZREG(sp)
90	REG_S		s6,  6*SZREG(sp)
91	REG_S		s7,  7*SZREG(sp)
92	REG_S		s8,  8*SZREG(sp)
93	REG_S		s9,  9*SZREG(sp)
94	REG_S		s10, 10*SZREG(sp)
95	REG_S		s11, 11*SZREG(sp)
96
97	ld		cnt, (counter)
98
99	li		copy0, 0x61707865
100	li		copy1, 0x3320646e
101	li		copy2, 0x79622d32
102	li		copy3, 0x6b206574
103
104.Lblock:
105	/* state[0,1,2,3] = "expand 32-byte k" */
106	mv		state0, copy0
107	mv		state1, copy1
108	mv		state2, copy2
109	mv		state3, copy3
110
111	/* state[4,5,..,11] = key */
112	lw		state4,   (key)
113	lw		state5,  4(key)
114	lw		state6,  8(key)
115	lw		state7,  12(key)
116	lw		state8,  16(key)
117	lw		state9,  20(key)
118	lw		state10, 24(key)
119	lw		state11, 28(key)
120
121	/* state[12,13] = counter */
122	mv		state12, cnt
123	srli		state13, cnt, 32
124
125	/* state[14,15] = 0 */
126	mv		state14, zero
127	mv		state15, zero
128
129	li		i, 10
130.Lpermute:
131	/* odd round */
132	OP_4REG	addw	line0, line1
133	OP_4REG	xor	line3, line0
134	OP_4REG	ROTRI	line3, _16
135
136	OP_4REG	addw	line2, line3
137	OP_4REG	xor	line1, line2
138	OP_4REG	ROTRI	line1, _20
139
140	OP_4REG	addw	line0, line1
141	OP_4REG	xor	line3, line0
142	OP_4REG	ROTRI	line3, _24
143
144	OP_4REG	addw	line2, line3
145	OP_4REG	xor	line1, line2
146	OP_4REG	ROTRI	line1, _25
147
148	/* even round */
149	OP_4REG	addw	line0, line1_perm
150	OP_4REG	xor	line3_perm, line0
151	OP_4REG	ROTRI	line3_perm, _16
152
153	OP_4REG	addw	line2_perm, line3_perm
154	OP_4REG	xor	line1_perm, line2_perm
155	OP_4REG	ROTRI	line1_perm, _20
156
157	OP_4REG	addw	line0, line1_perm
158	OP_4REG	xor	line3_perm, line0
159	OP_4REG	ROTRI	line3_perm, _24
160
161	OP_4REG	addw	line2_perm, line3_perm
162	OP_4REG	xor	line1_perm, line2_perm
163	OP_4REG	ROTRI	line1_perm, _25
164
165	addi		i, i, -1
166	bnez		i, .Lpermute
167
168	/* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
169	OP_4REG	addw	line0, copy
170	sw		state0,   (output)
171	sw		state1,  4(output)
172	sw		state2,  8(output)
173	sw		state3, 12(output)
174
175	/* from now on state[0,1,2,3] are scratch registers  */
176
177	/* state[0,1,2,3] = lo(key) */
178	lw		state0,   (key)
179	lw		state1,  4(key)
180	lw		state2,  8(key)
181	lw		state3, 12(key)
182
183	/* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
184	OP_4REG	addw	line1, line0
185	sw		state4, 16(output)
186	sw		state5, 20(output)
187	sw		state6, 24(output)
188	sw		state7, 28(output)
189
190	/* state[0,1,2,3] = hi(key) */
191	lw		state0, 16(key)
192	lw		state1, 20(key)
193	lw		state2, 24(key)
194	lw		state3, 28(key)
195
196	/* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */
197	OP_4REG	addw	line2, line0
198	sw		state8,  32(output)
199	sw		state9,  36(output)
200	sw		state10, 40(output)
201	sw		state11, 44(output)
202
203	/* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
204	addw		state12, state12, cnt
205	srli		state0, cnt, 32
206	addw		state13, state13, state0
207	sw		state12, 48(output)
208	sw		state13, 52(output)
209	sw		state14, 56(output)
210	sw		state15, 60(output)
211
212	/* ++counter */
213	addi		cnt, cnt, 1
214
215	/* output += 64 */
216	addi		output, output, 64
217	/* --nblocks */
218	addi		nblocks, nblocks, -1
219	bnez		nblocks, .Lblock
220
221	/* counter = [cnt_lo, cnt_hi] */
222	sd		cnt, (counter)
223
224	/* Zero out the potentially sensitive regs, in case nothing uses these
225	 * again.  As at now copy[0,1,2,3] just contains "expand 32-byte k" and
226	 * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we
227	 * only need to zero state[12,...,15].
228	 */
229	mv		state12, zero
230	mv		state13, zero
231	mv		state14, zero
232	mv		state15, zero
233
234	REG_L		s0,         (sp)
235	REG_L		s1,    SZREG(sp)
236	REG_L		s2,  2*SZREG(sp)
237	REG_L		s3,  3*SZREG(sp)
238	REG_L		s4,  4*SZREG(sp)
239	REG_L		s5,  5*SZREG(sp)
240	REG_L		s6,  6*SZREG(sp)
241	REG_L		s7,  7*SZREG(sp)
242	REG_L		s8,  8*SZREG(sp)
243	REG_L		s9,  9*SZREG(sp)
244	REG_L		s10, 10*SZREG(sp)
245	REG_L		s11, 11*SZREG(sp)
246	addi		sp, sp, 12*SZREG
247
248	ret
249SYM_FUNC_END(__arch_chacha20_blocks_nostack)
250