xref: /linux/arch/s390/kernel/vdso/vgetrandom-chacha.S (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1*c0087d80SHeiko Carstens/* SPDX-License-Identifier: GPL-2.0 */
2*c0087d80SHeiko Carstens
3*c0087d80SHeiko Carstens#include <linux/stringify.h>
4*c0087d80SHeiko Carstens#include <linux/linkage.h>
5*c0087d80SHeiko Carstens#include <asm/alternative.h>
6*c0087d80SHeiko Carstens#include <asm/dwarf.h>
7*c0087d80SHeiko Carstens#include <asm/fpu-insn.h>
8*c0087d80SHeiko Carstens
9*c0087d80SHeiko Carstens#define STATE0	%v0
10*c0087d80SHeiko Carstens#define STATE1	%v1
11*c0087d80SHeiko Carstens#define STATE2	%v2
12*c0087d80SHeiko Carstens#define STATE3	%v3
13*c0087d80SHeiko Carstens#define COPY0	%v4
14*c0087d80SHeiko Carstens#define COPY1	%v5
15*c0087d80SHeiko Carstens#define COPY2	%v6
16*c0087d80SHeiko Carstens#define COPY3	%v7
17*c0087d80SHeiko Carstens#define BEPERM	%v19
18*c0087d80SHeiko Carstens#define TMP0	%v20
19*c0087d80SHeiko Carstens#define TMP1	%v21
20*c0087d80SHeiko Carstens#define TMP2	%v22
21*c0087d80SHeiko Carstens#define TMP3	%v23
22*c0087d80SHeiko Carstens
23*c0087d80SHeiko Carstens	.section .rodata
24*c0087d80SHeiko Carstens
25*c0087d80SHeiko Carstens	.balign 32
26*c0087d80SHeiko CarstensSYM_DATA_START_LOCAL(chacha20_constants)
27*c0087d80SHeiko Carstens	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
28*c0087d80SHeiko Carstens	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
29*c0087d80SHeiko CarstensSYM_DATA_END(chacha20_constants)
30*c0087d80SHeiko Carstens
31*c0087d80SHeiko Carstens	.text
32*c0087d80SHeiko Carstens/*
33*c0087d80SHeiko Carstens * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
34*c0087d80SHeiko Carstens * number of blocks of output with nonce 0, taking an input key and 8-bytes
35*c0087d80SHeiko Carstens * counter. Does not spill to the stack.
36*c0087d80SHeiko Carstens *
37*c0087d80SHeiko Carstens * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
38*c0087d80SHeiko Carstens *				       const uint8_t *key,
39*c0087d80SHeiko Carstens *				       uint32_t *counter,
40*c0087d80SHeiko Carstens *				       size_t nblocks)
41*c0087d80SHeiko Carstens */
42*c0087d80SHeiko CarstensSYM_FUNC_START(__arch_chacha20_blocks_nostack)
43*c0087d80SHeiko Carstens	CFI_STARTPROC
44*c0087d80SHeiko Carstens	larl	%r1,chacha20_constants
45*c0087d80SHeiko Carstens
46*c0087d80SHeiko Carstens	/* COPY0 = "expand 32-byte k" */
47*c0087d80SHeiko Carstens	VL	COPY0,0,,%r1
48*c0087d80SHeiko Carstens
49*c0087d80SHeiko Carstens	/* BEPERM = byte selectors for VPERM */
50*c0087d80SHeiko Carstens	ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
51*c0087d80SHeiko Carstens
52*c0087d80SHeiko Carstens	/* COPY1,COPY2 = key */
53*c0087d80SHeiko Carstens	VLM	COPY1,COPY2,0,%r3
54*c0087d80SHeiko Carstens
55*c0087d80SHeiko Carstens	/* COPY3 = counter || zero nonce  */
56*c0087d80SHeiko Carstens	lg	%r3,0(%r4)
57*c0087d80SHeiko Carstens	VZERO	COPY3
58*c0087d80SHeiko Carstens	VLVGG	COPY3,%r3,0
59*c0087d80SHeiko Carstens
60*c0087d80SHeiko Carstens	lghi	%r1,0
61*c0087d80SHeiko Carstens.Lblock:
62*c0087d80SHeiko Carstens	VLR	STATE0,COPY0
63*c0087d80SHeiko Carstens	VLR	STATE1,COPY1
64*c0087d80SHeiko Carstens	VLR	STATE2,COPY2
65*c0087d80SHeiko Carstens	VLR	STATE3,COPY3
66*c0087d80SHeiko Carstens
67*c0087d80SHeiko Carstens	lghi	%r0,10
68*c0087d80SHeiko Carstens.Ldoubleround:
69*c0087d80SHeiko Carstens	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
70*c0087d80SHeiko Carstens	VAF	STATE0,STATE0,STATE1
71*c0087d80SHeiko Carstens	VX	STATE3,STATE3,STATE0
72*c0087d80SHeiko Carstens	VERLLF	STATE3,STATE3,16
73*c0087d80SHeiko Carstens
74*c0087d80SHeiko Carstens	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
75*c0087d80SHeiko Carstens	VAF	STATE2,STATE2,STATE3
76*c0087d80SHeiko Carstens	VX	STATE1,STATE1,STATE2
77*c0087d80SHeiko Carstens	VERLLF	STATE1,STATE1,12
78*c0087d80SHeiko Carstens
79*c0087d80SHeiko Carstens	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
80*c0087d80SHeiko Carstens	VAF	STATE0,STATE0,STATE1
81*c0087d80SHeiko Carstens	VX	STATE3,STATE3,STATE0
82*c0087d80SHeiko Carstens	VERLLF	STATE3,STATE3,8
83*c0087d80SHeiko Carstens
84*c0087d80SHeiko Carstens	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
85*c0087d80SHeiko Carstens	VAF	STATE2,STATE2,STATE3
86*c0087d80SHeiko Carstens	VX	STATE1,STATE1,STATE2
87*c0087d80SHeiko Carstens	VERLLF	STATE1,STATE1,7
88*c0087d80SHeiko Carstens
89*c0087d80SHeiko Carstens	/* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
90*c0087d80SHeiko Carstens	VSLDB	STATE1,STATE1,STATE1,4
91*c0087d80SHeiko Carstens	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
92*c0087d80SHeiko Carstens	VSLDB	STATE2,STATE2,STATE2,8
93*c0087d80SHeiko Carstens	/* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
94*c0087d80SHeiko Carstens	VSLDB	STATE3,STATE3,STATE3,12
95*c0087d80SHeiko Carstens
96*c0087d80SHeiko Carstens	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
97*c0087d80SHeiko Carstens	VAF	STATE0,STATE0,STATE1
98*c0087d80SHeiko Carstens	VX	STATE3,STATE3,STATE0
99*c0087d80SHeiko Carstens	VERLLF	STATE3,STATE3,16
100*c0087d80SHeiko Carstens
101*c0087d80SHeiko Carstens	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
102*c0087d80SHeiko Carstens	VAF	STATE2,STATE2,STATE3
103*c0087d80SHeiko Carstens	VX	STATE1,STATE1,STATE2
104*c0087d80SHeiko Carstens	VERLLF	STATE1,STATE1,12
105*c0087d80SHeiko Carstens
106*c0087d80SHeiko Carstens	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
107*c0087d80SHeiko Carstens	VAF	STATE0,STATE0,STATE1
108*c0087d80SHeiko Carstens	VX	STATE3,STATE3,STATE0
109*c0087d80SHeiko Carstens	VERLLF	STATE3,STATE3,8
110*c0087d80SHeiko Carstens
111*c0087d80SHeiko Carstens	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
112*c0087d80SHeiko Carstens	VAF	STATE2,STATE2,STATE3
113*c0087d80SHeiko Carstens	VX	STATE1,STATE1,STATE2
114*c0087d80SHeiko Carstens	VERLLF	STATE1,STATE1,7
115*c0087d80SHeiko Carstens
116*c0087d80SHeiko Carstens	/* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
117*c0087d80SHeiko Carstens	VSLDB	STATE1,STATE1,STATE1,12
118*c0087d80SHeiko Carstens	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
119*c0087d80SHeiko Carstens	VSLDB	STATE2,STATE2,STATE2,8
120*c0087d80SHeiko Carstens	/* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
121*c0087d80SHeiko Carstens	VSLDB	STATE3,STATE3,STATE3,4
122*c0087d80SHeiko Carstens	brctg	%r0,.Ldoubleround
123*c0087d80SHeiko Carstens
124*c0087d80SHeiko Carstens	/* OUTPUT0 = STATE0 + COPY0 */
125*c0087d80SHeiko Carstens	VAF	STATE0,STATE0,COPY0
126*c0087d80SHeiko Carstens	/* OUTPUT1 = STATE1 + COPY1 */
127*c0087d80SHeiko Carstens	VAF	STATE1,STATE1,COPY1
128*c0087d80SHeiko Carstens	/* OUTPUT2 = STATE2 + COPY2 */
129*c0087d80SHeiko Carstens	VAF	STATE2,STATE2,COPY2
130*c0087d80SHeiko Carstens	/* OUTPUT3 = STATE3 + COPY3 */
131*c0087d80SHeiko Carstens	VAF	STATE3,STATE3,COPY3
132*c0087d80SHeiko Carstens
133*c0087d80SHeiko Carstens	ALTERNATIVE							\
134*c0087d80SHeiko Carstens		__stringify(						\
135*c0087d80SHeiko Carstens		/* Convert STATE to little endian and store to OUTPUT */\
136*c0087d80SHeiko Carstens		VPERM	TMP0,STATE0,STATE0,BEPERM;			\
137*c0087d80SHeiko Carstens		VPERM	TMP1,STATE1,STATE1,BEPERM;			\
138*c0087d80SHeiko Carstens		VPERM	TMP2,STATE2,STATE2,BEPERM;			\
139*c0087d80SHeiko Carstens		VPERM	TMP3,STATE3,STATE3,BEPERM;			\
140*c0087d80SHeiko Carstens		VSTM	TMP0,TMP3,0,%r2),				\
141*c0087d80SHeiko Carstens		__stringify(						\
142*c0087d80SHeiko Carstens		/* 32 bit wise little endian store to OUTPUT */		\
143*c0087d80SHeiko Carstens		VSTBRF	STATE0,0,,%r2;					\
144*c0087d80SHeiko Carstens		VSTBRF	STATE1,16,,%r2;					\
145*c0087d80SHeiko Carstens		VSTBRF	STATE2,32,,%r2;					\
146*c0087d80SHeiko Carstens		VSTBRF	STATE3,48,,%r2;					\
147*c0087d80SHeiko Carstens		brcl	0,0),						\
148*c0087d80SHeiko Carstens		ALT_FACILITY(148)
149*c0087d80SHeiko Carstens
150*c0087d80SHeiko Carstens	/* ++COPY3.COUNTER */
151*c0087d80SHeiko Carstens	/* alsih %r3,1 */
152*c0087d80SHeiko Carstens	.insn	rilu,0xcc0a00000000,%r3,1
153*c0087d80SHeiko Carstens	alcr	%r3,%r1
154*c0087d80SHeiko Carstens	VLVGG	COPY3,%r3,0
155*c0087d80SHeiko Carstens
156*c0087d80SHeiko Carstens	/* OUTPUT += 64, --NBLOCKS */
157*c0087d80SHeiko Carstens	aghi	%r2,64
158*c0087d80SHeiko Carstens	brctg	%r5,.Lblock
159*c0087d80SHeiko Carstens
160*c0087d80SHeiko Carstens	/* COUNTER = COPY3.COUNTER */
161*c0087d80SHeiko Carstens	stg	%r3,0(%r4)
162*c0087d80SHeiko Carstens
163*c0087d80SHeiko Carstens	/* Zero out potentially sensitive regs */
164*c0087d80SHeiko Carstens	VZERO	STATE0
165*c0087d80SHeiko Carstens	VZERO	STATE1
166*c0087d80SHeiko Carstens	VZERO	STATE2
167*c0087d80SHeiko Carstens	VZERO	STATE3
168*c0087d80SHeiko Carstens	VZERO	COPY1
169*c0087d80SHeiko Carstens	VZERO	COPY2
170*c0087d80SHeiko Carstens
171*c0087d80SHeiko Carstens	/* Early exit if TMP0-TMP3 have not been used */
172*c0087d80SHeiko Carstens	ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
173*c0087d80SHeiko Carstens
174*c0087d80SHeiko Carstens	VZERO	TMP0
175*c0087d80SHeiko Carstens	VZERO	TMP1
176*c0087d80SHeiko Carstens	VZERO	TMP2
177*c0087d80SHeiko Carstens	VZERO	TMP3
178*c0087d80SHeiko Carstens
179*c0087d80SHeiko Carstens	br	%r14
180*c0087d80SHeiko Carstens	CFI_ENDPROC
181*c0087d80SHeiko CarstensSYM_FUNC_END(__arch_chacha20_blocks_nostack)
182