1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. 4 * 5 * Based on arch/loongarch/vdso/vgetrandom-chacha.S. 6 */ 7 8#include <asm/asm.h> 9#include <linux/linkage.h> 10#include <asm/assembler.h> 11 12.text 13 14.macro ROTRI rd rs imm 15 slliw t0, \rs, 32 - \imm 16 srliw \rd, \rs, \imm 17 or \rd, \rd, t0 18.endm 19 20.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 21 \op \d0, \d0, \s0 22 \op \d1, \d1, \s1 23 \op \d2, \d2, \s2 24 \op \d3, \d3, \s3 25.endm 26 27/* 28 * a0: output bytes 29 * a1: 32-byte key input 30 * a2: 8-byte counter input/output 31 * a3: number of 64-byte blocks to write to output 32 */ 33SYM_FUNC_START(__arch_chacha20_blocks_nostack) 34 35#define output a0 36#define key a1 37#define counter a2 38#define nblocks a3 39#define i a4 40#define state0 s0 41#define state1 s1 42#define state2 s2 43#define state3 s3 44#define state4 s4 45#define state5 s5 46#define state6 s6 47#define state7 s7 48#define state8 s8 49#define state9 s9 50#define state10 s10 51#define state11 s11 52#define state12 a5 53#define state13 a6 54#define state14 a7 55#define state15 t1 56#define cnt t2 57#define copy0 t3 58#define copy1 t4 59#define copy2 t5 60#define copy3 t6 61 62/* Packs to be used with OP_4REG */ 63#define line0 state0, state1, state2, state3 64#define line1 state4, state5, state6, state7 65#define line2 state8, state9, state10, state11 66#define line3 state12, state13, state14, state15 67 68#define line1_perm state5, state6, state7, state4 69#define line2_perm state10, state11, state8, state9 70#define line3_perm state15, state12, state13, state14 71 72#define copy copy0, copy1, copy2, copy3 73 74#define _16 16, 16, 16, 16 75#define _20 20, 20, 20, 20 76#define _24 24, 24, 24, 24 77#define _25 25, 25, 25, 25 78 vdso_lpad 79 /* 80 * The ABI requires s0-s9 saved. 81 * This does not violate the stack-less requirement: no sensitive data 82 * is spilled onto the stack. 83 */ 84 addi sp, sp, -12*SZREG 85 REG_S s0, (sp) 86 REG_S s1, SZREG(sp) 87 REG_S s2, 2*SZREG(sp) 88 REG_S s3, 3*SZREG(sp) 89 REG_S s4, 4*SZREG(sp) 90 REG_S s5, 5*SZREG(sp) 91 REG_S s6, 6*SZREG(sp) 92 REG_S s7, 7*SZREG(sp) 93 REG_S s8, 8*SZREG(sp) 94 REG_S s9, 9*SZREG(sp) 95 REG_S s10, 10*SZREG(sp) 96 REG_S s11, 11*SZREG(sp) 97 98 ld cnt, (counter) 99 100 li copy0, 0x61707865 101 li copy1, 0x3320646e 102 li copy2, 0x79622d32 103 li copy3, 0x6b206574 104 105.Lblock: 106 /* state[0,1,2,3] = "expand 32-byte k" */ 107 mv state0, copy0 108 mv state1, copy1 109 mv state2, copy2 110 mv state3, copy3 111 112 /* state[4,5,..,11] = key */ 113 lw state4, (key) 114 lw state5, 4(key) 115 lw state6, 8(key) 116 lw state7, 12(key) 117 lw state8, 16(key) 118 lw state9, 20(key) 119 lw state10, 24(key) 120 lw state11, 28(key) 121 122 /* state[12,13] = counter */ 123 mv state12, cnt 124 srli state13, cnt, 32 125 126 /* state[14,15] = 0 */ 127 mv state14, zero 128 mv state15, zero 129 130 li i, 10 131.Lpermute: 132 /* odd round */ 133 OP_4REG addw line0, line1 134 OP_4REG xor line3, line0 135 OP_4REG ROTRI line3, _16 136 137 OP_4REG addw line2, line3 138 OP_4REG xor line1, line2 139 OP_4REG ROTRI line1, _20 140 141 OP_4REG addw line0, line1 142 OP_4REG xor line3, line0 143 OP_4REG ROTRI line3, _24 144 145 OP_4REG addw line2, line3 146 OP_4REG xor line1, line2 147 OP_4REG ROTRI line1, _25 148 149 /* even round */ 150 OP_4REG addw line0, line1_perm 151 OP_4REG xor line3_perm, line0 152 OP_4REG ROTRI line3_perm, _16 153 154 OP_4REG addw line2_perm, line3_perm 155 OP_4REG xor line1_perm, line2_perm 156 OP_4REG ROTRI line1_perm, _20 157 158 OP_4REG addw line0, line1_perm 159 OP_4REG xor line3_perm, line0 160 OP_4REG ROTRI line3_perm, _24 161 162 OP_4REG addw line2_perm, line3_perm 163 OP_4REG xor line1_perm, line2_perm 164 OP_4REG ROTRI line1_perm, _25 165 166 addi i, i, -1 167 bnez i, .Lpermute 168 169 /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ 170 OP_4REG addw line0, copy 171 sw state0, (output) 172 sw state1, 4(output) 173 sw state2, 8(output) 174 sw state3, 12(output) 175 176 /* from now on state[0,1,2,3] are scratch registers */ 177 178 /* state[0,1,2,3] = lo(key) */ 179 lw state0, (key) 180 lw state1, 4(key) 181 lw state2, 8(key) 182 lw state3, 12(key) 183 184 /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ 185 OP_4REG addw line1, line0 186 sw state4, 16(output) 187 sw state5, 20(output) 188 sw state6, 24(output) 189 sw state7, 28(output) 190 191 /* state[0,1,2,3] = hi(key) */ 192 lw state0, 16(key) 193 lw state1, 20(key) 194 lw state2, 24(key) 195 lw state3, 28(key) 196 197 /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ 198 OP_4REG addw line2, line0 199 sw state8, 32(output) 200 sw state9, 36(output) 201 sw state10, 40(output) 202 sw state11, 44(output) 203 204 /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ 205 addw state12, state12, cnt 206 srli state0, cnt, 32 207 addw state13, state13, state0 208 sw state12, 48(output) 209 sw state13, 52(output) 210 sw state14, 56(output) 211 sw state15, 60(output) 212 213 /* ++counter */ 214 addi cnt, cnt, 1 215 216 /* output += 64 */ 217 addi output, output, 64 218 /* --nblocks */ 219 addi nblocks, nblocks, -1 220 bnez nblocks, .Lblock 221 222 /* counter = [cnt_lo, cnt_hi] */ 223 sd cnt, (counter) 224 225 /* Zero out the potentially sensitive regs, in case nothing uses these 226 * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and 227 * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we 228 * only need to zero state[12,...,15]. 229 */ 230 mv state12, zero 231 mv state13, zero 232 mv state14, zero 233 mv state15, zero 234 235 REG_L s0, (sp) 236 REG_L s1, SZREG(sp) 237 REG_L s2, 2*SZREG(sp) 238 REG_L s3, 3*SZREG(sp) 239 REG_L s4, 4*SZREG(sp) 240 REG_L s5, 5*SZREG(sp) 241 REG_L s6, 6*SZREG(sp) 242 REG_L s7, 7*SZREG(sp) 243 REG_L s8, 8*SZREG(sp) 244 REG_L s9, 9*SZREG(sp) 245 REG_L s10, 10*SZREG(sp) 246 REG_L s11, 11*SZREG(sp) 247 addi sp, sp, 12*SZREG 248 249 ret 250SYM_FUNC_END(__arch_chacha20_blocks_nostack) 251 252emit_riscv_feature_1_and 253