1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. 4 * 5 * Based on arch/loongarch/vdso/vgetrandom-chacha.S. 6 */ 7 8#include <asm/asm.h> 9#include <linux/linkage.h> 10 11.text 12 13.macro ROTRI rd rs imm 14 slliw t0, \rs, 32 - \imm 15 srliw \rd, \rs, \imm 16 or \rd, \rd, t0 17.endm 18 19.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 20 \op \d0, \d0, \s0 21 \op \d1, \d1, \s1 22 \op \d2, \d2, \s2 23 \op \d3, \d3, \s3 24.endm 25 26/* 27 * a0: output bytes 28 * a1: 32-byte key input 29 * a2: 8-byte counter input/output 30 * a3: number of 64-byte blocks to write to output 31 */ 32SYM_FUNC_START(__arch_chacha20_blocks_nostack) 33 34#define output a0 35#define key a1 36#define counter a2 37#define nblocks a3 38#define i a4 39#define state0 s0 40#define state1 s1 41#define state2 s2 42#define state3 s3 43#define state4 s4 44#define state5 s5 45#define state6 s6 46#define state7 s7 47#define state8 s8 48#define state9 s9 49#define state10 s10 50#define state11 s11 51#define state12 a5 52#define state13 a6 53#define state14 a7 54#define state15 t1 55#define cnt t2 56#define copy0 t3 57#define copy1 t4 58#define copy2 t5 59#define copy3 t6 60 61/* Packs to be used with OP_4REG */ 62#define line0 state0, state1, state2, state3 63#define line1 state4, state5, state6, state7 64#define line2 state8, state9, state10, state11 65#define line3 state12, state13, state14, state15 66 67#define line1_perm state5, state6, state7, state4 68#define line2_perm state10, state11, state8, state9 69#define line3_perm state15, state12, state13, state14 70 71#define copy copy0, copy1, copy2, copy3 72 73#define _16 16, 16, 16, 16 74#define _20 20, 20, 20, 20 75#define _24 24, 24, 24, 24 76#define _25 25, 25, 25, 25 77 78 /* 79 * The ABI requires s0-s9 saved. 80 * This does not violate the stack-less requirement: no sensitive data 81 * is spilled onto the stack. 82 */ 83 addi sp, sp, -12*SZREG 84 REG_S s0, (sp) 85 REG_S s1, SZREG(sp) 86 REG_S s2, 2*SZREG(sp) 87 REG_S s3, 3*SZREG(sp) 88 REG_S s4, 4*SZREG(sp) 89 REG_S s5, 5*SZREG(sp) 90 REG_S s6, 6*SZREG(sp) 91 REG_S s7, 7*SZREG(sp) 92 REG_S s8, 8*SZREG(sp) 93 REG_S s9, 9*SZREG(sp) 94 REG_S s10, 10*SZREG(sp) 95 REG_S s11, 11*SZREG(sp) 96 97 ld cnt, (counter) 98 99 li copy0, 0x61707865 100 li copy1, 0x3320646e 101 li copy2, 0x79622d32 102 li copy3, 0x6b206574 103 104.Lblock: 105 /* state[0,1,2,3] = "expand 32-byte k" */ 106 mv state0, copy0 107 mv state1, copy1 108 mv state2, copy2 109 mv state3, copy3 110 111 /* state[4,5,..,11] = key */ 112 lw state4, (key) 113 lw state5, 4(key) 114 lw state6, 8(key) 115 lw state7, 12(key) 116 lw state8, 16(key) 117 lw state9, 20(key) 118 lw state10, 24(key) 119 lw state11, 28(key) 120 121 /* state[12,13] = counter */ 122 mv state12, cnt 123 srli state13, cnt, 32 124 125 /* state[14,15] = 0 */ 126 mv state14, zero 127 mv state15, zero 128 129 li i, 10 130.Lpermute: 131 /* odd round */ 132 OP_4REG addw line0, line1 133 OP_4REG xor line3, line0 134 OP_4REG ROTRI line3, _16 135 136 OP_4REG addw line2, line3 137 OP_4REG xor line1, line2 138 OP_4REG ROTRI line1, _20 139 140 OP_4REG addw line0, line1 141 OP_4REG xor line3, line0 142 OP_4REG ROTRI line3, _24 143 144 OP_4REG addw line2, line3 145 OP_4REG xor line1, line2 146 OP_4REG ROTRI line1, _25 147 148 /* even round */ 149 OP_4REG addw line0, line1_perm 150 OP_4REG xor line3_perm, line0 151 OP_4REG ROTRI line3_perm, _16 152 153 OP_4REG addw line2_perm, line3_perm 154 OP_4REG xor line1_perm, line2_perm 155 OP_4REG ROTRI line1_perm, _20 156 157 OP_4REG addw line0, line1_perm 158 OP_4REG xor line3_perm, line0 159 OP_4REG ROTRI line3_perm, _24 160 161 OP_4REG addw line2_perm, line3_perm 162 OP_4REG xor line1_perm, line2_perm 163 OP_4REG ROTRI line1_perm, _25 164 165 addi i, i, -1 166 bnez i, .Lpermute 167 168 /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ 169 OP_4REG addw line0, copy 170 sw state0, (output) 171 sw state1, 4(output) 172 sw state2, 8(output) 173 sw state3, 12(output) 174 175 /* from now on state[0,1,2,3] are scratch registers */ 176 177 /* state[0,1,2,3] = lo(key) */ 178 lw state0, (key) 179 lw state1, 4(key) 180 lw state2, 8(key) 181 lw state3, 12(key) 182 183 /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ 184 OP_4REG addw line1, line0 185 sw state4, 16(output) 186 sw state5, 20(output) 187 sw state6, 24(output) 188 sw state7, 28(output) 189 190 /* state[0,1,2,3] = hi(key) */ 191 lw state0, 16(key) 192 lw state1, 20(key) 193 lw state2, 24(key) 194 lw state3, 28(key) 195 196 /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ 197 OP_4REG addw line2, line0 198 sw state8, 32(output) 199 sw state9, 36(output) 200 sw state10, 40(output) 201 sw state11, 44(output) 202 203 /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ 204 addw state12, state12, cnt 205 srli state0, cnt, 32 206 addw state13, state13, state0 207 sw state12, 48(output) 208 sw state13, 52(output) 209 sw state14, 56(output) 210 sw state15, 60(output) 211 212 /* ++counter */ 213 addi cnt, cnt, 1 214 215 /* output += 64 */ 216 addi output, output, 64 217 /* --nblocks */ 218 addi nblocks, nblocks, -1 219 bnez nblocks, .Lblock 220 221 /* counter = [cnt_lo, cnt_hi] */ 222 sd cnt, (counter) 223 224 /* Zero out the potentially sensitive regs, in case nothing uses these 225 * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and 226 * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we 227 * only need to zero state[12,...,15]. 228 */ 229 mv state12, zero 230 mv state13, zero 231 mv state14, zero 232 mv state15, zero 233 234 REG_L s0, (sp) 235 REG_L s1, SZREG(sp) 236 REG_L s2, 2*SZREG(sp) 237 REG_L s3, 3*SZREG(sp) 238 REG_L s4, 4*SZREG(sp) 239 REG_L s5, 5*SZREG(sp) 240 REG_L s6, 6*SZREG(sp) 241 REG_L s7, 7*SZREG(sp) 242 REG_L s8, 8*SZREG(sp) 243 REG_L s9, 9*SZREG(sp) 244 REG_L s10, 10*SZREG(sp) 245 REG_L s11, 11*SZREG(sp) 246 addi sp, sp, 12*SZREG 247 248 ret 249SYM_FUNC_END(__arch_chacha20_blocks_nostack) 250