1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#include <linux/stringify.h> 4#include <linux/linkage.h> 5#include <asm/alternative.h> 6#include <asm/dwarf.h> 7#include <asm/fpu-insn.h> 8 9#define STATE0 %v0 10#define STATE1 %v1 11#define STATE2 %v2 12#define STATE3 %v3 13#define COPY0 %v4 14#define COPY1 %v5 15#define COPY2 %v6 16#define COPY3 %v7 17#define BEPERM %v19 18#define TMP0 %v20 19#define TMP1 %v21 20#define TMP2 %v22 21#define TMP3 %v23 22 23 .section .rodata 24 25 .balign 32 26SYM_DATA_START_LOCAL(chacha20_constants) 27 .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral 28 .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap 29SYM_DATA_END(chacha20_constants) 30 31 .text 32/* 33 * s390 ChaCha20 implementation meant for vDSO. Produces a given positive 34 * number of blocks of output with nonce 0, taking an input key and 8-bytes 35 * counter. Does not spill to the stack. 36 * 37 * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, 38 * const uint8_t *key, 39 * uint32_t *counter, 40 * size_t nblocks) 41 */ 42SYM_FUNC_START(__arch_chacha20_blocks_nostack) 43 CFI_STARTPROC 44 larl %r1,chacha20_constants 45 46 /* COPY0 = "expand 32-byte k" */ 47 VL COPY0,0,,%r1 48 49 /* BEPERM = byte selectors for VPERM */ 50 ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148) 51 52 /* COPY1,COPY2 = key */ 53 VLM COPY1,COPY2,0,%r3 54 55 /* COPY3 = counter || zero nonce */ 56 lg %r3,0(%r4) 57 VZERO COPY3 58 VLVGG COPY3,%r3,0 59 60 lghi %r1,0 61.Lblock: 62 VLR STATE0,COPY0 63 VLR STATE1,COPY1 64 VLR STATE2,COPY2 65 VLR STATE3,COPY3 66 67 lghi %r0,10 68.Ldoubleround: 69 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ 70 VAF STATE0,STATE0,STATE1 71 VX STATE3,STATE3,STATE0 72 VERLLF STATE3,STATE3,16 73 74 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ 75 VAF STATE2,STATE2,STATE3 76 VX STATE1,STATE1,STATE2 77 VERLLF STATE1,STATE1,12 78 79 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ 80 VAF STATE0,STATE0,STATE1 81 VX STATE3,STATE3,STATE0 82 VERLLF STATE3,STATE3,8 83 84 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ 85 VAF STATE2,STATE2,STATE3 86 VX STATE1,STATE1,STATE2 87 VERLLF STATE1,STATE1,7 88 89 /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */ 90 VSLDB STATE1,STATE1,STATE1,4 91 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ 92 VSLDB STATE2,STATE2,STATE2,8 93 /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */ 94 VSLDB STATE3,STATE3,STATE3,12 95 96 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ 97 VAF STATE0,STATE0,STATE1 98 VX STATE3,STATE3,STATE0 99 VERLLF STATE3,STATE3,16 100 101 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ 102 VAF STATE2,STATE2,STATE3 103 VX STATE1,STATE1,STATE2 104 VERLLF STATE1,STATE1,12 105 106 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ 107 VAF STATE0,STATE0,STATE1 108 VX STATE3,STATE3,STATE0 109 VERLLF STATE3,STATE3,8 110 111 /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ 112 VAF STATE2,STATE2,STATE3 113 VX STATE1,STATE1,STATE2 114 VERLLF STATE1,STATE1,7 115 116 /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */ 117 VSLDB STATE1,STATE1,STATE1,12 118 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ 119 VSLDB STATE2,STATE2,STATE2,8 120 /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */ 121 VSLDB STATE3,STATE3,STATE3,4 122 brctg %r0,.Ldoubleround 123 124 /* OUTPUT0 = STATE0 + COPY0 */ 125 VAF STATE0,STATE0,COPY0 126 /* OUTPUT1 = STATE1 + COPY1 */ 127 VAF STATE1,STATE1,COPY1 128 /* OUTPUT2 = STATE2 + COPY2 */ 129 VAF STATE2,STATE2,COPY2 130 /* OUTPUT3 = STATE3 + COPY3 */ 131 VAF STATE3,STATE3,COPY3 132 133 ALTERNATIVE \ 134 __stringify( \ 135 /* Convert STATE to little endian and store to OUTPUT */\ 136 VPERM TMP0,STATE0,STATE0,BEPERM; \ 137 VPERM TMP1,STATE1,STATE1,BEPERM; \ 138 VPERM TMP2,STATE2,STATE2,BEPERM; \ 139 VPERM TMP3,STATE3,STATE3,BEPERM; \ 140 VSTM TMP0,TMP3,0,%r2), \ 141 __stringify( \ 142 /* 32 bit wise little endian store to OUTPUT */ \ 143 VSTBRF STATE0,0,,%r2; \ 144 VSTBRF STATE1,16,,%r2; \ 145 VSTBRF STATE2,32,,%r2; \ 146 VSTBRF STATE3,48,,%r2; \ 147 brcl 0,0), \ 148 ALT_FACILITY(148) 149 150 /* ++COPY3.COUNTER */ 151 /* alsih %r3,1 */ 152 .insn rilu,0xcc0a00000000,%r3,1 153 alcr %r3,%r1 154 VLVGG COPY3,%r3,0 155 156 /* OUTPUT += 64, --NBLOCKS */ 157 aghi %r2,64 158 brctg %r5,.Lblock 159 160 /* COUNTER = COPY3.COUNTER */ 161 stg %r3,0(%r4) 162 163 /* Zero out potentially sensitive regs */ 164 VZERO STATE0 165 VZERO STATE1 166 VZERO STATE2 167 VZERO STATE3 168 VZERO COPY1 169 VZERO COPY2 170 171 /* Early exit if TMP0-TMP3 have not been used */ 172 ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148) 173 174 VZERO TMP0 175 VZERO TMP1 176 VZERO TMP2 177 VZERO TMP3 178 179 br %r14 180 CFI_ENDPROC 181SYM_FUNC_END(__arch_chacha20_blocks_nostack) 182