1*c0087d80SHeiko Carstens/* SPDX-License-Identifier: GPL-2.0 */ 2*c0087d80SHeiko Carstens 3*c0087d80SHeiko Carstens#include <linux/stringify.h> 4*c0087d80SHeiko Carstens#include <linux/linkage.h> 5*c0087d80SHeiko Carstens#include <asm/alternative.h> 6*c0087d80SHeiko Carstens#include <asm/dwarf.h> 7*c0087d80SHeiko Carstens#include <asm/fpu-insn.h> 8*c0087d80SHeiko Carstens 9*c0087d80SHeiko Carstens#define STATE0 %v0 10*c0087d80SHeiko Carstens#define STATE1 %v1 11*c0087d80SHeiko Carstens#define STATE2 %v2 12*c0087d80SHeiko Carstens#define STATE3 %v3 13*c0087d80SHeiko Carstens#define COPY0 %v4 14*c0087d80SHeiko Carstens#define COPY1 %v5 15*c0087d80SHeiko Carstens#define COPY2 %v6 16*c0087d80SHeiko Carstens#define COPY3 %v7 17*c0087d80SHeiko Carstens#define BEPERM %v19 18*c0087d80SHeiko Carstens#define TMP0 %v20 19*c0087d80SHeiko Carstens#define TMP1 %v21 20*c0087d80SHeiko Carstens#define TMP2 %v22 21*c0087d80SHeiko Carstens#define TMP3 %v23 22*c0087d80SHeiko Carstens 23*c0087d80SHeiko Carstens .section .rodata 24*c0087d80SHeiko Carstens 25*c0087d80SHeiko Carstens .balign 32 26*c0087d80SHeiko CarstensSYM_DATA_START_LOCAL(chacha20_constants) 27*c0087d80SHeiko Carstens .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral 28*c0087d80SHeiko Carstens .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap 29*c0087d80SHeiko CarstensSYM_DATA_END(chacha20_constants) 30*c0087d80SHeiko Carstens 31*c0087d80SHeiko Carstens .text 32*c0087d80SHeiko Carstens/* 33*c0087d80SHeiko Carstens * s390 ChaCha20 implementation meant for vDSO. Produces a given positive 34*c0087d80SHeiko Carstens * number of blocks of output with nonce 0, taking an input key and 8-bytes 35*c0087d80SHeiko Carstens * counter. Does not spill to the stack. 36*c0087d80SHeiko Carstens * 37*c0087d80SHeiko Carstens * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, 38*c0087d80SHeiko Carstens * const uint8_t *key, 39*c0087d80SHeiko Carstens * uint32_t *counter, 40*c0087d80SHeiko Carstens * size_t nblocks) 41*c0087d80SHeiko Carstens */ 42*c0087d80SHeiko CarstensSYM_FUNC_START(__arch_chacha20_blocks_nostack) 43*c0087d80SHeiko Carstens CFI_STARTPROC 44*c0087d80SHeiko Carstens larl %r1,chacha20_constants 45*c0087d80SHeiko Carstens 46*c0087d80SHeiko Carstens /* COPY0 = "expand 32-byte k" */ 47*c0087d80SHeiko Carstens VL COPY0,0,,%r1 48*c0087d80SHeiko Carstens 49*c0087d80SHeiko Carstens /* BEPERM = byte selectors for VPERM */ 50*c0087d80SHeiko Carstens ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148) 51*c0087d80SHeiko Carstens 52*c0087d80SHeiko Carstens /* COPY1,COPY2 = key */ 53*c0087d80SHeiko Carstens VLM COPY1,COPY2,0,%r3 54*c0087d80SHeiko Carstens 55*c0087d80SHeiko Carstens /* COPY3 = counter || zero nonce */ 56*c0087d80SHeiko Carstens lg %r3,0(%r4) 57*c0087d80SHeiko Carstens VZERO COPY3 58*c0087d80SHeiko Carstens VLVGG COPY3,%r3,0 59*c0087d80SHeiko Carstens 60*c0087d80SHeiko Carstens lghi %r1,0 61*c0087d80SHeiko Carstens.Lblock: 62*c0087d80SHeiko Carstens VLR STATE0,COPY0 63*c0087d80SHeiko Carstens VLR STATE1,COPY1 64*c0087d80SHeiko Carstens VLR STATE2,COPY2 65*c0087d80SHeiko Carstens VLR STATE3,COPY3 66*c0087d80SHeiko Carstens 67*c0087d80SHeiko Carstens lghi %r0,10 68*c0087d80SHeiko Carstens.Ldoubleround: 69*c0087d80SHeiko Carstens /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ 70*c0087d80SHeiko Carstens VAF STATE0,STATE0,STATE1 71*c0087d80SHeiko Carstens VX STATE3,STATE3,STATE0 72*c0087d80SHeiko Carstens VERLLF STATE3,STATE3,16 73*c0087d80SHeiko Carstens 74*c0087d80SHeiko Carstens /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ 75*c0087d80SHeiko Carstens VAF STATE2,STATE2,STATE3 76*c0087d80SHeiko Carstens VX STATE1,STATE1,STATE2 77*c0087d80SHeiko Carstens VERLLF STATE1,STATE1,12 78*c0087d80SHeiko Carstens 79*c0087d80SHeiko Carstens /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ 80*c0087d80SHeiko Carstens VAF STATE0,STATE0,STATE1 81*c0087d80SHeiko Carstens VX STATE3,STATE3,STATE0 82*c0087d80SHeiko Carstens VERLLF STATE3,STATE3,8 83*c0087d80SHeiko Carstens 84*c0087d80SHeiko Carstens /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ 85*c0087d80SHeiko Carstens VAF STATE2,STATE2,STATE3 86*c0087d80SHeiko Carstens VX STATE1,STATE1,STATE2 87*c0087d80SHeiko Carstens VERLLF STATE1,STATE1,7 88*c0087d80SHeiko Carstens 89*c0087d80SHeiko Carstens /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */ 90*c0087d80SHeiko Carstens VSLDB STATE1,STATE1,STATE1,4 91*c0087d80SHeiko Carstens /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ 92*c0087d80SHeiko Carstens VSLDB STATE2,STATE2,STATE2,8 93*c0087d80SHeiko Carstens /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */ 94*c0087d80SHeiko Carstens VSLDB STATE3,STATE3,STATE3,12 95*c0087d80SHeiko Carstens 96*c0087d80SHeiko Carstens /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ 97*c0087d80SHeiko Carstens VAF STATE0,STATE0,STATE1 98*c0087d80SHeiko Carstens VX STATE3,STATE3,STATE0 99*c0087d80SHeiko Carstens VERLLF STATE3,STATE3,16 100*c0087d80SHeiko Carstens 101*c0087d80SHeiko Carstens /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ 102*c0087d80SHeiko Carstens VAF STATE2,STATE2,STATE3 103*c0087d80SHeiko Carstens VX STATE1,STATE1,STATE2 104*c0087d80SHeiko Carstens VERLLF STATE1,STATE1,12 105*c0087d80SHeiko Carstens 106*c0087d80SHeiko Carstens /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ 107*c0087d80SHeiko Carstens VAF STATE0,STATE0,STATE1 108*c0087d80SHeiko Carstens VX STATE3,STATE3,STATE0 109*c0087d80SHeiko Carstens VERLLF STATE3,STATE3,8 110*c0087d80SHeiko Carstens 111*c0087d80SHeiko Carstens /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ 112*c0087d80SHeiko Carstens VAF STATE2,STATE2,STATE3 113*c0087d80SHeiko Carstens VX STATE1,STATE1,STATE2 114*c0087d80SHeiko Carstens VERLLF STATE1,STATE1,7 115*c0087d80SHeiko Carstens 116*c0087d80SHeiko Carstens /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */ 117*c0087d80SHeiko Carstens VSLDB STATE1,STATE1,STATE1,12 118*c0087d80SHeiko Carstens /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ 119*c0087d80SHeiko Carstens VSLDB STATE2,STATE2,STATE2,8 120*c0087d80SHeiko Carstens /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */ 121*c0087d80SHeiko Carstens VSLDB STATE3,STATE3,STATE3,4 122*c0087d80SHeiko Carstens brctg %r0,.Ldoubleround 123*c0087d80SHeiko Carstens 124*c0087d80SHeiko Carstens /* OUTPUT0 = STATE0 + COPY0 */ 125*c0087d80SHeiko Carstens VAF STATE0,STATE0,COPY0 126*c0087d80SHeiko Carstens /* OUTPUT1 = STATE1 + COPY1 */ 127*c0087d80SHeiko Carstens VAF STATE1,STATE1,COPY1 128*c0087d80SHeiko Carstens /* OUTPUT2 = STATE2 + COPY2 */ 129*c0087d80SHeiko Carstens VAF STATE2,STATE2,COPY2 130*c0087d80SHeiko Carstens /* OUTPUT3 = STATE3 + COPY3 */ 131*c0087d80SHeiko Carstens VAF STATE3,STATE3,COPY3 132*c0087d80SHeiko Carstens 133*c0087d80SHeiko Carstens ALTERNATIVE \ 134*c0087d80SHeiko Carstens __stringify( \ 135*c0087d80SHeiko Carstens /* Convert STATE to little endian and store to OUTPUT */\ 136*c0087d80SHeiko Carstens VPERM TMP0,STATE0,STATE0,BEPERM; \ 137*c0087d80SHeiko Carstens VPERM TMP1,STATE1,STATE1,BEPERM; \ 138*c0087d80SHeiko Carstens VPERM TMP2,STATE2,STATE2,BEPERM; \ 139*c0087d80SHeiko Carstens VPERM TMP3,STATE3,STATE3,BEPERM; \ 140*c0087d80SHeiko Carstens VSTM TMP0,TMP3,0,%r2), \ 141*c0087d80SHeiko Carstens __stringify( \ 142*c0087d80SHeiko Carstens /* 32 bit wise little endian store to OUTPUT */ \ 143*c0087d80SHeiko Carstens VSTBRF STATE0,0,,%r2; \ 144*c0087d80SHeiko Carstens VSTBRF STATE1,16,,%r2; \ 145*c0087d80SHeiko Carstens VSTBRF STATE2,32,,%r2; \ 146*c0087d80SHeiko Carstens VSTBRF STATE3,48,,%r2; \ 147*c0087d80SHeiko Carstens brcl 0,0), \ 148*c0087d80SHeiko Carstens ALT_FACILITY(148) 149*c0087d80SHeiko Carstens 150*c0087d80SHeiko Carstens /* ++COPY3.COUNTER */ 151*c0087d80SHeiko Carstens /* alsih %r3,1 */ 152*c0087d80SHeiko Carstens .insn rilu,0xcc0a00000000,%r3,1 153*c0087d80SHeiko Carstens alcr %r3,%r1 154*c0087d80SHeiko Carstens VLVGG COPY3,%r3,0 155*c0087d80SHeiko Carstens 156*c0087d80SHeiko Carstens /* OUTPUT += 64, --NBLOCKS */ 157*c0087d80SHeiko Carstens aghi %r2,64 158*c0087d80SHeiko Carstens brctg %r5,.Lblock 159*c0087d80SHeiko Carstens 160*c0087d80SHeiko Carstens /* COUNTER = COPY3.COUNTER */ 161*c0087d80SHeiko Carstens stg %r3,0(%r4) 162*c0087d80SHeiko Carstens 163*c0087d80SHeiko Carstens /* Zero out potentially sensitive regs */ 164*c0087d80SHeiko Carstens VZERO STATE0 165*c0087d80SHeiko Carstens VZERO STATE1 166*c0087d80SHeiko Carstens VZERO STATE2 167*c0087d80SHeiko Carstens VZERO STATE3 168*c0087d80SHeiko Carstens VZERO COPY1 169*c0087d80SHeiko Carstens VZERO COPY2 170*c0087d80SHeiko Carstens 171*c0087d80SHeiko Carstens /* Early exit if TMP0-TMP3 have not been used */ 172*c0087d80SHeiko Carstens ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148) 173*c0087d80SHeiko Carstens 174*c0087d80SHeiko Carstens VZERO TMP0 175*c0087d80SHeiko Carstens VZERO TMP1 176*c0087d80SHeiko Carstens VZERO TMP2 177*c0087d80SHeiko Carstens VZERO TMP3 178*c0087d80SHeiko Carstens 179*c0087d80SHeiko Carstens br %r14 180*c0087d80SHeiko Carstens CFI_ENDPROC 181*c0087d80SHeiko CarstensSYM_FUNC_END(__arch_chacha20_blocks_nostack) 182