1*1e29a750SEric Biggers/* SPDX-License-Identifier: GPL-2.0 */ 2*1e29a750SEric Biggers/* 3*1e29a750SEric Biggers * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions 4*1e29a750SEric Biggers * 5*1e29a750SEric Biggers * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6*1e29a750SEric Biggers * 7*1e29a750SEric Biggers * This program is free software; you can redistribute it and/or modify 8*1e29a750SEric Biggers * it under the terms of the GNU General Public License version 2 as 9*1e29a750SEric Biggers * published by the Free Software Foundation. 10*1e29a750SEric Biggers */ 11*1e29a750SEric Biggers 12*1e29a750SEric Biggers#include <linux/linkage.h> 13*1e29a750SEric Biggers#include <asm/assembler.h> 14*1e29a750SEric Biggers 15*1e29a750SEric Biggers .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 16*1e29a750SEric Biggers .set .Lv\b\().2d, \b 17*1e29a750SEric Biggers .set .Lv\b\().16b, \b 18*1e29a750SEric Biggers .endr 19*1e29a750SEric Biggers 20*1e29a750SEric Biggers /* 21*1e29a750SEric Biggers * ARMv8.2 Crypto Extensions instructions 22*1e29a750SEric Biggers */ 23*1e29a750SEric Biggers .macro eor3, rd, rn, rm, ra 24*1e29a750SEric Biggers .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 25*1e29a750SEric Biggers .endm 26*1e29a750SEric Biggers 27*1e29a750SEric Biggers .macro rax1, rd, rn, rm 28*1e29a750SEric Biggers .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 29*1e29a750SEric Biggers .endm 30*1e29a750SEric Biggers 31*1e29a750SEric Biggers .macro bcax, rd, rn, rm, ra 32*1e29a750SEric Biggers .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 33*1e29a750SEric Biggers .endm 34*1e29a750SEric Biggers 35*1e29a750SEric Biggers .macro xar, rd, rn, rm, imm6 36*1e29a750SEric Biggers .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) 37*1e29a750SEric Biggers .endm 38*1e29a750SEric Biggers 39*1e29a750SEric Biggers /* 40*1e29a750SEric Biggers * size_t sha3_ce_transform(struct sha3_state *state, const u8 *data, 41*1e29a750SEric Biggers * size_t nblocks, size_t block_size) 42*1e29a750SEric Biggers * 43*1e29a750SEric Biggers * block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136 44*1e29a750SEric Biggers * (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128). 45*1e29a750SEric Biggers */ 46*1e29a750SEric Biggers .text 47*1e29a750SEric BiggersSYM_FUNC_START(sha3_ce_transform) 48*1e29a750SEric Biggers /* load state */ 49*1e29a750SEric Biggers add x8, x0, #32 50*1e29a750SEric Biggers ld1 { v0.1d- v3.1d}, [x0] 51*1e29a750SEric Biggers ld1 { v4.1d- v7.1d}, [x8], #32 52*1e29a750SEric Biggers ld1 { v8.1d-v11.1d}, [x8], #32 53*1e29a750SEric Biggers ld1 {v12.1d-v15.1d}, [x8], #32 54*1e29a750SEric Biggers ld1 {v16.1d-v19.1d}, [x8], #32 55*1e29a750SEric Biggers ld1 {v20.1d-v23.1d}, [x8], #32 56*1e29a750SEric Biggers ld1 {v24.1d}, [x8] 57*1e29a750SEric Biggers 58*1e29a750SEric Biggers0: sub x2, x2, #1 59*1e29a750SEric Biggers mov w8, #24 60*1e29a750SEric Biggers adr_l x9, .Lsha3_rcon 61*1e29a750SEric Biggers 62*1e29a750SEric Biggers /* load input */ 63*1e29a750SEric Biggers ld1 {v25.8b-v28.8b}, [x1], #32 64*1e29a750SEric Biggers ld1 {v29.8b}, [x1], #8 65*1e29a750SEric Biggers eor v0.8b, v0.8b, v25.8b 66*1e29a750SEric Biggers eor v1.8b, v1.8b, v26.8b 67*1e29a750SEric Biggers eor v2.8b, v2.8b, v27.8b 68*1e29a750SEric Biggers eor v3.8b, v3.8b, v28.8b 69*1e29a750SEric Biggers eor v4.8b, v4.8b, v29.8b 70*1e29a750SEric Biggers 71*1e29a750SEric Biggers ld1 {v25.8b-v28.8b}, [x1], #32 72*1e29a750SEric Biggers eor v5.8b, v5.8b, v25.8b 73*1e29a750SEric Biggers eor v6.8b, v6.8b, v26.8b 74*1e29a750SEric Biggers eor v7.8b, v7.8b, v27.8b 75*1e29a750SEric Biggers eor v8.8b, v8.8b, v28.8b 76*1e29a750SEric Biggers cmp x3, #72 77*1e29a750SEric Biggers b.eq 3f /* SHA3-512 (block_size=72)? */ 78*1e29a750SEric Biggers 79*1e29a750SEric Biggers ld1 {v25.8b-v28.8b}, [x1], #32 80*1e29a750SEric Biggers eor v9.8b, v9.8b, v25.8b 81*1e29a750SEric Biggers eor v10.8b, v10.8b, v26.8b 82*1e29a750SEric Biggers eor v11.8b, v11.8b, v27.8b 83*1e29a750SEric Biggers eor v12.8b, v12.8b, v28.8b 84*1e29a750SEric Biggers cmp x3, #104 85*1e29a750SEric Biggers b.eq 3f /* SHA3-384 (block_size=104)? */ 86*1e29a750SEric Biggers 87*1e29a750SEric Biggers ld1 {v25.8b-v28.8b}, [x1], #32 88*1e29a750SEric Biggers eor v13.8b, v13.8b, v25.8b 89*1e29a750SEric Biggers eor v14.8b, v14.8b, v26.8b 90*1e29a750SEric Biggers eor v15.8b, v15.8b, v27.8b 91*1e29a750SEric Biggers eor v16.8b, v16.8b, v28.8b 92*1e29a750SEric Biggers cmp x3, #144 93*1e29a750SEric Biggers b.lt 3f /* SHA3-256 or SHAKE256 (block_size=136)? */ 94*1e29a750SEric Biggers b.eq 2f /* SHA3-224 (block_size=144)? */ 95*1e29a750SEric Biggers 96*1e29a750SEric Biggers /* SHAKE128 (block_size=168) */ 97*1e29a750SEric Biggers ld1 {v25.8b-v28.8b}, [x1], #32 98*1e29a750SEric Biggers eor v17.8b, v17.8b, v25.8b 99*1e29a750SEric Biggers eor v18.8b, v18.8b, v26.8b 100*1e29a750SEric Biggers eor v19.8b, v19.8b, v27.8b 101*1e29a750SEric Biggers eor v20.8b, v20.8b, v28.8b 102*1e29a750SEric Biggers b 3f 103*1e29a750SEric Biggers2: 104*1e29a750SEric Biggers /* SHA3-224 (block_size=144) */ 105*1e29a750SEric Biggers ld1 {v25.8b}, [x1], #8 106*1e29a750SEric Biggers eor v17.8b, v17.8b, v25.8b 107*1e29a750SEric Biggers 108*1e29a750SEric Biggers3: sub w8, w8, #1 109*1e29a750SEric Biggers 110*1e29a750SEric Biggers eor3 v29.16b, v4.16b, v9.16b, v14.16b 111*1e29a750SEric Biggers eor3 v26.16b, v1.16b, v6.16b, v11.16b 112*1e29a750SEric Biggers eor3 v28.16b, v3.16b, v8.16b, v13.16b 113*1e29a750SEric Biggers eor3 v25.16b, v0.16b, v5.16b, v10.16b 114*1e29a750SEric Biggers eor3 v27.16b, v2.16b, v7.16b, v12.16b 115*1e29a750SEric Biggers eor3 v29.16b, v29.16b, v19.16b, v24.16b 116*1e29a750SEric Biggers eor3 v26.16b, v26.16b, v16.16b, v21.16b 117*1e29a750SEric Biggers eor3 v28.16b, v28.16b, v18.16b, v23.16b 118*1e29a750SEric Biggers eor3 v25.16b, v25.16b, v15.16b, v20.16b 119*1e29a750SEric Biggers eor3 v27.16b, v27.16b, v17.16b, v22.16b 120*1e29a750SEric Biggers 121*1e29a750SEric Biggers rax1 v30.2d, v29.2d, v26.2d // bc[0] 122*1e29a750SEric Biggers rax1 v26.2d, v26.2d, v28.2d // bc[2] 123*1e29a750SEric Biggers rax1 v28.2d, v28.2d, v25.2d // bc[4] 124*1e29a750SEric Biggers rax1 v25.2d, v25.2d, v27.2d // bc[1] 125*1e29a750SEric Biggers rax1 v27.2d, v27.2d, v29.2d // bc[3] 126*1e29a750SEric Biggers 127*1e29a750SEric Biggers eor v0.16b, v0.16b, v30.16b 128*1e29a750SEric Biggers xar v29.2d, v1.2d, v25.2d, (64 - 1) 129*1e29a750SEric Biggers xar v1.2d, v6.2d, v25.2d, (64 - 44) 130*1e29a750SEric Biggers xar v6.2d, v9.2d, v28.2d, (64 - 20) 131*1e29a750SEric Biggers xar v9.2d, v22.2d, v26.2d, (64 - 61) 132*1e29a750SEric Biggers xar v22.2d, v14.2d, v28.2d, (64 - 39) 133*1e29a750SEric Biggers xar v14.2d, v20.2d, v30.2d, (64 - 18) 134*1e29a750SEric Biggers xar v31.2d, v2.2d, v26.2d, (64 - 62) 135*1e29a750SEric Biggers xar v2.2d, v12.2d, v26.2d, (64 - 43) 136*1e29a750SEric Biggers xar v12.2d, v13.2d, v27.2d, (64 - 25) 137*1e29a750SEric Biggers xar v13.2d, v19.2d, v28.2d, (64 - 8) 138*1e29a750SEric Biggers xar v19.2d, v23.2d, v27.2d, (64 - 56) 139*1e29a750SEric Biggers xar v23.2d, v15.2d, v30.2d, (64 - 41) 140*1e29a750SEric Biggers xar v15.2d, v4.2d, v28.2d, (64 - 27) 141*1e29a750SEric Biggers xar v28.2d, v24.2d, v28.2d, (64 - 14) 142*1e29a750SEric Biggers xar v24.2d, v21.2d, v25.2d, (64 - 2) 143*1e29a750SEric Biggers xar v8.2d, v8.2d, v27.2d, (64 - 55) 144*1e29a750SEric Biggers xar v4.2d, v16.2d, v25.2d, (64 - 45) 145*1e29a750SEric Biggers xar v16.2d, v5.2d, v30.2d, (64 - 36) 146*1e29a750SEric Biggers xar v5.2d, v3.2d, v27.2d, (64 - 28) 147*1e29a750SEric Biggers xar v27.2d, v18.2d, v27.2d, (64 - 21) 148*1e29a750SEric Biggers xar v3.2d, v17.2d, v26.2d, (64 - 15) 149*1e29a750SEric Biggers xar v25.2d, v11.2d, v25.2d, (64 - 10) 150*1e29a750SEric Biggers xar v26.2d, v7.2d, v26.2d, (64 - 6) 151*1e29a750SEric Biggers xar v30.2d, v10.2d, v30.2d, (64 - 3) 152*1e29a750SEric Biggers 153*1e29a750SEric Biggers bcax v20.16b, v31.16b, v22.16b, v8.16b 154*1e29a750SEric Biggers bcax v21.16b, v8.16b, v23.16b, v22.16b 155*1e29a750SEric Biggers bcax v22.16b, v22.16b, v24.16b, v23.16b 156*1e29a750SEric Biggers bcax v23.16b, v23.16b, v31.16b, v24.16b 157*1e29a750SEric Biggers bcax v24.16b, v24.16b, v8.16b, v31.16b 158*1e29a750SEric Biggers 159*1e29a750SEric Biggers ld1r {v31.2d}, [x9], #8 160*1e29a750SEric Biggers 161*1e29a750SEric Biggers bcax v17.16b, v25.16b, v19.16b, v3.16b 162*1e29a750SEric Biggers bcax v18.16b, v3.16b, v15.16b, v19.16b 163*1e29a750SEric Biggers bcax v19.16b, v19.16b, v16.16b, v15.16b 164*1e29a750SEric Biggers bcax v15.16b, v15.16b, v25.16b, v16.16b 165*1e29a750SEric Biggers bcax v16.16b, v16.16b, v3.16b, v25.16b 166*1e29a750SEric Biggers 167*1e29a750SEric Biggers bcax v10.16b, v29.16b, v12.16b, v26.16b 168*1e29a750SEric Biggers bcax v11.16b, v26.16b, v13.16b, v12.16b 169*1e29a750SEric Biggers bcax v12.16b, v12.16b, v14.16b, v13.16b 170*1e29a750SEric Biggers bcax v13.16b, v13.16b, v29.16b, v14.16b 171*1e29a750SEric Biggers bcax v14.16b, v14.16b, v26.16b, v29.16b 172*1e29a750SEric Biggers 173*1e29a750SEric Biggers bcax v7.16b, v30.16b, v9.16b, v4.16b 174*1e29a750SEric Biggers bcax v8.16b, v4.16b, v5.16b, v9.16b 175*1e29a750SEric Biggers bcax v9.16b, v9.16b, v6.16b, v5.16b 176*1e29a750SEric Biggers bcax v5.16b, v5.16b, v30.16b, v6.16b 177*1e29a750SEric Biggers bcax v6.16b, v6.16b, v4.16b, v30.16b 178*1e29a750SEric Biggers 179*1e29a750SEric Biggers bcax v3.16b, v27.16b, v0.16b, v28.16b 180*1e29a750SEric Biggers bcax v4.16b, v28.16b, v1.16b, v0.16b 181*1e29a750SEric Biggers bcax v0.16b, v0.16b, v2.16b, v1.16b 182*1e29a750SEric Biggers bcax v1.16b, v1.16b, v27.16b, v2.16b 183*1e29a750SEric Biggers bcax v2.16b, v2.16b, v28.16b, v27.16b 184*1e29a750SEric Biggers 185*1e29a750SEric Biggers eor v0.16b, v0.16b, v31.16b 186*1e29a750SEric Biggers 187*1e29a750SEric Biggers cbnz w8, 3b 188*1e29a750SEric Biggers cond_yield 4f, x8, x9 189*1e29a750SEric Biggers cbnz x2, 0b 190*1e29a750SEric Biggers 191*1e29a750SEric Biggers /* save state */ 192*1e29a750SEric Biggers4: st1 { v0.1d- v3.1d}, [x0], #32 193*1e29a750SEric Biggers st1 { v4.1d- v7.1d}, [x0], #32 194*1e29a750SEric Biggers st1 { v8.1d-v11.1d}, [x0], #32 195*1e29a750SEric Biggers st1 {v12.1d-v15.1d}, [x0], #32 196*1e29a750SEric Biggers st1 {v16.1d-v19.1d}, [x0], #32 197*1e29a750SEric Biggers st1 {v20.1d-v23.1d}, [x0], #32 198*1e29a750SEric Biggers st1 {v24.1d}, [x0] 199*1e29a750SEric Biggers mov x0, x2 200*1e29a750SEric Biggers ret 201*1e29a750SEric BiggersSYM_FUNC_END(sha3_ce_transform) 202*1e29a750SEric Biggers 203*1e29a750SEric Biggers .section ".rodata", "a" 204*1e29a750SEric Biggers .align 8 205*1e29a750SEric Biggers.Lsha3_rcon: 206*1e29a750SEric Biggers .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a 207*1e29a750SEric Biggers .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 208*1e29a750SEric Biggers .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a 209*1e29a750SEric Biggers .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a 210*1e29a750SEric Biggers .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 211*1e29a750SEric Biggers .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 212*1e29a750SEric Biggers .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 213*1e29a750SEric Biggers .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 214