1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Core SHA-3 transform using v8.2 Crypto Extensions 4 * 5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12#include <linux/linkage.h> 13#include <asm/assembler.h> 14 15 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 16 .set .Lv\b\().2d, \b 17 .set .Lv\b\().16b, \b 18 .endr 19 20 /* 21 * ARMv8.2 Crypto Extensions instructions 22 */ 23 .macro eor3, rd, rn, rm, ra 24 .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 25 .endm 26 27 .macro rax1, rd, rn, rm 28 .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 29 .endm 30 31 .macro bcax, rd, rn, rm, ra 32 .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 33 .endm 34 35 .macro xar, rd, rn, rm, imm6 36 .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) 37 .endm 38 39 /* 40 * size_t sha3_ce_transform(struct sha3_state *state, const u8 *data, 41 * size_t nblocks, size_t block_size) 42 * 43 * block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136 44 * (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128). 45 */ 46 .text 47SYM_FUNC_START(sha3_ce_transform) 48 /* load state */ 49 add x8, x0, #32 50 ld1 { v0.1d- v3.1d}, [x0] 51 ld1 { v4.1d- v7.1d}, [x8], #32 52 ld1 { v8.1d-v11.1d}, [x8], #32 53 ld1 {v12.1d-v15.1d}, [x8], #32 54 ld1 {v16.1d-v19.1d}, [x8], #32 55 ld1 {v20.1d-v23.1d}, [x8], #32 56 ld1 {v24.1d}, [x8] 57 580: sub x2, x2, #1 59 mov w8, #24 60 adr_l x9, .Lsha3_rcon 61 62 /* load input */ 63 ld1 {v25.8b-v28.8b}, [x1], #32 64 ld1 {v29.8b}, [x1], #8 65 eor v0.8b, v0.8b, v25.8b 66 eor v1.8b, v1.8b, v26.8b 67 eor v2.8b, v2.8b, v27.8b 68 eor v3.8b, v3.8b, v28.8b 69 eor v4.8b, v4.8b, v29.8b 70 71 ld1 {v25.8b-v28.8b}, [x1], #32 72 eor v5.8b, v5.8b, v25.8b 73 eor v6.8b, v6.8b, v26.8b 74 eor v7.8b, v7.8b, v27.8b 75 eor v8.8b, v8.8b, v28.8b 76 cmp x3, #72 77 b.eq 3f /* SHA3-512 (block_size=72)? */ 78 79 ld1 {v25.8b-v28.8b}, [x1], #32 80 eor v9.8b, v9.8b, v25.8b 81 eor v10.8b, v10.8b, v26.8b 82 eor v11.8b, v11.8b, v27.8b 83 eor v12.8b, v12.8b, v28.8b 84 cmp x3, #104 85 b.eq 3f /* SHA3-384 (block_size=104)? */ 86 87 ld1 {v25.8b-v28.8b}, [x1], #32 88 eor v13.8b, v13.8b, v25.8b 89 eor v14.8b, v14.8b, v26.8b 90 eor v15.8b, v15.8b, v27.8b 91 eor v16.8b, v16.8b, v28.8b 92 cmp x3, #144 93 b.lt 3f /* SHA3-256 or SHAKE256 (block_size=136)? */ 94 b.eq 2f /* SHA3-224 (block_size=144)? */ 95 96 /* SHAKE128 (block_size=168) */ 97 ld1 {v25.8b-v28.8b}, [x1], #32 98 eor v17.8b, v17.8b, v25.8b 99 eor v18.8b, v18.8b, v26.8b 100 eor v19.8b, v19.8b, v27.8b 101 eor v20.8b, v20.8b, v28.8b 102 b 3f 1032: 104 /* SHA3-224 (block_size=144) */ 105 ld1 {v25.8b}, [x1], #8 106 eor v17.8b, v17.8b, v25.8b 107 1083: sub w8, w8, #1 109 110 eor3 v29.16b, v4.16b, v9.16b, v14.16b 111 eor3 v26.16b, v1.16b, v6.16b, v11.16b 112 eor3 v28.16b, v3.16b, v8.16b, v13.16b 113 eor3 v25.16b, v0.16b, v5.16b, v10.16b 114 eor3 v27.16b, v2.16b, v7.16b, v12.16b 115 eor3 v29.16b, v29.16b, v19.16b, v24.16b 116 eor3 v26.16b, v26.16b, v16.16b, v21.16b 117 eor3 v28.16b, v28.16b, v18.16b, v23.16b 118 eor3 v25.16b, v25.16b, v15.16b, v20.16b 119 eor3 v27.16b, v27.16b, v17.16b, v22.16b 120 121 rax1 v30.2d, v29.2d, v26.2d // bc[0] 122 rax1 v26.2d, v26.2d, v28.2d // bc[2] 123 rax1 v28.2d, v28.2d, v25.2d // bc[4] 124 rax1 v25.2d, v25.2d, v27.2d // bc[1] 125 rax1 v27.2d, v27.2d, v29.2d // bc[3] 126 127 eor v0.16b, v0.16b, v30.16b 128 xar v29.2d, v1.2d, v25.2d, (64 - 1) 129 xar v1.2d, v6.2d, v25.2d, (64 - 44) 130 xar v6.2d, v9.2d, v28.2d, (64 - 20) 131 xar v9.2d, v22.2d, v26.2d, (64 - 61) 132 xar v22.2d, v14.2d, v28.2d, (64 - 39) 133 xar v14.2d, v20.2d, v30.2d, (64 - 18) 134 xar v31.2d, v2.2d, v26.2d, (64 - 62) 135 xar v2.2d, v12.2d, v26.2d, (64 - 43) 136 xar v12.2d, v13.2d, v27.2d, (64 - 25) 137 xar v13.2d, v19.2d, v28.2d, (64 - 8) 138 xar v19.2d, v23.2d, v27.2d, (64 - 56) 139 xar v23.2d, v15.2d, v30.2d, (64 - 41) 140 xar v15.2d, v4.2d, v28.2d, (64 - 27) 141 xar v28.2d, v24.2d, v28.2d, (64 - 14) 142 xar v24.2d, v21.2d, v25.2d, (64 - 2) 143 xar v8.2d, v8.2d, v27.2d, (64 - 55) 144 xar v4.2d, v16.2d, v25.2d, (64 - 45) 145 xar v16.2d, v5.2d, v30.2d, (64 - 36) 146 xar v5.2d, v3.2d, v27.2d, (64 - 28) 147 xar v27.2d, v18.2d, v27.2d, (64 - 21) 148 xar v3.2d, v17.2d, v26.2d, (64 - 15) 149 xar v25.2d, v11.2d, v25.2d, (64 - 10) 150 xar v26.2d, v7.2d, v26.2d, (64 - 6) 151 xar v30.2d, v10.2d, v30.2d, (64 - 3) 152 153 bcax v20.16b, v31.16b, v22.16b, v8.16b 154 bcax v21.16b, v8.16b, v23.16b, v22.16b 155 bcax v22.16b, v22.16b, v24.16b, v23.16b 156 bcax v23.16b, v23.16b, v31.16b, v24.16b 157 bcax v24.16b, v24.16b, v8.16b, v31.16b 158 159 ld1r {v31.2d}, [x9], #8 160 161 bcax v17.16b, v25.16b, v19.16b, v3.16b 162 bcax v18.16b, v3.16b, v15.16b, v19.16b 163 bcax v19.16b, v19.16b, v16.16b, v15.16b 164 bcax v15.16b, v15.16b, v25.16b, v16.16b 165 bcax v16.16b, v16.16b, v3.16b, v25.16b 166 167 bcax v10.16b, v29.16b, v12.16b, v26.16b 168 bcax v11.16b, v26.16b, v13.16b, v12.16b 169 bcax v12.16b, v12.16b, v14.16b, v13.16b 170 bcax v13.16b, v13.16b, v29.16b, v14.16b 171 bcax v14.16b, v14.16b, v26.16b, v29.16b 172 173 bcax v7.16b, v30.16b, v9.16b, v4.16b 174 bcax v8.16b, v4.16b, v5.16b, v9.16b 175 bcax v9.16b, v9.16b, v6.16b, v5.16b 176 bcax v5.16b, v5.16b, v30.16b, v6.16b 177 bcax v6.16b, v6.16b, v4.16b, v30.16b 178 179 bcax v3.16b, v27.16b, v0.16b, v28.16b 180 bcax v4.16b, v28.16b, v1.16b, v0.16b 181 bcax v0.16b, v0.16b, v2.16b, v1.16b 182 bcax v1.16b, v1.16b, v27.16b, v2.16b 183 bcax v2.16b, v2.16b, v28.16b, v27.16b 184 185 eor v0.16b, v0.16b, v31.16b 186 187 cbnz w8, 3b 188 cond_yield 4f, x8, x9 189 cbnz x2, 0b 190 191 /* save state */ 1924: st1 { v0.1d- v3.1d}, [x0], #32 193 st1 { v4.1d- v7.1d}, [x0], #32 194 st1 { v8.1d-v11.1d}, [x0], #32 195 st1 {v12.1d-v15.1d}, [x0], #32 196 st1 {v16.1d-v19.1d}, [x0], #32 197 st1 {v20.1d-v23.1d}, [x0], #32 198 st1 {v24.1d}, [x0] 199 mov x0, x2 200 ret 201SYM_FUNC_END(sha3_ce_transform) 202 203 .section ".rodata", "a" 204 .align 8 205.Lsha3_rcon: 206 .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a 207 .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 208 .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a 209 .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a 210 .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 211 .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 212 .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 213 .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 214