115d5910eSArd Biesheuvel/* SPDX-License-Identifier: GPL-2.0 */ 215d5910eSArd Biesheuvel/* 315d5910eSArd Biesheuvel * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions 415d5910eSArd Biesheuvel * 515d5910eSArd Biesheuvel * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 615d5910eSArd Biesheuvel * 715d5910eSArd Biesheuvel * This program is free software; you can redistribute it and/or modify 815d5910eSArd Biesheuvel * it under the terms of the GNU General Public License version 2 as 915d5910eSArd Biesheuvel * published by the Free Software Foundation. 1015d5910eSArd Biesheuvel */ 1115d5910eSArd Biesheuvel 1215d5910eSArd Biesheuvel#include <linux/linkage.h> 1315d5910eSArd Biesheuvel#include <asm/assembler.h> 1415d5910eSArd Biesheuvel 1515d5910eSArd Biesheuvel .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 1615d5910eSArd Biesheuvel .set .Lv\b\().2d, \b 1715d5910eSArd Biesheuvel .set .Lv\b\().16b, \b 1815d5910eSArd Biesheuvel .endr 1915d5910eSArd Biesheuvel 2015d5910eSArd Biesheuvel /* 2115d5910eSArd Biesheuvel * ARMv8.2 Crypto Extensions instructions 2215d5910eSArd Biesheuvel */ 2315d5910eSArd Biesheuvel .macro eor3, rd, rn, rm, ra 2415d5910eSArd Biesheuvel .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 2515d5910eSArd Biesheuvel .endm 2615d5910eSArd Biesheuvel 2715d5910eSArd Biesheuvel .macro rax1, rd, rn, rm 2815d5910eSArd Biesheuvel .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 2915d5910eSArd Biesheuvel .endm 3015d5910eSArd Biesheuvel 3115d5910eSArd Biesheuvel .macro bcax, rd, rn, rm, ra 3215d5910eSArd Biesheuvel .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 3315d5910eSArd Biesheuvel .endm 3415d5910eSArd Biesheuvel 3515d5910eSArd Biesheuvel .macro xar, rd, rn, rm, imm6 3615d5910eSArd Biesheuvel .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) 3715d5910eSArd Biesheuvel .endm 3815d5910eSArd Biesheuvel 3915d5910eSArd Biesheuvel /* 409ecc9f31SArd Biesheuvel * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) 4115d5910eSArd Biesheuvel */ 4215d5910eSArd Biesheuvel .text 430e89640bSMark BrownSYM_FUNC_START(sha3_ce_transform) 449ecc9f31SArd Biesheuvel /* load state */ 459ecc9f31SArd Biesheuvel add x8, x0, #32 469ecc9f31SArd Biesheuvel ld1 { v0.1d- v3.1d}, [x0] 4715d5910eSArd Biesheuvel ld1 { v4.1d- v7.1d}, [x8], #32 4815d5910eSArd Biesheuvel ld1 { v8.1d-v11.1d}, [x8], #32 4915d5910eSArd Biesheuvel ld1 {v12.1d-v15.1d}, [x8], #32 5015d5910eSArd Biesheuvel ld1 {v16.1d-v19.1d}, [x8], #32 5115d5910eSArd Biesheuvel ld1 {v20.1d-v23.1d}, [x8], #32 5215d5910eSArd Biesheuvel ld1 {v24.1d}, [x8] 5315d5910eSArd Biesheuvel 549ecc9f31SArd Biesheuvel0: sub w2, w2, #1 5515d5910eSArd Biesheuvel mov w8, #24 5615d5910eSArd Biesheuvel adr_l x9, .Lsha3_rcon 5715d5910eSArd Biesheuvel 5815d5910eSArd Biesheuvel /* load input */ 599ecc9f31SArd Biesheuvel ld1 {v25.8b-v28.8b}, [x1], #32 609ecc9f31SArd Biesheuvel ld1 {v29.8b-v31.8b}, [x1], #24 6115d5910eSArd Biesheuvel eor v0.8b, v0.8b, v25.8b 6215d5910eSArd Biesheuvel eor v1.8b, v1.8b, v26.8b 6315d5910eSArd Biesheuvel eor v2.8b, v2.8b, v27.8b 6415d5910eSArd Biesheuvel eor v3.8b, v3.8b, v28.8b 6515d5910eSArd Biesheuvel eor v4.8b, v4.8b, v29.8b 6615d5910eSArd Biesheuvel eor v5.8b, v5.8b, v30.8b 6715d5910eSArd Biesheuvel eor v6.8b, v6.8b, v31.8b 6815d5910eSArd Biesheuvel 699ecc9f31SArd Biesheuvel tbnz x3, #6, 2f // SHA3-512 7015d5910eSArd Biesheuvel 719ecc9f31SArd Biesheuvel ld1 {v25.8b-v28.8b}, [x1], #32 729ecc9f31SArd Biesheuvel ld1 {v29.8b-v30.8b}, [x1], #16 7315d5910eSArd Biesheuvel eor v7.8b, v7.8b, v25.8b 7415d5910eSArd Biesheuvel eor v8.8b, v8.8b, v26.8b 7515d5910eSArd Biesheuvel eor v9.8b, v9.8b, v27.8b 7615d5910eSArd Biesheuvel eor v10.8b, v10.8b, v28.8b 7715d5910eSArd Biesheuvel eor v11.8b, v11.8b, v29.8b 7815d5910eSArd Biesheuvel eor v12.8b, v12.8b, v30.8b 7915d5910eSArd Biesheuvel 809ecc9f31SArd Biesheuvel tbnz x3, #4, 1f // SHA3-384 or SHA3-224 8115d5910eSArd Biesheuvel 8215d5910eSArd Biesheuvel // SHA3-256 839ecc9f31SArd Biesheuvel ld1 {v25.8b-v28.8b}, [x1], #32 8415d5910eSArd Biesheuvel eor v13.8b, v13.8b, v25.8b 8515d5910eSArd Biesheuvel eor v14.8b, v14.8b, v26.8b 8615d5910eSArd Biesheuvel eor v15.8b, v15.8b, v27.8b 8715d5910eSArd Biesheuvel eor v16.8b, v16.8b, v28.8b 889ecc9f31SArd Biesheuvel b 3f 8915d5910eSArd Biesheuvel 909ecc9f31SArd Biesheuvel1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 9115d5910eSArd Biesheuvel 9215d5910eSArd Biesheuvel // SHA3-224 939ecc9f31SArd Biesheuvel ld1 {v25.8b-v28.8b}, [x1], #32 949ecc9f31SArd Biesheuvel ld1 {v29.8b}, [x1], #8 9515d5910eSArd Biesheuvel eor v13.8b, v13.8b, v25.8b 9615d5910eSArd Biesheuvel eor v14.8b, v14.8b, v26.8b 9715d5910eSArd Biesheuvel eor v15.8b, v15.8b, v27.8b 9815d5910eSArd Biesheuvel eor v16.8b, v16.8b, v28.8b 9915d5910eSArd Biesheuvel eor v17.8b, v17.8b, v29.8b 1009ecc9f31SArd Biesheuvel b 3f 10115d5910eSArd Biesheuvel 10215d5910eSArd Biesheuvel // SHA3-512 1039ecc9f31SArd Biesheuvel2: ld1 {v25.8b-v26.8b}, [x1], #16 10415d5910eSArd Biesheuvel eor v7.8b, v7.8b, v25.8b 10515d5910eSArd Biesheuvel eor v8.8b, v8.8b, v26.8b 10615d5910eSArd Biesheuvel 1079ecc9f31SArd Biesheuvel3: sub w8, w8, #1 10815d5910eSArd Biesheuvel 10915d5910eSArd Biesheuvel eor3 v29.16b, v4.16b, v9.16b, v14.16b 11015d5910eSArd Biesheuvel eor3 v26.16b, v1.16b, v6.16b, v11.16b 11115d5910eSArd Biesheuvel eor3 v28.16b, v3.16b, v8.16b, v13.16b 11215d5910eSArd Biesheuvel eor3 v25.16b, v0.16b, v5.16b, v10.16b 11315d5910eSArd Biesheuvel eor3 v27.16b, v2.16b, v7.16b, v12.16b 11415d5910eSArd Biesheuvel eor3 v29.16b, v29.16b, v19.16b, v24.16b 11515d5910eSArd Biesheuvel eor3 v26.16b, v26.16b, v16.16b, v21.16b 11615d5910eSArd Biesheuvel eor3 v28.16b, v28.16b, v18.16b, v23.16b 11715d5910eSArd Biesheuvel eor3 v25.16b, v25.16b, v15.16b, v20.16b 11815d5910eSArd Biesheuvel eor3 v27.16b, v27.16b, v17.16b, v22.16b 11915d5910eSArd Biesheuvel 12015d5910eSArd Biesheuvel rax1 v30.2d, v29.2d, v26.2d // bc[0] 12115d5910eSArd Biesheuvel rax1 v26.2d, v26.2d, v28.2d // bc[2] 12215d5910eSArd Biesheuvel rax1 v28.2d, v28.2d, v25.2d // bc[4] 12315d5910eSArd Biesheuvel rax1 v25.2d, v25.2d, v27.2d // bc[1] 12415d5910eSArd Biesheuvel rax1 v27.2d, v27.2d, v29.2d // bc[3] 12515d5910eSArd Biesheuvel 12615d5910eSArd Biesheuvel eor v0.16b, v0.16b, v30.16b 12715d5910eSArd Biesheuvel xar v29.2d, v1.2d, v25.2d, (64 - 1) 12815d5910eSArd Biesheuvel xar v1.2d, v6.2d, v25.2d, (64 - 44) 12915d5910eSArd Biesheuvel xar v6.2d, v9.2d, v28.2d, (64 - 20) 13015d5910eSArd Biesheuvel xar v9.2d, v22.2d, v26.2d, (64 - 61) 13115d5910eSArd Biesheuvel xar v22.2d, v14.2d, v28.2d, (64 - 39) 13215d5910eSArd Biesheuvel xar v14.2d, v20.2d, v30.2d, (64 - 18) 13315d5910eSArd Biesheuvel xar v31.2d, v2.2d, v26.2d, (64 - 62) 13415d5910eSArd Biesheuvel xar v2.2d, v12.2d, v26.2d, (64 - 43) 13515d5910eSArd Biesheuvel xar v12.2d, v13.2d, v27.2d, (64 - 25) 13615d5910eSArd Biesheuvel xar v13.2d, v19.2d, v28.2d, (64 - 8) 13715d5910eSArd Biesheuvel xar v19.2d, v23.2d, v27.2d, (64 - 56) 13815d5910eSArd Biesheuvel xar v23.2d, v15.2d, v30.2d, (64 - 41) 13915d5910eSArd Biesheuvel xar v15.2d, v4.2d, v28.2d, (64 - 27) 14015d5910eSArd Biesheuvel xar v28.2d, v24.2d, v28.2d, (64 - 14) 14115d5910eSArd Biesheuvel xar v24.2d, v21.2d, v25.2d, (64 - 2) 14215d5910eSArd Biesheuvel xar v8.2d, v8.2d, v27.2d, (64 - 55) 14315d5910eSArd Biesheuvel xar v4.2d, v16.2d, v25.2d, (64 - 45) 14415d5910eSArd Biesheuvel xar v16.2d, v5.2d, v30.2d, (64 - 36) 14515d5910eSArd Biesheuvel xar v5.2d, v3.2d, v27.2d, (64 - 28) 14615d5910eSArd Biesheuvel xar v27.2d, v18.2d, v27.2d, (64 - 21) 14715d5910eSArd Biesheuvel xar v3.2d, v17.2d, v26.2d, (64 - 15) 14815d5910eSArd Biesheuvel xar v25.2d, v11.2d, v25.2d, (64 - 10) 14915d5910eSArd Biesheuvel xar v26.2d, v7.2d, v26.2d, (64 - 6) 15015d5910eSArd Biesheuvel xar v30.2d, v10.2d, v30.2d, (64 - 3) 15115d5910eSArd Biesheuvel 15215d5910eSArd Biesheuvel bcax v20.16b, v31.16b, v22.16b, v8.16b 15315d5910eSArd Biesheuvel bcax v21.16b, v8.16b, v23.16b, v22.16b 15415d5910eSArd Biesheuvel bcax v22.16b, v22.16b, v24.16b, v23.16b 15515d5910eSArd Biesheuvel bcax v23.16b, v23.16b, v31.16b, v24.16b 15615d5910eSArd Biesheuvel bcax v24.16b, v24.16b, v8.16b, v31.16b 15715d5910eSArd Biesheuvel 15815d5910eSArd Biesheuvel ld1r {v31.2d}, [x9], #8 15915d5910eSArd Biesheuvel 16015d5910eSArd Biesheuvel bcax v17.16b, v25.16b, v19.16b, v3.16b 16115d5910eSArd Biesheuvel bcax v18.16b, v3.16b, v15.16b, v19.16b 16215d5910eSArd Biesheuvel bcax v19.16b, v19.16b, v16.16b, v15.16b 16315d5910eSArd Biesheuvel bcax v15.16b, v15.16b, v25.16b, v16.16b 16415d5910eSArd Biesheuvel bcax v16.16b, v16.16b, v3.16b, v25.16b 16515d5910eSArd Biesheuvel 16615d5910eSArd Biesheuvel bcax v10.16b, v29.16b, v12.16b, v26.16b 16715d5910eSArd Biesheuvel bcax v11.16b, v26.16b, v13.16b, v12.16b 16815d5910eSArd Biesheuvel bcax v12.16b, v12.16b, v14.16b, v13.16b 16915d5910eSArd Biesheuvel bcax v13.16b, v13.16b, v29.16b, v14.16b 17015d5910eSArd Biesheuvel bcax v14.16b, v14.16b, v26.16b, v29.16b 17115d5910eSArd Biesheuvel 17215d5910eSArd Biesheuvel bcax v7.16b, v30.16b, v9.16b, v4.16b 17315d5910eSArd Biesheuvel bcax v8.16b, v4.16b, v5.16b, v9.16b 17415d5910eSArd Biesheuvel bcax v9.16b, v9.16b, v6.16b, v5.16b 17515d5910eSArd Biesheuvel bcax v5.16b, v5.16b, v30.16b, v6.16b 17615d5910eSArd Biesheuvel bcax v6.16b, v6.16b, v4.16b, v30.16b 17715d5910eSArd Biesheuvel 17815d5910eSArd Biesheuvel bcax v3.16b, v27.16b, v0.16b, v28.16b 17915d5910eSArd Biesheuvel bcax v4.16b, v28.16b, v1.16b, v0.16b 18015d5910eSArd Biesheuvel bcax v0.16b, v0.16b, v2.16b, v1.16b 18115d5910eSArd Biesheuvel bcax v1.16b, v1.16b, v27.16b, v2.16b 18215d5910eSArd Biesheuvel bcax v2.16b, v2.16b, v28.16b, v27.16b 18315d5910eSArd Biesheuvel 18415d5910eSArd Biesheuvel eor v0.16b, v0.16b, v31.16b 18515d5910eSArd Biesheuvel 1869ecc9f31SArd Biesheuvel cbnz w8, 3b 187*13150149SArd Biesheuvel cond_yield 4f, x8, x9 1889ecc9f31SArd Biesheuvel cbnz w2, 0b 18915d5910eSArd Biesheuvel 19015d5910eSArd Biesheuvel /* save state */ 191*13150149SArd Biesheuvel4: st1 { v0.1d- v3.1d}, [x0], #32 1929ecc9f31SArd Biesheuvel st1 { v4.1d- v7.1d}, [x0], #32 1939ecc9f31SArd Biesheuvel st1 { v8.1d-v11.1d}, [x0], #32 1949ecc9f31SArd Biesheuvel st1 {v12.1d-v15.1d}, [x0], #32 1959ecc9f31SArd Biesheuvel st1 {v16.1d-v19.1d}, [x0], #32 1969ecc9f31SArd Biesheuvel st1 {v20.1d-v23.1d}, [x0], #32 1979ecc9f31SArd Biesheuvel st1 {v24.1d}, [x0] 1989ecc9f31SArd Biesheuvel mov w0, w2 19915d5910eSArd Biesheuvel ret 2000e89640bSMark BrownSYM_FUNC_END(sha3_ce_transform) 20115d5910eSArd Biesheuvel 20215d5910eSArd Biesheuvel .section ".rodata", "a" 20315d5910eSArd Biesheuvel .align 8 20415d5910eSArd Biesheuvel.Lsha3_rcon: 20515d5910eSArd Biesheuvel .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a 20615d5910eSArd Biesheuvel .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 20715d5910eSArd Biesheuvel .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a 20815d5910eSArd Biesheuvel .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a 20915d5910eSArd Biesheuvel .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 21015d5910eSArd Biesheuvel .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 21115d5910eSArd Biesheuvel .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 21215d5910eSArd Biesheuvel .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 213