xref: /linux/arch/arm64/crypto/sha3-ce-core.S (revision 8be98d2f2a0a262f8bf8a0bc1fdf522b3c7aab17)
115d5910eSArd Biesheuvel/* SPDX-License-Identifier: GPL-2.0 */
215d5910eSArd Biesheuvel/*
315d5910eSArd Biesheuvel * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
415d5910eSArd Biesheuvel *
515d5910eSArd Biesheuvel * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
615d5910eSArd Biesheuvel *
715d5910eSArd Biesheuvel * This program is free software; you can redistribute it and/or modify
815d5910eSArd Biesheuvel * it under the terms of the GNU General Public License version 2 as
915d5910eSArd Biesheuvel * published by the Free Software Foundation.
1015d5910eSArd Biesheuvel */
1115d5910eSArd Biesheuvel
1215d5910eSArd Biesheuvel#include <linux/linkage.h>
1315d5910eSArd Biesheuvel#include <asm/assembler.h>
1415d5910eSArd Biesheuvel
1515d5910eSArd Biesheuvel	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
1615d5910eSArd Biesheuvel	.set	.Lv\b\().2d, \b
1715d5910eSArd Biesheuvel	.set	.Lv\b\().16b, \b
1815d5910eSArd Biesheuvel	.endr
1915d5910eSArd Biesheuvel
2015d5910eSArd Biesheuvel	/*
2115d5910eSArd Biesheuvel	 * ARMv8.2 Crypto Extensions instructions
2215d5910eSArd Biesheuvel	 */
2315d5910eSArd Biesheuvel	.macro	eor3, rd, rn, rm, ra
2415d5910eSArd Biesheuvel	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
2515d5910eSArd Biesheuvel	.endm
2615d5910eSArd Biesheuvel
2715d5910eSArd Biesheuvel	.macro	rax1, rd, rn, rm
2815d5910eSArd Biesheuvel	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
2915d5910eSArd Biesheuvel	.endm
3015d5910eSArd Biesheuvel
3115d5910eSArd Biesheuvel	.macro	bcax, rd, rn, rm, ra
3215d5910eSArd Biesheuvel	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
3315d5910eSArd Biesheuvel	.endm
3415d5910eSArd Biesheuvel
3515d5910eSArd Biesheuvel	.macro	xar, rd, rn, rm, imm6
3615d5910eSArd Biesheuvel	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
3715d5910eSArd Biesheuvel	.endm
3815d5910eSArd Biesheuvel
3915d5910eSArd Biesheuvel	/*
409ecc9f31SArd Biesheuvel	 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
4115d5910eSArd Biesheuvel	 */
4215d5910eSArd Biesheuvel	.text
430e89640bSMark BrownSYM_FUNC_START(sha3_ce_transform)
449ecc9f31SArd Biesheuvel	/* load state */
459ecc9f31SArd Biesheuvel	add	x8, x0, #32
469ecc9f31SArd Biesheuvel	ld1	{ v0.1d- v3.1d}, [x0]
4715d5910eSArd Biesheuvel	ld1	{ v4.1d- v7.1d}, [x8], #32
4815d5910eSArd Biesheuvel	ld1	{ v8.1d-v11.1d}, [x8], #32
4915d5910eSArd Biesheuvel	ld1	{v12.1d-v15.1d}, [x8], #32
5015d5910eSArd Biesheuvel	ld1	{v16.1d-v19.1d}, [x8], #32
5115d5910eSArd Biesheuvel	ld1	{v20.1d-v23.1d}, [x8], #32
5215d5910eSArd Biesheuvel	ld1	{v24.1d}, [x8]
5315d5910eSArd Biesheuvel
549ecc9f31SArd Biesheuvel0:	sub	w2, w2, #1
5515d5910eSArd Biesheuvel	mov	w8, #24
5615d5910eSArd Biesheuvel	adr_l	x9, .Lsha3_rcon
5715d5910eSArd Biesheuvel
5815d5910eSArd Biesheuvel	/* load input */
599ecc9f31SArd Biesheuvel	ld1	{v25.8b-v28.8b}, [x1], #32
609ecc9f31SArd Biesheuvel	ld1	{v29.8b-v31.8b}, [x1], #24
6115d5910eSArd Biesheuvel	eor	v0.8b, v0.8b, v25.8b
6215d5910eSArd Biesheuvel	eor	v1.8b, v1.8b, v26.8b
6315d5910eSArd Biesheuvel	eor	v2.8b, v2.8b, v27.8b
6415d5910eSArd Biesheuvel	eor	v3.8b, v3.8b, v28.8b
6515d5910eSArd Biesheuvel	eor	v4.8b, v4.8b, v29.8b
6615d5910eSArd Biesheuvel	eor	v5.8b, v5.8b, v30.8b
6715d5910eSArd Biesheuvel	eor	v6.8b, v6.8b, v31.8b
6815d5910eSArd Biesheuvel
699ecc9f31SArd Biesheuvel	tbnz	x3, #6, 2f		// SHA3-512
7015d5910eSArd Biesheuvel
719ecc9f31SArd Biesheuvel	ld1	{v25.8b-v28.8b}, [x1], #32
729ecc9f31SArd Biesheuvel	ld1	{v29.8b-v30.8b}, [x1], #16
7315d5910eSArd Biesheuvel	eor	 v7.8b,  v7.8b, v25.8b
7415d5910eSArd Biesheuvel	eor	 v8.8b,  v8.8b, v26.8b
7515d5910eSArd Biesheuvel	eor	 v9.8b,  v9.8b, v27.8b
7615d5910eSArd Biesheuvel	eor	v10.8b, v10.8b, v28.8b
7715d5910eSArd Biesheuvel	eor	v11.8b, v11.8b, v29.8b
7815d5910eSArd Biesheuvel	eor	v12.8b, v12.8b, v30.8b
7915d5910eSArd Biesheuvel
809ecc9f31SArd Biesheuvel	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
8115d5910eSArd Biesheuvel
8215d5910eSArd Biesheuvel	// SHA3-256
839ecc9f31SArd Biesheuvel	ld1	{v25.8b-v28.8b}, [x1], #32
8415d5910eSArd Biesheuvel	eor	v13.8b, v13.8b, v25.8b
8515d5910eSArd Biesheuvel	eor	v14.8b, v14.8b, v26.8b
8615d5910eSArd Biesheuvel	eor	v15.8b, v15.8b, v27.8b
8715d5910eSArd Biesheuvel	eor	v16.8b, v16.8b, v28.8b
889ecc9f31SArd Biesheuvel	b	3f
8915d5910eSArd Biesheuvel
909ecc9f31SArd Biesheuvel1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
9115d5910eSArd Biesheuvel
9215d5910eSArd Biesheuvel	// SHA3-224
939ecc9f31SArd Biesheuvel	ld1	{v25.8b-v28.8b}, [x1], #32
949ecc9f31SArd Biesheuvel	ld1	{v29.8b}, [x1], #8
9515d5910eSArd Biesheuvel	eor	v13.8b, v13.8b, v25.8b
9615d5910eSArd Biesheuvel	eor	v14.8b, v14.8b, v26.8b
9715d5910eSArd Biesheuvel	eor	v15.8b, v15.8b, v27.8b
9815d5910eSArd Biesheuvel	eor	v16.8b, v16.8b, v28.8b
9915d5910eSArd Biesheuvel	eor	v17.8b, v17.8b, v29.8b
1009ecc9f31SArd Biesheuvel	b	3f
10115d5910eSArd Biesheuvel
10215d5910eSArd Biesheuvel	// SHA3-512
1039ecc9f31SArd Biesheuvel2:	ld1	{v25.8b-v26.8b}, [x1], #16
10415d5910eSArd Biesheuvel	eor	 v7.8b,  v7.8b, v25.8b
10515d5910eSArd Biesheuvel	eor	 v8.8b,  v8.8b, v26.8b
10615d5910eSArd Biesheuvel
1079ecc9f31SArd Biesheuvel3:	sub	w8, w8, #1
10815d5910eSArd Biesheuvel
10915d5910eSArd Biesheuvel	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
11015d5910eSArd Biesheuvel	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
11115d5910eSArd Biesheuvel	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
11215d5910eSArd Biesheuvel	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
11315d5910eSArd Biesheuvel	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
11415d5910eSArd Biesheuvel	eor3	v29.16b, v29.16b, v19.16b, v24.16b
11515d5910eSArd Biesheuvel	eor3	v26.16b, v26.16b, v16.16b, v21.16b
11615d5910eSArd Biesheuvel	eor3	v28.16b, v28.16b, v18.16b, v23.16b
11715d5910eSArd Biesheuvel	eor3	v25.16b, v25.16b, v15.16b, v20.16b
11815d5910eSArd Biesheuvel	eor3	v27.16b, v27.16b, v17.16b, v22.16b
11915d5910eSArd Biesheuvel
12015d5910eSArd Biesheuvel	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
12115d5910eSArd Biesheuvel	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
12215d5910eSArd Biesheuvel	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
12315d5910eSArd Biesheuvel	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
12415d5910eSArd Biesheuvel	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
12515d5910eSArd Biesheuvel
12615d5910eSArd Biesheuvel	eor	 v0.16b,  v0.16b, v30.16b
12715d5910eSArd Biesheuvel	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
12815d5910eSArd Biesheuvel	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
12915d5910eSArd Biesheuvel	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
13015d5910eSArd Biesheuvel	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
13115d5910eSArd Biesheuvel	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
13215d5910eSArd Biesheuvel	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
13315d5910eSArd Biesheuvel	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
13415d5910eSArd Biesheuvel	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
13515d5910eSArd Biesheuvel	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
13615d5910eSArd Biesheuvel	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
13715d5910eSArd Biesheuvel	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
13815d5910eSArd Biesheuvel	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
13915d5910eSArd Biesheuvel	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
14015d5910eSArd Biesheuvel	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
14115d5910eSArd Biesheuvel	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
14215d5910eSArd Biesheuvel	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
14315d5910eSArd Biesheuvel	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
14415d5910eSArd Biesheuvel	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
14515d5910eSArd Biesheuvel	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
14615d5910eSArd Biesheuvel	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
14715d5910eSArd Biesheuvel	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
14815d5910eSArd Biesheuvel	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
14915d5910eSArd Biesheuvel	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
15015d5910eSArd Biesheuvel	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
15115d5910eSArd Biesheuvel
15215d5910eSArd Biesheuvel	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
15315d5910eSArd Biesheuvel	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
15415d5910eSArd Biesheuvel	bcax	v22.16b, v22.16b, v24.16b, v23.16b
15515d5910eSArd Biesheuvel	bcax	v23.16b, v23.16b, v31.16b, v24.16b
15615d5910eSArd Biesheuvel	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
15715d5910eSArd Biesheuvel
15815d5910eSArd Biesheuvel	ld1r	{v31.2d}, [x9], #8
15915d5910eSArd Biesheuvel
16015d5910eSArd Biesheuvel	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
16115d5910eSArd Biesheuvel	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
16215d5910eSArd Biesheuvel	bcax	v19.16b, v19.16b, v16.16b, v15.16b
16315d5910eSArd Biesheuvel	bcax	v15.16b, v15.16b, v25.16b, v16.16b
16415d5910eSArd Biesheuvel	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
16515d5910eSArd Biesheuvel
16615d5910eSArd Biesheuvel	bcax	v10.16b, v29.16b, v12.16b, v26.16b
16715d5910eSArd Biesheuvel	bcax	v11.16b, v26.16b, v13.16b, v12.16b
16815d5910eSArd Biesheuvel	bcax	v12.16b, v12.16b, v14.16b, v13.16b
16915d5910eSArd Biesheuvel	bcax	v13.16b, v13.16b, v29.16b, v14.16b
17015d5910eSArd Biesheuvel	bcax	v14.16b, v14.16b, v26.16b, v29.16b
17115d5910eSArd Biesheuvel
17215d5910eSArd Biesheuvel	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
17315d5910eSArd Biesheuvel	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
17415d5910eSArd Biesheuvel	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
17515d5910eSArd Biesheuvel	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
17615d5910eSArd Biesheuvel	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
17715d5910eSArd Biesheuvel
17815d5910eSArd Biesheuvel	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
17915d5910eSArd Biesheuvel	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
18015d5910eSArd Biesheuvel	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
18115d5910eSArd Biesheuvel	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
18215d5910eSArd Biesheuvel	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
18315d5910eSArd Biesheuvel
18415d5910eSArd Biesheuvel	eor	 v0.16b,  v0.16b, v31.16b
18515d5910eSArd Biesheuvel
1869ecc9f31SArd Biesheuvel	cbnz	w8, 3b
187*13150149SArd Biesheuvel	cond_yield 4f, x8, x9
1889ecc9f31SArd Biesheuvel	cbnz	w2, 0b
18915d5910eSArd Biesheuvel
19015d5910eSArd Biesheuvel	/* save state */
191*13150149SArd Biesheuvel4:	st1	{ v0.1d- v3.1d}, [x0], #32
1929ecc9f31SArd Biesheuvel	st1	{ v4.1d- v7.1d}, [x0], #32
1939ecc9f31SArd Biesheuvel	st1	{ v8.1d-v11.1d}, [x0], #32
1949ecc9f31SArd Biesheuvel	st1	{v12.1d-v15.1d}, [x0], #32
1959ecc9f31SArd Biesheuvel	st1	{v16.1d-v19.1d}, [x0], #32
1969ecc9f31SArd Biesheuvel	st1	{v20.1d-v23.1d}, [x0], #32
1979ecc9f31SArd Biesheuvel	st1	{v24.1d}, [x0]
1989ecc9f31SArd Biesheuvel	mov	w0, w2
19915d5910eSArd Biesheuvel	ret
2000e89640bSMark BrownSYM_FUNC_END(sha3_ce_transform)
20115d5910eSArd Biesheuvel
20215d5910eSArd Biesheuvel	.section	".rodata", "a"
20315d5910eSArd Biesheuvel	.align		8
20415d5910eSArd Biesheuvel.Lsha3_rcon:
20515d5910eSArd Biesheuvel	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
20615d5910eSArd Biesheuvel	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
20715d5910eSArd Biesheuvel	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
20815d5910eSArd Biesheuvel	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
20915d5910eSArd Biesheuvel	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
21015d5910eSArd Biesheuvel	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
21115d5910eSArd Biesheuvel	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
21215d5910eSArd Biesheuvel	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
213