xref: /linux/arch/arm64/crypto/sha2-ce-core.S (revision 0ea5c948cb64bab5bc7a5516774eb8536f05aa0d)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
26ba6c74dSArd Biesheuvel/*
36ba6c74dSArd Biesheuvel * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
46ba6c74dSArd Biesheuvel *
56ba6c74dSArd Biesheuvel * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
66ba6c74dSArd Biesheuvel */
76ba6c74dSArd Biesheuvel
86ba6c74dSArd Biesheuvel#include <linux/linkage.h>
96ba6c74dSArd Biesheuvel#include <asm/assembler.h>
106ba6c74dSArd Biesheuvel
116ba6c74dSArd Biesheuvel	.text
126ba6c74dSArd Biesheuvel	.arch		armv8-a+crypto
136ba6c74dSArd Biesheuvel
146ba6c74dSArd Biesheuvel	dga		.req	q20
156ba6c74dSArd Biesheuvel	dgav		.req	v20
166ba6c74dSArd Biesheuvel	dgb		.req	q21
176ba6c74dSArd Biesheuvel	dgbv		.req	v21
186ba6c74dSArd Biesheuvel
196ba6c74dSArd Biesheuvel	t0		.req	v22
206ba6c74dSArd Biesheuvel	t1		.req	v23
216ba6c74dSArd Biesheuvel
226ba6c74dSArd Biesheuvel	dg0q		.req	q24
236ba6c74dSArd Biesheuvel	dg0v		.req	v24
246ba6c74dSArd Biesheuvel	dg1q		.req	q25
256ba6c74dSArd Biesheuvel	dg1v		.req	v25
266ba6c74dSArd Biesheuvel	dg2q		.req	q26
276ba6c74dSArd Biesheuvel	dg2v		.req	v26
286ba6c74dSArd Biesheuvel
296ba6c74dSArd Biesheuvel	.macro		add_only, ev, rc, s0
306ba6c74dSArd Biesheuvel	mov		dg2v.16b, dg0v.16b
316ba6c74dSArd Biesheuvel	.ifeq		\ev
326ba6c74dSArd Biesheuvel	add		t1.4s, v\s0\().4s, \rc\().4s
336ba6c74dSArd Biesheuvel	sha256h		dg0q, dg1q, t0.4s
346ba6c74dSArd Biesheuvel	sha256h2	dg1q, dg2q, t0.4s
356ba6c74dSArd Biesheuvel	.else
366ba6c74dSArd Biesheuvel	.ifnb		\s0
376ba6c74dSArd Biesheuvel	add		t0.4s, v\s0\().4s, \rc\().4s
386ba6c74dSArd Biesheuvel	.endif
396ba6c74dSArd Biesheuvel	sha256h		dg0q, dg1q, t1.4s
406ba6c74dSArd Biesheuvel	sha256h2	dg1q, dg2q, t1.4s
416ba6c74dSArd Biesheuvel	.endif
426ba6c74dSArd Biesheuvel	.endm
436ba6c74dSArd Biesheuvel
446ba6c74dSArd Biesheuvel	.macro		add_update, ev, rc, s0, s1, s2, s3
456ba6c74dSArd Biesheuvel	sha256su0	v\s0\().4s, v\s1\().4s
466ba6c74dSArd Biesheuvel	add_only	\ev, \rc, \s1
476ba6c74dSArd Biesheuvel	sha256su1	v\s0\().4s, v\s2\().4s, v\s3\().4s
486ba6c74dSArd Biesheuvel	.endm
496ba6c74dSArd Biesheuvel
506ba6c74dSArd Biesheuvel	/*
516ba6c74dSArd Biesheuvel	 * The SHA-256 round constants
526ba6c74dSArd Biesheuvel	 */
5317a06070SArd Biesheuvel	.section	".rodata", "a"
546ba6c74dSArd Biesheuvel	.align		4
556ba6c74dSArd Biesheuvel.Lsha2_rcon:
566ba6c74dSArd Biesheuvel	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
576ba6c74dSArd Biesheuvel	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
586ba6c74dSArd Biesheuvel	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
596ba6c74dSArd Biesheuvel	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
606ba6c74dSArd Biesheuvel	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
616ba6c74dSArd Biesheuvel	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
626ba6c74dSArd Biesheuvel	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
636ba6c74dSArd Biesheuvel	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
646ba6c74dSArd Biesheuvel	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
656ba6c74dSArd Biesheuvel	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
666ba6c74dSArd Biesheuvel	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
676ba6c74dSArd Biesheuvel	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
686ba6c74dSArd Biesheuvel	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
696ba6c74dSArd Biesheuvel	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
706ba6c74dSArd Biesheuvel	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
716ba6c74dSArd Biesheuvel	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
726ba6c74dSArd Biesheuvel
736ba6c74dSArd Biesheuvel	/*
74*ba30d311SEric Biggers	 * int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src,
7503802f6aSArd Biesheuvel	 *			     int blocks)
766ba6c74dSArd Biesheuvel	 */
7717a06070SArd Biesheuvel	.text
78*ba30d311SEric BiggersSYM_FUNC_START(__sha256_ce_transform)
796ba6c74dSArd Biesheuvel	/* load round constants */
80b2eadbf4SArd Biesheuvel	adr_l		x8, .Lsha2_rcon
816ba6c74dSArd Biesheuvel	ld1		{ v0.4s- v3.4s}, [x8], #64
826ba6c74dSArd Biesheuvel	ld1		{ v4.4s- v7.4s}, [x8], #64
836ba6c74dSArd Biesheuvel	ld1		{ v8.4s-v11.4s}, [x8], #64
846ba6c74dSArd Biesheuvel	ld1		{v12.4s-v15.4s}, [x8]
856ba6c74dSArd Biesheuvel
866ba6c74dSArd Biesheuvel	/* load state */
87b2eadbf4SArd Biesheuvel	ld1		{dgav.4s, dgbv.4s}, [x0]
886ba6c74dSArd Biesheuvel
8903802f6aSArd Biesheuvel	/* load sha256_ce_state::finalize */
90f4857f4cSArd Biesheuvel	ldr_l		w4, sha256_ce_offsetof_finalize, x4
91b2eadbf4SArd Biesheuvel	ldr		w4, [x0, x4]
926ba6c74dSArd Biesheuvel
936ba6c74dSArd Biesheuvel	/* load input */
94b2eadbf4SArd Biesheuvel0:	ld1		{v16.4s-v19.4s}, [x1], #64
95b2eadbf4SArd Biesheuvel	sub		w2, w2, #1
966ba6c74dSArd Biesheuvel
976ba6c74dSArd BiesheuvelCPU_LE(	rev32		v16.16b, v16.16b	)
986ba6c74dSArd BiesheuvelCPU_LE(	rev32		v17.16b, v17.16b	)
996ba6c74dSArd BiesheuvelCPU_LE(	rev32		v18.16b, v18.16b	)
1006ba6c74dSArd BiesheuvelCPU_LE(	rev32		v19.16b, v19.16b	)
1016ba6c74dSArd Biesheuvel
102b2eadbf4SArd Biesheuvel1:	add		t0.4s, v16.4s, v0.4s
1036ba6c74dSArd Biesheuvel	mov		dg0v.16b, dgav.16b
1046ba6c74dSArd Biesheuvel	mov		dg1v.16b, dgbv.16b
1056ba6c74dSArd Biesheuvel
1066ba6c74dSArd Biesheuvel	add_update	0,  v1, 16, 17, 18, 19
1076ba6c74dSArd Biesheuvel	add_update	1,  v2, 17, 18, 19, 16
1086ba6c74dSArd Biesheuvel	add_update	0,  v3, 18, 19, 16, 17
1096ba6c74dSArd Biesheuvel	add_update	1,  v4, 19, 16, 17, 18
1106ba6c74dSArd Biesheuvel
1116ba6c74dSArd Biesheuvel	add_update	0,  v5, 16, 17, 18, 19
1126ba6c74dSArd Biesheuvel	add_update	1,  v6, 17, 18, 19, 16
1136ba6c74dSArd Biesheuvel	add_update	0,  v7, 18, 19, 16, 17
1146ba6c74dSArd Biesheuvel	add_update	1,  v8, 19, 16, 17, 18
1156ba6c74dSArd Biesheuvel
1166ba6c74dSArd Biesheuvel	add_update	0,  v9, 16, 17, 18, 19
1176ba6c74dSArd Biesheuvel	add_update	1, v10, 17, 18, 19, 16
1186ba6c74dSArd Biesheuvel	add_update	0, v11, 18, 19, 16, 17
1196ba6c74dSArd Biesheuvel	add_update	1, v12, 19, 16, 17, 18
1206ba6c74dSArd Biesheuvel
1216ba6c74dSArd Biesheuvel	add_only	0, v13, 17
1226ba6c74dSArd Biesheuvel	add_only	1, v14, 18
1236ba6c74dSArd Biesheuvel	add_only	0, v15, 19
1246ba6c74dSArd Biesheuvel	add_only	1
1256ba6c74dSArd Biesheuvel
1266ba6c74dSArd Biesheuvel	/* update state */
1276ba6c74dSArd Biesheuvel	add		dgav.4s, dgav.4s, dg0v.4s
1286ba6c74dSArd Biesheuvel	add		dgbv.4s, dgbv.4s, dg1v.4s
1296ba6c74dSArd Biesheuvel
1306ba6c74dSArd Biesheuvel	/* handled all input blocks? */
131b2eadbf4SArd Biesheuvel	cbz		w2, 2f
13213150149SArd Biesheuvel	cond_yield	3f, x5, x6
133d82f37abSArd Biesheuvel	b		0b
1346ba6c74dSArd Biesheuvel
1356ba6c74dSArd Biesheuvel	/*
1366ba6c74dSArd Biesheuvel	 * Final block: add padding and total bit count.
13703802f6aSArd Biesheuvel	 * Skip if the input size was not a round multiple of the block size,
13803802f6aSArd Biesheuvel	 * the padding is handled by the C code in that case.
1396ba6c74dSArd Biesheuvel	 */
140b2eadbf4SArd Biesheuvel2:	cbz		x4, 3f
141f4857f4cSArd Biesheuvel	ldr_l		w4, sha256_ce_offsetof_count, x4
142b2eadbf4SArd Biesheuvel	ldr		x4, [x0, x4]
1436ba6c74dSArd Biesheuvel	movi		v17.2d, #0
1446ba6c74dSArd Biesheuvel	mov		x8, #0x80000000
1456ba6c74dSArd Biesheuvel	movi		v18.2d, #0
1466ba6c74dSArd Biesheuvel	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
1476ba6c74dSArd Biesheuvel	fmov		d16, x8
1486ba6c74dSArd Biesheuvel	mov		x4, #0
1496ba6c74dSArd Biesheuvel	mov		v19.d[0], xzr
1506ba6c74dSArd Biesheuvel	mov		v19.d[1], x7
151b2eadbf4SArd Biesheuvel	b		1b
1526ba6c74dSArd Biesheuvel
1536ba6c74dSArd Biesheuvel	/* store new state */
154b2eadbf4SArd Biesheuvel3:	st1		{dgav.4s, dgbv.4s}, [x0]
155b2eadbf4SArd Biesheuvel	mov		w0, w2
1566ba6c74dSArd Biesheuvel	ret
157*ba30d311SEric BiggersSYM_FUNC_END(__sha256_ce_transform)
158