xref: /linux/arch/arm64/crypto/sha1-ce-core.S (revision 0ea5c948cb64bab5bc7a5516774eb8536f05aa0d)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
22c98833aSArd Biesheuvel/*
32c98833aSArd Biesheuvel * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
42c98833aSArd Biesheuvel *
52c98833aSArd Biesheuvel * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
62c98833aSArd Biesheuvel */
72c98833aSArd Biesheuvel
82c98833aSArd Biesheuvel#include <linux/linkage.h>
92c98833aSArd Biesheuvel#include <asm/assembler.h>
102c98833aSArd Biesheuvel
112c98833aSArd Biesheuvel	.text
122c98833aSArd Biesheuvel	.arch		armv8-a+crypto
132c98833aSArd Biesheuvel
142c98833aSArd Biesheuvel	k0		.req	v0
152c98833aSArd Biesheuvel	k1		.req	v1
162c98833aSArd Biesheuvel	k2		.req	v2
172c98833aSArd Biesheuvel	k3		.req	v3
182c98833aSArd Biesheuvel
192c98833aSArd Biesheuvel	t0		.req	v4
202c98833aSArd Biesheuvel	t1		.req	v5
212c98833aSArd Biesheuvel
222c98833aSArd Biesheuvel	dga		.req	q6
232c98833aSArd Biesheuvel	dgav		.req	v6
242c98833aSArd Biesheuvel	dgb		.req	s7
252c98833aSArd Biesheuvel	dgbv		.req	v7
262c98833aSArd Biesheuvel
272c98833aSArd Biesheuvel	dg0q		.req	q12
282c98833aSArd Biesheuvel	dg0s		.req	s12
292c98833aSArd Biesheuvel	dg0v		.req	v12
302c98833aSArd Biesheuvel	dg1s		.req	s13
312c98833aSArd Biesheuvel	dg1v		.req	v13
322c98833aSArd Biesheuvel	dg2s		.req	s14
332c98833aSArd Biesheuvel
342c98833aSArd Biesheuvel	.macro		add_only, op, ev, rc, s0, dg1
352c98833aSArd Biesheuvel	.ifc		\ev, ev
362c98833aSArd Biesheuvel	add		t1.4s, v\s0\().4s, \rc\().4s
372c98833aSArd Biesheuvel	sha1h		dg2s, dg0s
382c98833aSArd Biesheuvel	.ifnb		\dg1
392c98833aSArd Biesheuvel	sha1\op		dg0q, \dg1, t0.4s
402c98833aSArd Biesheuvel	.else
412c98833aSArd Biesheuvel	sha1\op		dg0q, dg1s, t0.4s
422c98833aSArd Biesheuvel	.endif
432c98833aSArd Biesheuvel	.else
442c98833aSArd Biesheuvel	.ifnb		\s0
452c98833aSArd Biesheuvel	add		t0.4s, v\s0\().4s, \rc\().4s
462c98833aSArd Biesheuvel	.endif
472c98833aSArd Biesheuvel	sha1h		dg1s, dg0s
482c98833aSArd Biesheuvel	sha1\op		dg0q, dg2s, t1.4s
492c98833aSArd Biesheuvel	.endif
502c98833aSArd Biesheuvel	.endm
512c98833aSArd Biesheuvel
522c98833aSArd Biesheuvel	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
532c98833aSArd Biesheuvel	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
542c98833aSArd Biesheuvel	add_only	\op, \ev, \rc, \s1, \dg1
552c98833aSArd Biesheuvel	sha1su1		v\s0\().4s, v\s3\().4s
562c98833aSArd Biesheuvel	.endm
572c98833aSArd Biesheuvel
5820b04c02SArd Biesheuvel	.macro		loadrc, k, val, tmp
5920b04c02SArd Biesheuvel	movz		\tmp, :abs_g0_nc:\val
6020b04c02SArd Biesheuvel	movk		\tmp, :abs_g1:\val
6120b04c02SArd Biesheuvel	dup		\k, \tmp
6220b04c02SArd Biesheuvel	.endm
632c98833aSArd Biesheuvel
642c98833aSArd Biesheuvel	/*
65*1f9f3a52SEric Biggers	 * int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
6607eb54d3SArd Biesheuvel	 *			   int blocks)
672c98833aSArd Biesheuvel	 */
68*1f9f3a52SEric BiggersSYM_FUNC_START(__sha1_ce_transform)
692c98833aSArd Biesheuvel	/* load round constants */
705a69e1b7SArd Biesheuvel	loadrc		k0.4s, 0x5a827999, w6
7120b04c02SArd Biesheuvel	loadrc		k1.4s, 0x6ed9eba1, w6
7220b04c02SArd Biesheuvel	loadrc		k2.4s, 0x8f1bbcdc, w6
7320b04c02SArd Biesheuvel	loadrc		k3.4s, 0xca62c1d6, w6
742c98833aSArd Biesheuvel
752c98833aSArd Biesheuvel	/* load state */
765a69e1b7SArd Biesheuvel	ld1		{dgav.4s}, [x0]
775a69e1b7SArd Biesheuvel	ldr		dgb, [x0, #16]
782c98833aSArd Biesheuvel
7907eb54d3SArd Biesheuvel	/* load sha1_ce_state::finalize */
80f4857f4cSArd Biesheuvel	ldr_l		w4, sha1_ce_offsetof_finalize, x4
815a69e1b7SArd Biesheuvel	ldr		w4, [x0, x4]
822c98833aSArd Biesheuvel
832c98833aSArd Biesheuvel	/* load input */
845a69e1b7SArd Biesheuvel0:	ld1		{v8.4s-v11.4s}, [x1], #64
855a69e1b7SArd Biesheuvel	sub		w2, w2, #1
862c98833aSArd Biesheuvel
872c98833aSArd BiesheuvelCPU_LE(	rev32		v8.16b, v8.16b		)
882c98833aSArd BiesheuvelCPU_LE(	rev32		v9.16b, v9.16b		)
892c98833aSArd BiesheuvelCPU_LE(	rev32		v10.16b, v10.16b	)
902c98833aSArd BiesheuvelCPU_LE(	rev32		v11.16b, v11.16b	)
912c98833aSArd Biesheuvel
925a69e1b7SArd Biesheuvel1:	add		t0.4s, v8.4s, k0.4s
932c98833aSArd Biesheuvel	mov		dg0v.16b, dgav.16b
942c98833aSArd Biesheuvel
952c98833aSArd Biesheuvel	add_update	c, ev, k0,  8,  9, 10, 11, dgb
962c98833aSArd Biesheuvel	add_update	c, od, k0,  9, 10, 11,  8
972c98833aSArd Biesheuvel	add_update	c, ev, k0, 10, 11,  8,  9
982c98833aSArd Biesheuvel	add_update	c, od, k0, 11,  8,  9, 10
992c98833aSArd Biesheuvel	add_update	c, ev, k1,  8,  9, 10, 11
1002c98833aSArd Biesheuvel
1012c98833aSArd Biesheuvel	add_update	p, od, k1,  9, 10, 11,  8
1022c98833aSArd Biesheuvel	add_update	p, ev, k1, 10, 11,  8,  9
1032c98833aSArd Biesheuvel	add_update	p, od, k1, 11,  8,  9, 10
1042c98833aSArd Biesheuvel	add_update	p, ev, k1,  8,  9, 10, 11
1052c98833aSArd Biesheuvel	add_update	p, od, k2,  9, 10, 11,  8
1062c98833aSArd Biesheuvel
1072c98833aSArd Biesheuvel	add_update	m, ev, k2, 10, 11,  8,  9
1082c98833aSArd Biesheuvel	add_update	m, od, k2, 11,  8,  9, 10
1092c98833aSArd Biesheuvel	add_update	m, ev, k2,  8,  9, 10, 11
1102c98833aSArd Biesheuvel	add_update	m, od, k2,  9, 10, 11,  8
1112c98833aSArd Biesheuvel	add_update	m, ev, k3, 10, 11,  8,  9
1122c98833aSArd Biesheuvel
1132c98833aSArd Biesheuvel	add_update	p, od, k3, 11,  8,  9, 10
1142c98833aSArd Biesheuvel	add_only	p, ev, k3,  9
1152c98833aSArd Biesheuvel	add_only	p, od, k3, 10
1162c98833aSArd Biesheuvel	add_only	p, ev, k3, 11
1172c98833aSArd Biesheuvel	add_only	p, od
1182c98833aSArd Biesheuvel
1192c98833aSArd Biesheuvel	/* update state */
1202c98833aSArd Biesheuvel	add		dgbv.2s, dgbv.2s, dg1v.2s
1212c98833aSArd Biesheuvel	add		dgav.4s, dgav.4s, dg0v.4s
1222c98833aSArd Biesheuvel
1235a69e1b7SArd Biesheuvel	cbz		w2, 2f
12413150149SArd Biesheuvel	cond_yield	3f, x5, x6
1257df8d164SArd Biesheuvel	b		0b
1262c98833aSArd Biesheuvel
1272c98833aSArd Biesheuvel	/*
1282c98833aSArd Biesheuvel	 * Final block: add padding and total bit count.
12907eb54d3SArd Biesheuvel	 * Skip if the input size was not a round multiple of the block size,
13007eb54d3SArd Biesheuvel	 * the padding is handled by the C code in that case.
1312c98833aSArd Biesheuvel	 */
1325a69e1b7SArd Biesheuvel2:	cbz		x4, 3f
133f4857f4cSArd Biesheuvel	ldr_l		w4, sha1_ce_offsetof_count, x4
1345a69e1b7SArd Biesheuvel	ldr		x4, [x0, x4]
1352c98833aSArd Biesheuvel	movi		v9.2d, #0
1362c98833aSArd Biesheuvel	mov		x8, #0x80000000
1372c98833aSArd Biesheuvel	movi		v10.2d, #0
1382c98833aSArd Biesheuvel	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
1392c98833aSArd Biesheuvel	fmov		d8, x8
1402c98833aSArd Biesheuvel	mov		x4, #0
1412c98833aSArd Biesheuvel	mov		v11.d[0], xzr
1422c98833aSArd Biesheuvel	mov		v11.d[1], x7
1435a69e1b7SArd Biesheuvel	b		1b
1442c98833aSArd Biesheuvel
1452c98833aSArd Biesheuvel	/* store new state */
1465a69e1b7SArd Biesheuvel3:	st1		{dgav.4s}, [x0]
1475a69e1b7SArd Biesheuvel	str		dgb, [x0, #16]
1485a69e1b7SArd Biesheuvel	mov		w0, w2
1492c98833aSArd Biesheuvel	ret
150*1f9f3a52SEric BiggersSYM_FUNC_END(__sha1_ce_transform)
151