xref: /linux/arch/arm64/crypto/sm3-ce-core.S (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1/*
2 * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
3 *
4 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
15	.set		.Lv\b\().4s, \b
16	.endr
17
18	.macro		sm3partw1, rd, rn, rm
19	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
20	.endm
21
22	.macro		sm3partw2, rd, rn, rm
23	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
24	.endm
25
26	.macro		sm3ss1, rd, rn, rm, ra
27	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
28	.endm
29
30	.macro		sm3tt1a, rd, rn, rm, imm2
31	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
32	.endm
33
34	.macro		sm3tt1b, rd, rn, rm, imm2
35	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
36	.endm
37
38	.macro		sm3tt2a, rd, rn, rm, imm2
39	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
40	.endm
41
42	.macro		sm3tt2b, rd, rn, rm, imm2
43	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
44	.endm
45
46	.macro		round, ab, s0, t0, t1, i
47	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
48	shl		\t1\().4s, \t0\().4s, #1
49	sri		\t1\().4s, \t0\().4s, #31
50	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
51	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
52	.endm
53
54	.macro		qround, ab, s0, s1, s2, s3, s4
55	.ifnb		\s4
56	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
57	ext		v6.16b, \s0\().16b, \s1\().16b, #12
58	ext		v7.16b, \s2\().16b, \s3\().16b, #8
59	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
60	.endif
61
62	eor		v10.16b, \s0\().16b, \s1\().16b
63
64	round		\ab, \s0, v11, v12, 0
65	round		\ab, \s0, v12, v11, 1
66	round		\ab, \s0, v11, v12, 2
67	round		\ab, \s0, v12, v11, 3
68
69	.ifnb		\s4
70	sm3partw2	\s4\().4s, v7.4s, v6.4s
71	.endif
72	.endm
73
74	/*
75	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
76	 *                       int blocks)
77	 */
78	.text
79ENTRY(sm3_ce_transform)
80	/* load state */
81	ld1		{v8.4s-v9.4s}, [x0]
82	rev64		v8.4s, v8.4s
83	rev64		v9.4s, v9.4s
84	ext		v8.16b, v8.16b, v8.16b, #8
85	ext		v9.16b, v9.16b, v9.16b, #8
86
87	adr_l		x8, .Lt
88	ldp		s13, s14, [x8]
89
90	/* load input */
910:	ld1		{v0.16b-v3.16b}, [x1], #64
92	sub		w2, w2, #1
93
94	mov		v15.16b, v8.16b
95	mov		v16.16b, v9.16b
96
97CPU_LE(	rev32		v0.16b, v0.16b		)
98CPU_LE(	rev32		v1.16b, v1.16b		)
99CPU_LE(	rev32		v2.16b, v2.16b		)
100CPU_LE(	rev32		v3.16b, v3.16b		)
101
102	ext		v11.16b, v13.16b, v13.16b, #4
103
104	qround		a, v0, v1, v2, v3, v4
105	qround		a, v1, v2, v3, v4, v0
106	qround		a, v2, v3, v4, v0, v1
107	qround		a, v3, v4, v0, v1, v2
108
109	ext		v11.16b, v14.16b, v14.16b, #4
110
111	qround		b, v4, v0, v1, v2, v3
112	qround		b, v0, v1, v2, v3, v4
113	qround		b, v1, v2, v3, v4, v0
114	qround		b, v2, v3, v4, v0, v1
115	qround		b, v3, v4, v0, v1, v2
116	qround		b, v4, v0, v1, v2, v3
117	qround		b, v0, v1, v2, v3, v4
118	qround		b, v1, v2, v3, v4, v0
119	qround		b, v2, v3, v4, v0, v1
120	qround		b, v3, v4
121	qround		b, v4, v0
122	qround		b, v0, v1
123
124	eor		v8.16b, v8.16b, v15.16b
125	eor		v9.16b, v9.16b, v16.16b
126
127	/* handled all input blocks? */
128	cbnz		w2, 0b
129
130	/* save state */
131	rev64		v8.4s, v8.4s
132	rev64		v9.4s, v9.4s
133	ext		v8.16b, v8.16b, v8.16b, #8
134	ext		v9.16b, v9.16b, v9.16b, #8
135	st1		{v8.4s-v9.4s}, [x0]
136	ret
137ENDPROC(sm3_ce_transform)
138
139	.section	".rodata", "a"
140	.align		3
141.Lt:	.word		0x79cc4519, 0x9d8a7a87
142