xref: /freebsd/lib/libmd/aarch64/md5block.S (revision c1135b2b54bf46709120d98c90ff4d28a77b896c)
1/*-
2 * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <sys/elf_common.h>
8#include <machine/asm.h>
9
10# optimal instruction sequence for k = \key + \m
11.macro	addkm	key, m
12.if 0x100000000 - \key > 0x00ffffff
13	movz	k, #\key & 0xffff
14	movk	k, #\key >> 16, lsl #16
15	add	k, k, \m
16.elseif 0x100000000 - \key > 0x0000ffff
17	sub	k, \m, #(0x100000000 - \key) & 0xfff000
18	sub	k, k, #(0x100000000 - \key) & 0xfff
19.else
20	movz	k, #0x100000000 - \key
21	sub	k, \m, k
22.endif
23.endm
24
25.macro	round	a, b, c, d, f, key, m, s
26	\f	f, \b, \c, \d
27	addkm	\key, \m		// k[i] + m[g]
28	add	\a, \a, k		// k[i] + m[g] + a
29	add	\a, \a, f		// k[i] + m[g] + a + f
30	ror	\a, \a, #32-\s
31	add	\a, \a, \b
32.endm
33
34	/* f = b ? c : d */
35.macro	f0	f, b, c, d
36	eor	\f, \c, \d
37	and	\f, \f, \b
38	eor	\f, \f, \d
39.endm
40
41	/*
42	 * special cased round 1 function
43	 * f1 = d ? b : c = (d & b) + (~d & c)
44	 */
45.macro	round1	a, b, c, d, key, m, s
46	bic	tmp, \c, \d		// ~d & c
47	addkm	\key, \m		// k[i] + m[g]
48	add	\a, \a, k		// k[i] + m[g] + a
49	and	f, \b, \d		// d & b
50	add	\a, \a, tmp		// k[i] + m[g] + a + (~d & c)
51	add	\a, \a, f		// k[i] + m[g] + a + (~d & c) + (d & b)
52	ror	\a, \a, #32-\s
53	add	\a, \a, \b
54.endm
55
56	/* f = b ^ c ^ d */
57.macro	f2	f, b, c, d
58	eor	\f, \c, \d
59	eor	\f, \f, \b
60.endm
61
62	/* f = c ^ (b | ~d) */
63.macro	f3	f, b, c, d
64	orn	\f, \b, \d
65	eor	\f, \f, \c
66.endm
67
68	/* do 4 rounds */
69.macro	rounds	f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3
70	round	a, b, c, d, \f, \k0, \m0, \s0
71	round	d, a, b, c, \f, \k1, \m1, \s1
72	round	c, d, a, b, \f, \k2, \m2, \s2
73	round	b, c, d, a, \f, \k3, \m3, \s3
74.endm
75
76	/* do 4 rounds with f0, f1, f2, f3 */
77.macro	rounds0	m0, m1, m2, m3, k0, k1, k2, k3
78	rounds	f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3
79.endm
80
81.macro	rounds1	m0, m1, m2, m3, k0, k1, k2, k3
82	round1	a, b, c, d, \k0, \m0,  5
83	round1	d, a, b, c, \k1, \m1,  9
84	round1	c, d, a, b, \k2, \m2, 14
85	round1	b, c, d, a, \k3, \m3, 20
86.endm
87
88.macro	rounds2	m0, m1, m2, m3, k0, k1, k2, k3
89	rounds	f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3
90.endm
91
92.macro	rounds3	m0, m1, m2, m3, k0, k1, k2, k3
93	rounds	f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3
94.endm
95
96	/* md5block(MD5_CTX, buf, len) */
97ENTRY(_libmd_md5block)
98ctx	.req	x0
99buf	.req	x1
100len	.req	x2
101end	.req	x2			// aliases len
102a	.req	w3
103b	.req	w4
104c	.req	w5
105d	.req	w6
106f	.req	w7
107tmp	.req	w8
108k	.req	w9
109m0	.req	w10
110m1	.req	w11
111m2	.req	w12
112m3	.req	w13
113m4	.req	w14
114m5	.req	w15
115m6	.req	w16
116m7	.req	w17
117					// x18 is the platform register
118m8	.req	w19
119m9	.req	w20
120m10	.req	w21
121m11	.req	w22
122m12	.req	w23
123m13	.req	w24
124m14	.req	w25
125m15	.req	w26
126
127a_	.req	m0
128b_	.req	m7
129c_	.req	m14
130d_	.req	m5
131
132	stp	x19, x20, [sp, #-0x40]!
133	stp	x21, x22, [sp, #0x10]
134	stp	x23, x24, [sp, #0x20]
135	stp	x25, x26, [sp, #0x30]
136
137	bics	len, len, #63		// length in blocks
138	add	end, buf, len		// end pointer
139
140	beq	.Lend			// was len == 0 after BICS?
141
142	ldp	a, b, [ctx, #0]
143	ldp	c, d, [ctx, #8]
144
145	/* first eight rounds interleaved with data loads */
146.Lloop:	ldp	m0, m1, [buf, #0]
147	round	a, b, c, d, f0, 0xd76aa478, m0,  7
148	ldp	m2, m3, [buf, #8]
149	round	d, a, b, c, f0, 0xe8c7b756, m1, 12
150	ldp	m4, m5, [buf, #16]
151	round	c, d, a, b, f0, 0x242070db, m2, 17
152	ldp	m6, m7, [buf, #24]
153	round	b, c, d, a, f0, 0xc1bdceee, m3, 22
154
155	ldp	m8, m9, [buf, #32]
156	round	a, b, c, d, f0, 0xf57c0faf, m4,  7
157	ldp	m10, m11, [buf, #40]
158	round	d, a, b, c, f0, 0x4787c62a, m5, 12
159	ldp	m12, m13, [buf, #48]
160	round	c, d, a, b, f0, 0xa8304613, m6, 17
161	ldp	m14, m15, [buf, #56]
162	round	b, c, d, a, f0, 0xfd469501, m7, 22
163
164	/* remaining rounds use the roundsX macros */
165	rounds0	 m8,  m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
166	rounds0	m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
167
168	rounds1	 m1,  m6, m11,  m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
169	rounds1	 m5, m10, m15,  m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
170	rounds1	 m9, m14,  m3,  m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
171	rounds1	m13,  m2,  m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
172
173	rounds2	 m5,  m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
174	rounds2	 m1,  m4,  m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
175	rounds2	m13,  m0,  m3,  m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
176	rounds2	 m9, m12, m15,  m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
177
178	rounds3	 m0,  m7, m14,  m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
179	rounds3	m12,  m3, m10,  m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
180	rounds3	 m8, m15,  m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
181	rounds3	 m4, m11,  m2,  m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
182
183	ldp	a_, b_, [ctx, #0]
184	ldp	c_, d_, [ctx, #8]
185	add	a, a, a_
186	add	b, b, b_
187	add	c, c, c_
188	add	d, d, d_
189	stp	a, b, [ctx, #0]
190	stp	c, d, [ctx, #8]
191
192	add	buf, buf, #64
193	cmp	buf, end
194	bne	.Lloop
195
196.Lend:	ldp	x25, x26, [sp, #0x30]
197	ldp	x23, x24, [sp, #0x20]
198	ldp	x21, x22, [sp, #0x10]
199	ldp	x19, x20, [sp], #0x40
200
201	ret
202END(_libmd_md5block)
203
204GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
205
206	.section .note.GNU-stack,"",%progbits
207