xref: /freebsd/lib/libmd/aarch64/md5block.S (revision c1135b2b54bf46709120d98c90ff4d28a77b896c)
1*c1135b2bSRobert Clausecker/*-
2*c1135b2bSRobert Clausecker * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
3*c1135b2bSRobert Clausecker *
4*c1135b2bSRobert Clausecker * SPDX-License-Identifier: BSD-2-Clause
5*c1135b2bSRobert Clausecker */
6*c1135b2bSRobert Clausecker
7*c1135b2bSRobert Clausecker#include <sys/elf_common.h>
8*c1135b2bSRobert Clausecker#include <machine/asm.h>
9*c1135b2bSRobert Clausecker
10*c1135b2bSRobert Clausecker# optimal instruction sequence for k = \key + \m
11*c1135b2bSRobert Clausecker.macro	addkm	key, m
12*c1135b2bSRobert Clausecker.if 0x100000000 - \key > 0x00ffffff
13*c1135b2bSRobert Clausecker	movz	k, #\key & 0xffff
14*c1135b2bSRobert Clausecker	movk	k, #\key >> 16, lsl #16
15*c1135b2bSRobert Clausecker	add	k, k, \m
16*c1135b2bSRobert Clausecker.elseif 0x100000000 - \key > 0x0000ffff
17*c1135b2bSRobert Clausecker	sub	k, \m, #(0x100000000 - \key) & 0xfff000
18*c1135b2bSRobert Clausecker	sub	k, k, #(0x100000000 - \key) & 0xfff
19*c1135b2bSRobert Clausecker.else
20*c1135b2bSRobert Clausecker	movz	k, #0x100000000 - \key
21*c1135b2bSRobert Clausecker	sub	k, \m, k
22*c1135b2bSRobert Clausecker.endif
23*c1135b2bSRobert Clausecker.endm
24*c1135b2bSRobert Clausecker
25*c1135b2bSRobert Clausecker.macro	round	a, b, c, d, f, key, m, s
26*c1135b2bSRobert Clausecker	\f	f, \b, \c, \d
27*c1135b2bSRobert Clausecker	addkm	\key, \m		// k[i] + m[g]
28*c1135b2bSRobert Clausecker	add	\a, \a, k		// k[i] + m[g] + a
29*c1135b2bSRobert Clausecker	add	\a, \a, f		// k[i] + m[g] + a + f
30*c1135b2bSRobert Clausecker	ror	\a, \a, #32-\s
31*c1135b2bSRobert Clausecker	add	\a, \a, \b
32*c1135b2bSRobert Clausecker.endm
33*c1135b2bSRobert Clausecker
34*c1135b2bSRobert Clausecker	/* f = b ? c : d */
35*c1135b2bSRobert Clausecker.macro	f0	f, b, c, d
36*c1135b2bSRobert Clausecker	eor	\f, \c, \d
37*c1135b2bSRobert Clausecker	and	\f, \f, \b
38*c1135b2bSRobert Clausecker	eor	\f, \f, \d
39*c1135b2bSRobert Clausecker.endm
40*c1135b2bSRobert Clausecker
41*c1135b2bSRobert Clausecker	/*
42*c1135b2bSRobert Clausecker	 * special cased round 1 function
43*c1135b2bSRobert Clausecker	 * f1 = d ? b : c = (d & b) + (~d & c)
44*c1135b2bSRobert Clausecker	 */
45*c1135b2bSRobert Clausecker.macro	round1	a, b, c, d, key, m, s
46*c1135b2bSRobert Clausecker	bic	tmp, \c, \d		// ~d & c
47*c1135b2bSRobert Clausecker	addkm	\key, \m		// k[i] + m[g]
48*c1135b2bSRobert Clausecker	add	\a, \a, k		// k[i] + m[g] + a
49*c1135b2bSRobert Clausecker	and	f, \b, \d		// d & b
50*c1135b2bSRobert Clausecker	add	\a, \a, tmp		// k[i] + m[g] + a + (~d & c)
51*c1135b2bSRobert Clausecker	add	\a, \a, f		// k[i] + m[g] + a + (~d & c) + (d & b)
52*c1135b2bSRobert Clausecker	ror	\a, \a, #32-\s
53*c1135b2bSRobert Clausecker	add	\a, \a, \b
54*c1135b2bSRobert Clausecker.endm
55*c1135b2bSRobert Clausecker
56*c1135b2bSRobert Clausecker	/* f = b ^ c ^ d */
57*c1135b2bSRobert Clausecker.macro	f2	f, b, c, d
58*c1135b2bSRobert Clausecker	eor	\f, \c, \d
59*c1135b2bSRobert Clausecker	eor	\f, \f, \b
60*c1135b2bSRobert Clausecker.endm
61*c1135b2bSRobert Clausecker
62*c1135b2bSRobert Clausecker	/* f = c ^ (b | ~d) */
63*c1135b2bSRobert Clausecker.macro	f3	f, b, c, d
64*c1135b2bSRobert Clausecker	orn	\f, \b, \d
65*c1135b2bSRobert Clausecker	eor	\f, \f, \c
66*c1135b2bSRobert Clausecker.endm
67*c1135b2bSRobert Clausecker
68*c1135b2bSRobert Clausecker	/* do 4 rounds */
69*c1135b2bSRobert Clausecker.macro	rounds	f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3
70*c1135b2bSRobert Clausecker	round	a, b, c, d, \f, \k0, \m0, \s0
71*c1135b2bSRobert Clausecker	round	d, a, b, c, \f, \k1, \m1, \s1
72*c1135b2bSRobert Clausecker	round	c, d, a, b, \f, \k2, \m2, \s2
73*c1135b2bSRobert Clausecker	round	b, c, d, a, \f, \k3, \m3, \s3
74*c1135b2bSRobert Clausecker.endm
75*c1135b2bSRobert Clausecker
76*c1135b2bSRobert Clausecker	/* do 4 rounds with f0, f1, f2, f3 */
77*c1135b2bSRobert Clausecker.macro	rounds0	m0, m1, m2, m3, k0, k1, k2, k3
78*c1135b2bSRobert Clausecker	rounds	f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3
79*c1135b2bSRobert Clausecker.endm
80*c1135b2bSRobert Clausecker
81*c1135b2bSRobert Clausecker.macro	rounds1	m0, m1, m2, m3, k0, k1, k2, k3
82*c1135b2bSRobert Clausecker	round1	a, b, c, d, \k0, \m0,  5
83*c1135b2bSRobert Clausecker	round1	d, a, b, c, \k1, \m1,  9
84*c1135b2bSRobert Clausecker	round1	c, d, a, b, \k2, \m2, 14
85*c1135b2bSRobert Clausecker	round1	b, c, d, a, \k3, \m3, 20
86*c1135b2bSRobert Clausecker.endm
87*c1135b2bSRobert Clausecker
88*c1135b2bSRobert Clausecker.macro	rounds2	m0, m1, m2, m3, k0, k1, k2, k3
89*c1135b2bSRobert Clausecker	rounds	f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3
90*c1135b2bSRobert Clausecker.endm
91*c1135b2bSRobert Clausecker
92*c1135b2bSRobert Clausecker.macro	rounds3	m0, m1, m2, m3, k0, k1, k2, k3
93*c1135b2bSRobert Clausecker	rounds	f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3
94*c1135b2bSRobert Clausecker.endm
95*c1135b2bSRobert Clausecker
96*c1135b2bSRobert Clausecker	/* md5block(MD5_CTX, buf, len) */
97*c1135b2bSRobert ClauseckerENTRY(_libmd_md5block)
98*c1135b2bSRobert Clauseckerctx	.req	x0
99*c1135b2bSRobert Clauseckerbuf	.req	x1
100*c1135b2bSRobert Clauseckerlen	.req	x2
101*c1135b2bSRobert Clauseckerend	.req	x2			// aliases len
102*c1135b2bSRobert Clauseckera	.req	w3
103*c1135b2bSRobert Clauseckerb	.req	w4
104*c1135b2bSRobert Clauseckerc	.req	w5
105*c1135b2bSRobert Clauseckerd	.req	w6
106*c1135b2bSRobert Clauseckerf	.req	w7
107*c1135b2bSRobert Clauseckertmp	.req	w8
108*c1135b2bSRobert Clauseckerk	.req	w9
109*c1135b2bSRobert Clauseckerm0	.req	w10
110*c1135b2bSRobert Clauseckerm1	.req	w11
111*c1135b2bSRobert Clauseckerm2	.req	w12
112*c1135b2bSRobert Clauseckerm3	.req	w13
113*c1135b2bSRobert Clauseckerm4	.req	w14
114*c1135b2bSRobert Clauseckerm5	.req	w15
115*c1135b2bSRobert Clauseckerm6	.req	w16
116*c1135b2bSRobert Clauseckerm7	.req	w17
117*c1135b2bSRobert Clausecker					// x18 is the platform register
118*c1135b2bSRobert Clauseckerm8	.req	w19
119*c1135b2bSRobert Clauseckerm9	.req	w20
120*c1135b2bSRobert Clauseckerm10	.req	w21
121*c1135b2bSRobert Clauseckerm11	.req	w22
122*c1135b2bSRobert Clauseckerm12	.req	w23
123*c1135b2bSRobert Clauseckerm13	.req	w24
124*c1135b2bSRobert Clauseckerm14	.req	w25
125*c1135b2bSRobert Clauseckerm15	.req	w26
126*c1135b2bSRobert Clausecker
127*c1135b2bSRobert Clauseckera_	.req	m0
128*c1135b2bSRobert Clauseckerb_	.req	m7
129*c1135b2bSRobert Clauseckerc_	.req	m14
130*c1135b2bSRobert Clauseckerd_	.req	m5
131*c1135b2bSRobert Clausecker
132*c1135b2bSRobert Clausecker	stp	x19, x20, [sp, #-0x40]!
133*c1135b2bSRobert Clausecker	stp	x21, x22, [sp, #0x10]
134*c1135b2bSRobert Clausecker	stp	x23, x24, [sp, #0x20]
135*c1135b2bSRobert Clausecker	stp	x25, x26, [sp, #0x30]
136*c1135b2bSRobert Clausecker
137*c1135b2bSRobert Clausecker	bics	len, len, #63		// length in blocks
138*c1135b2bSRobert Clausecker	add	end, buf, len		// end pointer
139*c1135b2bSRobert Clausecker
140*c1135b2bSRobert Clausecker	beq	.Lend			// was len == 0 after BICS?
141*c1135b2bSRobert Clausecker
142*c1135b2bSRobert Clausecker	ldp	a, b, [ctx, #0]
143*c1135b2bSRobert Clausecker	ldp	c, d, [ctx, #8]
144*c1135b2bSRobert Clausecker
145*c1135b2bSRobert Clausecker	/* first eight rounds interleaved with data loads */
146*c1135b2bSRobert Clausecker.Lloop:	ldp	m0, m1, [buf, #0]
147*c1135b2bSRobert Clausecker	round	a, b, c, d, f0, 0xd76aa478, m0,  7
148*c1135b2bSRobert Clausecker	ldp	m2, m3, [buf, #8]
149*c1135b2bSRobert Clausecker	round	d, a, b, c, f0, 0xe8c7b756, m1, 12
150*c1135b2bSRobert Clausecker	ldp	m4, m5, [buf, #16]
151*c1135b2bSRobert Clausecker	round	c, d, a, b, f0, 0x242070db, m2, 17
152*c1135b2bSRobert Clausecker	ldp	m6, m7, [buf, #24]
153*c1135b2bSRobert Clausecker	round	b, c, d, a, f0, 0xc1bdceee, m3, 22
154*c1135b2bSRobert Clausecker
155*c1135b2bSRobert Clausecker	ldp	m8, m9, [buf, #32]
156*c1135b2bSRobert Clausecker	round	a, b, c, d, f0, 0xf57c0faf, m4,  7
157*c1135b2bSRobert Clausecker	ldp	m10, m11, [buf, #40]
158*c1135b2bSRobert Clausecker	round	d, a, b, c, f0, 0x4787c62a, m5, 12
159*c1135b2bSRobert Clausecker	ldp	m12, m13, [buf, #48]
160*c1135b2bSRobert Clausecker	round	c, d, a, b, f0, 0xa8304613, m6, 17
161*c1135b2bSRobert Clausecker	ldp	m14, m15, [buf, #56]
162*c1135b2bSRobert Clausecker	round	b, c, d, a, f0, 0xfd469501, m7, 22
163*c1135b2bSRobert Clausecker
164*c1135b2bSRobert Clausecker	/* remaining rounds use the roundsX macros */
165*c1135b2bSRobert Clausecker	rounds0	 m8,  m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
166*c1135b2bSRobert Clausecker	rounds0	m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
167*c1135b2bSRobert Clausecker
168*c1135b2bSRobert Clausecker	rounds1	 m1,  m6, m11,  m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
169*c1135b2bSRobert Clausecker	rounds1	 m5, m10, m15,  m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
170*c1135b2bSRobert Clausecker	rounds1	 m9, m14,  m3,  m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
171*c1135b2bSRobert Clausecker	rounds1	m13,  m2,  m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
172*c1135b2bSRobert Clausecker
173*c1135b2bSRobert Clausecker	rounds2	 m5,  m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
174*c1135b2bSRobert Clausecker	rounds2	 m1,  m4,  m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
175*c1135b2bSRobert Clausecker	rounds2	m13,  m0,  m3,  m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
176*c1135b2bSRobert Clausecker	rounds2	 m9, m12, m15,  m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
177*c1135b2bSRobert Clausecker
178*c1135b2bSRobert Clausecker	rounds3	 m0,  m7, m14,  m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
179*c1135b2bSRobert Clausecker	rounds3	m12,  m3, m10,  m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
180*c1135b2bSRobert Clausecker	rounds3	 m8, m15,  m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
181*c1135b2bSRobert Clausecker	rounds3	 m4, m11,  m2,  m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
182*c1135b2bSRobert Clausecker
183*c1135b2bSRobert Clausecker	ldp	a_, b_, [ctx, #0]
184*c1135b2bSRobert Clausecker	ldp	c_, d_, [ctx, #8]
185*c1135b2bSRobert Clausecker	add	a, a, a_
186*c1135b2bSRobert Clausecker	add	b, b, b_
187*c1135b2bSRobert Clausecker	add	c, c, c_
188*c1135b2bSRobert Clausecker	add	d, d, d_
189*c1135b2bSRobert Clausecker	stp	a, b, [ctx, #0]
190*c1135b2bSRobert Clausecker	stp	c, d, [ctx, #8]
191*c1135b2bSRobert Clausecker
192*c1135b2bSRobert Clausecker	add	buf, buf, #64
193*c1135b2bSRobert Clausecker	cmp	buf, end
194*c1135b2bSRobert Clausecker	bne	.Lloop
195*c1135b2bSRobert Clausecker
196*c1135b2bSRobert Clausecker.Lend:	ldp	x25, x26, [sp, #0x30]
197*c1135b2bSRobert Clausecker	ldp	x23, x24, [sp, #0x20]
198*c1135b2bSRobert Clausecker	ldp	x21, x22, [sp, #0x10]
199*c1135b2bSRobert Clausecker	ldp	x19, x20, [sp], #0x40
200*c1135b2bSRobert Clausecker
201*c1135b2bSRobert Clausecker	ret
202*c1135b2bSRobert ClauseckerEND(_libmd_md5block)
203*c1135b2bSRobert Clausecker
204*c1135b2bSRobert ClauseckerGNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
205*c1135b2bSRobert Clausecker
206*c1135b2bSRobert Clausecker	.section .note.GNU-stack,"",%progbits
207