xref: /freebsd/lib/libc/aarch64/string/timingsafe_memcmp.S (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1/*
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 Robert Clausecker
5 */
6
7#include <machine/asm.h>
8
9ENTRY(timingsafe_memcmp)
10	cmp	x2, #16			// at least 17 bytes to process?
11	bhi	.Lgt16
12
13	cmp	x2, #8			// at least 9 bytes to process?
14	bhi	.L0916
15
16	cmp	x2, #4			// at least 5 bytes to process?
17	bhi	.L0508
18
19	cmp	x2, #2			// at least 3 bytes to process?
20	bhi	.L0304
21
22	cbnz	x2, .L0102		// buffer empty?
23
24	mov	w0, #0			// empty buffer always matches
25	ret
26
27.L0102:	ldrb	w3, [x0]		// load first bytes
28	ldrb	w4, [x1]
29	sub	x2, x2, #1
30	ldrb	w5, [x0, x2]		// load last bytes
31	ldrb	w6, [x1, x2]
32	bfi	w5, w3, #8, #8		// join bytes in big endian
33	bfi	w6, w4, #8, #8
34	sub	w0, w5, w6
35	ret
36
37
38.L0304:	ldrh	w3, [x0]		// load first halfwords
39	ldrh	w4, [x1]
40	sub	x2, x2, #2
41	ldrh	w5, [x0, x2]		// load last halfwords
42	ldrh	w6, [x1, x2]
43	bfi	w3, w5, #16, #16	// join halfwords in little endian
44	bfi	w4, w6, #16, #16
45	rev	w3, w3			// swap word order
46	rev	w4, w4
47	cmp	w3, w4
48	csetm	w0, lo			// w0 = w3 >= w4 ? 0 : -1
49	csinc	w0, w0, wzr, ls		// w0 = w3 <=> w4 ? 1 : 0 : -1
50	ret
51
52.L0508:	ldr	w3, [x0]		// load first words
53	ldr	w4, [x1]
54	sub	x2, x2, #4
55	ldr	w5, [x0, x2]		// load last words
56	ldr	w6, [x1, x2]
57	bfi	x3, x5, #32, #32	// join words in little endian
58	bfi	x4, x6, #32, #32
59	rev	x3, x3			// swap word order
60	rev	x4, x4
61	cmp	x3, x4
62	csetm	w0, lo			// x0 = x3 >= w4 ? 0 : -1
63	csinc	w0, w0, wzr, ls		// x0 = x3 <=> w4 ? 1 : 0 : -1
64	ret
65
66.L0916:	ldr	x3, [x0]
67	ldr	x4, [x1]
68	sub	x2, x2, #8
69	ldr	x5, [x0, x2]
70	ldr	x6, [x1, x2]
71	cmp	x3, x4			// mismatch in first pair?
72	csel	x3, x3, x5, ne		// use second pair if first pair equal
73	csel	x4, x4, x6, ne
74	rev	x3, x3
75	rev	x4, x4
76	cmp	x3, x4
77	csetm	w0, lo
78	csinc	w0, w0, wzr, ls
79	ret
80
81	/* more than 16 bytes: process buffer in a loop */
82.Lgt16:	ldp	x3, x4, [x0], #16
83	ldp	x5, x6, [x1], #16
84	cmp	x3, x5			// mismatch in first pair?
85	csel	x3, x3, x4, ne		// use second pair if first pair equal
86	csel	x5, x5, x6, ne
87	subs	x2, x2, #32
88	bls	.Ltail
89
900:	ldp	x4, x7, [x0], #16
91	ldp	x6, x8, [x1], #16
92	cmp	x4, x6			// mismatch in first pair?
93	csel	x4, x4, x7, ne		// if not, try second pair
94	csel	x6, x6, x8, ne
95	cmp	x3, x5			// was there a mismatch previously?
96	csel	x3, x3, x4, ne		// apply new pair if there was not
97	csel	x5, x5, x6, ne
98	subs	x2, x2, #16
99	bhi	0b
100
101.Ltail:	add	x0, x0, x2
102	add	x1, x1, x2
103	ldp	x4, x7, [x0]
104	ldp	x6, x8, [x1]
105	cmp	x4, x6			// mismatch in first pair?
106	csel	x4, x4, x7, ne		// if not, try second pair
107	csel	x6, x6, x8, ne
108	cmp	x3, x5			// was there a mismatch previously?
109	csel	x3, x3, x4, ne		// apply new pair if there was not
110	csel	x5, x5, x6, ne
111	rev	x3, x3
112	rev	x5, x5
113	cmp	x3, x5
114	csetm	w0, lo
115	csinc	w0, w0, wzr, ls
116	ret
117END(timingsafe_bcmp)
118