xref: /freebsd/lib/libc/aarch64/string/timingsafe_bcmp.S (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1/*
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 Robert Clausecker
5 */
6
7#include <machine/asm.h>
8
9ENTRY(timingsafe_bcmp)
10	cmp	x2, #32			// at least 33 bytes to process?
11	bhi	.Lgt32
12
13	cmp	x2, #16			// at least 17 bytes to process?
14	bhi	.L1732
15
16	cmp	x2, #8			// at least 9 bytes to process?
17	bhi	.L0916
18
19	cmp	x2, #4			// at least 5 bytes to process?
20	bhi	.L0508
21
22	cmp	x2, #2			// at least 3 bytes to process?
23	bhi	.L0304
24
25	cbnz	x2, .L0102		// buffer empty?
26
27	mov	w0, #0			// empty buffer always matches
28	ret
29
30.L0102:	ldrb	w3, [x0]		// load first bytes
31	ldrb	w4, [x1]
32	sub	x2, x2, #1
33	ldrb	w5, [x0, x2]		// load last bytes
34	ldrb	w6, [x1, x2]
35	eor	w3, w3, w4
36	eor	w5, w5, w6
37	orr	w0, w3, w5
38	ret
39
40.L0304:	ldrh	w3, [x0]		// load first halfwords
41	ldrh	w4, [x1]
42	sub	x2, x2, #2
43	ldrh	w5, [x0, x2]		// load last halfwords
44	ldrh	w6, [x1, x2]
45	eor	w3, w3, w4
46	eor	w5, w5, w6
47	orr	w0, w3, w5
48	ret
49
50.L0508:	ldr	w3, [x0]		// load first words
51	ldr	w4, [x1]
52	sub	x2, x2, #4
53	ldr	w5, [x0, x2]		// load last words
54	ldr	w6, [x1, x2]
55	eor	w3, w3, w4
56	eor	w5, w5, w6
57	orr	w0, w3, w5
58	ret
59
60.L0916:	ldr	x3, [x0]
61	ldr	x4, [x1]
62	sub	x2, x2, #8
63	ldr	x5, [x0, x2]
64	ldr	x6, [x1, x2]
65	eor	x3, x3, x4
66	eor	x5, x5, x6
67	orr	x0, x3, x5
68	orr	x0, x0, x0, lsr #32	// ensure low 32 bits are nonzero iff mismatch
69	ret
70
71.L1732:	ldr	q0, [x0]
72	ldr	q1, [x1]
73	sub	x2, x2, #16
74	ldr	q2, [x0, x2]
75	ldr	q3, [x1, x2]
76	eor	v0.16b, v0.16b, v1.16b
77	eor	v2.16b, v2.16b, v3.16b
78	orr	v0.16b, v0.16b, v2.16b
79	umaxv	s0, v0.4s		// get a nonzero word if any
80	mov	w0, v0.s[0]
81	ret
82
83	/* more than 32 bytes: process buffer in a loop */
84.Lgt32:	ldp	q0, q1, [x0], #32
85	ldp	q2, q3, [x1], #32
86	eor	v0.16b, v0.16b, v2.16b
87	eor	v1.16b, v1.16b, v3.16b
88	orr	v4.16b, v0.16b, v1.16b
89	subs	x2, x2, #64		// enough left for another iteration?
90	bls	.Ltail
91
920:	ldp	q0, q1, [x0], #32
93	ldp	q2, q3, [x1], #32
94	eor	v0.16b, v0.16b, v2.16b
95	eor	v1.16b, v1.16b, v3.16b
96	orr	v0.16b, v0.16b, v1.16b
97	orr	v4.16b, v4.16b, v0.16b
98	subs	x2, x2, #32
99	bhi	0b
100
101	/* process last 32 bytes */
102.Ltail:	add	x0, x0, x2		// point to the last 32 bytes in the buffer
103	add	x1, x1, x2
104	ldp	q0, q1, [x0]
105	ldp	q2, q3, [x1]
106	eor	v0.16b, v0.16b, v2.16b
107	eor	v1.16b, v1.16b, v3.16b
108	orr	v0.16b, v0.16b, v1.16b
109	orr	v4.16b, v4.16b, v0.16b
110	umaxv	s0, v4.4s		// get a nonzero word if any
111	mov	w0, v0.s[0]
112	ret
113END(timingsafe_bcmp)
114