xref: /freebsd/lib/libc/aarch64/string/timingsafe_bcmp.S (revision f2c98669fc1b3fd2dbc7a7e3eedd098970a10dec)
1*f2c98669SRobert Clausecker/*
2*f2c98669SRobert Clausecker * SPDX-License-Identifier: BSD-2-Clause
3*f2c98669SRobert Clausecker *
4*f2c98669SRobert Clausecker * Copyright (c) 2024 Robert Clausecker
5*f2c98669SRobert Clausecker */
6*f2c98669SRobert Clausecker
7*f2c98669SRobert Clausecker#include <machine/asm.h>
8*f2c98669SRobert Clausecker
9*f2c98669SRobert ClauseckerENTRY(timingsafe_bcmp)
10*f2c98669SRobert Clausecker	cmp	x2, #32			// at least 33 bytes to process?
11*f2c98669SRobert Clausecker	bhi	.Lgt32
12*f2c98669SRobert Clausecker
13*f2c98669SRobert Clausecker	cmp	x2, #16			// at least 17 bytes to process?
14*f2c98669SRobert Clausecker	bhi	.L1732
15*f2c98669SRobert Clausecker
16*f2c98669SRobert Clausecker	cmp	x2, #8			// at least 9 bytes to process?
17*f2c98669SRobert Clausecker	bhi	.L0916
18*f2c98669SRobert Clausecker
19*f2c98669SRobert Clausecker	cmp	x2, #4			// at least 5 bytes to process?
20*f2c98669SRobert Clausecker	bhi	.L0508
21*f2c98669SRobert Clausecker
22*f2c98669SRobert Clausecker	cmp	x2, #2			// at least 3 bytes to process?
23*f2c98669SRobert Clausecker	bhi	.L0304
24*f2c98669SRobert Clausecker
25*f2c98669SRobert Clausecker	cbnz	x2, .L0102		// buffer empty?
26*f2c98669SRobert Clausecker
27*f2c98669SRobert Clausecker	mov	w0, #0			// empty buffer always matches
28*f2c98669SRobert Clausecker	ret
29*f2c98669SRobert Clausecker
30*f2c98669SRobert Clausecker.L0102:	ldrb	w3, [x0]		// load first bytes
31*f2c98669SRobert Clausecker	ldrb	w4, [x1]
32*f2c98669SRobert Clausecker	sub	x2, x2, #1
33*f2c98669SRobert Clausecker	ldrb	w5, [x0, x2]		// load last bytes
34*f2c98669SRobert Clausecker	ldrb	w6, [x1, x2]
35*f2c98669SRobert Clausecker	eor	w3, w3, w4
36*f2c98669SRobert Clausecker	eor	w5, w5, w6
37*f2c98669SRobert Clausecker	orr	w0, w3, w5
38*f2c98669SRobert Clausecker	ret
39*f2c98669SRobert Clausecker
40*f2c98669SRobert Clausecker.L0304:	ldrh	w3, [x0]		// load first halfwords
41*f2c98669SRobert Clausecker	ldrh	w4, [x1]
42*f2c98669SRobert Clausecker	sub	x2, x2, #2
43*f2c98669SRobert Clausecker	ldrh	w5, [x0, x2]		// load last halfwords
44*f2c98669SRobert Clausecker	ldrh	w6, [x1, x2]
45*f2c98669SRobert Clausecker	eor	w3, w3, w4
46*f2c98669SRobert Clausecker	eor	w5, w5, w6
47*f2c98669SRobert Clausecker	orr	w0, w3, w5
48*f2c98669SRobert Clausecker	ret
49*f2c98669SRobert Clausecker
50*f2c98669SRobert Clausecker.L0508:	ldr	w3, [x0]		// load first words
51*f2c98669SRobert Clausecker	ldr	w4, [x1]
52*f2c98669SRobert Clausecker	sub	x2, x2, #4
53*f2c98669SRobert Clausecker	ldr	w5, [x0, x2]		// load last words
54*f2c98669SRobert Clausecker	ldr	w6, [x1, x2]
55*f2c98669SRobert Clausecker	eor	w3, w3, w4
56*f2c98669SRobert Clausecker	eor	w5, w5, w6
57*f2c98669SRobert Clausecker	orr	w0, w3, w5
58*f2c98669SRobert Clausecker	ret
59*f2c98669SRobert Clausecker
60*f2c98669SRobert Clausecker.L0916:	ldr	x3, [x0]
61*f2c98669SRobert Clausecker	ldr	x4, [x1]
62*f2c98669SRobert Clausecker	sub	x2, x2, #8
63*f2c98669SRobert Clausecker	ldr	x5, [x0, x2]
64*f2c98669SRobert Clausecker	ldr	x6, [x1, x2]
65*f2c98669SRobert Clausecker	eor	x3, x3, x4
66*f2c98669SRobert Clausecker	eor	x5, x5, x6
67*f2c98669SRobert Clausecker	orr	x0, x3, x5
68*f2c98669SRobert Clausecker	orr	x0, x0, x0, lsr #32	// ensure low 32 bits are nonzero iff mismatch
69*f2c98669SRobert Clausecker	ret
70*f2c98669SRobert Clausecker
71*f2c98669SRobert Clausecker.L1732:	ldr	q0, [x0]
72*f2c98669SRobert Clausecker	ldr	q1, [x1]
73*f2c98669SRobert Clausecker	sub	x2, x2, #16
74*f2c98669SRobert Clausecker	ldr	q2, [x0, x2]
75*f2c98669SRobert Clausecker	ldr	q3, [x1, x2]
76*f2c98669SRobert Clausecker	eor	v0.16b, v0.16b, v1.16b
77*f2c98669SRobert Clausecker	eor	v2.16b, v2.16b, v3.16b
78*f2c98669SRobert Clausecker	orr	v0.16b, v0.16b, v2.16b
79*f2c98669SRobert Clausecker	umaxv	s0, v0.4s		// get a nonzero word if any
80*f2c98669SRobert Clausecker	mov	w0, v0.s[0]
81*f2c98669SRobert Clausecker	ret
82*f2c98669SRobert Clausecker
83*f2c98669SRobert Clausecker	/* more than 32 bytes: process buffer in a loop */
84*f2c98669SRobert Clausecker.Lgt32:	ldp	q0, q1, [x0], #32
85*f2c98669SRobert Clausecker	ldp	q2, q3, [x1], #32
86*f2c98669SRobert Clausecker	eor	v0.16b, v0.16b, v2.16b
87*f2c98669SRobert Clausecker	eor	v1.16b, v1.16b, v3.16b
88*f2c98669SRobert Clausecker	orr	v4.16b, v0.16b, v1.16b
89*f2c98669SRobert Clausecker	subs	x2, x2, #64		// enough left for another iteration?
90*f2c98669SRobert Clausecker	bls	.Ltail
91*f2c98669SRobert Clausecker
92*f2c98669SRobert Clausecker0:	ldp	q0, q1, [x0], #32
93*f2c98669SRobert Clausecker	ldp	q2, q3, [x1], #32
94*f2c98669SRobert Clausecker	eor	v0.16b, v0.16b, v2.16b
95*f2c98669SRobert Clausecker	eor	v1.16b, v1.16b, v3.16b
96*f2c98669SRobert Clausecker	orr	v0.16b, v0.16b, v1.16b
97*f2c98669SRobert Clausecker	orr	v4.16b, v4.16b, v0.16b
98*f2c98669SRobert Clausecker	subs	x2, x2, #32
99*f2c98669SRobert Clausecker	bhi	0b
100*f2c98669SRobert Clausecker
101*f2c98669SRobert Clausecker	/* process last 32 bytes */
102*f2c98669SRobert Clausecker.Ltail:	add	x0, x0, x2		// point to the last 32 bytes in the buffer
103*f2c98669SRobert Clausecker	add	x1, x1, x2
104*f2c98669SRobert Clausecker	ldp	q0, q1, [x0]
105*f2c98669SRobert Clausecker	ldp	q2, q3, [x1]
106*f2c98669SRobert Clausecker	eor	v0.16b, v0.16b, v2.16b
107*f2c98669SRobert Clausecker	eor	v1.16b, v1.16b, v3.16b
108*f2c98669SRobert Clausecker	orr	v0.16b, v0.16b, v1.16b
109*f2c98669SRobert Clausecker	orr	v4.16b, v4.16b, v0.16b
110*f2c98669SRobert Clausecker	umaxv	s0, v4.4s		// get a nonzero word if any
111*f2c98669SRobert Clausecker	mov	w0, v0.s[0]
112*f2c98669SRobert Clausecker	ret
113*f2c98669SRobert ClauseckerEND(timingsafe_bcmp)
114