xref: /freebsd/lib/libc/aarch64/string/timingsafe_memcmp.S (revision 3f224333af163d5fcd7547a20993dcf18f19076c)
1*3f224333SRobert Clausecker/*
2*3f224333SRobert Clausecker * SPDX-License-Identifier: BSD-2-Clause
3*3f224333SRobert Clausecker *
4*3f224333SRobert Clausecker * Copyright (c) 2024 Robert Clausecker
5*3f224333SRobert Clausecker */
6*3f224333SRobert Clausecker
7*3f224333SRobert Clausecker#include <machine/asm.h>
8*3f224333SRobert Clausecker
9*3f224333SRobert ClauseckerENTRY(timingsafe_memcmp)
10*3f224333SRobert Clausecker	cmp	x2, #16			// at least 17 bytes to process?
11*3f224333SRobert Clausecker	bhi	.Lgt16
12*3f224333SRobert Clausecker
13*3f224333SRobert Clausecker	cmp	x2, #8			// at least 9 bytes to process?
14*3f224333SRobert Clausecker	bhi	.L0916
15*3f224333SRobert Clausecker
16*3f224333SRobert Clausecker	cmp	x2, #4			// at least 5 bytes to process?
17*3f224333SRobert Clausecker	bhi	.L0508
18*3f224333SRobert Clausecker
19*3f224333SRobert Clausecker	cmp	x2, #2			// at least 3 bytes to process?
20*3f224333SRobert Clausecker	bhi	.L0304
21*3f224333SRobert Clausecker
22*3f224333SRobert Clausecker	cbnz	x2, .L0102		// buffer empty?
23*3f224333SRobert Clausecker
24*3f224333SRobert Clausecker	mov	w0, #0			// empty buffer always matches
25*3f224333SRobert Clausecker	ret
26*3f224333SRobert Clausecker
27*3f224333SRobert Clausecker.L0102:	ldrb	w3, [x0]		// load first bytes
28*3f224333SRobert Clausecker	ldrb	w4, [x1]
29*3f224333SRobert Clausecker	sub	x2, x2, #1
30*3f224333SRobert Clausecker	ldrb	w5, [x0, x2]		// load last bytes
31*3f224333SRobert Clausecker	ldrb	w6, [x1, x2]
32*3f224333SRobert Clausecker	bfi	w5, w3, #8, #8		// join bytes in big endian
33*3f224333SRobert Clausecker	bfi	w6, w4, #8, #8
34*3f224333SRobert Clausecker	sub	w0, w5, w6
35*3f224333SRobert Clausecker	ret
36*3f224333SRobert Clausecker
37*3f224333SRobert Clausecker
38*3f224333SRobert Clausecker.L0304:	ldrh	w3, [x0]		// load first halfwords
39*3f224333SRobert Clausecker	ldrh	w4, [x1]
40*3f224333SRobert Clausecker	sub	x2, x2, #2
41*3f224333SRobert Clausecker	ldrh	w5, [x0, x2]		// load last halfwords
42*3f224333SRobert Clausecker	ldrh	w6, [x1, x2]
43*3f224333SRobert Clausecker	bfi	w3, w5, #16, #16	// join halfwords in little endian
44*3f224333SRobert Clausecker	bfi	w4, w6, #16, #16
45*3f224333SRobert Clausecker	rev	w3, w3			// swap word order
46*3f224333SRobert Clausecker	rev	w4, w4
47*3f224333SRobert Clausecker	cmp	w3, w4
48*3f224333SRobert Clausecker	csetm	w0, lo			// w0 = w3 >= w4 ? 0 : -1
49*3f224333SRobert Clausecker	csinc	w0, w0, wzr, ls		// w0 = w3 <=> w4 ? 1 : 0 : -1
50*3f224333SRobert Clausecker	ret
51*3f224333SRobert Clausecker
52*3f224333SRobert Clausecker.L0508:	ldr	w3, [x0]		// load first words
53*3f224333SRobert Clausecker	ldr	w4, [x1]
54*3f224333SRobert Clausecker	sub	x2, x2, #4
55*3f224333SRobert Clausecker	ldr	w5, [x0, x2]		// load last words
56*3f224333SRobert Clausecker	ldr	w6, [x1, x2]
57*3f224333SRobert Clausecker	bfi	x3, x5, #32, #32	// join words in little endian
58*3f224333SRobert Clausecker	bfi	x4, x6, #32, #32
59*3f224333SRobert Clausecker	rev	x3, x3			// swap word order
60*3f224333SRobert Clausecker	rev	x4, x4
61*3f224333SRobert Clausecker	cmp	x3, x4
62*3f224333SRobert Clausecker	csetm	w0, lo			// x0 = x3 >= w4 ? 0 : -1
63*3f224333SRobert Clausecker	csinc	w0, w0, wzr, ls		// x0 = x3 <=> w4 ? 1 : 0 : -1
64*3f224333SRobert Clausecker	ret
65*3f224333SRobert Clausecker
66*3f224333SRobert Clausecker.L0916:	ldr	x3, [x0]
67*3f224333SRobert Clausecker	ldr	x4, [x1]
68*3f224333SRobert Clausecker	sub	x2, x2, #8
69*3f224333SRobert Clausecker	ldr	x5, [x0, x2]
70*3f224333SRobert Clausecker	ldr	x6, [x1, x2]
71*3f224333SRobert Clausecker	cmp	x3, x4			// mismatch in first pair?
72*3f224333SRobert Clausecker	csel	x3, x3, x5, ne		// use second pair if first pair equal
73*3f224333SRobert Clausecker	csel	x4, x4, x6, ne
74*3f224333SRobert Clausecker	rev	x3, x3
75*3f224333SRobert Clausecker	rev	x4, x4
76*3f224333SRobert Clausecker	cmp	x3, x4
77*3f224333SRobert Clausecker	csetm	w0, lo
78*3f224333SRobert Clausecker	csinc	w0, w0, wzr, ls
79*3f224333SRobert Clausecker	ret
80*3f224333SRobert Clausecker
81*3f224333SRobert Clausecker	/* more than 16 bytes: process buffer in a loop */
82*3f224333SRobert Clausecker.Lgt16:	ldp	x3, x4, [x0], #16
83*3f224333SRobert Clausecker	ldp	x5, x6, [x1], #16
84*3f224333SRobert Clausecker	cmp	x3, x5			// mismatch in first pair?
85*3f224333SRobert Clausecker	csel	x3, x3, x4, ne		// use second pair if first pair equal
86*3f224333SRobert Clausecker	csel	x5, x5, x6, ne
87*3f224333SRobert Clausecker	subs	x2, x2, #32
88*3f224333SRobert Clausecker	bls	.Ltail
89*3f224333SRobert Clausecker
90*3f224333SRobert Clausecker0:	ldp	x4, x7, [x0], #16
91*3f224333SRobert Clausecker	ldp	x6, x8, [x1], #16
92*3f224333SRobert Clausecker	cmp	x4, x6			// mismatch in first pair?
93*3f224333SRobert Clausecker	csel	x4, x4, x7, ne		// if not, try second pair
94*3f224333SRobert Clausecker	csel	x6, x6, x8, ne
95*3f224333SRobert Clausecker	cmp	x3, x5			// was there a mismatch previously?
96*3f224333SRobert Clausecker	csel	x3, x3, x4, ne		// apply new pair if there was not
97*3f224333SRobert Clausecker	csel	x5, x5, x6, ne
98*3f224333SRobert Clausecker	subs	x2, x2, #16
99*3f224333SRobert Clausecker	bhi	0b
100*3f224333SRobert Clausecker
101*3f224333SRobert Clausecker.Ltail:	add	x0, x0, x2
102*3f224333SRobert Clausecker	add	x1, x1, x2
103*3f224333SRobert Clausecker	ldp	x4, x7, [x0]
104*3f224333SRobert Clausecker	ldp	x6, x8, [x1]
105*3f224333SRobert Clausecker	cmp	x4, x6			// mismatch in first pair?
106*3f224333SRobert Clausecker	csel	x4, x4, x7, ne		// if not, try second pair
107*3f224333SRobert Clausecker	csel	x6, x6, x8, ne
108*3f224333SRobert Clausecker	cmp	x3, x5			// was there a mismatch previously?
109*3f224333SRobert Clausecker	csel	x3, x3, x4, ne		// apply new pair if there was not
110*3f224333SRobert Clausecker	csel	x5, x5, x6, ne
111*3f224333SRobert Clausecker	rev	x3, x3
112*3f224333SRobert Clausecker	rev	x5, x5
113*3f224333SRobert Clausecker	cmp	x3, x5
114*3f224333SRobert Clausecker	csetm	w0, lo
115*3f224333SRobert Clausecker	csinc	w0, w0, wzr, ls
116*3f224333SRobert Clausecker	ret
117*3f224333SRobert ClauseckerEND(timingsafe_bcmp)
118