xref: /freebsd/lib/libc/amd64/string/timingsafe_memcmp.S (revision 5048c1b85506c5e0f441ee7dd98dd8d96d0a4a24)
1*5048c1b8SRobert Clausecker/*-
2*5048c1b8SRobert Clausecker * Copyright (c) 2023 The FreeBSD Foundation
3*5048c1b8SRobert Clausecker *
4*5048c1b8SRobert Clausecker * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
5*5048c1b8SRobert Clausecker * under sponsorship from the FreeBSD Foundation.
6*5048c1b8SRobert Clausecker *
7*5048c1b8SRobert Clausecker * Redistribution and use in source and binary forms, with or without
8*5048c1b8SRobert Clausecker * modification, are permitted provided that the following conditions
9*5048c1b8SRobert Clausecker * are met:
10*5048c1b8SRobert Clausecker * 1. Redistributions of source code must retain the above copyright
11*5048c1b8SRobert Clausecker *    notice, this list of conditions and the following disclaimer.
12*5048c1b8SRobert Clausecker * 2. Redistributions in binary form must reproduce the above copyright
13*5048c1b8SRobert Clausecker *    notice, this list of conditions and the following disclaimer in the
14*5048c1b8SRobert Clausecker *    documentation and/or other materials provided with the distribution.
15*5048c1b8SRobert Clausecker *
16*5048c1b8SRobert Clausecker * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17*5048c1b8SRobert Clausecker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*5048c1b8SRobert Clausecker * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*5048c1b8SRobert Clausecker * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*5048c1b8SRobert Clausecker * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*5048c1b8SRobert Clausecker * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*5048c1b8SRobert Clausecker * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*5048c1b8SRobert Clausecker * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*5048c1b8SRobert Clausecker * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*5048c1b8SRobert Clausecker * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*5048c1b8SRobert Clausecker * SUCH DAMAGE
27*5048c1b8SRobert Clausecker */
28*5048c1b8SRobert Clausecker
29*5048c1b8SRobert Clausecker#include <machine/asm.h>
30*5048c1b8SRobert Clausecker
31*5048c1b8SRobert Clausecker#define ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
32*5048c1b8SRobert Clausecker
33*5048c1b8SRobert Clausecker/* int timingsafe_memcmp(const void *rdi, const void *rsi, size_t rdx) */
34*5048c1b8SRobert ClauseckerENTRY(timingsafe_memcmp)
35*5048c1b8SRobert Clausecker	cmp	$16, %rdx		# at least 17 bytes to process?
36*5048c1b8SRobert Clausecker	ja	.Lgt16
37*5048c1b8SRobert Clausecker
38*5048c1b8SRobert Clausecker	cmp	$8, %edx		# at least 9 bytes to process?
39*5048c1b8SRobert Clausecker	ja	.L0916
40*5048c1b8SRobert Clausecker
41*5048c1b8SRobert Clausecker	cmp	$4, %edx		# at least 5 bytes to process?
42*5048c1b8SRobert Clausecker	ja	.L0508
43*5048c1b8SRobert Clausecker
44*5048c1b8SRobert Clausecker	cmp	$2, %edx		# at least 3 bytes to process?
45*5048c1b8SRobert Clausecker	ja	.L0304
46*5048c1b8SRobert Clausecker
47*5048c1b8SRobert Clausecker	test	%edx, %edx		# buffer empty?
48*5048c1b8SRobert Clausecker	jnz	.L0102
49*5048c1b8SRobert Clausecker
50*5048c1b8SRobert Clausecker	xor	%eax, %eax		# empty buffer always matches
51*5048c1b8SRobert Clausecker	ret
52*5048c1b8SRobert Clausecker
53*5048c1b8SRobert Clausecker.L0102:	movzbl	-1(%rdi, %rdx, 1), %eax	# load 1--2 bytes from first buffer
54*5048c1b8SRobert Clausecker	movzbl	-1(%rsi, %rdx, 1), %ecx
55*5048c1b8SRobert Clausecker	mov	(%rdi), %ah		# in big endian
56*5048c1b8SRobert Clausecker	mov	(%rsi), %ch
57*5048c1b8SRobert Clausecker	sub	%ecx, %eax
58*5048c1b8SRobert Clausecker	ret
59*5048c1b8SRobert Clausecker
60*5048c1b8SRobert Clausecker.L0304:	movzwl	-2(%rdi, %rdx, 1), %ecx
61*5048c1b8SRobert Clausecker	movzwl	-2(%rsi, %rdx, 1), %edx
62*5048c1b8SRobert Clausecker	movzwl	(%rdi), %eax
63*5048c1b8SRobert Clausecker	movzwl	(%rsi), %esi
64*5048c1b8SRobert Clausecker	bswap	%ecx			# convert to big endian
65*5048c1b8SRobert Clausecker	bswap	%edx			# dito for edx, (e)ax, and (e)si
66*5048c1b8SRobert Clausecker	rol	$8, %ax			# ROLW is used here so the upper two
67*5048c1b8SRobert Clausecker	rol	$8, %si			# bytes stay clear, allowing us to
68*5048c1b8SRobert Clausecker	sub	%edx, %ecx		# save a SBB compared to .L0508
69*5048c1b8SRobert Clausecker	sbb	%esi, %eax
70*5048c1b8SRobert Clausecker	or	%eax, %ecx		# nonzero if not equal
71*5048c1b8SRobert Clausecker	setnz	%al
72*5048c1b8SRobert Clausecker	ret
73*5048c1b8SRobert Clausecker
74*5048c1b8SRobert Clausecker.L0508:	mov	-4(%rdi, %rdx, 1), %ecx
75*5048c1b8SRobert Clausecker	mov	-4(%rsi, %rdx, 1), %edx
76*5048c1b8SRobert Clausecker	mov	(%rdi), %edi
77*5048c1b8SRobert Clausecker	mov	(%rsi), %esi
78*5048c1b8SRobert Clausecker	bswap	%ecx			# compare in big endian
79*5048c1b8SRobert Clausecker	bswap	%edx
80*5048c1b8SRobert Clausecker	bswap	%edi
81*5048c1b8SRobert Clausecker	bswap	%esi
82*5048c1b8SRobert Clausecker	sub	%edx, %ecx
83*5048c1b8SRobert Clausecker	sbb	%esi, %edi
84*5048c1b8SRobert Clausecker	sbb	%eax, %eax		# -1 if less, 0 if greater or equal
85*5048c1b8SRobert Clausecker	or	%edi, %ecx		# nonzero if not equal
86*5048c1b8SRobert Clausecker	setnz	%al			# negative if <, 0 if =, 1 if >
87*5048c1b8SRobert Clausecker	ret
88*5048c1b8SRobert Clausecker
89*5048c1b8SRobert Clausecker.L0916:	mov	-8(%rdi, %rdx, 1), %rcx
90*5048c1b8SRobert Clausecker	mov	-8(%rsi, %rdx, 1), %rdx
91*5048c1b8SRobert Clausecker	mov	(%rdi), %rdi
92*5048c1b8SRobert Clausecker	mov	(%rsi), %rsi
93*5048c1b8SRobert Clausecker	bswap	%rcx			# compare in big endian
94*5048c1b8SRobert Clausecker	bswap	%rdx
95*5048c1b8SRobert Clausecker	bswap	%rdi
96*5048c1b8SRobert Clausecker	bswap	%rsi
97*5048c1b8SRobert Clausecker	sub	%rdx, %rcx
98*5048c1b8SRobert Clausecker	sbb	%rsi, %rdi
99*5048c1b8SRobert Clausecker	sbb	%eax, %eax		# -1 if less, 0 if greater or equal
100*5048c1b8SRobert Clausecker	or	%rdi, %rcx		# nonzero if not equal
101*5048c1b8SRobert Clausecker	setnz	%al			# negative if <, 0 if =, 1 if >
102*5048c1b8SRobert Clausecker	ret
103*5048c1b8SRobert Clausecker
104*5048c1b8SRobert Clausecker	/* compare 17+ bytes */
105*5048c1b8SRobert Clausecker.Lgt16:	mov	(%rdi), %r8		# process first 16 bytes
106*5048c1b8SRobert Clausecker	mov	(%rsi), %r9
107*5048c1b8SRobert Clausecker	mov	$32, %ecx
108*5048c1b8SRobert Clausecker	cmp	%r8, %r9		# mismatch in head?
109*5048c1b8SRobert Clausecker	cmove	8(%rdi), %r8		# if not, try second pair
110*5048c1b8SRobert Clausecker	cmove	8(%rsi), %r9
111*5048c1b8SRobert Clausecker	cmp	%rdx, %rcx
112*5048c1b8SRobert Clausecker	jae	.Ltail
113*5048c1b8SRobert Clausecker
114*5048c1b8SRobert Clausecker	/* main loop processing 16 bytes per iteration */
115*5048c1b8SRobert Clausecker	ALIGN_TEXT
116*5048c1b8SRobert Clausecker0:	mov	-16(%rdi, %rcx, 1), %r10
117*5048c1b8SRobert Clausecker	mov	-16(%rsi, %rcx, 1), %r11
118*5048c1b8SRobert Clausecker	cmp	%r10, %r11		# mismatch in first pair?
119*5048c1b8SRobert Clausecker	cmove	-8(%rdi, %rcx, 1), %r10	# if not, try second pair
120*5048c1b8SRobert Clausecker	cmove	-8(%rsi, %rcx, 1), %r11
121*5048c1b8SRobert Clausecker	cmp	%r8, %r9		# was there a mismatch previously?
122*5048c1b8SRobert Clausecker	cmove	%r10, %r8		# apply new pair if there was not
123*5048c1b8SRobert Clausecker	cmove	%r11, %r9
124*5048c1b8SRobert Clausecker	add	$16, %rcx
125*5048c1b8SRobert Clausecker	cmp	%rdx, %rcx
126*5048c1b8SRobert Clausecker	jb	0b
127*5048c1b8SRobert Clausecker
128*5048c1b8SRobert Clausecker.Ltail:	mov	-8(%rdi, %rdx, 1), %r10
129*5048c1b8SRobert Clausecker	mov	-8(%rsi, %rdx, 1), %r11
130*5048c1b8SRobert Clausecker	cmp	%r8, %r9
131*5048c1b8SRobert Clausecker	cmove	-16(%rdi, %rdx, 1), %r8
132*5048c1b8SRobert Clausecker	cmove	-16(%rsi, %rdx, 1), %r9
133*5048c1b8SRobert Clausecker	bswap	%r10			# compare in big endian
134*5048c1b8SRobert Clausecker	bswap	%r11
135*5048c1b8SRobert Clausecker	bswap	%r8
136*5048c1b8SRobert Clausecker	bswap	%r9
137*5048c1b8SRobert Clausecker	sub	%r11, %r10
138*5048c1b8SRobert Clausecker	sbb	%r9, %r8
139*5048c1b8SRobert Clausecker	sbb	%eax, %eax		# -1 if less, 0 if greater or equal
140*5048c1b8SRobert Clausecker	or	%r10, %r8		# nonzero if not equal
141*5048c1b8SRobert Clausecker	setnz	%al			# negative if <, 0 if =, 1 if >
142*5048c1b8SRobert Clausecker	ret
143*5048c1b8SRobert ClauseckerEND(timingsafe_memcmp)
144*5048c1b8SRobert Clausecker
145*5048c1b8SRobert Clausecker	.section .note.GNU-stack,"",%progbits
146