xref: /freebsd/lib/libc/amd64/string/timingsafe_memcmp.S (revision f29af8618bf94f1e58877feb6dbef35bd8bbf56b)
1/*-
2 * Copyright (c) 2023 The FreeBSD Foundation
3 *
4 * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE
27 */
28
29#include <machine/asm.h>
30
31#define ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
32
33/* int timingsafe_memcmp(const void *rdi, const void *rsi, size_t rdx) */
34ENTRY(timingsafe_memcmp)
35	cmp	$16, %rdx		# at least 17 bytes to process?
36	ja	.Lgt16
37
38	cmp	$8, %edx		# at least 9 bytes to process?
39	ja	.L0916
40
41	cmp	$4, %edx		# at least 5 bytes to process?
42	ja	.L0508
43
44	cmp	$2, %edx		# at least 3 bytes to process?
45	ja	.L0304
46
47	test	%edx, %edx		# buffer empty?
48	jnz	.L0102
49
50	xor	%eax, %eax		# empty buffer always matches
51	ret
52
53.L0102:	movzbl	-1(%rdi, %rdx, 1), %eax	# load 1--2 bytes from first buffer
54	movzbl	-1(%rsi, %rdx, 1), %ecx
55	mov	(%rdi), %ah		# in big endian
56	mov	(%rsi), %ch
57	sub	%ecx, %eax
58	ret
59
60.L0304:	movzwl	-2(%rdi, %rdx, 1), %ecx
61	movzwl	-2(%rsi, %rdx, 1), %edx
62	movzwl	(%rdi), %eax
63	movzwl	(%rsi), %esi
64	bswap	%ecx			# convert to big endian
65	bswap	%edx			# dito for edx, (e)ax, and (e)si
66	rol	$8, %ax			# ROLW is used here so the upper two
67	rol	$8, %si			# bytes stay clear, allowing us to
68	sub	%edx, %ecx		# save a SBB compared to .L0508
69	sbb	%esi, %eax
70	or	%eax, %ecx		# nonzero if not equal
71	setnz	%al
72	ret
73
74.L0508:	mov	-4(%rdi, %rdx, 1), %ecx
75	mov	-4(%rsi, %rdx, 1), %edx
76	mov	(%rdi), %edi
77	mov	(%rsi), %esi
78	bswap	%ecx			# compare in big endian
79	bswap	%edx
80	bswap	%edi
81	bswap	%esi
82	sub	%edx, %ecx
83	sbb	%esi, %edi
84	sbb	%eax, %eax		# -1 if less, 0 if greater or equal
85	or	%edi, %ecx		# nonzero if not equal
86	setnz	%al			# negative if <, 0 if =, 1 if >
87	ret
88
89.L0916:	mov	-8(%rdi, %rdx, 1), %rcx
90	mov	-8(%rsi, %rdx, 1), %rdx
91	mov	(%rdi), %rdi
92	mov	(%rsi), %rsi
93	bswap	%rcx			# compare in big endian
94	bswap	%rdx
95	bswap	%rdi
96	bswap	%rsi
97	sub	%rdx, %rcx
98	sbb	%rsi, %rdi
99	sbb	%eax, %eax		# -1 if less, 0 if greater or equal
100	or	%rdi, %rcx		# nonzero if not equal
101	setnz	%al			# negative if <, 0 if =, 1 if >
102	ret
103
104	/* compare 17+ bytes */
105.Lgt16:	mov	(%rdi), %r8		# process first 16 bytes
106	mov	(%rsi), %r9
107	mov	$32, %ecx
108	cmp	%r8, %r9		# mismatch in head?
109	cmove	8(%rdi), %r8		# if not, try second pair
110	cmove	8(%rsi), %r9
111	cmp	%rdx, %rcx
112	jae	.Ltail
113
114	/* main loop processing 16 bytes per iteration */
115	ALIGN_TEXT
1160:	mov	-16(%rdi, %rcx, 1), %r10
117	mov	-16(%rsi, %rcx, 1), %r11
118	cmp	%r10, %r11		# mismatch in first pair?
119	cmove	-8(%rdi, %rcx, 1), %r10	# if not, try second pair
120	cmove	-8(%rsi, %rcx, 1), %r11
121	cmp	%r8, %r9		# was there a mismatch previously?
122	cmove	%r10, %r8		# apply new pair if there was not
123	cmove	%r11, %r9
124	add	$16, %rcx
125	cmp	%rdx, %rcx
126	jb	0b
127
128.Ltail:	mov	-8(%rdi, %rdx, 1), %r10
129	mov	-8(%rsi, %rdx, 1), %r11
130	cmp	%r8, %r9
131	cmove	-16(%rdi, %rdx, 1), %r8
132	cmove	-16(%rsi, %rdx, 1), %r9
133	bswap	%r10			# compare in big endian
134	bswap	%r11
135	bswap	%r8
136	bswap	%r9
137	sub	%r11, %r10
138	sbb	%r9, %r8
139	sbb	%eax, %eax		# -1 if less, 0 if greater or equal
140	or	%r10, %r8		# nonzero if not equal
141	setnz	%al			# negative if <, 0 if =, 1 if >
142	ret
143END(timingsafe_memcmp)
144
145	.section .note.GNU-stack,"",%progbits
146