xref: /freebsd/lib/libc/amd64/string/memcmp.S (revision b3e7694832e81d7a904a10f525f8797b753bf0d3)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30__FBSDID("$FreeBSD$");
31
32/*
33 * Note: this routine was written with kernel use in mind (read: no simd),
34 * it is only present in userspace as a temporary measure until something
35 * better gets imported.
36 */
37
38#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
39
40#ifdef BCMP
41ENTRY(bcmp)
42#else
43ENTRY(memcmp)
44#endif
45	xorl	%eax,%eax
4610:
47	cmpq	$16,%rdx
48	ja	101632f
49
50	cmpb	$8,%dl
51	jg	100816f
52
53	cmpb	$4,%dl
54	jg	100408f
55
56	cmpb	$2,%dl
57	jge	100204f
58
59	cmpb	$1,%dl
60	jl	100000f
61	movzbl	(%rdi),%eax
62	movzbl	(%rsi),%r8d
63	subl	%r8d,%eax
64100000:
65	ret
66
67	ALIGN_TEXT
68100816:
69	movq	(%rdi),%r8
70	movq	(%rsi),%r9
71	cmpq	%r8,%r9
72	jne	80f
73	movq	-8(%rdi,%rdx),%r8
74	movq	-8(%rsi,%rdx),%r9
75	cmpq	%r8,%r9
76	jne	10081608f
77	ret
78	ALIGN_TEXT
79100408:
80	movl	(%rdi),%r8d
81	movl	(%rsi),%r9d
82	cmpl	%r8d,%r9d
83	jne	80f
84	movl	-4(%rdi,%rdx),%r8d
85	movl	-4(%rsi,%rdx),%r9d
86	cmpl	%r8d,%r9d
87	jne	10040804f
88	ret
89	ALIGN_TEXT
90100204:
91	movzwl	(%rdi),%r8d
92	movzwl	(%rsi),%r9d
93	cmpl	%r8d,%r9d
94	jne	1f
95	movzwl	-2(%rdi,%rdx),%r8d
96	movzwl	-2(%rsi,%rdx),%r9d
97	cmpl	%r8d,%r9d
98	jne	1f
99	ret
100	ALIGN_TEXT
101101632:
102	cmpq	$32,%rdx
103	ja	103200f
104	movq	(%rdi),%r8
105	movq	(%rsi),%r9
106	cmpq	%r8,%r9
107	jne	80f
108	movq	8(%rdi),%r8
109	movq	8(%rsi),%r9
110	cmpq	%r8,%r9
111	jne	10163208f
112	movq	-16(%rdi,%rdx),%r8
113	movq	-16(%rsi,%rdx),%r9
114	cmpq	%r8,%r9
115	jne	10163216f
116	movq	-8(%rdi,%rdx),%r8
117	movq	-8(%rsi,%rdx),%r9
118	cmpq	%r8,%r9
119	jne	10163224f
120	ret
121	ALIGN_TEXT
122103200:
123	movq	(%rdi),%r8
124	movq	8(%rdi),%r9
125	subq	(%rsi),%r8
126	subq	8(%rsi),%r9
127	orq	%r8,%r9
128	jnz	10320000f
129
130	movq    16(%rdi),%r8
131	movq    24(%rdi),%r9
132	subq    16(%rsi),%r8
133	subq    24(%rsi),%r9
134	orq	%r8,%r9
135	jnz     10320016f
136
137	leaq	32(%rdi),%rdi
138	leaq	32(%rsi),%rsi
139	subq	$32,%rdx
140	cmpq	$32,%rdx
141	jae	103200b
142	cmpb	$0,%dl
143	jne	10b
144	ret
145
146/*
147 * Mismatch was found.
148 */
149#ifdef BCMP
150	ALIGN_TEXT
15110320016:
15210320000:
15310081608:
15410163224:
15510163216:
15610163208:
15710040804:
15880:
1591:
160	leal	1(%eax),%eax
161	ret
162END(bcmp)
163#else
164/*
165 * We need to compute the difference between strings.
166 * Start with narrowing the range down (16 -> 8 -> 4 bytes).
167 */
168	ALIGN_TEXT
16910320016:
170	leaq	16(%rdi),%rdi
171	leaq	16(%rsi),%rsi
17210320000:
173	movq	(%rdi),%r8
174	movq	(%rsi),%r9
175	cmpq	%r8,%r9
176	jne	80f
177	leaq	8(%rdi),%rdi
178	leaq	8(%rsi),%rsi
179	jmp	80f
180	ALIGN_TEXT
18110081608:
18210163224:
183	leaq	-8(%rdi,%rdx),%rdi
184	leaq	-8(%rsi,%rdx),%rsi
185	jmp	80f
186	ALIGN_TEXT
18710163216:
188	leaq	-16(%rdi,%rdx),%rdi
189	leaq	-16(%rsi,%rdx),%rsi
190	jmp	80f
191	ALIGN_TEXT
19210163208:
193	leaq	8(%rdi),%rdi
194	leaq	8(%rsi),%rsi
195	jmp	80f
196	ALIGN_TEXT
19710040804:
198	leaq	-4(%rdi,%rdx),%rdi
199	leaq	-4(%rsi,%rdx),%rsi
200	jmp	1f
201
202	ALIGN_TEXT
20380:
204	movl	(%rdi),%r8d
205	movl	(%rsi),%r9d
206	cmpl	%r8d,%r9d
207	jne	1f
208	leaq	4(%rdi),%rdi
209	leaq	4(%rsi),%rsi
210
211/*
212 * We have up to 4 bytes to inspect.
213 */
2141:
215	movzbl	(%rdi),%eax
216	movzbl	(%rsi),%r8d
217	cmpb	%r8b,%al
218	jne	2f
219
220	movzbl	1(%rdi),%eax
221	movzbl	1(%rsi),%r8d
222	cmpb	%r8b,%al
223	jne	2f
224
225	movzbl	2(%rdi),%eax
226	movzbl	2(%rsi),%r8d
227	cmpb	%r8b,%al
228	jne	2f
229
230	movzbl	3(%rdi),%eax
231	movzbl	3(%rsi),%r8d
2322:
233	subl	%r8d,%eax
234	ret
235END(memcmp)
236#endif
237
238	.section .note.GNU-stack,"",%progbits
239