xref: /freebsd/lib/libc/amd64/string/memcmp.S (revision 6be3386466ab79a84b48429ae66244f21526d3df)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31#include <machine/asm.h>
32__FBSDID("$FreeBSD$");
33
34/*
35 * Note: this routine was written with kernel use in mind (read: no simd),
36 * it is only present in userspace as a temporary measure until something
37 * better gets imported.
38 */
39
40#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
41
42ENTRY(memcmp)
43	xorl	%eax,%eax
4410:
45	cmpq	$16,%rdx
46	ja	101632f
47
48	cmpb	$8,%dl
49	jg	100816f
50
51	cmpb	$4,%dl
52	jg	100408f
53
54	cmpb	$2,%dl
55	jge	100204f
56
57	cmpb	$1,%dl
58	jl	100000f
59	movzbl	(%rdi),%eax
60	movzbl	(%rsi),%r8d
61	subl	%r8d,%eax
62100000:
63	ret
64
65	ALIGN_TEXT
66100816:
67	movq	(%rdi),%r8
68	movq	(%rsi),%r9
69	cmpq	%r8,%r9
70	jne	80f
71	movq	-8(%rdi,%rdx),%r8
72	movq	-8(%rsi,%rdx),%r9
73	cmpq	%r8,%r9
74	jne	10081608f
75	ret
76	ALIGN_TEXT
77100408:
78	movl	(%rdi),%r8d
79	movl	(%rsi),%r9d
80	cmpl	%r8d,%r9d
81	jne	80f
82	movl	-4(%rdi,%rdx),%r8d
83	movl	-4(%rsi,%rdx),%r9d
84	cmpl	%r8d,%r9d
85	jne	10040804f
86	ret
87	ALIGN_TEXT
88100204:
89	movzwl	(%rdi),%r8d
90	movzwl	(%rsi),%r9d
91	cmpl	%r8d,%r9d
92	jne	1f
93	movzwl	-2(%rdi,%rdx),%r8d
94	movzwl	-2(%rsi,%rdx),%r9d
95	cmpl	%r8d,%r9d
96	jne	1f
97	ret
98	ALIGN_TEXT
99101632:
100	cmpq	$32,%rdx
101	ja	103200f
102	movq	(%rdi),%r8
103	movq	(%rsi),%r9
104	cmpq	%r8,%r9
105	jne	80f
106	movq	8(%rdi),%r8
107	movq	8(%rsi),%r9
108	cmpq	%r8,%r9
109	jne	10163208f
110	movq	-16(%rdi,%rdx),%r8
111	movq	-16(%rsi,%rdx),%r9
112	cmpq	%r8,%r9
113	jne	10163216f
114	movq	-8(%rdi,%rdx),%r8
115	movq	-8(%rsi,%rdx),%r9
116	cmpq	%r8,%r9
117	jne	10163224f
118	ret
119	ALIGN_TEXT
120103200:
121	movq	(%rdi),%r8
122	movq	8(%rdi),%r9
123	subq	(%rsi),%r8
124	subq	8(%rsi),%r9
125	orq	%r8,%r9
126	jnz	10320000f
127
128	movq    16(%rdi),%r8
129	movq    24(%rdi),%r9
130	subq    16(%rsi),%r8
131	subq    24(%rsi),%r9
132	orq	%r8,%r9
133	jnz     10320016f
134
135	leaq	32(%rdi),%rdi
136	leaq	32(%rsi),%rsi
137	subq	$32,%rdx
138	cmpq	$32,%rdx
139	jae	103200b
140	cmpb	$0,%dl
141	jne	10b
142	ret
143
144/*
145 * Mismatch was found.
146 *
147 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
148 */
149	ALIGN_TEXT
15010320016:
151	leaq	16(%rdi),%rdi
152	leaq	16(%rsi),%rsi
15310320000:
154	movq	(%rdi),%r8
155	movq	(%rsi),%r9
156	cmpq	%r8,%r9
157	jne	80f
158	leaq	8(%rdi),%rdi
159	leaq	8(%rsi),%rsi
160	jmp	80f
161	ALIGN_TEXT
16210081608:
16310163224:
164	leaq	-8(%rdi,%rdx),%rdi
165	leaq	-8(%rsi,%rdx),%rsi
166	jmp	80f
167	ALIGN_TEXT
16810163216:
169	leaq	-16(%rdi,%rdx),%rdi
170	leaq	-16(%rsi,%rdx),%rsi
171	jmp	80f
172	ALIGN_TEXT
17310163208:
174	leaq	8(%rdi),%rdi
175	leaq	8(%rsi),%rsi
176	jmp	80f
177	ALIGN_TEXT
17810040804:
179	leaq	-4(%rdi,%rdx),%rdi
180	leaq	-4(%rsi,%rdx),%rsi
181	jmp	1f
182
183	ALIGN_TEXT
18480:
185	movl	(%rdi),%r8d
186	movl	(%rsi),%r9d
187	cmpl	%r8d,%r9d
188	jne	1f
189	leaq	4(%rdi),%rdi
190	leaq	4(%rsi),%rsi
191
192/*
193 * We have up to 4 bytes to inspect.
194 */
1951:
196	movzbl	(%rdi),%eax
197	movzbl	(%rsi),%r8d
198	cmpb	%r8b,%al
199	jne	2f
200
201	movzbl	1(%rdi),%eax
202	movzbl	1(%rsi),%r8d
203	cmpb	%r8b,%al
204	jne	2f
205
206	movzbl	2(%rdi),%eax
207	movzbl	2(%rsi),%r8d
208	cmpb	%r8b,%al
209	jne	2f
210
211	movzbl	3(%rdi),%eax
212	movzbl	3(%rsi),%r8d
2132:
214	subl	%r8d,%eax
215	ret
216END(memcmp)
217
218	.section .note.GNU-stack,"",%progbits
219