xref: /freebsd/lib/libc/amd64/string/memcmp.S (revision b65f813c1ab99448278961c5ca80dc422b1eae29)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31#include <machine/asm.h>
32__FBSDID("$FreeBSD$");
33
34#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
35
36ENTRY(memcmp)
37	xorl	%eax,%eax
3810:
39	cmpq	$16,%rdx
40	ja	101632f
41
42100816:
43	cmpb	$8,%dl
44	jl	100408f
45	movq	(%rdi),%r8
46	movq	(%rsi),%r9
47	cmpq	%r8,%r9
48	jne	1f
49	movq	-8(%rdi,%rdx),%r8
50	movq	-8(%rsi,%rdx),%r9
51	cmpq	%r8,%r9
52	jne	10081608f
53	ret
54100408:
55	cmpb	$4,%dl
56	jl	100204f
57	movl	(%rsi),%r8d
58	movl	(%rdi),%r9d
59	cmpl	%r8d,%r9d
60	jne	1f
61	movl	-4(%rsi,%rdx),%r8d
62	movl	-4(%rdi,%rdx),%r9d
63	cmpl	%r8d,%r9d
64	jne	1f
65	ret
66100204:
67	cmpb	$2,%dl
68	jl	100001f
69	movzwl	(%rsi),%r8d
70	movzwl	(%rdi),%r9d
71	cmpl	%r8d,%r9d
72	jne	1f
73	movzwl	-2(%rsi,%rdx),%r8d
74	movzwl	-2(%rdi,%rdx),%r9d
75	cmpl	%r8d,%r9d
76	jne	1f
77	ret
78100001:
79	cmpb	$1,%dl
80	jl	100000f
81	movzbl	(%rdi),%r8d
82	movzbl	(%rsi),%r9d
83	cmpb	%r8b,%r9b
84	jne	1f
85100000:
86	ret
87ALIGN_TEXT
88101632:
89	cmpq	$32,%rdx
90	ja	103200f
91	movq	(%rdi),%r8
92	movq	(%rsi),%r9
93	cmpq	%r8,%r9
94	jne	1f
95	movq	8(%rdi),%r8
96	movq	8(%rsi),%r9
97	cmpq	%r8,%r9
98	jne	 10163208f
99	movq	-16(%rdi,%rdx),%r8
100	movq	-16(%rsi,%rdx),%r9
101	cmpq	%r8,%r9
102	jne	10163216f
103	movq	-8(%rdi,%rdx),%r8
104	movq	-8(%rsi,%rdx),%r9
105	cmpq	%r8,%r9
106	jne	10163224f
107	ret
108ALIGN_TEXT
109103200:
110	movq	(%rdi),%r8
111	movq	8(%rdi),%r9
112	subq	(%rsi),%r8
113	subq	8(%rsi),%r9
114	or	%r8,%r9
115	jnz	10320000f
116
117	movq    16(%rdi),%r8
118	movq    24(%rdi),%r9
119	subq    16(%rsi),%r8
120	subq    24(%rsi),%r9
121	or      %r8,%r9
122	jnz     10320016f
123
124	leaq	32(%rdi),%rdi
125	leaq	32(%rsi),%rsi
126	subq	$32,%rdx
127	cmpq	$32,%rdx
128	jae	103200b
129	cmpb	$0,%dl
130	jne	10b
131	ret
132
13310320016:
134	leaq	16(%rdi),%rdi
135	leaq	16(%rsi),%rsi
13610320000:
137/*
138 * Mismatch was found within a 16 bytes range. The part of the routine
139 * which calculates it only operates on sizes up to 8 bytes. Find the
140 * right part.
141 */
142	movq	(%rdi),%r8
143	movq	(%rsi),%r9
144	cmpq	%r8,%r9
145	jne	1f
146	leaq	8(%rdi),%rdi
147	leaq	8(%rsi),%rsi
148	jmp	1f
14910163224:
150	leaq	-8(%rdi,%rdx),%rdi
151	leaq	-8(%rsi,%rdx),%rsi
152	jmp	1f
15310163216:
154	leaq	-16(%rdi,%rdx),%rdi
155	leaq	-16(%rsi,%rdx),%rsi
156	jmp	1f
15710163208:
15810081608:
159	leaq	8(%rdi),%rdi
160	leaq	8(%rsi),%rsi
161	jmp	1f
162
163/*
164 * Mismatch was found. We have no more than 8 bytes to inspect.
165 */
166ALIGN_TEXT
1671:
168	movzbl	(%rdi),%eax
169	movzbl	(%rsi),%r8d
170	cmpb	%r8b,%al
171	jne	2f
172
173	movzbl	1(%rdi),%eax
174	movzbl	1(%rsi),%r8d
175	cmpb	%r8b,%al
176	jne	2f
177
178	movzbl	2(%rdi),%eax
179	movzbl	2(%rsi),%r8d
180	cmpb	%r8b,%al
181	jne	2f
182
183	movzbl	3(%rdi),%eax
184	movzbl	3(%rsi),%r8d
185	cmpb	%r8b,%al
186	jne	2f
187
188	movzbl	4(%rdi),%eax
189	movzbl	4(%rsi),%r8d
190	cmpb	%r8b,%al
191	jne	2f
192
193	movzbl	5(%rdi),%eax
194	movzbl	5(%rsi),%r8d
195	cmpb	%r8b,%al
196	jne	2f
197
198	movzbl	6(%rdi),%eax
199	movzbl	6(%rsi),%r8d
200	cmpb	%r8b,%al
201	jne	2f
202
203	movzbl	7(%rdi),%eax
204	movzbl	7(%rsi),%r8d
205	cmpb	%r8b,%al
206	jne	2f
207
208	xorl	%eax,%eax
209	ret
2102:
211	subl	%r8d,%eax
212	ret
213END(memcmp)
214
215	.section .note.GNU-stack,"",%progbits
216