xref: /freebsd/lib/libc/amd64/string/strspn.S (revision 7084133cde6a58412d86bae9f8a55b86141fb304)
1*7084133cSRobert Clausecker/*-
2*7084133cSRobert Clausecker * Copyright (c) 2023 The FreeBSD Foundation
3*7084133cSRobert Clausecker *
4*7084133cSRobert Clausecker * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
5*7084133cSRobert Clausecker * under sponsorship from the FreeBSD Foundation.
6*7084133cSRobert Clausecker *
7*7084133cSRobert Clausecker * Redistribution and use in source and binary forms, with or without
8*7084133cSRobert Clausecker * modification, are permitted provided that the following conditions
9*7084133cSRobert Clausecker * are met:
10*7084133cSRobert Clausecker * 1. Redistributions of source code must retain the above copyright
11*7084133cSRobert Clausecker *    notice, this list of conditions and the following disclaimer.
12*7084133cSRobert Clausecker * 2. Redistributions in binary form must reproduce the above copyright
13*7084133cSRobert Clausecker *    notice, this list of conditions and the following disclaimer in the
14*7084133cSRobert Clausecker *    documentation and/or other materials provided with the distribution.
15*7084133cSRobert Clausecker *
16*7084133cSRobert Clausecker * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17*7084133cSRobert Clausecker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*7084133cSRobert Clausecker * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*7084133cSRobert Clausecker * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*7084133cSRobert Clausecker * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*7084133cSRobert Clausecker * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*7084133cSRobert Clausecker * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*7084133cSRobert Clausecker * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*7084133cSRobert Clausecker * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*7084133cSRobert Clausecker * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*7084133cSRobert Clausecker * SUCH DAMAGE
27*7084133cSRobert Clausecker */
28*7084133cSRobert Clausecker
29*7084133cSRobert Clausecker#include <machine/asm.h>
30*7084133cSRobert Clausecker#include <machine/param.h>
31*7084133cSRobert Clausecker
32*7084133cSRobert Clausecker#include "amd64_archlevel.h"
33*7084133cSRobert Clausecker
34*7084133cSRobert Clausecker#define ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
35*7084133cSRobert Clausecker
36*7084133cSRobert ClauseckerARCHFUNCS(strspn)
37*7084133cSRobert Clausecker	ARCHFUNC(strspn, scalar)
38*7084133cSRobert Clausecker	NOARCHFUNC
39*7084133cSRobert Clausecker	ARCHFUNC(strspn, x86_64_v2)
40*7084133cSRobert ClauseckerENDARCHFUNCS(strspn)
41*7084133cSRobert Clausecker
42*7084133cSRobert ClauseckerARCHENTRY(strspn, scalar)
43*7084133cSRobert Clausecker	push	%rbp			# align stack to enable function call
44*7084133cSRobert Clausecker	mov	%rsp, %rbp
45*7084133cSRobert Clausecker	sub	$256, %rsp		# allocate space for lookup table
46*7084133cSRobert Clausecker
47*7084133cSRobert Clausecker	/* check for special cases */
48*7084133cSRobert Clausecker	movzbl	(%rsi), %edx		# first character in the set
49*7084133cSRobert Clausecker	test	%edx, %edx
50*7084133cSRobert Clausecker	jz	.Lzero			# empty set always returns 0
51*7084133cSRobert Clausecker
52*7084133cSRobert Clausecker	movzbl	1(%rsi), %eax		# second character in the set
53*7084133cSRobert Clausecker	test	%eax, %eax
54*7084133cSRobert Clausecker	jz	.Lsingle
55*7084133cSRobert Clausecker
56*7084133cSRobert Clausecker	/* no special case matches -- prepare lookup table */
57*7084133cSRobert Clausecker	xor	%r8d, %r8d
58*7084133cSRobert Clausecker	mov	$28, %ecx
59*7084133cSRobert Clausecker0:	mov	%r8, (%rsp, %rcx, 8)
60*7084133cSRobert Clausecker	mov	%r8, 8(%rsp, %rcx, 8)
61*7084133cSRobert Clausecker	mov	%r8, 16(%rsp, %rcx, 8)
62*7084133cSRobert Clausecker	mov	%r8, 24(%rsp, %rcx, 8)
63*7084133cSRobert Clausecker	sub	$4, %ecx
64*7084133cSRobert Clausecker	jnc	0b
65*7084133cSRobert Clausecker
66*7084133cSRobert Clausecker	movb	$1, (%rsp, %rdx, 1)	# register first char in set
67*7084133cSRobert Clausecker	add	$2, %rsi
68*7084133cSRobert Clausecker
69*7084133cSRobert Clausecker	/* process remaining chars in set */
70*7084133cSRobert Clausecker	ALIGN_TEXT
71*7084133cSRobert Clausecker0:	movb	$1, (%rsp, %rax, 1)	# register previous char
72*7084133cSRobert Clausecker	movzbl	(%rsi), %eax		# next char in set
73*7084133cSRobert Clausecker	test	%eax, %eax		# end of string?
74*7084133cSRobert Clausecker	jz	1f
75*7084133cSRobert Clausecker
76*7084133cSRobert Clausecker	movb	$1, (%rsp, %rax, 1)
77*7084133cSRobert Clausecker	add	$2, %rsi
78*7084133cSRobert Clausecker	movzbl	-1(%rsi), %eax
79*7084133cSRobert Clausecker	test	%eax, %eax
80*7084133cSRobert Clausecker	jnz	0b
81*7084133cSRobert Clausecker
82*7084133cSRobert Clausecker1:	mov	%rdi, %rax		# a copy of the source to iterate over
83*7084133cSRobert Clausecker
84*7084133cSRobert Clausecker	/* find mismatch */
85*7084133cSRobert Clausecker	ALIGN_TEXT
86*7084133cSRobert Clausecker0:	movzbl	(%rax), %ecx
87*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
88*7084133cSRobert Clausecker	je	2f
89*7084133cSRobert Clausecker
90*7084133cSRobert Clausecker	movzbl	1(%rax), %ecx
91*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
92*7084133cSRobert Clausecker	je	3f
93*7084133cSRobert Clausecker
94*7084133cSRobert Clausecker	movzbl	2(%rax), %ecx
95*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
96*7084133cSRobert Clausecker	je	4f
97*7084133cSRobert Clausecker
98*7084133cSRobert Clausecker	movzbl	3(%rax), %ecx
99*7084133cSRobert Clausecker	add	$4, %rax
100*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
101*7084133cSRobert Clausecker	jne	0b
102*7084133cSRobert Clausecker
103*7084133cSRobert Clausecker	sub	$3, %rax
104*7084133cSRobert Clausecker4:	dec	%rdi
105*7084133cSRobert Clausecker3:	inc	%rax
106*7084133cSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
107*7084133cSRobert Clausecker	leave
108*7084133cSRobert Clausecker	ret
109*7084133cSRobert Clausecker
110*7084133cSRobert Clausecker	/* empty set never matches */
111*7084133cSRobert Clausecker.Lzero:	xor	%eax, %eax
112*7084133cSRobert Clausecker	leave
113*7084133cSRobert Clausecker	ret
114*7084133cSRobert Clausecker
115*7084133cSRobert Clausecker	/* find repeated single character */
116*7084133cSRobert Clausecker	ALIGN_TEXT
117*7084133cSRobert Clausecker.Lsingle:
118*7084133cSRobert Clausecker	cmpb	%dl, (%rdi, %rax, 1)
119*7084133cSRobert Clausecker	jne	1f
120*7084133cSRobert Clausecker
121*7084133cSRobert Clausecker	cmpb	%dl, 1(%rdi, %rax, 1)
122*7084133cSRobert Clausecker	jne	2f
123*7084133cSRobert Clausecker
124*7084133cSRobert Clausecker	cmpb	%dl, 2(%rdi, %rax, 1)
125*7084133cSRobert Clausecker	jne	3f
126*7084133cSRobert Clausecker
127*7084133cSRobert Clausecker	cmpb	%dl, 3(%rdi, %rax, 1)
128*7084133cSRobert Clausecker	lea	4(%rax), %rax
129*7084133cSRobert Clausecker	je	.Lsingle
130*7084133cSRobert Clausecker
131*7084133cSRobert Clausecker	sub	$3, %rax
132*7084133cSRobert Clausecker3:	inc	%rax
133*7084133cSRobert Clausecker2:	inc	%rax
134*7084133cSRobert Clausecker1:	leave
135*7084133cSRobert Clausecker	ret
136*7084133cSRobert ClauseckerARCHEND(strspn, scalar)
137*7084133cSRobert Clausecker
138*7084133cSRobert Clausecker	/*
139*7084133cSRobert Clausecker	 * This kernel uses pcmpistri to do the heavy lifting.
140*7084133cSRobert Clausecker	 * We provide three code paths, depending on set size:
141*7084133cSRobert Clausecker	 *
142*7084133cSRobert Clausecker	 *  0--16: one pcmpistri per 16 bytes of input
143*7084133cSRobert Clausecker	 * 17--32: two pcmpistri per 16 bytes of input
144*7084133cSRobert Clausecker	 *   >=33: fall back to look up table
145*7084133cSRobert Clausecker	 */
146*7084133cSRobert ClauseckerARCHENTRY(strspn, x86_64_v2)
147*7084133cSRobert Clausecker	push		%rbp
148*7084133cSRobert Clausecker	mov		%rsp, %rbp
149*7084133cSRobert Clausecker	sub		$256, %rsp
150*7084133cSRobert Clausecker
151*7084133cSRobert Clausecker	/* find set size and copy up to 32 bytes to (%rsp) */
152*7084133cSRobert Clausecker	mov		%esi, %ecx
153*7084133cSRobert Clausecker	and		$~0xf, %rsi		# align set pointer
154*7084133cSRobert Clausecker	movdqa		(%rsi), %xmm0
155*7084133cSRobert Clausecker	pxor		%xmm1, %xmm1
156*7084133cSRobert Clausecker	and		$0xf, %ecx		# amount of bytes rsi is past alignment
157*7084133cSRobert Clausecker	xor		%edx, %edx
158*7084133cSRobert Clausecker	pcmpeqb		%xmm0, %xmm1		# end of string reached?
159*7084133cSRobert Clausecker	movdqa		%xmm0, 32(%rsp)		# transfer head of set to stack
160*7084133cSRobert Clausecker	pmovmskb	%xmm1, %eax
161*7084133cSRobert Clausecker	shr		%cl, %eax		# clear out junk before string
162*7084133cSRobert Clausecker	test		%eax, %eax		# end of set reached?
163*7084133cSRobert Clausecker	jnz		0f
164*7084133cSRobert Clausecker
165*7084133cSRobert Clausecker	movdqa		16(%rsi), %xmm0		# second chunk of the set
166*7084133cSRobert Clausecker	mov		$16, %edx
167*7084133cSRobert Clausecker	sub		%ecx, %edx		# length of set preceding xmm0
168*7084133cSRobert Clausecker	pxor		%xmm1, %xmm1
169*7084133cSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
170*7084133cSRobert Clausecker	movdqa		%xmm0, 48(%rsp)
171*7084133cSRobert Clausecker	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
172*7084133cSRobert Clausecker	pmovmskb	%xmm1, %eax
173*7084133cSRobert Clausecker	test		%eax, %eax
174*7084133cSRobert Clausecker	jnz		1f
175*7084133cSRobert Clausecker
176*7084133cSRobert Clausecker	movdqa		32(%rsi), %xmm0		# third chunk
177*7084133cSRobert Clausecker	add		$16, %edx
178*7084133cSRobert Clausecker	pxor		%xmm1, %xmm1
179*7084133cSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
180*7084133cSRobert Clausecker	movdqa		%xmm0, 64(%rsp)
181*7084133cSRobert Clausecker	pmovmskb	%xmm1, %eax
182*7084133cSRobert Clausecker	test		%eax, %eax		# still not done?
183*7084133cSRobert Clausecker	jz		.Lgt32v2
184*7084133cSRobert Clausecker
185*7084133cSRobert Clausecker0:	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
186*7084133cSRobert Clausecker1:	tzcnt		%eax, %eax
187*7084133cSRobert Clausecker	add		%eax, %edx		# length of set (excluding NUL byte)
188*7084133cSRobert Clausecker	cmp		$32, %edx		# above 32 bytes?
189*7084133cSRobert Clausecker	ja		.Lgt32v2
190*7084133cSRobert Clausecker
191*7084133cSRobert Clausecker	/*
192*7084133cSRobert Clausecker	 * At this point we know that we want to use pcmpistri.
193*7084133cSRobert Clausecker	 * one last problem obtains: the head of the string is not
194*7084133cSRobert Clausecker	 * aligned and may cross a cacheline.  If this is the case,
195*7084133cSRobert Clausecker	 * we take the part before the page boundary and repeat the
196*7084133cSRobert Clausecker	 * last byte to fill up the xmm register.
197*7084133cSRobert Clausecker	 */
198*7084133cSRobert Clausecker	mov		%rdi, %rax		# save original string pointer
199*7084133cSRobert Clausecker	lea		15(%rdi), %esi		# last byte of the head
200*7084133cSRobert Clausecker	xor		%edi, %esi
201*7084133cSRobert Clausecker	test		$PAGE_SIZE, %esi	# does the head cross a page?
202*7084133cSRobert Clausecker	jz		0f
203*7084133cSRobert Clausecker
204*7084133cSRobert Clausecker	/* head crosses page: copy to stack to fix up */
205*7084133cSRobert Clausecker	and		$~0xf, %rax		# align head pointer temporarily
206*7084133cSRobert Clausecker	movzbl		15(%rax), %esi		# last head byte on the page
207*7084133cSRobert Clausecker	movdqa		(%rax), %xmm0
208*7084133cSRobert Clausecker	movabs		$0x0101010101010101, %r8
209*7084133cSRobert Clausecker	imul		%r8, %rsi		# repeated 8 times
210*7084133cSRobert Clausecker	movdqa		%xmm0, (%rsp)		# head word on stack
211*7084133cSRobert Clausecker	mov		%rsi, 16(%rsp)		# followed by filler (last byte x8)
212*7084133cSRobert Clausecker	mov		%rsi, 24(%rsp)
213*7084133cSRobert Clausecker	mov		%edi, %eax
214*7084133cSRobert Clausecker	and		$0xf, %eax		# offset of head from alignment
215*7084133cSRobert Clausecker	add		%rsp, %rax		# pointer to fake head
216*7084133cSRobert Clausecker
217*7084133cSRobert Clausecker0:	movdqu		(%rax), %xmm1		# load head (fake or real)
218*7084133cSRobert Clausecker	lea		16(%rdi), %rax
219*7084133cSRobert Clausecker	and		$~0xf, %rax		# second 16 bytes of string (aligned)
220*7084133cSRobert Clausecker1:	cmp		$16, %edx		# 16--32 bytes?
221*7084133cSRobert Clausecker	ja		.Lgt16v2
222*7084133cSRobert Clausecker
223*7084133cSRobert Clausecker
224*7084133cSRobert Clausecker	/* set is 2--16 bytes in size */
225*7084133cSRobert Clausecker
226*7084133cSRobert Clausecker	/* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT|_SIDD_NEGATIVE_POLARITY */
227*7084133cSRobert Clausecker	pcmpistri	$0x10, %xmm1, %xmm2	# match in head?
228*7084133cSRobert Clausecker	jc		.Lheadmismatchv2
229*7084133cSRobert Clausecker
230*7084133cSRobert Clausecker	ALIGN_TEXT
231*7084133cSRobert Clausecker0:	pcmpistri	$0x10, (%rax), %xmm2
232*7084133cSRobert Clausecker	jc		1f			# match or end of string?
233*7084133cSRobert Clausecker	pcmpistri	$0x10, 16(%rax), %xmm2
234*7084133cSRobert Clausecker	lea		32(%rax), %rax
235*7084133cSRobert Clausecker	jnc		0b			# match or end of string?
236*7084133cSRobert Clausecker
237*7084133cSRobert Clausecker	sub		$16, %rax		# go back to second half
238*7084133cSRobert Clausecker1:	sub		%rdi, %rax		# offset of (%rax) from beginning of string
239*7084133cSRobert Clausecker	add		%rcx, %rax		# prefix length before match/NUL
240*7084133cSRobert Clausecker        leave
241*7084133cSRobert Clausecker        ret
242*7084133cSRobert Clausecker
243*7084133cSRobert Clausecker.Lheadmismatchv2:
244*7084133cSRobert Clausecker	mov		%ecx, %eax		# prefix length before mismatch/NUL
245*7084133cSRobert Clausecker	leave
246*7084133cSRobert Clausecker	ret
247*7084133cSRobert Clausecker
248*7084133cSRobert Clausecker	/* set is 17--32 bytes in size */
249*7084133cSRobert Clausecker.Lgt16v2:
250*7084133cSRobert Clausecker	movdqu		48(%rsp, %rcx, 1), %xmm3 # second part of set
251*7084133cSRobert Clausecker
252*7084133cSRobert Clausecker	/* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_BIT_MASK|_SIDD_NEGATIVE_POLARITY */
253*7084133cSRobert Clausecker	pcmpistrm	$0x10, %xmm1, %xmm2	# any mismatch in first half?
254*7084133cSRobert Clausecker	movdqa		%xmm0, %xmm4
255*7084133cSRobert Clausecker	pcmpistrm	$0x10, %xmm1, %xmm3	# any mismatch in the second half?
256*7084133cSRobert Clausecker	ptest		%xmm0, %xmm4		# any entry that doesn't match either?
257*7084133cSRobert Clausecker	jnz		2f
258*7084133cSRobert Clausecker
259*7084133cSRobert Clausecker	ALIGN_TEXT
260*7084133cSRobert Clausecker0:	movdqa		(%rax), %xmm1
261*7084133cSRobert Clausecker	pcmpistrm	$0x10, %xmm1, %xmm2
262*7084133cSRobert Clausecker	movdqa		%xmm0, %xmm4
263*7084133cSRobert Clausecker	pcmpistrm	$0x10, %xmm1, %xmm3
264*7084133cSRobert Clausecker	ptest		%xmm0, %xmm4
265*7084133cSRobert Clausecker	jnz		1f
266*7084133cSRobert Clausecker	movdqa		16(%rax), %xmm1
267*7084133cSRobert Clausecker	add		$32, %rax
268*7084133cSRobert Clausecker	pcmpistrm	$0x10, %xmm1, %xmm2
269*7084133cSRobert Clausecker	movdqa		%xmm0, %xmm4
270*7084133cSRobert Clausecker	pcmpistrm	$0x10, %xmm1, %xmm3
271*7084133cSRobert Clausecker	ptest		%xmm0, %xmm4
272*7084133cSRobert Clausecker	jz		0b
273*7084133cSRobert Clausecker
274*7084133cSRobert Clausecker	sub		$16, %rax
275*7084133cSRobert Clausecker1:	pand		%xmm4, %xmm0
276*7084133cSRobert Clausecker	movd		%xmm0, %ecx
277*7084133cSRobert Clausecker	sub		%rdi, %rax		# offset of %xmm1 from beginning of string
278*7084133cSRobert Clausecker	tzcnt		%ecx, %ecx
279*7084133cSRobert Clausecker	add		%rcx, %rax		# prefix length before match/NUL
280*7084133cSRobert Clausecker	leave
281*7084133cSRobert Clausecker	ret
282*7084133cSRobert Clausecker
283*7084133cSRobert Clausecker	/* mismatch or string end in head */
284*7084133cSRobert Clausecker2:	pand		%xmm4, %xmm0		# bit mask of mismatches (end of string counts)
285*7084133cSRobert Clausecker	movd		%xmm0, %eax
286*7084133cSRobert Clausecker	tzcnt		%eax, %eax		# prefix length before mismatch/NUL
287*7084133cSRobert Clausecker	leave
288*7084133cSRobert Clausecker	ret
289*7084133cSRobert Clausecker
290*7084133cSRobert Clausecker	/* set is >=33 bytes in size */
291*7084133cSRobert Clausecker.Lgt32v2:
292*7084133cSRobert Clausecker	xorps	%xmm0, %xmm0
293*7084133cSRobert Clausecker	mov	$256-64, %edx
294*7084133cSRobert Clausecker
295*7084133cSRobert Clausecker	/* clear out look up table */
296*7084133cSRobert Clausecker0:	movaps	%xmm0, (%rsp, %rdx, 1)
297*7084133cSRobert Clausecker	movaps	%xmm0, 16(%rsp, %rdx, 1)
298*7084133cSRobert Clausecker	movaps	%xmm0, 32(%rsp, %rdx, 1)
299*7084133cSRobert Clausecker	movaps	%xmm0, 48(%rsp, %rdx, 1)
300*7084133cSRobert Clausecker	sub	$64, %edx
301*7084133cSRobert Clausecker	jnc	0b
302*7084133cSRobert Clausecker
303*7084133cSRobert Clausecker	add	%rcx, %rsi		# restore string pointer
304*7084133cSRobert Clausecker	mov	%rdi, %rax		# keep a copy of the string
305*7084133cSRobert Clausecker
306*7084133cSRobert Clausecker	/* initialise look up table */
307*7084133cSRobert Clausecker	movzbl	(%rsi), %ecx		# string is known not to be empty
308*7084133cSRobert Clausecker
309*7084133cSRobert Clausecker	ALIGN_TEXT
310*7084133cSRobert Clausecker0:	movb	$1, (%rsp, %rcx, 1)
311*7084133cSRobert Clausecker	movzbl	1(%rsi), %ecx
312*7084133cSRobert Clausecker	test	%ecx, %ecx
313*7084133cSRobert Clausecker	jz	1f
314*7084133cSRobert Clausecker
315*7084133cSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
316*7084133cSRobert Clausecker	movzbl	2(%rsi), %ecx
317*7084133cSRobert Clausecker	test	%ecx, %ecx
318*7084133cSRobert Clausecker	jz	1f
319*7084133cSRobert Clausecker
320*7084133cSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
321*7084133cSRobert Clausecker	movzbl	3(%rsi), %ecx
322*7084133cSRobert Clausecker	add	$4, %rsi
323*7084133cSRobert Clausecker	test	%ecx, %ecx
324*7084133cSRobert Clausecker	jz	1f
325*7084133cSRobert Clausecker
326*7084133cSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
327*7084133cSRobert Clausecker	movzbl	(%rsi), %ecx
328*7084133cSRobert Clausecker	test	%ecx, %ecx
329*7084133cSRobert Clausecker	jnz	0b
330*7084133cSRobert Clausecker
331*7084133cSRobert Clausecker	/* find match */
332*7084133cSRobert Clausecker	ALIGN_TEXT
333*7084133cSRobert Clausecker1:	movzbl	(%rax), %ecx
334*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
335*7084133cSRobert Clausecker	je	2f
336*7084133cSRobert Clausecker
337*7084133cSRobert Clausecker	movzbl	1(%rax), %ecx
338*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
339*7084133cSRobert Clausecker	je	3f
340*7084133cSRobert Clausecker
341*7084133cSRobert Clausecker	movzbl	2(%rax), %ecx
342*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
343*7084133cSRobert Clausecker	je	4f
344*7084133cSRobert Clausecker
345*7084133cSRobert Clausecker	movzbl	3(%rax), %ecx
346*7084133cSRobert Clausecker	add	$4, %rax
347*7084133cSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
348*7084133cSRobert Clausecker	jne	1b
349*7084133cSRobert Clausecker
350*7084133cSRobert Clausecker	sub	$3, %rax
351*7084133cSRobert Clausecker4:	dec	%rdi
352*7084133cSRobert Clausecker3:	inc	%rax
353*7084133cSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
354*7084133cSRobert Clausecker	leave
355*7084133cSRobert Clausecker	ret
356*7084133cSRobert ClauseckerARCHEND(strspn, x86_64_v2)
357*7084133cSRobert Clausecker
358*7084133cSRobert Clausecker	.section .note.GNU-stack,"",%progbits
359