xref: /freebsd/lib/libc/amd64/string/strcspn.S (revision 474408bb7933f0383a0da2b01e717bfe683ae77c)
1*474408bbSRobert Clausecker/*
2*474408bbSRobert Clausecker * Copyright (c) 2023 The FreeBSD Foundation
3*474408bbSRobert Clausecker *
4*474408bbSRobert Clausecker * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
5*474408bbSRobert Clausecker * under sponsorship from the FreeBSD Foundation.
6*474408bbSRobert Clausecker *
7*474408bbSRobert Clausecker * Redistribution and use in source and binary forms, with or without
8*474408bbSRobert Clausecker * modification, are permitted provided that the following conditions
9*474408bbSRobert Clausecker * are met:
10*474408bbSRobert Clausecker * 1. Redistributions of source code must retain the above copyright
11*474408bbSRobert Clausecker *    notice, this list of conditions and the following disclaimer.
12*474408bbSRobert Clausecker * 2. Redistributions in binary form must reproduce the above copyright
13*474408bbSRobert Clausecker *    notice, this list of conditions and the following disclaimer in the
14*474408bbSRobert Clausecker *    documentation and/or other materials provided with the distribution.
15*474408bbSRobert Clausecker *
16*474408bbSRobert Clausecker * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17*474408bbSRobert Clausecker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*474408bbSRobert Clausecker * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*474408bbSRobert Clausecker * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*474408bbSRobert Clausecker * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*474408bbSRobert Clausecker * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*474408bbSRobert Clausecker * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*474408bbSRobert Clausecker * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*474408bbSRobert Clausecker * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*474408bbSRobert Clausecker * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*474408bbSRobert Clausecker * SUCH DAMAGE
27*474408bbSRobert Clausecker */
28*474408bbSRobert Clausecker
29*474408bbSRobert Clausecker#include <machine/asm.h>
30*474408bbSRobert Clausecker#include <machine/param.h>
31*474408bbSRobert Clausecker
32*474408bbSRobert Clausecker#include "amd64_archlevel.h"
33*474408bbSRobert Clausecker
34*474408bbSRobert Clausecker#define ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
35*474408bbSRobert Clausecker
36*474408bbSRobert ClauseckerARCHFUNCS(strcspn)
37*474408bbSRobert Clausecker	ARCHFUNC(strcspn, scalar)
38*474408bbSRobert Clausecker	NOARCHFUNC
39*474408bbSRobert Clausecker	ARCHFUNC(strcspn, x86_64_v2)
40*474408bbSRobert ClauseckerENDARCHFUNCS(strcspn)
41*474408bbSRobert Clausecker
42*474408bbSRobert ClauseckerARCHENTRY(strcspn, scalar)
43*474408bbSRobert Clausecker	push	%rbp			# align stack to enable function call
44*474408bbSRobert Clausecker	mov	%rsp, %rbp
45*474408bbSRobert Clausecker	sub	$256, %rsp		# allocate space for lookup table
46*474408bbSRobert Clausecker
47*474408bbSRobert Clausecker	/* check for special cases */
48*474408bbSRobert Clausecker	movzbl	(%rsi), %eax		# first character in the set
49*474408bbSRobert Clausecker	test	%eax, %eax
50*474408bbSRobert Clausecker	jz	.Lstrlen
51*474408bbSRobert Clausecker
52*474408bbSRobert Clausecker	movzbl	1(%rsi), %edx		# second character in the set
53*474408bbSRobert Clausecker	test	%edx, %edx
54*474408bbSRobert Clausecker	jz	.Lstrchr
55*474408bbSRobert Clausecker
56*474408bbSRobert Clausecker	/* no special case matches -- prepare lookup table */
57*474408bbSRobert Clausecker	xor	%r8d, %r8d
58*474408bbSRobert Clausecker	mov	$28, %ecx
59*474408bbSRobert Clausecker0:	mov	%r8, (%rsp, %rcx, 8)
60*474408bbSRobert Clausecker	mov	%r8, 8(%rsp, %rcx, 8)
61*474408bbSRobert Clausecker	mov	%r8, 16(%rsp, %rcx, 8)
62*474408bbSRobert Clausecker	mov	%r8, 24(%rsp, %rcx, 8)
63*474408bbSRobert Clausecker	sub	$4, %ecx
64*474408bbSRobert Clausecker	jnc	0b
65*474408bbSRobert Clausecker
66*474408bbSRobert Clausecker	add	$2, %rsi
67*474408bbSRobert Clausecker	movb	$1, (%rsp, %rax, 1)	# register first chars in set
68*474408bbSRobert Clausecker	movb	$1, (%rsp, %rdx, 1)
69*474408bbSRobert Clausecker	mov	%rdi, %rax		# a copy of the source to iterate over
70*474408bbSRobert Clausecker
71*474408bbSRobert Clausecker	/* process remaining chars in set */
72*474408bbSRobert Clausecker	ALIGN_TEXT
73*474408bbSRobert Clausecker0:	movzbl	(%rsi), %ecx
74*474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
75*474408bbSRobert Clausecker	test	%ecx, %ecx
76*474408bbSRobert Clausecker	jz	1f
77*474408bbSRobert Clausecker
78*474408bbSRobert Clausecker	movzbl	1(%rsi), %ecx
79*474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
80*474408bbSRobert Clausecker	test	%ecx, %ecx
81*474408bbSRobert Clausecker	jz	1f
82*474408bbSRobert Clausecker
83*474408bbSRobert Clausecker	add	$2, %rsi
84*474408bbSRobert Clausecker	jmp	0b
85*474408bbSRobert Clausecker
86*474408bbSRobert Clausecker	/* find match */
87*474408bbSRobert Clausecker	ALIGN_TEXT
88*474408bbSRobert Clausecker1:	movzbl	(%rax), %ecx
89*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
90*474408bbSRobert Clausecker	jne	2f
91*474408bbSRobert Clausecker
92*474408bbSRobert Clausecker	movzbl	1(%rax), %ecx
93*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
94*474408bbSRobert Clausecker	jne	3f
95*474408bbSRobert Clausecker
96*474408bbSRobert Clausecker	movzbl	2(%rax), %ecx
97*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
98*474408bbSRobert Clausecker	jne	4f
99*474408bbSRobert Clausecker
100*474408bbSRobert Clausecker	movzbl	3(%rax), %ecx
101*474408bbSRobert Clausecker	add	$4, %rax
102*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
103*474408bbSRobert Clausecker	je	1b
104*474408bbSRobert Clausecker
105*474408bbSRobert Clausecker	sub	$3, %rax
106*474408bbSRobert Clausecker4:	dec	%rdi
107*474408bbSRobert Clausecker3:	inc	%rax
108*474408bbSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
109*474408bbSRobert Clausecker	leave
110*474408bbSRobert Clausecker	ret
111*474408bbSRobert Clausecker
112*474408bbSRobert Clausecker	/* set is empty, degrades to strlen */
113*474408bbSRobert Clausecker.Lstrlen:
114*474408bbSRobert Clausecker	leave
115*474408bbSRobert Clausecker	jmp	CNAME(strlen)
116*474408bbSRobert Clausecker
117*474408bbSRobert Clausecker	/* just one character in set, degrades to strchr */
118*474408bbSRobert Clausecker.Lstrchr:
119*474408bbSRobert Clausecker	mov	%rdi, (%rsp)		# stash a copy of the string
120*474408bbSRobert Clausecker	mov	%eax, %esi		# find the character in the set
121*474408bbSRobert Clausecker	call	CNAME(strchrnul)
122*474408bbSRobert Clausecker	sub	(%rsp), %rax		# length of prefix before match
123*474408bbSRobert Clausecker	leave
124*474408bbSRobert Clausecker	ret
125*474408bbSRobert ClauseckerARCHEND(strcspn, scalar)
126*474408bbSRobert Clausecker
127*474408bbSRobert Clausecker	/*
128*474408bbSRobert Clausecker	 * This kernel uses pcmpistri to do the heavy lifting.
129*474408bbSRobert Clausecker	 * We provide five code paths, depending on set size:
130*474408bbSRobert Clausecker	 *
131*474408bbSRobert Clausecker	 *      0: call strlen()
132*474408bbSRobert Clausecker	 *      1: call strchr()
133*474408bbSRobert Clausecker	 *  2--16: one pcmpistri per 16 bytes of input
134*474408bbSRobert Clausecker	 * 17--32: two pcmpistri per 16 bytes of input
135*474408bbSRobert Clausecker	 *   >=33: fall back to look up table
136*474408bbSRobert Clausecker	 */
137*474408bbSRobert ClauseckerARCHENTRY(strcspn, x86_64_v2)
138*474408bbSRobert Clausecker	push		%rbp
139*474408bbSRobert Clausecker	mov		%rsp, %rbp
140*474408bbSRobert Clausecker	sub		$256, %rsp
141*474408bbSRobert Clausecker
142*474408bbSRobert Clausecker	/* check for special cases */
143*474408bbSRobert Clausecker	movzbl		(%rsi), %eax
144*474408bbSRobert Clausecker	test		%eax, %eax		# empty string?
145*474408bbSRobert Clausecker	jz		.Lstrlenv2
146*474408bbSRobert Clausecker
147*474408bbSRobert Clausecker	cmpb		$0, 1(%rsi)		# single character string?
148*474408bbSRobert Clausecker	jz		.Lstrchrv2
149*474408bbSRobert Clausecker
150*474408bbSRobert Clausecker	/* find set size and copy up to 32 bytes to (%rsp) */
151*474408bbSRobert Clausecker	mov		%esi, %ecx
152*474408bbSRobert Clausecker	and		$~0xf, %rsi		# align set pointer
153*474408bbSRobert Clausecker	movdqa		(%rsi), %xmm0
154*474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
155*474408bbSRobert Clausecker	and		$0xf, %ecx		# amount of bytes rsi is past alignment
156*474408bbSRobert Clausecker	xor		%edx, %edx
157*474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1		# end of string reached?
158*474408bbSRobert Clausecker	movdqa		%xmm0, 32(%rsp)		# transfer head of set to stack
159*474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
160*474408bbSRobert Clausecker	shr		%cl, %eax		# clear out junk before string
161*474408bbSRobert Clausecker	test		%eax, %eax		# end of set reached?
162*474408bbSRobert Clausecker	jnz		0f
163*474408bbSRobert Clausecker
164*474408bbSRobert Clausecker	movdqa		16(%rsi), %xmm0		# second chunk of the set
165*474408bbSRobert Clausecker	mov		$16, %edx
166*474408bbSRobert Clausecker	sub		%ecx, %edx		# length of set preceding xmm0
167*474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
168*474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
169*474408bbSRobert Clausecker	movdqa		%xmm0, 48(%rsp)
170*474408bbSRobert Clausecker	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
171*474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
172*474408bbSRobert Clausecker	test		%eax, %eax
173*474408bbSRobert Clausecker	jnz		1f
174*474408bbSRobert Clausecker
175*474408bbSRobert Clausecker	movdqa		32(%rsi), %xmm0		# third chunk
176*474408bbSRobert Clausecker	add		$16, %edx
177*474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
178*474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
179*474408bbSRobert Clausecker	movdqa		%xmm0, 64(%rsp)
180*474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
181*474408bbSRobert Clausecker	test		%eax, %eax		# still not done?
182*474408bbSRobert Clausecker	jz		.Lgt32v2
183*474408bbSRobert Clausecker
184*474408bbSRobert Clausecker0:	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
185*474408bbSRobert Clausecker1:	tzcnt		%eax, %eax
186*474408bbSRobert Clausecker	add		%eax, %edx		# length of set (excluding NUL byte)
187*474408bbSRobert Clausecker	cmp		$32, %edx		# above 32 bytes?
188*474408bbSRobert Clausecker	ja		.Lgt32v2
189*474408bbSRobert Clausecker
190*474408bbSRobert Clausecker	/*
191*474408bbSRobert Clausecker	 * At this point we know that we want to use pcmpistri.
192*474408bbSRobert Clausecker	 * one last problem obtains: the head of the string is not
193*474408bbSRobert Clausecker	 * aligned and may cross a cacheline.  If this is the case,
194*474408bbSRobert Clausecker	 * we take the part before the page boundary and repeat the
195*474408bbSRobert Clausecker	 * last byte to fill up the xmm register.
196*474408bbSRobert Clausecker	 */
197*474408bbSRobert Clausecker	mov		%rdi, %rax		# save original string pointer
198*474408bbSRobert Clausecker	lea		15(%rdi), %esi		# last byte of the head
199*474408bbSRobert Clausecker	xor		%edi, %esi
200*474408bbSRobert Clausecker	test		$PAGE_SIZE, %esi	# does the head cross a page?
201*474408bbSRobert Clausecker	jz		0f
202*474408bbSRobert Clausecker
203*474408bbSRobert Clausecker	/* head crosses page: copy to stack to fix up */
204*474408bbSRobert Clausecker	and		$~0xf, %rax		# align head pointer temporarily
205*474408bbSRobert Clausecker	movzbl		15(%rax), %esi		# last head byte on the page
206*474408bbSRobert Clausecker	movdqa		(%rax), %xmm0
207*474408bbSRobert Clausecker	movabs		$0x0101010101010101, %r8
208*474408bbSRobert Clausecker	imul		%r8, %rsi		# repeated 8 times
209*474408bbSRobert Clausecker	movdqa		%xmm0, (%rsp)		# head word on stack
210*474408bbSRobert Clausecker	mov		%rsi, 16(%rsp)		# followed by filler (last byte x8)
211*474408bbSRobert Clausecker	mov		%rsi, 24(%rsp)
212*474408bbSRobert Clausecker	mov		%edi, %eax
213*474408bbSRobert Clausecker	and		$0xf, %eax		# offset of head from alignment
214*474408bbSRobert Clausecker	add		%rsp, %rax		# pointer to fake head
215*474408bbSRobert Clausecker
216*474408bbSRobert Clausecker0:	movdqu		(%rax), %xmm0		# load head (fake or real)
217*474408bbSRobert Clausecker	lea		16(%rdi), %rax
218*474408bbSRobert Clausecker	and		$~0xf, %rax		# second 16 bytes of string (aligned)
219*474408bbSRobert Clausecker1:	cmp		$16, %edx		# 16--32 bytes?
220*474408bbSRobert Clausecker	ja		.Lgt16v2
221*474408bbSRobert Clausecker
222*474408bbSRobert Clausecker
223*474408bbSRobert Clausecker	/* set is 2--16 bytes in size */
224*474408bbSRobert Clausecker
225*474408bbSRobert Clausecker	/* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT */
226*474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2	# match in head?
227*474408bbSRobert Clausecker	jbe		.Lheadmatchv2
228*474408bbSRobert Clausecker
229*474408bbSRobert Clausecker	ALIGN_TEXT
230*474408bbSRobert Clausecker0:	pcmpistri	$0, (%rax), %xmm2
231*474408bbSRobert Clausecker	jbe		1f			# match or end of string?
232*474408bbSRobert Clausecker	pcmpistri	$0, 16(%rax), %xmm2
233*474408bbSRobert Clausecker	lea		32(%rax), %rax
234*474408bbSRobert Clausecker	ja		0b			# match or end of string?
235*474408bbSRobert Clausecker
236*474408bbSRobert Clausecker3:	lea		-16(%rax), %rax		# go back to second half
237*474408bbSRobert Clausecker1:	jc		2f			# jump if match found
238*474408bbSRobert Clausecker	movdqa		(%rax), %xmm0		# reload string piece
239*474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
240*474408bbSRobert Clausecker	pcmpeqb		%xmm1, %xmm0		# where is the NUL byte?
241*474408bbSRobert Clausecker	pmovmskb	%xmm0, %ecx
242*474408bbSRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte in (%rax)
243*474408bbSRobert Clausecker2:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
244*474408bbSRobert Clausecker	add		%rcx, %rax		# prefix length before match/NUL
245*474408bbSRobert Clausecker	leave
246*474408bbSRobert Clausecker	ret
247*474408bbSRobert Clausecker
248*474408bbSRobert Clausecker.Lheadmatchv2:
249*474408bbSRobert Clausecker	jc		2f			# jump if match found
250*474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
251*474408bbSRobert Clausecker	pcmpeqb		%xmm1, %xmm0
252*474408bbSRobert Clausecker	pmovmskb	%xmm0, %ecx
253*474408bbSRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte
254*474408bbSRobert Clausecker2:	mov		%ecx, %eax		# prefix length before match/NUL
255*474408bbSRobert Clausecker	leave
256*474408bbSRobert Clausecker	ret
257*474408bbSRobert Clausecker
258*474408bbSRobert Clausecker.Lgt16v2:
259*474408bbSRobert Clausecker	movdqu		48(%rsp, %rcx, 1), %xmm3 # second part of set
260*474408bbSRobert Clausecker
261*474408bbSRobert Clausecker	/* set is 17--32 bytes in size */
262*474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2	# match in head?
263*474408bbSRobert Clausecker	jbe		.Lheadmatchv2
264*474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3	# ZF=1 not possible here
265*474408bbSRobert Clausecker	jb		.Lheadmatchv2
266*474408bbSRobert Clausecker
267*474408bbSRobert Clausecker	ALIGN_TEXT
268*474408bbSRobert Clausecker0:	movdqa		(%rax), %xmm0
269*474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2
270*474408bbSRobert Clausecker	jbe		1b
271*474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3
272*474408bbSRobert Clausecker	jb		1f			# ZF=1 not possible here
273*474408bbSRobert Clausecker	movdqa		16(%rax), %xmm0
274*474408bbSRobert Clausecker	add		$32, %rax
275*474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2
276*474408bbSRobert Clausecker	jbe		3b
277*474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3
278*474408bbSRobert Clausecker	jae		0b			# ZF=1 not possible here
279*474408bbSRobert Clausecker
280*474408bbSRobert Clausecker	sub		$16, %rax		# go back to second half
281*474408bbSRobert Clausecker1:	add		%rcx, %rax
282*474408bbSRobert Clausecker	sub		%rdi, %rax
283*474408bbSRobert Clausecker	leave
284*474408bbSRobert Clausecker	ret
285*474408bbSRobert Clausecker
286*474408bbSRobert Clausecker	/* set is empty, degrades to strlen */
287*474408bbSRobert Clausecker.Lstrlenv2:
288*474408bbSRobert Clausecker	leave
289*474408bbSRobert Clausecker	jmp	CNAME(strlen)
290*474408bbSRobert Clausecker
291*474408bbSRobert Clausecker	/* just one character in set, degrades to strchr */
292*474408bbSRobert Clausecker.Lstrchrv2:
293*474408bbSRobert Clausecker	mov	%rdi, (%rsp)		# stash a copy of the string
294*474408bbSRobert Clausecker	mov	%eax, %esi		# find this character
295*474408bbSRobert Clausecker	call	CNAME(strchrnul)
296*474408bbSRobert Clausecker	sub	(%rsp), %rax		# length of prefix before match
297*474408bbSRobert Clausecker	leave
298*474408bbSRobert Clausecker	ret
299*474408bbSRobert Clausecker
300*474408bbSRobert Clausecker	/* set is >=33 bytes in size */
301*474408bbSRobert Clausecker.Lgt32v2:
302*474408bbSRobert Clausecker	xorps	%xmm0, %xmm0
303*474408bbSRobert Clausecker	mov	$256-64, %edx
304*474408bbSRobert Clausecker
305*474408bbSRobert Clausecker	/* clear out look up table */
306*474408bbSRobert Clausecker0:	movaps	%xmm0, (%rsp, %rdx, 1)
307*474408bbSRobert Clausecker	movaps	%xmm0, 16(%rsp, %rdx, 1)
308*474408bbSRobert Clausecker	movaps	%xmm0, 32(%rsp, %rdx, 1)
309*474408bbSRobert Clausecker	movaps	%xmm0, 48(%rsp, %rdx, 1)
310*474408bbSRobert Clausecker	sub	$64, %edx
311*474408bbSRobert Clausecker	jnc	0b
312*474408bbSRobert Clausecker
313*474408bbSRobert Clausecker	add	%rcx, %rsi		# restore string pointer
314*474408bbSRobert Clausecker	mov	%rdi, %rax		# keep a copy of the string
315*474408bbSRobert Clausecker
316*474408bbSRobert Clausecker	/* initialise look up table */
317*474408bbSRobert Clausecker	ALIGN_TEXT
318*474408bbSRobert Clausecker0:	movzbl	(%rsi), %ecx
319*474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
320*474408bbSRobert Clausecker	test	%ecx, %ecx
321*474408bbSRobert Clausecker	jz	1f
322*474408bbSRobert Clausecker
323*474408bbSRobert Clausecker	movzbl	1(%rsi), %ecx
324*474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
325*474408bbSRobert Clausecker	test	%ecx, %ecx
326*474408bbSRobert Clausecker	jz	1f
327*474408bbSRobert Clausecker
328*474408bbSRobert Clausecker	movzbl	2(%rsi), %ecx
329*474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
330*474408bbSRobert Clausecker	test	%ecx, %ecx
331*474408bbSRobert Clausecker	jz	1f
332*474408bbSRobert Clausecker
333*474408bbSRobert Clausecker	movzbl	3(%rsi), %ecx
334*474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
335*474408bbSRobert Clausecker	test	%ecx, %ecx
336*474408bbSRobert Clausecker	jz	1f
337*474408bbSRobert Clausecker
338*474408bbSRobert Clausecker	add	$4, %rsi
339*474408bbSRobert Clausecker	jmp	0b
340*474408bbSRobert Clausecker
341*474408bbSRobert Clausecker	/* find match */
342*474408bbSRobert Clausecker	ALIGN_TEXT
343*474408bbSRobert Clausecker1:	movzbl	(%rax), %ecx
344*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
345*474408bbSRobert Clausecker	jne	2f
346*474408bbSRobert Clausecker
347*474408bbSRobert Clausecker	movzbl	1(%rax), %ecx
348*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
349*474408bbSRobert Clausecker	jne	3f
350*474408bbSRobert Clausecker
351*474408bbSRobert Clausecker	movzbl	2(%rax), %ecx
352*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
353*474408bbSRobert Clausecker	jne	4f
354*474408bbSRobert Clausecker
355*474408bbSRobert Clausecker	movzbl	3(%rax), %ecx
356*474408bbSRobert Clausecker	add	$4, %rax
357*474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
358*474408bbSRobert Clausecker	je	1b
359*474408bbSRobert Clausecker
360*474408bbSRobert Clausecker	sub	$3, %rax
361*474408bbSRobert Clausecker4:	dec	%rdi
362*474408bbSRobert Clausecker3:	inc	%rax
363*474408bbSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
364*474408bbSRobert Clausecker	leave
365*474408bbSRobert Clausecker	ret
366*474408bbSRobert ClauseckerARCHEND(strcspn, x86_64_v2)
367*474408bbSRobert Clausecker
368*474408bbSRobert Clausecker	.section .note.GNU-stack,"",%progbits
369