xref: /freebsd/lib/libc/amd64/string/strcspn.S (revision c91cd7d03a9dee649ba3a1b9b4014df9de111bb8)
1474408bbSRobert Clausecker/*
2474408bbSRobert Clausecker * Copyright (c) 2023 The FreeBSD Foundation
3474408bbSRobert Clausecker *
4474408bbSRobert Clausecker * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
5474408bbSRobert Clausecker * under sponsorship from the FreeBSD Foundation.
6474408bbSRobert Clausecker *
7474408bbSRobert Clausecker * Redistribution and use in source and binary forms, with or without
8474408bbSRobert Clausecker * modification, are permitted provided that the following conditions
9474408bbSRobert Clausecker * are met:
10474408bbSRobert Clausecker * 1. Redistributions of source code must retain the above copyright
11474408bbSRobert Clausecker *    notice, this list of conditions and the following disclaimer.
12474408bbSRobert Clausecker * 2. Redistributions in binary form must reproduce the above copyright
13474408bbSRobert Clausecker *    notice, this list of conditions and the following disclaimer in the
14474408bbSRobert Clausecker *    documentation and/or other materials provided with the distribution.
15474408bbSRobert Clausecker *
16474408bbSRobert Clausecker * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17474408bbSRobert Clausecker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18474408bbSRobert Clausecker * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19474408bbSRobert Clausecker * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20474408bbSRobert Clausecker * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21474408bbSRobert Clausecker * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22474408bbSRobert Clausecker * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23474408bbSRobert Clausecker * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24474408bbSRobert Clausecker * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25474408bbSRobert Clausecker * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26474408bbSRobert Clausecker * SUCH DAMAGE
27474408bbSRobert Clausecker */
28474408bbSRobert Clausecker
29474408bbSRobert Clausecker#include <machine/asm.h>
30474408bbSRobert Clausecker#include <machine/param.h>
31474408bbSRobert Clausecker
32474408bbSRobert Clausecker#include "amd64_archlevel.h"
33474408bbSRobert Clausecker
34474408bbSRobert Clausecker#define ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
35474408bbSRobert Clausecker
36474408bbSRobert ClauseckerARCHFUNCS(strcspn)
37474408bbSRobert Clausecker	ARCHFUNC(strcspn, scalar)
38474408bbSRobert Clausecker	NOARCHFUNC
39474408bbSRobert Clausecker	ARCHFUNC(strcspn, x86_64_v2)
40474408bbSRobert ClauseckerENDARCHFUNCS(strcspn)
41474408bbSRobert Clausecker
42474408bbSRobert ClauseckerARCHENTRY(strcspn, scalar)
43474408bbSRobert Clausecker	push	%rbp			# align stack to enable function call
44474408bbSRobert Clausecker	mov	%rsp, %rbp
45474408bbSRobert Clausecker	sub	$256, %rsp		# allocate space for lookup table
46474408bbSRobert Clausecker
47474408bbSRobert Clausecker	/* check for special cases */
48474408bbSRobert Clausecker	movzbl	(%rsi), %eax		# first character in the set
49474408bbSRobert Clausecker	test	%eax, %eax
50474408bbSRobert Clausecker	jz	.Lstrlen
51474408bbSRobert Clausecker
52474408bbSRobert Clausecker	movzbl	1(%rsi), %edx		# second character in the set
53474408bbSRobert Clausecker	test	%edx, %edx
54474408bbSRobert Clausecker	jz	.Lstrchr
55474408bbSRobert Clausecker
56474408bbSRobert Clausecker	/* no special case matches -- prepare lookup table */
57474408bbSRobert Clausecker	xor	%r8d, %r8d
58474408bbSRobert Clausecker	mov	$28, %ecx
59474408bbSRobert Clausecker0:	mov	%r8, (%rsp, %rcx, 8)
60474408bbSRobert Clausecker	mov	%r8, 8(%rsp, %rcx, 8)
61474408bbSRobert Clausecker	mov	%r8, 16(%rsp, %rcx, 8)
62474408bbSRobert Clausecker	mov	%r8, 24(%rsp, %rcx, 8)
63474408bbSRobert Clausecker	sub	$4, %ecx
64474408bbSRobert Clausecker	jnc	0b
65474408bbSRobert Clausecker
66474408bbSRobert Clausecker	add	$2, %rsi
67474408bbSRobert Clausecker	movb	$1, (%rsp, %rax, 1)	# register first chars in set
68474408bbSRobert Clausecker	movb	$1, (%rsp, %rdx, 1)
69474408bbSRobert Clausecker	mov	%rdi, %rax		# a copy of the source to iterate over
70474408bbSRobert Clausecker
71474408bbSRobert Clausecker	/* process remaining chars in set */
72474408bbSRobert Clausecker	ALIGN_TEXT
73474408bbSRobert Clausecker0:	movzbl	(%rsi), %ecx
74474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
75474408bbSRobert Clausecker	test	%ecx, %ecx
76474408bbSRobert Clausecker	jz	1f
77474408bbSRobert Clausecker
78474408bbSRobert Clausecker	movzbl	1(%rsi), %ecx
79474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
80474408bbSRobert Clausecker	test	%ecx, %ecx
81474408bbSRobert Clausecker	jz	1f
82474408bbSRobert Clausecker
83474408bbSRobert Clausecker	add	$2, %rsi
84474408bbSRobert Clausecker	jmp	0b
85474408bbSRobert Clausecker
86474408bbSRobert Clausecker	/* find match */
87474408bbSRobert Clausecker	ALIGN_TEXT
88474408bbSRobert Clausecker1:	movzbl	(%rax), %ecx
89474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
90474408bbSRobert Clausecker	jne	2f
91474408bbSRobert Clausecker
92474408bbSRobert Clausecker	movzbl	1(%rax), %ecx
93474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
94474408bbSRobert Clausecker	jne	3f
95474408bbSRobert Clausecker
96474408bbSRobert Clausecker	movzbl	2(%rax), %ecx
97474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
98474408bbSRobert Clausecker	jne	4f
99474408bbSRobert Clausecker
100474408bbSRobert Clausecker	movzbl	3(%rax), %ecx
101474408bbSRobert Clausecker	add	$4, %rax
102474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
103474408bbSRobert Clausecker	je	1b
104474408bbSRobert Clausecker
105474408bbSRobert Clausecker	sub	$3, %rax
106474408bbSRobert Clausecker4:	dec	%rdi
107474408bbSRobert Clausecker3:	inc	%rax
108474408bbSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
109474408bbSRobert Clausecker	leave
110474408bbSRobert Clausecker	ret
111474408bbSRobert Clausecker
112474408bbSRobert Clausecker	/* set is empty, degrades to strlen */
113474408bbSRobert Clausecker.Lstrlen:
114474408bbSRobert Clausecker	leave
115474408bbSRobert Clausecker	jmp	CNAME(strlen)
116474408bbSRobert Clausecker
117474408bbSRobert Clausecker	/* just one character in set, degrades to strchr */
118474408bbSRobert Clausecker.Lstrchr:
119474408bbSRobert Clausecker	mov	%rdi, (%rsp)		# stash a copy of the string
120474408bbSRobert Clausecker	mov	%eax, %esi		# find the character in the set
121474408bbSRobert Clausecker	call	CNAME(strchrnul)
122474408bbSRobert Clausecker	sub	(%rsp), %rax		# length of prefix before match
123474408bbSRobert Clausecker	leave
124474408bbSRobert Clausecker	ret
125474408bbSRobert ClauseckerARCHEND(strcspn, scalar)
126474408bbSRobert Clausecker
127474408bbSRobert Clausecker	/*
128474408bbSRobert Clausecker	 * This kernel uses pcmpistri to do the heavy lifting.
129474408bbSRobert Clausecker	 * We provide five code paths, depending on set size:
130474408bbSRobert Clausecker	 *
131474408bbSRobert Clausecker	 *      0: call strlen()
132474408bbSRobert Clausecker	 *      1: call strchr()
133474408bbSRobert Clausecker	 *  2--16: one pcmpistri per 16 bytes of input
134474408bbSRobert Clausecker	 * 17--32: two pcmpistri per 16 bytes of input
135474408bbSRobert Clausecker	 *   >=33: fall back to look up table
136474408bbSRobert Clausecker	 */
137474408bbSRobert ClauseckerARCHENTRY(strcspn, x86_64_v2)
138474408bbSRobert Clausecker	push		%rbp
139474408bbSRobert Clausecker	mov		%rsp, %rbp
140474408bbSRobert Clausecker	sub		$256, %rsp
141474408bbSRobert Clausecker
142474408bbSRobert Clausecker	/* check for special cases */
143474408bbSRobert Clausecker	movzbl		(%rsi), %eax
144474408bbSRobert Clausecker	test		%eax, %eax		# empty string?
145474408bbSRobert Clausecker	jz		.Lstrlenv2
146474408bbSRobert Clausecker
147474408bbSRobert Clausecker	cmpb		$0, 1(%rsi)		# single character string?
148474408bbSRobert Clausecker	jz		.Lstrchrv2
149474408bbSRobert Clausecker
150474408bbSRobert Clausecker	/* find set size and copy up to 32 bytes to (%rsp) */
151474408bbSRobert Clausecker	mov		%esi, %ecx
152474408bbSRobert Clausecker	and		$~0xf, %rsi		# align set pointer
153474408bbSRobert Clausecker	movdqa		(%rsi), %xmm0
154474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
155474408bbSRobert Clausecker	and		$0xf, %ecx		# amount of bytes rsi is past alignment
156474408bbSRobert Clausecker	xor		%edx, %edx
157474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1		# end of string reached?
158474408bbSRobert Clausecker	movdqa		%xmm0, 32(%rsp)		# transfer head of set to stack
159474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
160474408bbSRobert Clausecker	shr		%cl, %eax		# clear out junk before string
161474408bbSRobert Clausecker	test		%eax, %eax		# end of set reached?
162474408bbSRobert Clausecker	jnz		0f
163474408bbSRobert Clausecker
164474408bbSRobert Clausecker	movdqa		16(%rsi), %xmm0		# second chunk of the set
165474408bbSRobert Clausecker	mov		$16, %edx
166474408bbSRobert Clausecker	sub		%ecx, %edx		# length of set preceding xmm0
167474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
168474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
169474408bbSRobert Clausecker	movdqa		%xmm0, 48(%rsp)
170474408bbSRobert Clausecker	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
171474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
172474408bbSRobert Clausecker	test		%eax, %eax
173474408bbSRobert Clausecker	jnz		1f
174474408bbSRobert Clausecker
175474408bbSRobert Clausecker	movdqa		32(%rsi), %xmm0		# third chunk
176474408bbSRobert Clausecker	add		$16, %edx
177474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
178474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
179474408bbSRobert Clausecker	movdqa		%xmm0, 64(%rsp)
180474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
181474408bbSRobert Clausecker	test		%eax, %eax		# still not done?
182474408bbSRobert Clausecker	jz		.Lgt32v2
183474408bbSRobert Clausecker
184474408bbSRobert Clausecker0:	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
185474408bbSRobert Clausecker1:	tzcnt		%eax, %eax
186474408bbSRobert Clausecker	add		%eax, %edx		# length of set (excluding NUL byte)
187474408bbSRobert Clausecker	cmp		$32, %edx		# above 32 bytes?
188474408bbSRobert Clausecker	ja		.Lgt32v2
189474408bbSRobert Clausecker
190474408bbSRobert Clausecker	/*
191474408bbSRobert Clausecker	 * At this point we know that we want to use pcmpistri.
192474408bbSRobert Clausecker	 * one last problem obtains: the head of the string is not
193474408bbSRobert Clausecker	 * aligned and may cross a cacheline.  If this is the case,
194474408bbSRobert Clausecker	 * we take the part before the page boundary and repeat the
195474408bbSRobert Clausecker	 * last byte to fill up the xmm register.
196474408bbSRobert Clausecker	 */
197474408bbSRobert Clausecker	mov		%rdi, %rax		# save original string pointer
198474408bbSRobert Clausecker	lea		15(%rdi), %esi		# last byte of the head
199474408bbSRobert Clausecker	xor		%edi, %esi
200474408bbSRobert Clausecker	test		$PAGE_SIZE, %esi	# does the head cross a page?
201474408bbSRobert Clausecker	jz		0f
202474408bbSRobert Clausecker
203474408bbSRobert Clausecker	/* head crosses page: copy to stack to fix up */
204474408bbSRobert Clausecker	and		$~0xf, %rax		# align head pointer temporarily
205474408bbSRobert Clausecker	movzbl		15(%rax), %esi		# last head byte on the page
206474408bbSRobert Clausecker	movdqa		(%rax), %xmm0
207474408bbSRobert Clausecker	movabs		$0x0101010101010101, %r8
208474408bbSRobert Clausecker	imul		%r8, %rsi		# repeated 8 times
209474408bbSRobert Clausecker	movdqa		%xmm0, (%rsp)		# head word on stack
210474408bbSRobert Clausecker	mov		%rsi, 16(%rsp)		# followed by filler (last byte x8)
211474408bbSRobert Clausecker	mov		%rsi, 24(%rsp)
212474408bbSRobert Clausecker	mov		%edi, %eax
213474408bbSRobert Clausecker	and		$0xf, %eax		# offset of head from alignment
214474408bbSRobert Clausecker	add		%rsp, %rax		# pointer to fake head
215474408bbSRobert Clausecker
216474408bbSRobert Clausecker0:	movdqu		(%rax), %xmm0		# load head (fake or real)
217474408bbSRobert Clausecker	lea		16(%rdi), %rax
218474408bbSRobert Clausecker	and		$~0xf, %rax		# second 16 bytes of string (aligned)
219474408bbSRobert Clausecker1:	cmp		$16, %edx		# 16--32 bytes?
220474408bbSRobert Clausecker	ja		.Lgt16v2
221474408bbSRobert Clausecker
222474408bbSRobert Clausecker
223474408bbSRobert Clausecker	/* set is 2--16 bytes in size */
224474408bbSRobert Clausecker
225474408bbSRobert Clausecker	/* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT */
226474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2	# match in head?
227474408bbSRobert Clausecker	jbe		.Lheadmatchv2
228474408bbSRobert Clausecker
229474408bbSRobert Clausecker	ALIGN_TEXT
230474408bbSRobert Clausecker0:	pcmpistri	$0, (%rax), %xmm2
231474408bbSRobert Clausecker	jbe		1f			# match or end of string?
232474408bbSRobert Clausecker	pcmpistri	$0, 16(%rax), %xmm2
233474408bbSRobert Clausecker	lea		32(%rax), %rax
234474408bbSRobert Clausecker	ja		0b			# match or end of string?
235474408bbSRobert Clausecker
236474408bbSRobert Clausecker3:	lea		-16(%rax), %rax		# go back to second half
237474408bbSRobert Clausecker1:	jc		2f			# jump if match found
238474408bbSRobert Clausecker	movdqa		(%rax), %xmm0		# reload string piece
239474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
240474408bbSRobert Clausecker	pcmpeqb		%xmm1, %xmm0		# where is the NUL byte?
241474408bbSRobert Clausecker	pmovmskb	%xmm0, %ecx
242474408bbSRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte in (%rax)
243474408bbSRobert Clausecker2:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
244474408bbSRobert Clausecker	add		%rcx, %rax		# prefix length before match/NUL
245474408bbSRobert Clausecker	leave
246474408bbSRobert Clausecker	ret
247474408bbSRobert Clausecker
248474408bbSRobert Clausecker.Lheadmatchv2:
249474408bbSRobert Clausecker	jc		2f			# jump if match found
250474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
251474408bbSRobert Clausecker	pcmpeqb		%xmm1, %xmm0
252474408bbSRobert Clausecker	pmovmskb	%xmm0, %ecx
253474408bbSRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte
254474408bbSRobert Clausecker2:	mov		%ecx, %eax		# prefix length before match/NUL
255474408bbSRobert Clausecker	leave
256474408bbSRobert Clausecker	ret
257474408bbSRobert Clausecker
258*c91cd7d0SRobert Clausecker	/* match in first set half during head */
259*c91cd7d0SRobert Clausecker.Lheadmatchv2first:
260*c91cd7d0SRobert Clausecker	mov		%ecx, %eax
261*c91cd7d0SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3	# match in second set half?
262*c91cd7d0SRobert Clausecker	cmp		%ecx, %eax		# before the first half match?
263*c91cd7d0SRobert Clausecker	cmova		%ecx, %eax		# use the earlier match
264*c91cd7d0SRobert Clausecker	leave
265*c91cd7d0SRobert Clausecker	ret
266*c91cd7d0SRobert Clausecker
267474408bbSRobert Clausecker.Lgt16v2:
268474408bbSRobert Clausecker	movdqu		48(%rsp, %rcx, 1), %xmm3 # second part of set
269474408bbSRobert Clausecker
270474408bbSRobert Clausecker	/* set is 17--32 bytes in size */
27152d4a4d4SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2	# match in first set half?
272*c91cd7d0SRobert Clausecker	jb		.Lheadmatchv2first
27352d4a4d4SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3	# match in second set half or end of string?
27452d4a4d4SRobert Clausecker	jbe		.Lheadmatchv2
275474408bbSRobert Clausecker
276474408bbSRobert Clausecker	ALIGN_TEXT
277474408bbSRobert Clausecker0:	movdqa		(%rax), %xmm0
278474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2
279*c91cd7d0SRobert Clausecker	jb		4f			# match in first set half?
280474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3
28152d4a4d4SRobert Clausecker	jbe		1f			# match in second set half or end of string?
282474408bbSRobert Clausecker	movdqa		16(%rax), %xmm0
283474408bbSRobert Clausecker	add		$32, %rax
284474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2
28552d4a4d4SRobert Clausecker	jb		3f			# match in first set half?
286474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3
28752d4a4d4SRobert Clausecker	ja		0b			# neither match in 2nd half nor string end?
288474408bbSRobert Clausecker
289*c91cd7d0SRobert Clausecker	/* match in second half or NUL */
290*c91cd7d0SRobert Clausecker	lea		-16(%rax), %rax		# go back to second half
29152d4a4d4SRobert Clausecker1:	jc		2f			# jump if match found
29252d4a4d4SRobert Clausecker	pxor		%xmm1, %xmm1
29352d4a4d4SRobert Clausecker	pcmpeqb		%xmm1, %xmm0		# where is the NUL byte?
29452d4a4d4SRobert Clausecker	pmovmskb	%xmm0, %ecx
29552d4a4d4SRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte in (%rax)
29652d4a4d4SRobert Clausecker2:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
29752d4a4d4SRobert Clausecker	add		%rcx, %rax		# prefix length before match/NUL
298474408bbSRobert Clausecker	leave
299474408bbSRobert Clausecker	ret
300474408bbSRobert Clausecker
301*c91cd7d0SRobert Clausecker	/* match in first half */
302*c91cd7d0SRobert Clausecker3:	sub		$16, %rax		# go back to second half
303*c91cd7d0SRobert Clausecker4:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
304*c91cd7d0SRobert Clausecker	mov		%ecx, %edx
305*c91cd7d0SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3	# match in second set half?
306*c91cd7d0SRobert Clausecker	cmp		%ecx, %edx		# before the first half match?
307*c91cd7d0SRobert Clausecker	cmova		%ecx, %edx		# use the earlier match
308*c91cd7d0SRobert Clausecker	add		%rdx, %rax		# return full ofset
309*c91cd7d0SRobert Clausecker	leave
310*c91cd7d0SRobert Clausecker	ret
311*c91cd7d0SRobert Clausecker
312474408bbSRobert Clausecker	/* set is empty, degrades to strlen */
313474408bbSRobert Clausecker.Lstrlenv2:
314474408bbSRobert Clausecker	leave
315474408bbSRobert Clausecker	jmp	CNAME(strlen)
316474408bbSRobert Clausecker
317474408bbSRobert Clausecker	/* just one character in set, degrades to strchr */
318474408bbSRobert Clausecker.Lstrchrv2:
319474408bbSRobert Clausecker	mov	%rdi, (%rsp)		# stash a copy of the string
320474408bbSRobert Clausecker	mov	%eax, %esi		# find this character
321474408bbSRobert Clausecker	call	CNAME(strchrnul)
322474408bbSRobert Clausecker	sub	(%rsp), %rax		# length of prefix before match
323474408bbSRobert Clausecker	leave
324474408bbSRobert Clausecker	ret
325474408bbSRobert Clausecker
326474408bbSRobert Clausecker	/* set is >=33 bytes in size */
327474408bbSRobert Clausecker.Lgt32v2:
328474408bbSRobert Clausecker	xorps	%xmm0, %xmm0
329474408bbSRobert Clausecker	mov	$256-64, %edx
330474408bbSRobert Clausecker
331474408bbSRobert Clausecker	/* clear out look up table */
332474408bbSRobert Clausecker0:	movaps	%xmm0, (%rsp, %rdx, 1)
333474408bbSRobert Clausecker	movaps	%xmm0, 16(%rsp, %rdx, 1)
334474408bbSRobert Clausecker	movaps	%xmm0, 32(%rsp, %rdx, 1)
335474408bbSRobert Clausecker	movaps	%xmm0, 48(%rsp, %rdx, 1)
336474408bbSRobert Clausecker	sub	$64, %edx
337474408bbSRobert Clausecker	jnc	0b
338474408bbSRobert Clausecker
339474408bbSRobert Clausecker	add	%rcx, %rsi		# restore string pointer
340474408bbSRobert Clausecker	mov	%rdi, %rax		# keep a copy of the string
341474408bbSRobert Clausecker
342474408bbSRobert Clausecker	/* initialise look up table */
343474408bbSRobert Clausecker	ALIGN_TEXT
344474408bbSRobert Clausecker0:	movzbl	(%rsi), %ecx
345474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
346474408bbSRobert Clausecker	test	%ecx, %ecx
347474408bbSRobert Clausecker	jz	1f
348474408bbSRobert Clausecker
349474408bbSRobert Clausecker	movzbl	1(%rsi), %ecx
350474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
351474408bbSRobert Clausecker	test	%ecx, %ecx
352474408bbSRobert Clausecker	jz	1f
353474408bbSRobert Clausecker
354474408bbSRobert Clausecker	movzbl	2(%rsi), %ecx
355474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
356474408bbSRobert Clausecker	test	%ecx, %ecx
357474408bbSRobert Clausecker	jz	1f
358474408bbSRobert Clausecker
359474408bbSRobert Clausecker	movzbl	3(%rsi), %ecx
360474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
361474408bbSRobert Clausecker	test	%ecx, %ecx
362474408bbSRobert Clausecker	jz	1f
363474408bbSRobert Clausecker
364474408bbSRobert Clausecker	add	$4, %rsi
365474408bbSRobert Clausecker	jmp	0b
366474408bbSRobert Clausecker
367474408bbSRobert Clausecker	/* find match */
368474408bbSRobert Clausecker	ALIGN_TEXT
369474408bbSRobert Clausecker1:	movzbl	(%rax), %ecx
370474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
371474408bbSRobert Clausecker	jne	2f
372474408bbSRobert Clausecker
373474408bbSRobert Clausecker	movzbl	1(%rax), %ecx
374474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
375474408bbSRobert Clausecker	jne	3f
376474408bbSRobert Clausecker
377474408bbSRobert Clausecker	movzbl	2(%rax), %ecx
378474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
379474408bbSRobert Clausecker	jne	4f
380474408bbSRobert Clausecker
381474408bbSRobert Clausecker	movzbl	3(%rax), %ecx
382474408bbSRobert Clausecker	add	$4, %rax
383474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
384474408bbSRobert Clausecker	je	1b
385474408bbSRobert Clausecker
386474408bbSRobert Clausecker	sub	$3, %rax
387474408bbSRobert Clausecker4:	dec	%rdi
388474408bbSRobert Clausecker3:	inc	%rax
389474408bbSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
390474408bbSRobert Clausecker	leave
391474408bbSRobert Clausecker	ret
392474408bbSRobert ClauseckerARCHEND(strcspn, x86_64_v2)
393474408bbSRobert Clausecker
394474408bbSRobert Clausecker	.section .note.GNU-stack,"",%progbits
395