xref: /freebsd/lib/libc/amd64/string/strcspn.S (revision f4fc317c364f2c81ad3d36763d8e5a60393ddbd1)
1474408bbSRobert Clausecker/*
2474408bbSRobert Clausecker * Copyright (c) 2023 The FreeBSD Foundation
3474408bbSRobert Clausecker *
4474408bbSRobert Clausecker * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
5474408bbSRobert Clausecker * under sponsorship from the FreeBSD Foundation.
6474408bbSRobert Clausecker *
7474408bbSRobert Clausecker * Redistribution and use in source and binary forms, with or without
8474408bbSRobert Clausecker * modification, are permitted provided that the following conditions
9474408bbSRobert Clausecker * are met:
10474408bbSRobert Clausecker * 1. Redistributions of source code must retain the above copyright
11474408bbSRobert Clausecker *    notice, this list of conditions and the following disclaimer.
12474408bbSRobert Clausecker * 2. Redistributions in binary form must reproduce the above copyright
13474408bbSRobert Clausecker *    notice, this list of conditions and the following disclaimer in the
14474408bbSRobert Clausecker *    documentation and/or other materials provided with the distribution.
15474408bbSRobert Clausecker *
16474408bbSRobert Clausecker * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17474408bbSRobert Clausecker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18474408bbSRobert Clausecker * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19474408bbSRobert Clausecker * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20474408bbSRobert Clausecker * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21474408bbSRobert Clausecker * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22474408bbSRobert Clausecker * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23474408bbSRobert Clausecker * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24474408bbSRobert Clausecker * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25474408bbSRobert Clausecker * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26474408bbSRobert Clausecker * SUCH DAMAGE
27474408bbSRobert Clausecker */
28474408bbSRobert Clausecker
29474408bbSRobert Clausecker#include <machine/asm.h>
30474408bbSRobert Clausecker#include <machine/param.h>
31474408bbSRobert Clausecker
32474408bbSRobert Clausecker#include "amd64_archlevel.h"
33474408bbSRobert Clausecker
34474408bbSRobert Clausecker#define ALIGN_TEXT	.p2align 4,0x90 /* 16-byte alignment, nop filled */
35474408bbSRobert Clausecker
36*f4fc317cSRobert Clausecker	.weak strcspn
37*f4fc317cSRobert Clausecker	.set strcspn, __strcspn
38*f4fc317cSRobert ClauseckerARCHFUNCS(__strcspn)
39*f4fc317cSRobert Clausecker	ARCHFUNC(__strcspn, scalar)
40474408bbSRobert Clausecker	NOARCHFUNC
41*f4fc317cSRobert Clausecker	ARCHFUNC(__strcspn, x86_64_v2)
42*f4fc317cSRobert ClauseckerENDARCHFUNCS(__strcspn)
43474408bbSRobert Clausecker
44*f4fc317cSRobert ClauseckerARCHENTRY(__strcspn, scalar)
45474408bbSRobert Clausecker	push	%rbp			# align stack to enable function call
46474408bbSRobert Clausecker	mov	%rsp, %rbp
47474408bbSRobert Clausecker	sub	$256, %rsp		# allocate space for lookup table
48474408bbSRobert Clausecker
49474408bbSRobert Clausecker	/* check for special cases */
50474408bbSRobert Clausecker	movzbl	(%rsi), %eax		# first character in the set
51474408bbSRobert Clausecker	test	%eax, %eax
52474408bbSRobert Clausecker	jz	.Lstrlen
53474408bbSRobert Clausecker
54474408bbSRobert Clausecker	movzbl	1(%rsi), %edx		# second character in the set
55474408bbSRobert Clausecker	test	%edx, %edx
56474408bbSRobert Clausecker	jz	.Lstrchr
57474408bbSRobert Clausecker
58474408bbSRobert Clausecker	/* no special case matches -- prepare lookup table */
59474408bbSRobert Clausecker	xor	%r8d, %r8d
60474408bbSRobert Clausecker	mov	$28, %ecx
61474408bbSRobert Clausecker0:	mov	%r8, (%rsp, %rcx, 8)
62474408bbSRobert Clausecker	mov	%r8, 8(%rsp, %rcx, 8)
63474408bbSRobert Clausecker	mov	%r8, 16(%rsp, %rcx, 8)
64474408bbSRobert Clausecker	mov	%r8, 24(%rsp, %rcx, 8)
65474408bbSRobert Clausecker	sub	$4, %ecx
66474408bbSRobert Clausecker	jnc	0b
67474408bbSRobert Clausecker
68474408bbSRobert Clausecker	add	$2, %rsi
69474408bbSRobert Clausecker	movb	$1, (%rsp, %rax, 1)	# register first chars in set
70474408bbSRobert Clausecker	movb	$1, (%rsp, %rdx, 1)
71474408bbSRobert Clausecker	mov	%rdi, %rax		# a copy of the source to iterate over
72474408bbSRobert Clausecker
73474408bbSRobert Clausecker	/* process remaining chars in set */
74474408bbSRobert Clausecker	ALIGN_TEXT
75474408bbSRobert Clausecker0:	movzbl	(%rsi), %ecx
76474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
77474408bbSRobert Clausecker	test	%ecx, %ecx
78474408bbSRobert Clausecker	jz	1f
79474408bbSRobert Clausecker
80474408bbSRobert Clausecker	movzbl	1(%rsi), %ecx
81474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
82474408bbSRobert Clausecker	test	%ecx, %ecx
83474408bbSRobert Clausecker	jz	1f
84474408bbSRobert Clausecker
85474408bbSRobert Clausecker	add	$2, %rsi
86474408bbSRobert Clausecker	jmp	0b
87474408bbSRobert Clausecker
88474408bbSRobert Clausecker	/* find match */
89474408bbSRobert Clausecker	ALIGN_TEXT
90474408bbSRobert Clausecker1:	movzbl	(%rax), %ecx
91474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
92474408bbSRobert Clausecker	jne	2f
93474408bbSRobert Clausecker
94474408bbSRobert Clausecker	movzbl	1(%rax), %ecx
95474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
96474408bbSRobert Clausecker	jne	3f
97474408bbSRobert Clausecker
98474408bbSRobert Clausecker	movzbl	2(%rax), %ecx
99474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
100474408bbSRobert Clausecker	jne	4f
101474408bbSRobert Clausecker
102474408bbSRobert Clausecker	movzbl	3(%rax), %ecx
103474408bbSRobert Clausecker	add	$4, %rax
104474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
105474408bbSRobert Clausecker	je	1b
106474408bbSRobert Clausecker
107474408bbSRobert Clausecker	sub	$3, %rax
108474408bbSRobert Clausecker4:	dec	%rdi
109474408bbSRobert Clausecker3:	inc	%rax
110474408bbSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
111474408bbSRobert Clausecker	leave
112474408bbSRobert Clausecker	ret
113474408bbSRobert Clausecker
114474408bbSRobert Clausecker	/* set is empty, degrades to strlen */
115474408bbSRobert Clausecker.Lstrlen:
116474408bbSRobert Clausecker	leave
117474408bbSRobert Clausecker	jmp	CNAME(strlen)
118474408bbSRobert Clausecker
119474408bbSRobert Clausecker	/* just one character in set, degrades to strchr */
120474408bbSRobert Clausecker.Lstrchr:
121474408bbSRobert Clausecker	mov	%rdi, (%rsp)		# stash a copy of the string
122474408bbSRobert Clausecker	mov	%eax, %esi		# find the character in the set
123474408bbSRobert Clausecker	call	CNAME(strchrnul)
124474408bbSRobert Clausecker	sub	(%rsp), %rax		# length of prefix before match
125474408bbSRobert Clausecker	leave
126474408bbSRobert Clausecker	ret
127*f4fc317cSRobert ClauseckerARCHEND(__strcspn, scalar)
128474408bbSRobert Clausecker
129474408bbSRobert Clausecker	/*
130474408bbSRobert Clausecker	 * This kernel uses pcmpistri to do the heavy lifting.
131474408bbSRobert Clausecker	 * We provide five code paths, depending on set size:
132474408bbSRobert Clausecker	 *
133474408bbSRobert Clausecker	 *      0: call strlen()
134474408bbSRobert Clausecker	 *      1: call strchr()
135474408bbSRobert Clausecker	 *  2--16: one pcmpistri per 16 bytes of input
136474408bbSRobert Clausecker	 * 17--32: two pcmpistri per 16 bytes of input
137474408bbSRobert Clausecker	 *   >=33: fall back to look up table
138474408bbSRobert Clausecker	 */
139*f4fc317cSRobert ClauseckerARCHENTRY(__strcspn, x86_64_v2)
140474408bbSRobert Clausecker	push		%rbp
141474408bbSRobert Clausecker	mov		%rsp, %rbp
142474408bbSRobert Clausecker	sub		$256, %rsp
143474408bbSRobert Clausecker
144474408bbSRobert Clausecker	/* check for special cases */
145474408bbSRobert Clausecker	movzbl		(%rsi), %eax
146474408bbSRobert Clausecker	test		%eax, %eax		# empty string?
147474408bbSRobert Clausecker	jz		.Lstrlenv2
148474408bbSRobert Clausecker
149474408bbSRobert Clausecker	cmpb		$0, 1(%rsi)		# single character string?
150474408bbSRobert Clausecker	jz		.Lstrchrv2
151474408bbSRobert Clausecker
152474408bbSRobert Clausecker	/* find set size and copy up to 32 bytes to (%rsp) */
153474408bbSRobert Clausecker	mov		%esi, %ecx
154474408bbSRobert Clausecker	and		$~0xf, %rsi		# align set pointer
155474408bbSRobert Clausecker	movdqa		(%rsi), %xmm0
156474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
157474408bbSRobert Clausecker	and		$0xf, %ecx		# amount of bytes rsi is past alignment
158474408bbSRobert Clausecker	xor		%edx, %edx
159474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1		# end of string reached?
160474408bbSRobert Clausecker	movdqa		%xmm0, 32(%rsp)		# transfer head of set to stack
161474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
162474408bbSRobert Clausecker	shr		%cl, %eax		# clear out junk before string
163474408bbSRobert Clausecker	test		%eax, %eax		# end of set reached?
164474408bbSRobert Clausecker	jnz		0f
165474408bbSRobert Clausecker
166474408bbSRobert Clausecker	movdqa		16(%rsi), %xmm0		# second chunk of the set
167474408bbSRobert Clausecker	mov		$16, %edx
168474408bbSRobert Clausecker	sub		%ecx, %edx		# length of set preceding xmm0
169474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
170474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
171474408bbSRobert Clausecker	movdqa		%xmm0, 48(%rsp)
172474408bbSRobert Clausecker	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
173474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
174474408bbSRobert Clausecker	test		%eax, %eax
175474408bbSRobert Clausecker	jnz		1f
176474408bbSRobert Clausecker
177474408bbSRobert Clausecker	movdqa		32(%rsi), %xmm0		# third chunk
178474408bbSRobert Clausecker	add		$16, %edx
179474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
180474408bbSRobert Clausecker	pcmpeqb		%xmm0, %xmm1
181474408bbSRobert Clausecker	movdqa		%xmm0, 64(%rsp)
182474408bbSRobert Clausecker	pmovmskb	%xmm1, %eax
183474408bbSRobert Clausecker	test		%eax, %eax		# still not done?
184474408bbSRobert Clausecker	jz		.Lgt32v2
185474408bbSRobert Clausecker
186474408bbSRobert Clausecker0:	movdqu		32(%rsp, %rcx, 1), %xmm2 # head of set
187474408bbSRobert Clausecker1:	tzcnt		%eax, %eax
188474408bbSRobert Clausecker	add		%eax, %edx		# length of set (excluding NUL byte)
189474408bbSRobert Clausecker	cmp		$32, %edx		# above 32 bytes?
190474408bbSRobert Clausecker	ja		.Lgt32v2
191474408bbSRobert Clausecker
192474408bbSRobert Clausecker	/*
193474408bbSRobert Clausecker	 * At this point we know that we want to use pcmpistri.
194474408bbSRobert Clausecker	 * one last problem obtains: the head of the string is not
195474408bbSRobert Clausecker	 * aligned and may cross a cacheline.  If this is the case,
196474408bbSRobert Clausecker	 * we take the part before the page boundary and repeat the
197474408bbSRobert Clausecker	 * last byte to fill up the xmm register.
198474408bbSRobert Clausecker	 */
199474408bbSRobert Clausecker	mov		%rdi, %rax		# save original string pointer
200474408bbSRobert Clausecker	lea		15(%rdi), %esi		# last byte of the head
201474408bbSRobert Clausecker	xor		%edi, %esi
202474408bbSRobert Clausecker	test		$PAGE_SIZE, %esi	# does the head cross a page?
203474408bbSRobert Clausecker	jz		0f
204474408bbSRobert Clausecker
205474408bbSRobert Clausecker	/* head crosses page: copy to stack to fix up */
206474408bbSRobert Clausecker	and		$~0xf, %rax		# align head pointer temporarily
207474408bbSRobert Clausecker	movzbl		15(%rax), %esi		# last head byte on the page
208474408bbSRobert Clausecker	movdqa		(%rax), %xmm0
209474408bbSRobert Clausecker	movabs		$0x0101010101010101, %r8
210474408bbSRobert Clausecker	imul		%r8, %rsi		# repeated 8 times
211474408bbSRobert Clausecker	movdqa		%xmm0, (%rsp)		# head word on stack
212474408bbSRobert Clausecker	mov		%rsi, 16(%rsp)		# followed by filler (last byte x8)
213474408bbSRobert Clausecker	mov		%rsi, 24(%rsp)
214474408bbSRobert Clausecker	mov		%edi, %eax
215474408bbSRobert Clausecker	and		$0xf, %eax		# offset of head from alignment
216474408bbSRobert Clausecker	add		%rsp, %rax		# pointer to fake head
217474408bbSRobert Clausecker
218474408bbSRobert Clausecker0:	movdqu		(%rax), %xmm0		# load head (fake or real)
219474408bbSRobert Clausecker	lea		16(%rdi), %rax
220474408bbSRobert Clausecker	and		$~0xf, %rax		# second 16 bytes of string (aligned)
221474408bbSRobert Clausecker1:	cmp		$16, %edx		# 16--32 bytes?
222474408bbSRobert Clausecker	ja		.Lgt16v2
223474408bbSRobert Clausecker
224474408bbSRobert Clausecker
225474408bbSRobert Clausecker	/* set is 2--16 bytes in size */
226474408bbSRobert Clausecker
227474408bbSRobert Clausecker	/* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT */
228474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2	# match in head?
229474408bbSRobert Clausecker	jbe		.Lheadmatchv2
230474408bbSRobert Clausecker
231474408bbSRobert Clausecker	ALIGN_TEXT
232474408bbSRobert Clausecker0:	pcmpistri	$0, (%rax), %xmm2
233474408bbSRobert Clausecker	jbe		1f			# match or end of string?
234474408bbSRobert Clausecker	pcmpistri	$0, 16(%rax), %xmm2
235474408bbSRobert Clausecker	lea		32(%rax), %rax
236474408bbSRobert Clausecker	ja		0b			# match or end of string?
237474408bbSRobert Clausecker
238474408bbSRobert Clausecker3:	lea		-16(%rax), %rax		# go back to second half
239474408bbSRobert Clausecker1:	jc		2f			# jump if match found
240474408bbSRobert Clausecker	movdqa		(%rax), %xmm0		# reload string piece
241474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
242474408bbSRobert Clausecker	pcmpeqb		%xmm1, %xmm0		# where is the NUL byte?
243474408bbSRobert Clausecker	pmovmskb	%xmm0, %ecx
244474408bbSRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte in (%rax)
245474408bbSRobert Clausecker2:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
246474408bbSRobert Clausecker	add		%rcx, %rax		# prefix length before match/NUL
247474408bbSRobert Clausecker	leave
248474408bbSRobert Clausecker	ret
249474408bbSRobert Clausecker
250474408bbSRobert Clausecker.Lheadmatchv2:
251474408bbSRobert Clausecker	jc		2f			# jump if match found
252474408bbSRobert Clausecker	pxor		%xmm1, %xmm1
253474408bbSRobert Clausecker	pcmpeqb		%xmm1, %xmm0
254474408bbSRobert Clausecker	pmovmskb	%xmm0, %ecx
255474408bbSRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte
256474408bbSRobert Clausecker2:	mov		%ecx, %eax		# prefix length before match/NUL
257474408bbSRobert Clausecker	leave
258474408bbSRobert Clausecker	ret
259474408bbSRobert Clausecker
260c91cd7d0SRobert Clausecker	/* match in first set half during head */
261c91cd7d0SRobert Clausecker.Lheadmatchv2first:
262c91cd7d0SRobert Clausecker	mov		%ecx, %eax
263c91cd7d0SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3	# match in second set half?
264c91cd7d0SRobert Clausecker	cmp		%ecx, %eax		# before the first half match?
265c91cd7d0SRobert Clausecker	cmova		%ecx, %eax		# use the earlier match
266c91cd7d0SRobert Clausecker	leave
267c91cd7d0SRobert Clausecker	ret
268c91cd7d0SRobert Clausecker
269474408bbSRobert Clausecker.Lgt16v2:
270474408bbSRobert Clausecker	movdqu		48(%rsp, %rcx, 1), %xmm3 # second part of set
271474408bbSRobert Clausecker
272474408bbSRobert Clausecker	/* set is 17--32 bytes in size */
27352d4a4d4SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2	# match in first set half?
274c91cd7d0SRobert Clausecker	jb		.Lheadmatchv2first
27552d4a4d4SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3	# match in second set half or end of string?
27652d4a4d4SRobert Clausecker	jbe		.Lheadmatchv2
277474408bbSRobert Clausecker
278474408bbSRobert Clausecker	ALIGN_TEXT
279474408bbSRobert Clausecker0:	movdqa		(%rax), %xmm0
280474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2
281c91cd7d0SRobert Clausecker	jb		4f			# match in first set half?
282474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3
28352d4a4d4SRobert Clausecker	jbe		1f			# match in second set half or end of string?
284474408bbSRobert Clausecker	movdqa		16(%rax), %xmm0
285474408bbSRobert Clausecker	add		$32, %rax
286474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm2
28752d4a4d4SRobert Clausecker	jb		3f			# match in first set half?
288474408bbSRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3
28952d4a4d4SRobert Clausecker	ja		0b			# neither match in 2nd half nor string end?
290474408bbSRobert Clausecker
291c91cd7d0SRobert Clausecker	/* match in second half or NUL */
292c91cd7d0SRobert Clausecker	lea		-16(%rax), %rax		# go back to second half
29352d4a4d4SRobert Clausecker1:	jc		2f			# jump if match found
29452d4a4d4SRobert Clausecker	pxor		%xmm1, %xmm1
29552d4a4d4SRobert Clausecker	pcmpeqb		%xmm1, %xmm0		# where is the NUL byte?
29652d4a4d4SRobert Clausecker	pmovmskb	%xmm0, %ecx
29752d4a4d4SRobert Clausecker	tzcnt		%ecx, %ecx		# location of NUL byte in (%rax)
29852d4a4d4SRobert Clausecker2:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
29952d4a4d4SRobert Clausecker	add		%rcx, %rax		# prefix length before match/NUL
300474408bbSRobert Clausecker	leave
301474408bbSRobert Clausecker	ret
302474408bbSRobert Clausecker
303c91cd7d0SRobert Clausecker	/* match in first half */
304c91cd7d0SRobert Clausecker3:	sub		$16, %rax		# go back to second half
305c91cd7d0SRobert Clausecker4:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
306c91cd7d0SRobert Clausecker	mov		%ecx, %edx
307c91cd7d0SRobert Clausecker	pcmpistri	$0, %xmm0, %xmm3	# match in second set half?
308c91cd7d0SRobert Clausecker	cmp		%ecx, %edx		# before the first half match?
309c91cd7d0SRobert Clausecker	cmova		%ecx, %edx		# use the earlier match
310c91cd7d0SRobert Clausecker	add		%rdx, %rax		# return full ofset
311c91cd7d0SRobert Clausecker	leave
312c91cd7d0SRobert Clausecker	ret
313c91cd7d0SRobert Clausecker
314474408bbSRobert Clausecker	/* set is empty, degrades to strlen */
315474408bbSRobert Clausecker.Lstrlenv2:
316474408bbSRobert Clausecker	leave
317474408bbSRobert Clausecker	jmp	CNAME(strlen)
318474408bbSRobert Clausecker
319474408bbSRobert Clausecker	/* just one character in set, degrades to strchr */
320474408bbSRobert Clausecker.Lstrchrv2:
321474408bbSRobert Clausecker	mov	%rdi, (%rsp)		# stash a copy of the string
322474408bbSRobert Clausecker	mov	%eax, %esi		# find this character
323474408bbSRobert Clausecker	call	CNAME(strchrnul)
324474408bbSRobert Clausecker	sub	(%rsp), %rax		# length of prefix before match
325474408bbSRobert Clausecker	leave
326474408bbSRobert Clausecker	ret
327474408bbSRobert Clausecker
328474408bbSRobert Clausecker	/* set is >=33 bytes in size */
329474408bbSRobert Clausecker.Lgt32v2:
330474408bbSRobert Clausecker	xorps	%xmm0, %xmm0
331474408bbSRobert Clausecker	mov	$256-64, %edx
332474408bbSRobert Clausecker
333474408bbSRobert Clausecker	/* clear out look up table */
334474408bbSRobert Clausecker0:	movaps	%xmm0, (%rsp, %rdx, 1)
335474408bbSRobert Clausecker	movaps	%xmm0, 16(%rsp, %rdx, 1)
336474408bbSRobert Clausecker	movaps	%xmm0, 32(%rsp, %rdx, 1)
337474408bbSRobert Clausecker	movaps	%xmm0, 48(%rsp, %rdx, 1)
338474408bbSRobert Clausecker	sub	$64, %edx
339474408bbSRobert Clausecker	jnc	0b
340474408bbSRobert Clausecker
341474408bbSRobert Clausecker	add	%rcx, %rsi		# restore string pointer
342474408bbSRobert Clausecker	mov	%rdi, %rax		# keep a copy of the string
343474408bbSRobert Clausecker
344474408bbSRobert Clausecker	/* initialise look up table */
345474408bbSRobert Clausecker	ALIGN_TEXT
346474408bbSRobert Clausecker0:	movzbl	(%rsi), %ecx
347474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
348474408bbSRobert Clausecker	test	%ecx, %ecx
349474408bbSRobert Clausecker	jz	1f
350474408bbSRobert Clausecker
351474408bbSRobert Clausecker	movzbl	1(%rsi), %ecx
352474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
353474408bbSRobert Clausecker	test	%ecx, %ecx
354474408bbSRobert Clausecker	jz	1f
355474408bbSRobert Clausecker
356474408bbSRobert Clausecker	movzbl	2(%rsi), %ecx
357474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
358474408bbSRobert Clausecker	test	%ecx, %ecx
359474408bbSRobert Clausecker	jz	1f
360474408bbSRobert Clausecker
361474408bbSRobert Clausecker	movzbl	3(%rsi), %ecx
362474408bbSRobert Clausecker	movb	$1, (%rsp, %rcx, 1)
363474408bbSRobert Clausecker	test	%ecx, %ecx
364474408bbSRobert Clausecker	jz	1f
365474408bbSRobert Clausecker
366474408bbSRobert Clausecker	add	$4, %rsi
367474408bbSRobert Clausecker	jmp	0b
368474408bbSRobert Clausecker
369474408bbSRobert Clausecker	/* find match */
370474408bbSRobert Clausecker	ALIGN_TEXT
371474408bbSRobert Clausecker1:	movzbl	(%rax), %ecx
372474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
373474408bbSRobert Clausecker	jne	2f
374474408bbSRobert Clausecker
375474408bbSRobert Clausecker	movzbl	1(%rax), %ecx
376474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
377474408bbSRobert Clausecker	jne	3f
378474408bbSRobert Clausecker
379474408bbSRobert Clausecker	movzbl	2(%rax), %ecx
380474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
381474408bbSRobert Clausecker	jne	4f
382474408bbSRobert Clausecker
383474408bbSRobert Clausecker	movzbl	3(%rax), %ecx
384474408bbSRobert Clausecker	add	$4, %rax
385474408bbSRobert Clausecker	cmpb	$0, (%rsp, %rcx, 1)
386474408bbSRobert Clausecker	je	1b
387474408bbSRobert Clausecker
388474408bbSRobert Clausecker	sub	$3, %rax
389474408bbSRobert Clausecker4:	dec	%rdi
390474408bbSRobert Clausecker3:	inc	%rax
391474408bbSRobert Clausecker2:	sub	%rdi, %rax		# number of characters preceding match
392474408bbSRobert Clausecker	leave
393474408bbSRobert Clausecker	ret
394*f4fc317cSRobert ClauseckerARCHEND(__strcspn, x86_64_v2)
395474408bbSRobert Clausecker
396474408bbSRobert Clausecker	.section .note.GNU-stack,"",%progbits
397