xref: /titanic_41/usr/src/lib/libc/amd64/gen/strcmp.s (revision 533d3a4910febc9985154b885dbe971e3c21ca04)
17c478bd9Sstevel@tonic-gate/*
2*533d3a49SEdward Gillett * CDDL HEADER START
3*533d3a49SEdward Gillett *
4*533d3a49SEdward Gillett * The contents of this file are subject to the terms of the
5*533d3a49SEdward Gillett * Common Development and Distribution License (the "License").
6*533d3a49SEdward Gillett * You may not use this file except in compliance with the License.
7*533d3a49SEdward Gillett *
8*533d3a49SEdward Gillett * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*533d3a49SEdward Gillett * or http://www.opensolaris.org/os/licensing.
10*533d3a49SEdward Gillett * See the License for the specific language governing permissions
11*533d3a49SEdward Gillett * and limitations under the License.
12*533d3a49SEdward Gillett *
13*533d3a49SEdward Gillett * When distributing Covered Code, include this CDDL HEADER in each
14*533d3a49SEdward Gillett * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*533d3a49SEdward Gillett * If applicable, add the following below this CDDL HEADER, with the
16*533d3a49SEdward Gillett * fields enclosed by brackets "[]" replaced with your own identifying
17*533d3a49SEdward Gillett * information: Portions Copyright [yyyy] [name of copyright owner]
18*533d3a49SEdward Gillett *
19*533d3a49SEdward Gillett * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate
227c478bd9Sstevel@tonic-gate/*
23*533d3a49SEdward Gillett * Copyright (c) 2009, Intel Corporation
247c478bd9Sstevel@tonic-gate * All rights reserved.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
27*533d3a49SEdward Gillett/*
28*533d3a49SEdward Gillett *	str[n]cmp - compare chars between two string
29*533d3a49SEdward Gillett */
307c478bd9Sstevel@tonic-gate
317c478bd9Sstevel@tonic-gate#include "SYS.h"
32*533d3a49SEdward Gillett#include "proc64_id.h"
337c478bd9Sstevel@tonic-gate
347c478bd9Sstevel@tonic-gate#define LABEL(s) .strcmp/**/s
357c478bd9Sstevel@tonic-gate
367c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
37*533d3a49SEdward Gillett	/*
38*533d3a49SEdward Gillett	 * Since the counter, %r11, is unsigned, we branch to strcmp_exitz
39*533d3a49SEdward Gillett	 * if the new counter > the old one or is 0.
40*533d3a49SEdward Gillett	 */
41*533d3a49SEdward Gillett#define UPDATE_STRNCMP_COUNTER				\
42*533d3a49SEdward Gillett	/* calculate left number to compare */		\
43*533d3a49SEdward Gillett	lea	-16(%rcx, %r11), %r9;			\
44*533d3a49SEdward Gillett	cmp	%r9, %r11;				\
45*533d3a49SEdward Gillett	jb	LABEL(strcmp_exitz);			\
46*533d3a49SEdward Gillett	test	%r9, %r9;				\
47*533d3a49SEdward Gillett	je	LABEL(strcmp_exitz);			\
48*533d3a49SEdward Gillett	mov	%r9, %r11
49*533d3a49SEdward Gillett#else
50*533d3a49SEdward Gillett#define UPDATE_STRNCMP_COUNTER
51*533d3a49SEdward Gillett#endif
52*533d3a49SEdward Gillett
53*533d3a49SEdward Gillett	/*
54*533d3a49SEdward Gillett	 * This implementation uses SSE to compare up to 16 bytes at a time.
55*533d3a49SEdward Gillett	 */
56*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
577c478bd9Sstevel@tonic-gate	ENTRY(strncmp)
58*533d3a49SEdward Gillett	test	%rdx, %rdx
59*533d3a49SEdward Gillett	je	LABEL(strcmp_exitz)
60*533d3a49SEdward Gillett	mov	%rdx, %r11
617c478bd9Sstevel@tonic-gate#else
627c478bd9Sstevel@tonic-gate	ENTRY(strcmp)			/* (const char *, const char *) */
637c478bd9Sstevel@tonic-gate#endif
64*533d3a49SEdward Gillett	mov	%esi, %ecx
65*533d3a49SEdward Gillett	mov	%edi, %eax
66*533d3a49SEdward Gillett	and	$0x3f, %rcx		/* rsi alignment in cache line */
67*533d3a49SEdward Gillett	and	$0x3f, %rax		/* rdi alignment in cache line */
68*533d3a49SEdward Gillett	cmp	$0x30, %ecx
69*533d3a49SEdward Gillett	ja	LABEL(crosscache)	/* rsi: 16-byte load will cross cache line */
70*533d3a49SEdward Gillett	cmp	$0x30, %eax
71*533d3a49SEdward Gillett	ja	LABEL(crosscache)	/* rdi: 16-byte load will cross cache line */
72*533d3a49SEdward Gillett	movlpd	(%rdi), %xmm1
73*533d3a49SEdward Gillett	movlpd	(%rsi), %xmm2
74*533d3a49SEdward Gillett	movhpd	8(%rdi), %xmm1
75*533d3a49SEdward Gillett	movhpd	8(%rsi), %xmm2
76*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
77*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
78*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1		/* compare first 16 bytes for equality */
79*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
80*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
81*533d3a49SEdward Gillett	sub	$0xffff, %edx		/* if first 16 bytes are same, edx == 0xffff */
82*533d3a49SEdward Gillett	jnz	LABEL(less16bytes)	/* If not, found mismatch or null char */
837c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
84*533d3a49SEdward Gillett	sub	$16, %r11
85*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)	/* finish comparision */
867c478bd9Sstevel@tonic-gate#endif
87*533d3a49SEdward Gillett	add	$16, %rsi		/* prepare to search next 16 bytes */
88*533d3a49SEdward Gillett	add	$16, %rdi		/* prepare to search next 16 bytes */
897c478bd9Sstevel@tonic-gate
904fdb7a01SNobutomo Nakano	/*
91*533d3a49SEdward Gillett	 * Determine rdi and rsi string offsets from 16-byte alignment.
92*533d3a49SEdward Gillett	 * Use relative offset difference between the two to determine which case
93*533d3a49SEdward Gillett	 * below to use.
944fdb7a01SNobutomo Nakano	 */
95*533d3a49SEdward Gillett	.p2align 4
96*533d3a49SEdward GillettLABEL(crosscache):
97*533d3a49SEdward Gillett	and	$0xfffffffffffffff0, %rsi	/* force %rsi to be 16 byte aligned */
98*533d3a49SEdward Gillett	and	$0xfffffffffffffff0, %rdi	/* force %rdi to be 16 byte aligned */
99*533d3a49SEdward Gillett	mov	$0xffff, %edx			/* for equivalent offset */
100*533d3a49SEdward Gillett	xor	%r8d, %r8d
101*533d3a49SEdward Gillett	and	$0xf, %ecx			/* offset of rsi */
102*533d3a49SEdward Gillett	and	$0xf, %eax			/* offset of rdi */
103*533d3a49SEdward Gillett	cmp	%eax, %ecx
104*533d3a49SEdward Gillett	je	LABEL(ashr_0)			/* both strings have the same alignment */
105*533d3a49SEdward Gillett	ja	LABEL(bigger)
106*533d3a49SEdward Gillett	mov	%edx, %r8d			/* r8d is offset flag for exit tail */
107*533d3a49SEdward Gillett	xchg	%ecx, %eax
108*533d3a49SEdward Gillett	xchg	%rsi, %rdi
109*533d3a49SEdward GillettLABEL(bigger):
110*533d3a49SEdward Gillett	mov	%rcx, %r9
111*533d3a49SEdward Gillett	sub	%rax, %r9
112*533d3a49SEdward Gillett	lea	LABEL(unaligned_table)(%rip), %r10
113*533d3a49SEdward Gillett	movslq	(%r10, %r9, 4), %r9
114*533d3a49SEdward Gillett	lea	(%r10, %r9), %r10
115*533d3a49SEdward Gillett	jmp	*%r10				/* jump to corresponding case */
1167c478bd9Sstevel@tonic-gate
117*533d3a49SEdward Gillett/*
118*533d3a49SEdward Gillett * ashr_0 handles the following cases:
119*533d3a49SEdward Gillett * 	str1 offset = str2 offset
120*533d3a49SEdward Gillett */
121*533d3a49SEdward Gillett	.p2align 4
122*533d3a49SEdward GillettLABEL(ashr_0):
123*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
124*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0			/* clear %xmm0 for null char check */
125*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0			/* Any null chars? */
126*533d3a49SEdward Gillett	pcmpeqb	(%rdi), %xmm1			/* compare 16 bytes for equality */
127*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1			/* packed sub of comparison results*/
128*533d3a49SEdward Gillett	pmovmskb %xmm1, %r9d
129*533d3a49SEdward Gillett	shr	%cl, %edx			/* adjust 0xffff for offset */
130*533d3a49SEdward Gillett	shr	%cl, %r9d			/* adjust for 16-byte offset */
131*533d3a49SEdward Gillett	sub	%r9d, %edx
132*533d3a49SEdward Gillett	/*
133*533d3a49SEdward Gillett	 * edx must be the same with r9d if in left byte (16-rcx) is equal to
134*533d3a49SEdward Gillett	 * the start from (16-rax) and no null char was seen.
135*533d3a49SEdward Gillett	 */
136*533d3a49SEdward Gillett	jne	LABEL(less32bytes)		/* mismatch or null char */
137*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
138*533d3a49SEdward Gillett	mov	$16, %rcx
139*533d3a49SEdward Gillett	mov	$16, %r9
140*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0			/* clear xmm0, may have changed above */
141*533d3a49SEdward Gillett
142*533d3a49SEdward Gillett	/*
143*533d3a49SEdward Gillett	 * Now both strings are aligned at 16-byte boundary. Loop over strings
144*533d3a49SEdward Gillett	 * checking 32-bytes per iteration.
145*533d3a49SEdward Gillett	 */
146*533d3a49SEdward Gillett	.p2align 4
147*533d3a49SEdward GillettLABEL(loop_ashr_0):
148*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
149*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
150*533d3a49SEdward Gillett
151*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
152*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
153*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
154*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
155*533d3a49SEdward Gillett	sub	$0xffff, %edx
156*533d3a49SEdward Gillett	jnz	LABEL(exit)		/* mismatch or null char seen */
157*533d3a49SEdward Gillett
158*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
159*533d3a49SEdward Gillett	sub	$16, %r11
160*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
161*533d3a49SEdward Gillett#endif
162*533d3a49SEdward Gillett	add	$16, %rcx
163*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
164*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
165*533d3a49SEdward Gillett
166*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
167*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
168*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
169*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
170*533d3a49SEdward Gillett	sub	$0xffff, %edx
171*533d3a49SEdward Gillett	jnz	LABEL(exit)
172*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
173*533d3a49SEdward Gillett	sub	$16, %r11
174*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
175*533d3a49SEdward Gillett#endif
176*533d3a49SEdward Gillett	add	$16, %rcx
177*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_0)
178*533d3a49SEdward Gillett
179*533d3a49SEdward Gillett/*
180*533d3a49SEdward Gillett * ashr_1 handles the following cases:
181*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 15
182*533d3a49SEdward Gillett */
183*533d3a49SEdward Gillett	.p2align 4
184*533d3a49SEdward GillettLABEL(ashr_1):
185*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
186*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
187*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
188*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
189*533d3a49SEdward Gillett	pslldq	$15, %xmm2		/* shift first string to align with second */
190*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2		/* compare 16 bytes for equality */
191*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
192*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
193*533d3a49SEdward Gillett	shr	%cl, %edx		/* adjust 0xffff for offset */
194*533d3a49SEdward Gillett	shr	%cl, %r9d		/* adjust for 16-byte offset */
195*533d3a49SEdward Gillett	sub	%r9d, %edx
196*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)	/* mismatch or null char seen */
197*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
198*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
199*533d3a49SEdward Gillett
200*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
201*533d3a49SEdward Gillett	mov	$16, %rcx		/* index for loads */
202*533d3a49SEdward Gillett	mov	$1, %r9d		/* rdi bytes already examined. Used in exit code */
203*533d3a49SEdward Gillett	/*
204*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
205*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
206*533d3a49SEdward Gillett	 * need to do a nibble.
207*533d3a49SEdward Gillett	 */
208*533d3a49SEdward Gillett	lea	1(%rdi), %r10
209*533d3a49SEdward Gillett	and	$0xfff, %r10		/* offset into 4K page */
210*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K pagesize */
211*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
2127c478bd9Sstevel@tonic-gate
2137c478bd9Sstevel@tonic-gate	.p2align 4
214*533d3a49SEdward GillettLABEL(loop_ashr_1):
215*533d3a49SEdward Gillett	add	$16, %r10
216*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_1)	/* cross page boundary */
2177c478bd9Sstevel@tonic-gate
218*533d3a49SEdward GillettLABEL(gobble_ashr_1):
219*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
220*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
221*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4		 /* store for next cycle */
2227c478bd9Sstevel@tonic-gate
223*533d3a49SEdward Gillett	psrldq	$1, %xmm3
224*533d3a49SEdward Gillett	pslldq	$15, %xmm2
225*533d3a49SEdward Gillett	por	%xmm3, %xmm2		/* merge into one 16byte value */
226*533d3a49SEdward Gillett
227*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
228*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
229*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
230*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
231*533d3a49SEdward Gillett	sub	$0xffff, %edx
232*533d3a49SEdward Gillett	jnz	LABEL(exit)
233*533d3a49SEdward Gillett
234*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
235*533d3a49SEdward Gillett	sub	$16, %r11
236*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
237*533d3a49SEdward Gillett#endif
238*533d3a49SEdward Gillett	add	$16, %rcx
239*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
240*533d3a49SEdward Gillett
241*533d3a49SEdward Gillett	add	$16, %r10
242*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_1)	/* cross page boundary */
243*533d3a49SEdward Gillett
244*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
245*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
246*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4		/* store for next cycle */
247*533d3a49SEdward Gillett
248*533d3a49SEdward Gillett	psrldq	$1, %xmm3
249*533d3a49SEdward Gillett	pslldq 	$15, %xmm2
250*533d3a49SEdward Gillett	por	%xmm3, %xmm2		/* merge into one 16byte value */
251*533d3a49SEdward Gillett
252*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
253*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
254*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
255*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
256*533d3a49SEdward Gillett	sub	$0xffff, %edx
257*533d3a49SEdward Gillett	jnz	LABEL(exit)
258*533d3a49SEdward Gillett
259*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
260*533d3a49SEdward Gillett	sub	$16, %r11
261*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
262*533d3a49SEdward Gillett#endif
263*533d3a49SEdward Gillett	add	$16, %rcx
264*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
265*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_1)
266*533d3a49SEdward Gillett
267*533d3a49SEdward Gillett	/*
268*533d3a49SEdward Gillett	 * Nibble avoids loads across page boundary. This is to avoid a potential
269*533d3a49SEdward Gillett	 * access into unmapped memory.
270*533d3a49SEdward Gillett	 */
271*533d3a49SEdward Gillett	.p2align 4
272*533d3a49SEdward GillettLABEL(nibble_ashr_1):
273*533d3a49SEdward Gillett	psrldq	$1, %xmm4
274*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
275*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
276*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
277*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
278*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
279*533d3a49SEdward Gillett	sub	$0x7fff, %edx
280*533d3a49SEdward Gillett	jnz	LABEL(exit)
281*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
282*533d3a49SEdward Gillett	cmp	$15, %r11
283*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
284*533d3a49SEdward Gillett#endif
285*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
286*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
287*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_1)
288*533d3a49SEdward Gillett
289*533d3a49SEdward Gillett/*
290*533d3a49SEdward Gillett * ashr_2 handles the following cases:
291*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 14
292*533d3a49SEdward Gillett */
293*533d3a49SEdward Gillett	.p2align 4
294*533d3a49SEdward GillettLABEL(ashr_2):
295*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
296*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
297*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
298*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
299*533d3a49SEdward Gillett	pslldq	$14, %xmm2
300*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
301*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
302*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
303*533d3a49SEdward Gillett	shr	%cl, %edx
304*533d3a49SEdward Gillett	shr	%cl, %r9d
305*533d3a49SEdward Gillett	sub	%r9d, %edx
306*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
307*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
308*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
309*533d3a49SEdward Gillett
310*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
311*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
312*533d3a49SEdward Gillett	mov	$2, %r9d	/* rdi bytes already examined. Used in exit code */
313*533d3a49SEdward Gillett	/*
314*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
315*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
316*533d3a49SEdward Gillett	 * need to do a nibble.
317*533d3a49SEdward Gillett	 */
318*533d3a49SEdward Gillett	lea	2(%rdi), %r10
319*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
320*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
321*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
3227c478bd9Sstevel@tonic-gate
3237c478bd9Sstevel@tonic-gate	.p2align 4
324*533d3a49SEdward GillettLABEL(loop_ashr_2):
325*533d3a49SEdward Gillett	add	$16, %r10
326*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_2)
3277c478bd9Sstevel@tonic-gate
328*533d3a49SEdward GillettLABEL(gobble_ashr_2):
329*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
330*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
331*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
332*533d3a49SEdward Gillett
333*533d3a49SEdward Gillett	psrldq	$2, %xmm3
334*533d3a49SEdward Gillett	pslldq	$14, %xmm2
335*533d3a49SEdward Gillett	por	%xmm3, %xmm2
336*533d3a49SEdward Gillett
337*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
338*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
339*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
340*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
341*533d3a49SEdward Gillett	sub	$0xffff, %edx
342*533d3a49SEdward Gillett	jnz	LABEL(exit)
3437c478bd9Sstevel@tonic-gate
3447c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
345*533d3a49SEdward Gillett	sub	$16, %r11
346*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
3477c478bd9Sstevel@tonic-gate#endif
3487c478bd9Sstevel@tonic-gate
349*533d3a49SEdward Gillett	add	$16, %rcx
350*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
3517c478bd9Sstevel@tonic-gate
352*533d3a49SEdward Gillett	add	$16, %r10
353*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_2)	/* cross page boundary */
3547c478bd9Sstevel@tonic-gate
355*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
356*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
357*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
358*533d3a49SEdward Gillett
359*533d3a49SEdward Gillett	psrldq	$2, %xmm3
360*533d3a49SEdward Gillett	pslldq 	$14, %xmm2
361*533d3a49SEdward Gillett	por	%xmm3, %xmm2
362*533d3a49SEdward Gillett
363*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
364*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
365*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
366*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
367*533d3a49SEdward Gillett	sub	$0xffff, %edx
368*533d3a49SEdward Gillett	jnz	LABEL(exit)
369*533d3a49SEdward Gillett
370*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
371*533d3a49SEdward Gillett	sub	$16, %r11
372*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
373*533d3a49SEdward Gillett#endif
374*533d3a49SEdward Gillett
375*533d3a49SEdward Gillett	add	$16, %rcx
376*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
377*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_2)
3787c478bd9Sstevel@tonic-gate
3797c478bd9Sstevel@tonic-gate	.p2align 4
380*533d3a49SEdward GillettLABEL(nibble_ashr_2):
381*533d3a49SEdward Gillett	psrldq	$2, %xmm4
382*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
383*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
384*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
385*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
386*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
387*533d3a49SEdward Gillett	sub	$0x3fff, %edx
388*533d3a49SEdward Gillett	jnz	LABEL(exit)
389*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
390*533d3a49SEdward Gillett	cmp	$14, %r11
391*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
392*533d3a49SEdward Gillett#endif
393*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
394*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
395*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_2)
3967c478bd9Sstevel@tonic-gate
397*533d3a49SEdward Gillett/*
398*533d3a49SEdward Gillett * ashr_3 handles the following cases:
399*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 13
400*533d3a49SEdward Gillett */
401*533d3a49SEdward Gillett	.p2align 4
402*533d3a49SEdward GillettLABEL(ashr_3):
403*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
404*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
405*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
406*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
407*533d3a49SEdward Gillett	pslldq	$13, %xmm2
408*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
409*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
410*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
411*533d3a49SEdward Gillett	shr	%cl, %edx
412*533d3a49SEdward Gillett	shr	%cl, %r9d
413*533d3a49SEdward Gillett	sub	%r9d, %edx
414*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
415*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
416*533d3a49SEdward Gillett
417*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
418*533d3a49SEdward Gillett
419*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
420*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
421*533d3a49SEdward Gillett	mov	$3, %r9d	/* rdi bytes already examined. Used in exit code */
422*533d3a49SEdward Gillett	/*
423*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
424*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
425*533d3a49SEdward Gillett	 * need to do a nibble.
426*533d3a49SEdward Gillett	 */
427*533d3a49SEdward Gillett	lea	3(%rdi), %r10
428*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
429*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
430*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
431*533d3a49SEdward Gillett
432*533d3a49SEdward Gillett	.p2align 4
433*533d3a49SEdward GillettLABEL(loop_ashr_3):
434*533d3a49SEdward Gillett	add	$16, %r10
435*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_3)
436*533d3a49SEdward Gillett
437*533d3a49SEdward GillettLABEL(gobble_ashr_3):
438*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
439*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
440*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
441*533d3a49SEdward Gillett
442*533d3a49SEdward Gillett	psrldq	$3, %xmm3
443*533d3a49SEdward Gillett	pslldq	$13, %xmm2
444*533d3a49SEdward Gillett	por	%xmm3, %xmm2
445*533d3a49SEdward Gillett
446*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
447*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
448*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
449*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
450*533d3a49SEdward Gillett	sub	$0xffff, %edx
451*533d3a49SEdward Gillett	jnz	LABEL(exit)
4527c478bd9Sstevel@tonic-gate
4537c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
454*533d3a49SEdward Gillett	sub	$16, %r11
455*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
4567c478bd9Sstevel@tonic-gate#endif
4577c478bd9Sstevel@tonic-gate
458*533d3a49SEdward Gillett	add	$16, %rcx
459*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
4607c478bd9Sstevel@tonic-gate
461*533d3a49SEdward Gillett	add	$16, %r10
462*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_3)	/* cross page boundary */
4637c478bd9Sstevel@tonic-gate
464*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
465*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
466*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
4677c478bd9Sstevel@tonic-gate
468*533d3a49SEdward Gillett	psrldq	$3, %xmm3
469*533d3a49SEdward Gillett	pslldq 	$13, %xmm2
470*533d3a49SEdward Gillett	por	%xmm3, %xmm2
4717c478bd9Sstevel@tonic-gate
472*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
473*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
474*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
475*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
476*533d3a49SEdward Gillett	sub	$0xffff, %edx
477*533d3a49SEdward Gillett	jnz	LABEL(exit)
4787c478bd9Sstevel@tonic-gate
4797c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
480*533d3a49SEdward Gillett	sub	$16, %r11
481*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
4827c478bd9Sstevel@tonic-gate#endif
4837c478bd9Sstevel@tonic-gate
484*533d3a49SEdward Gillett	add	$16, %rcx
485*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
486*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_3)
4877c478bd9Sstevel@tonic-gate
488*533d3a49SEdward Gillett	.p2align 4
489*533d3a49SEdward GillettLABEL(nibble_ashr_3):
490*533d3a49SEdward Gillett	psrldq	$3, %xmm4
491*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
492*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
493*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
494*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
495*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
496*533d3a49SEdward Gillett	sub	$0x1fff, %edx
497*533d3a49SEdward Gillett	jnz	LABEL(exit)
498*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
499*533d3a49SEdward Gillett	cmp	$13, %r11
500*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
501*533d3a49SEdward Gillett#endif
502*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
503*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
504*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_3)
5057c478bd9Sstevel@tonic-gate
506*533d3a49SEdward Gillett/*
507*533d3a49SEdward Gillett * ashr_4 handles the following cases:
508*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 12
509*533d3a49SEdward Gillett */
510*533d3a49SEdward Gillett	.p2align 4
511*533d3a49SEdward GillettLABEL(ashr_4):
512*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
513*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
514*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
515*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
516*533d3a49SEdward Gillett	pslldq	$12, %xmm2
517*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
518*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
519*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
520*533d3a49SEdward Gillett	shr	%cl, %edx
521*533d3a49SEdward Gillett	shr	%cl, %r9d
522*533d3a49SEdward Gillett	sub	%r9d, %edx
523*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
524*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
5257c478bd9Sstevel@tonic-gate
526*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
5277c478bd9Sstevel@tonic-gate
528*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
529*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
530*533d3a49SEdward Gillett	mov	$4, %r9d	/* rdi bytes already examined. Used in exit code */
531*533d3a49SEdward Gillett	/*
532*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
533*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
534*533d3a49SEdward Gillett	 * need to do a nibble.
535*533d3a49SEdward Gillett	 */
536*533d3a49SEdward Gillett	lea	4(%rdi), %r10
537*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
538*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
539*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
5407c478bd9Sstevel@tonic-gate
541*533d3a49SEdward Gillett	.p2align 4
542*533d3a49SEdward GillettLABEL(loop_ashr_4):
543*533d3a49SEdward Gillett	add	$16, %r10
544*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_4)
545*533d3a49SEdward Gillett
546*533d3a49SEdward GillettLABEL(gobble_ashr_4):
547*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
548*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
549*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
550*533d3a49SEdward Gillett
551*533d3a49SEdward Gillett	psrldq	$4, %xmm3
552*533d3a49SEdward Gillett	pslldq	$12, %xmm2
553*533d3a49SEdward Gillett	por	%xmm3, %xmm2
554*533d3a49SEdward Gillett
555*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
556*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
557*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
558*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
559*533d3a49SEdward Gillett	sub	$0xffff, %edx
560*533d3a49SEdward Gillett	jnz	LABEL(exit)
5617c478bd9Sstevel@tonic-gate
5627c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
563*533d3a49SEdward Gillett	sub	$16, %r11
564*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
5657c478bd9Sstevel@tonic-gate#endif
5667c478bd9Sstevel@tonic-gate
567*533d3a49SEdward Gillett	add	$16, %rcx
568*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
5697c478bd9Sstevel@tonic-gate
570*533d3a49SEdward Gillett	add	$16, %r10
571*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_4)	/* cross page boundary */
5727c478bd9Sstevel@tonic-gate
573*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
574*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
575*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
5767c478bd9Sstevel@tonic-gate
577*533d3a49SEdward Gillett	psrldq	$4, %xmm3
578*533d3a49SEdward Gillett	pslldq 	$12, %xmm2
579*533d3a49SEdward Gillett	por	%xmm3, %xmm2
5807c478bd9Sstevel@tonic-gate
581*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
582*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
583*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
584*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
585*533d3a49SEdward Gillett	sub	$0xffff, %edx
586*533d3a49SEdward Gillett	jnz	LABEL(exit)
5877c478bd9Sstevel@tonic-gate
5887c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
589*533d3a49SEdward Gillett	sub	$16, %r11
590*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
5917c478bd9Sstevel@tonic-gate#endif
5927c478bd9Sstevel@tonic-gate
593*533d3a49SEdward Gillett	add	$16, %rcx
594*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
595*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_4)
5967c478bd9Sstevel@tonic-gate
597*533d3a49SEdward Gillett	.p2align 4
598*533d3a49SEdward GillettLABEL(nibble_ashr_4):
599*533d3a49SEdward Gillett	psrldq	$4, %xmm4
600*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
601*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
602*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
603*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
604*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
605*533d3a49SEdward Gillett	sub	$0x0fff, %edx
606*533d3a49SEdward Gillett	jnz	LABEL(exit)
607*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
608*533d3a49SEdward Gillett	cmp	$12, %r11
609*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
610*533d3a49SEdward Gillett#endif
611*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
612*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
613*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_4)
6147c478bd9Sstevel@tonic-gate
615*533d3a49SEdward Gillett/*
616*533d3a49SEdward Gillett * ashr_5 handles the following cases:
617*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 11
618*533d3a49SEdward Gillett */
619*533d3a49SEdward Gillett	.p2align 4
620*533d3a49SEdward GillettLABEL(ashr_5):
621*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
622*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
623*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
624*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
625*533d3a49SEdward Gillett	pslldq	$11, %xmm2
626*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
627*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
628*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
629*533d3a49SEdward Gillett	shr	%cl, %edx
630*533d3a49SEdward Gillett	shr	%cl, %r9d
631*533d3a49SEdward Gillett	sub	%r9d, %edx
632*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
633*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
6347c478bd9Sstevel@tonic-gate
635*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
6367c478bd9Sstevel@tonic-gate
637*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
638*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
639*533d3a49SEdward Gillett	mov	$5, %r9d	/* rdi bytes already examined. Used in exit code */
640*533d3a49SEdward Gillett	/*
641*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
642*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
643*533d3a49SEdward Gillett	 * need to do a nibble.
644*533d3a49SEdward Gillett	 */
645*533d3a49SEdward Gillett	lea	5(%rdi), %r10
646*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
647*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
648*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
6497c478bd9Sstevel@tonic-gate
650*533d3a49SEdward Gillett	.p2align 4
651*533d3a49SEdward GillettLABEL(loop_ashr_5):
652*533d3a49SEdward Gillett	add	$16, %r10
653*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_5)
654*533d3a49SEdward Gillett
655*533d3a49SEdward GillettLABEL(gobble_ashr_5):
656*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
657*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
658*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
659*533d3a49SEdward Gillett
660*533d3a49SEdward Gillett	psrldq	$5, %xmm3
661*533d3a49SEdward Gillett	pslldq	$11, %xmm2
662*533d3a49SEdward Gillett	por	%xmm3, %xmm2
663*533d3a49SEdward Gillett
664*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
665*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
666*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
667*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
668*533d3a49SEdward Gillett	sub	$0xffff, %edx
669*533d3a49SEdward Gillett	jnz	LABEL(exit)
6707c478bd9Sstevel@tonic-gate
6717c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
672*533d3a49SEdward Gillett	sub	$16, %r11
673*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
6747c478bd9Sstevel@tonic-gate#endif
6757c478bd9Sstevel@tonic-gate
676*533d3a49SEdward Gillett	add	$16, %rcx
677*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
6787c478bd9Sstevel@tonic-gate
679*533d3a49SEdward Gillett	add	$16, %r10
680*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_5)	/* cross page boundary */
6817c478bd9Sstevel@tonic-gate
682*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
683*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
684*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
6857c478bd9Sstevel@tonic-gate
686*533d3a49SEdward Gillett	psrldq	$5, %xmm3
687*533d3a49SEdward Gillett	pslldq 	$11, %xmm2
688*533d3a49SEdward Gillett	por	%xmm3, %xmm2
6897c478bd9Sstevel@tonic-gate
690*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
691*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
692*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
693*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
694*533d3a49SEdward Gillett	sub	$0xffff, %edx
695*533d3a49SEdward Gillett	jnz	LABEL(exit)
6967c478bd9Sstevel@tonic-gate
6977c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
698*533d3a49SEdward Gillett	sub	$16, %r11
699*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
7007c478bd9Sstevel@tonic-gate#endif
7017c478bd9Sstevel@tonic-gate
702*533d3a49SEdward Gillett	add	$16, %rcx
703*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
704*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_5)
7057c478bd9Sstevel@tonic-gate
706*533d3a49SEdward Gillett	.p2align 4
707*533d3a49SEdward GillettLABEL(nibble_ashr_5):
708*533d3a49SEdward Gillett	psrldq	$5, %xmm4
709*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
710*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
711*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
712*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
713*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
714*533d3a49SEdward Gillett	sub	$0x07ff, %edx
715*533d3a49SEdward Gillett	jnz	LABEL(exit)
716*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
717*533d3a49SEdward Gillett	cmp	$11, %r11
718*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
719*533d3a49SEdward Gillett#endif
720*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
721*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
722*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_5)
7237c478bd9Sstevel@tonic-gate
724*533d3a49SEdward Gillett/*
725*533d3a49SEdward Gillett * ashr_6 handles the following cases:
726*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 10
727*533d3a49SEdward Gillett */
728*533d3a49SEdward Gillett	.p2align 4
729*533d3a49SEdward GillettLABEL(ashr_6):
730*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
731*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
732*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
733*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
734*533d3a49SEdward Gillett	pslldq	$10, %xmm2
735*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
736*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
737*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
738*533d3a49SEdward Gillett	shr	%cl, %edx
739*533d3a49SEdward Gillett	shr	%cl, %r9d
740*533d3a49SEdward Gillett	sub	%r9d, %edx
741*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
742*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
7437c478bd9Sstevel@tonic-gate
744*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
7457c478bd9Sstevel@tonic-gate
746*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
747*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
748*533d3a49SEdward Gillett	mov	$6, %r9d	/* rdi bytes already examined. Used in exit code */
749*533d3a49SEdward Gillett	/*
750*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
751*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
752*533d3a49SEdward Gillett	 * need to do a nibble.
753*533d3a49SEdward Gillett	 */
754*533d3a49SEdward Gillett	lea	6(%rdi), %r10
755*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
756*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
757*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
7587c478bd9Sstevel@tonic-gate
759*533d3a49SEdward Gillett	.p2align 4
760*533d3a49SEdward GillettLABEL(loop_ashr_6):
761*533d3a49SEdward Gillett	add	$16, %r10
762*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_6)
763*533d3a49SEdward Gillett
764*533d3a49SEdward GillettLABEL(gobble_ashr_6):
765*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
766*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
767*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
768*533d3a49SEdward Gillett
769*533d3a49SEdward Gillett	psrldq	$6, %xmm3
770*533d3a49SEdward Gillett	pslldq	$10, %xmm2
771*533d3a49SEdward Gillett	por	%xmm3, %xmm2
772*533d3a49SEdward Gillett
773*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
774*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
775*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
776*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
777*533d3a49SEdward Gillett	sub	$0xffff, %edx
778*533d3a49SEdward Gillett	jnz	LABEL(exit)
7797c478bd9Sstevel@tonic-gate
7807c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
781*533d3a49SEdward Gillett	sub	$16, %r11
782*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
7837c478bd9Sstevel@tonic-gate#endif
7847c478bd9Sstevel@tonic-gate
785*533d3a49SEdward Gillett	add	$16, %rcx
786*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
7877c478bd9Sstevel@tonic-gate
788*533d3a49SEdward Gillett	add	$16, %r10
789*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_6)	/* cross page boundary */
7907c478bd9Sstevel@tonic-gate
791*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
792*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
793*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
7947c478bd9Sstevel@tonic-gate
795*533d3a49SEdward Gillett	psrldq	$6, %xmm3
796*533d3a49SEdward Gillett	pslldq 	$10, %xmm2
797*533d3a49SEdward Gillett	por	%xmm3, %xmm2
7987c478bd9Sstevel@tonic-gate
799*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
800*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
801*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
802*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
803*533d3a49SEdward Gillett	sub	$0xffff, %edx
804*533d3a49SEdward Gillett	jnz	LABEL(exit)
8057c478bd9Sstevel@tonic-gate
8067c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
807*533d3a49SEdward Gillett	sub	$16, %r11
808*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
8097c478bd9Sstevel@tonic-gate#endif
8107c478bd9Sstevel@tonic-gate
811*533d3a49SEdward Gillett	add	$16, %rcx
812*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
813*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_6)
8147c478bd9Sstevel@tonic-gate
815*533d3a49SEdward Gillett	.p2align 4
816*533d3a49SEdward GillettLABEL(nibble_ashr_6):
817*533d3a49SEdward Gillett	psrldq	$6, %xmm4
818*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
819*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
820*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
821*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
822*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
823*533d3a49SEdward Gillett	sub	$0x03ff, %edx
824*533d3a49SEdward Gillett	jnz	LABEL(exit)
8257c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
826*533d3a49SEdward Gillett	cmp	$10, %r11
827*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
8287c478bd9Sstevel@tonic-gate#endif
829*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
830*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
831*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_6)
8327c478bd9Sstevel@tonic-gate
833*533d3a49SEdward Gillett/*
834*533d3a49SEdward Gillett * ashr_7 handles the following cases:
835*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 9
836*533d3a49SEdward Gillett */
837*533d3a49SEdward Gillett	.p2align 4
838*533d3a49SEdward GillettLABEL(ashr_7):
839*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
840*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
841*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
842*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
843*533d3a49SEdward Gillett	pslldq	$9, %xmm2
844*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
845*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
846*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
847*533d3a49SEdward Gillett	shr	%cl, %edx
848*533d3a49SEdward Gillett	shr	%cl, %r9d
849*533d3a49SEdward Gillett	sub	%r9d, %edx
850*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
851*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
8527c478bd9Sstevel@tonic-gate
853*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
854*533d3a49SEdward Gillett
855*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
856*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
857*533d3a49SEdward Gillett	mov	$7, %r9d	/* rdi bytes already examined. Used in exit code */
858*533d3a49SEdward Gillett	/*
859*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
860*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
861*533d3a49SEdward Gillett	 * need to do a nibble.
862*533d3a49SEdward Gillett	 */
863*533d3a49SEdward Gillett	lea	7(%rdi), %r10
864*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
865*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
866*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
867*533d3a49SEdward Gillett
868*533d3a49SEdward Gillett	.p2align 4
869*533d3a49SEdward GillettLABEL(loop_ashr_7):
870*533d3a49SEdward Gillett	add	$16, %r10
871*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_7)
872*533d3a49SEdward Gillett
873*533d3a49SEdward GillettLABEL(gobble_ashr_7):
874*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
875*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
876*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
877*533d3a49SEdward Gillett
878*533d3a49SEdward Gillett	psrldq	$7, %xmm3
879*533d3a49SEdward Gillett	pslldq	$9, %xmm2
880*533d3a49SEdward Gillett	por	%xmm3, %xmm2
881*533d3a49SEdward Gillett
882*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
883*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
884*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
885*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
886*533d3a49SEdward Gillett	sub	$0xffff, %edx
887*533d3a49SEdward Gillett	jnz	LABEL(exit)
8887c478bd9Sstevel@tonic-gate
8897c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
890*533d3a49SEdward Gillett	sub	$16, %r11
891*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
8927c478bd9Sstevel@tonic-gate#endif
8937c478bd9Sstevel@tonic-gate
894*533d3a49SEdward Gillett	add	$16, %rcx
895*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
8967c478bd9Sstevel@tonic-gate
897*533d3a49SEdward Gillett	add	$16, %r10
898*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_7)	/* cross page boundary */
899*533d3a49SEdward Gillett
900*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
901*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
902*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
903*533d3a49SEdward Gillett
904*533d3a49SEdward Gillett	psrldq	$7, %xmm3
905*533d3a49SEdward Gillett	pslldq 	$9, %xmm2
906*533d3a49SEdward Gillett	por	%xmm3, %xmm2
907*533d3a49SEdward Gillett
908*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
909*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
910*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
911*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
912*533d3a49SEdward Gillett	sub	$0xffff, %edx
913*533d3a49SEdward Gillett	jnz	LABEL(exit)
9147c478bd9Sstevel@tonic-gate
9157c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
916*533d3a49SEdward Gillett	sub	$16, %r11
917*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
9187c478bd9Sstevel@tonic-gate#endif
9197c478bd9Sstevel@tonic-gate
920*533d3a49SEdward Gillett	add	$16, %rcx
921*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
922*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_7)
9237c478bd9Sstevel@tonic-gate
924*533d3a49SEdward Gillett	.p2align 4
925*533d3a49SEdward GillettLABEL(nibble_ashr_7):
926*533d3a49SEdward Gillett	psrldq	$7, %xmm4
927*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
928*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
929*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
930*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
931*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
932*533d3a49SEdward Gillett	sub	$0x01ff, %edx
933*533d3a49SEdward Gillett	jnz	LABEL(exit)
934*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
935*533d3a49SEdward Gillett	cmp	$9, %r11
936*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
937*533d3a49SEdward Gillett#endif
938*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
939*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
940*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_7)
941*533d3a49SEdward Gillett
942*533d3a49SEdward Gillett/*
943*533d3a49SEdward Gillett * ashr_8 handles the following cases:
944*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 8
945*533d3a49SEdward Gillett */
946*533d3a49SEdward Gillett	.p2align 4
947*533d3a49SEdward GillettLABEL(ashr_8):
948*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
949*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
950*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
951*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
952*533d3a49SEdward Gillett	pslldq	$8, %xmm2
953*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
954*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
955*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
956*533d3a49SEdward Gillett	shr	%cl, %edx
957*533d3a49SEdward Gillett	shr	%cl, %r9d
958*533d3a49SEdward Gillett	sub	%r9d, %edx
959*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
960*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
961*533d3a49SEdward Gillett
962*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
963*533d3a49SEdward Gillett
964*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
965*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
966*533d3a49SEdward Gillett	mov	$8, %r9d	/* rdi bytes already examined. Used in exit code */
967*533d3a49SEdward Gillett	/*
968*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
969*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
970*533d3a49SEdward Gillett	 * need to do a nibble.
971*533d3a49SEdward Gillett	 */
972*533d3a49SEdward Gillett	lea	8(%rdi), %r10
973*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
974*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
975*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
976*533d3a49SEdward Gillett
977*533d3a49SEdward Gillett	.p2align 4
978*533d3a49SEdward GillettLABEL(loop_ashr_8):
979*533d3a49SEdward Gillett	add	$16, %r10
980*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_8)
981*533d3a49SEdward Gillett
982*533d3a49SEdward GillettLABEL(gobble_ashr_8):
983*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
984*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
985*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
986*533d3a49SEdward Gillett
987*533d3a49SEdward Gillett	psrldq	$8, %xmm3
988*533d3a49SEdward Gillett	pslldq	$8, %xmm2
989*533d3a49SEdward Gillett	por	%xmm3, %xmm2
990*533d3a49SEdward Gillett
991*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
992*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
993*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
994*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
995*533d3a49SEdward Gillett	sub	$0xffff, %edx
996*533d3a49SEdward Gillett	jnz	LABEL(exit)
9977c478bd9Sstevel@tonic-gate
9987c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
999*533d3a49SEdward Gillett	sub	$16, %r11
1000*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
10017c478bd9Sstevel@tonic-gate#endif
10027c478bd9Sstevel@tonic-gate
1003*533d3a49SEdward Gillett	add	$16, %rcx
1004*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
10057c478bd9Sstevel@tonic-gate
1006*533d3a49SEdward Gillett	add	$16, %r10
1007*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_8)	/* cross page boundary */
1008*533d3a49SEdward Gillett
1009*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1010*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1011*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1012*533d3a49SEdward Gillett
1013*533d3a49SEdward Gillett	psrldq	$8, %xmm3
1014*533d3a49SEdward Gillett	pslldq 	$8, %xmm2
1015*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1016*533d3a49SEdward Gillett
1017*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1018*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1019*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1020*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1021*533d3a49SEdward Gillett	sub	$0xffff, %edx
1022*533d3a49SEdward Gillett	jnz	LABEL(exit)
10237c478bd9Sstevel@tonic-gate
10247c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
1025*533d3a49SEdward Gillett	sub	$16, %r11
1026*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
10277c478bd9Sstevel@tonic-gate#endif
10287c478bd9Sstevel@tonic-gate
1029*533d3a49SEdward Gillett	add	$16, %rcx
1030*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1031*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_8)
10327c478bd9Sstevel@tonic-gate
1033*533d3a49SEdward Gillett	.p2align 4
1034*533d3a49SEdward GillettLABEL(nibble_ashr_8):
1035*533d3a49SEdward Gillett	psrldq	$8, %xmm4
1036*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1037*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1038*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1039*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1040*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1041*533d3a49SEdward Gillett	sub	$0x00ff, %edx
1042*533d3a49SEdward Gillett	jnz	LABEL(exit)
1043*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1044*533d3a49SEdward Gillett	cmp	$8, %r11
1045*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1046*533d3a49SEdward Gillett#endif
1047*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1048*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1049*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_8)
1050*533d3a49SEdward Gillett
1051*533d3a49SEdward Gillett/*
1052*533d3a49SEdward Gillett * ashr_9 handles the following cases:
1053*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 7
1054*533d3a49SEdward Gillett */
1055*533d3a49SEdward Gillett	.p2align 4
1056*533d3a49SEdward GillettLABEL(ashr_9):
1057*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1058*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
1059*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
1060*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1061*533d3a49SEdward Gillett	pslldq	$7, %xmm2
1062*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
1063*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
1064*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
1065*533d3a49SEdward Gillett	shr	%cl, %edx
1066*533d3a49SEdward Gillett	shr	%cl, %r9d
1067*533d3a49SEdward Gillett	sub	%r9d, %edx
1068*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
1069*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
1070*533d3a49SEdward Gillett
1071*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
1072*533d3a49SEdward Gillett
1073*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1074*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
1075*533d3a49SEdward Gillett	mov	$9, %r9d	/* rdi bytes already examined. Used in exit code */
1076*533d3a49SEdward Gillett	/*
1077*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
1078*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
1079*533d3a49SEdward Gillett	 * need to do a nibble.
1080*533d3a49SEdward Gillett	 */
1081*533d3a49SEdward Gillett	lea	9(%rdi), %r10
1082*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
1083*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
1084*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
1085*533d3a49SEdward Gillett
1086*533d3a49SEdward Gillett	.p2align 4
1087*533d3a49SEdward GillettLABEL(loop_ashr_9):
1088*533d3a49SEdward Gillett	add	$16, %r10
1089*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_9)
1090*533d3a49SEdward Gillett
1091*533d3a49SEdward GillettLABEL(gobble_ashr_9):
1092*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1093*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1094*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1095*533d3a49SEdward Gillett
1096*533d3a49SEdward Gillett	psrldq	$9, %xmm3
1097*533d3a49SEdward Gillett	pslldq	$7, %xmm2
1098*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1099*533d3a49SEdward Gillett
1100*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1101*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1102*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1103*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1104*533d3a49SEdward Gillett	sub	$0xffff, %edx
1105*533d3a49SEdward Gillett	jnz	LABEL(exit)
11067c478bd9Sstevel@tonic-gate
11077c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
1108*533d3a49SEdward Gillett	sub	$16, %r11
1109*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
11107c478bd9Sstevel@tonic-gate#endif
11117c478bd9Sstevel@tonic-gate
1112*533d3a49SEdward Gillett	add	$16, %rcx
1113*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
11147c478bd9Sstevel@tonic-gate
1115*533d3a49SEdward Gillett	add	$16, %r10
1116*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_9)	/* cross page boundary */
1117*533d3a49SEdward Gillett
1118*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1119*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1120*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1121*533d3a49SEdward Gillett
1122*533d3a49SEdward Gillett	psrldq	$9, %xmm3
1123*533d3a49SEdward Gillett	pslldq 	$7, %xmm2
1124*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1125*533d3a49SEdward Gillett
1126*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1127*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1128*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1129*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1130*533d3a49SEdward Gillett	sub	$0xffff, %edx
1131*533d3a49SEdward Gillett	jnz	LABEL(exit)
11327c478bd9Sstevel@tonic-gate
11337c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
1134*533d3a49SEdward Gillett	sub	$16, %r11
1135*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
11367c478bd9Sstevel@tonic-gate#endif
11377c478bd9Sstevel@tonic-gate
1138*533d3a49SEdward Gillett	add	$16, %rcx
1139*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3		/* store for next cycle */
1140*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_9)
11417c478bd9Sstevel@tonic-gate
1142*533d3a49SEdward Gillett	.p2align 4
1143*533d3a49SEdward GillettLABEL(nibble_ashr_9):
1144*533d3a49SEdward Gillett	psrldq	$9, %xmm4
1145*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1146*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1147*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1148*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1149*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1150*533d3a49SEdward Gillett	sub	$0x007f, %edx
1151*533d3a49SEdward Gillett	jnz	LABEL(exit)
1152*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1153*533d3a49SEdward Gillett	cmp	$7, %r11
1154*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1155*533d3a49SEdward Gillett#endif
1156*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1157*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1158*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_9)
1159*533d3a49SEdward Gillett
1160*533d3a49SEdward Gillett/*
1161*533d3a49SEdward Gillett * ashr_10 handles the following cases:
1162*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 6
1163*533d3a49SEdward Gillett */
1164*533d3a49SEdward Gillett	.p2align 4
1165*533d3a49SEdward GillettLABEL(ashr_10):
1166*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1167*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
1168*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
1169*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1170*533d3a49SEdward Gillett	pslldq	$6, %xmm2
1171*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
1172*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
1173*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
1174*533d3a49SEdward Gillett	shr	%cl, %edx
1175*533d3a49SEdward Gillett	shr	%cl, %r9d
1176*533d3a49SEdward Gillett	sub	%r9d, %edx
1177*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
1178*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
1179*533d3a49SEdward Gillett
1180*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
1181*533d3a49SEdward Gillett
1182*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1183*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
1184*533d3a49SEdward Gillett	mov	$10, %r9d	/* rdi bytes already examined. Used in exit code */
1185*533d3a49SEdward Gillett	/*
1186*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
1187*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
1188*533d3a49SEdward Gillett	 * need to do a nibble.
1189*533d3a49SEdward Gillett	 */
1190*533d3a49SEdward Gillett	lea	10(%rdi), %r10
1191*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
1192*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
1193*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
1194*533d3a49SEdward Gillett
1195*533d3a49SEdward Gillett	.p2align 4
1196*533d3a49SEdward GillettLABEL(loop_ashr_10):
1197*533d3a49SEdward Gillett	add	$16, %r10
1198*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_10)
1199*533d3a49SEdward Gillett
1200*533d3a49SEdward GillettLABEL(gobble_ashr_10):
1201*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1202*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1203*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1204*533d3a49SEdward Gillett
1205*533d3a49SEdward Gillett	psrldq	$10, %xmm3
1206*533d3a49SEdward Gillett	pslldq	$6, %xmm2
1207*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1208*533d3a49SEdward Gillett
1209*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1210*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1211*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1212*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1213*533d3a49SEdward Gillett	sub	$0xffff, %edx
1214*533d3a49SEdward Gillett	jnz	LABEL(exit)
12157c478bd9Sstevel@tonic-gate
12167c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
1217*533d3a49SEdward Gillett	sub	$16, %r11
1218*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
12197c478bd9Sstevel@tonic-gate#endif
12207c478bd9Sstevel@tonic-gate
1221*533d3a49SEdward Gillett	add	$16, %rcx
1222*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
12237c478bd9Sstevel@tonic-gate
1224*533d3a49SEdward Gillett	add	$16, %r10
1225*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_10)	/* cross page boundary */
12267c478bd9Sstevel@tonic-gate
1227*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1228*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1229*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
12307c478bd9Sstevel@tonic-gate
1231*533d3a49SEdward Gillett	psrldq	$10, %xmm3
1232*533d3a49SEdward Gillett	pslldq 	$6, %xmm2
1233*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1234*533d3a49SEdward Gillett
1235*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1236*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1237*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1238*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1239*533d3a49SEdward Gillett	sub	$0xffff, %edx
1240*533d3a49SEdward Gillett	jnz	LABEL(exit)
1241*533d3a49SEdward Gillett
1242*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1243*533d3a49SEdward Gillett	sub	$16, %r11
1244*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1245*533d3a49SEdward Gillett#endif
1246*533d3a49SEdward Gillett
1247*533d3a49SEdward Gillett	add	$16, %rcx
1248*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1249*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_10)
1250*533d3a49SEdward Gillett
1251*533d3a49SEdward Gillett	.p2align 4
1252*533d3a49SEdward GillettLABEL(nibble_ashr_10):
1253*533d3a49SEdward Gillett	psrldq	$10, %xmm4
1254*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1255*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1256*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1257*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1258*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1259*533d3a49SEdward Gillett	sub	$0x003f, %edx
1260*533d3a49SEdward Gillett	jnz	LABEL(exit)
1261*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1262*533d3a49SEdward Gillett	cmp	$6, %r11
1263*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1264*533d3a49SEdward Gillett#endif
1265*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1266*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1267*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_10)
1268*533d3a49SEdward Gillett
1269*533d3a49SEdward Gillett/*
1270*533d3a49SEdward Gillett * ashr_11 handles the following cases:
1271*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 5
1272*533d3a49SEdward Gillett */
1273*533d3a49SEdward Gillett	.p2align 4
1274*533d3a49SEdward GillettLABEL(ashr_11):
1275*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1276*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
1277*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
1278*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1279*533d3a49SEdward Gillett	pslldq	$5, %xmm2
1280*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
1281*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
1282*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
1283*533d3a49SEdward Gillett	shr	%cl, %edx
1284*533d3a49SEdward Gillett	shr	%cl, %r9d
1285*533d3a49SEdward Gillett	sub	%r9d, %edx
1286*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
1287*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
1288*533d3a49SEdward Gillett
1289*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
1290*533d3a49SEdward Gillett
1291*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1292*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
1293*533d3a49SEdward Gillett	mov	$11, %r9d	/* rdi bytes already examined. Used in exit code */
1294*533d3a49SEdward Gillett	/*
1295*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
1296*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
1297*533d3a49SEdward Gillett	 * need to do a nibble.
1298*533d3a49SEdward Gillett	 */
1299*533d3a49SEdward Gillett	lea	11(%rdi), %r10
1300*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
1301*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
1302*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
1303*533d3a49SEdward Gillett
1304*533d3a49SEdward Gillett	.p2align 4
1305*533d3a49SEdward GillettLABEL(loop_ashr_11):
1306*533d3a49SEdward Gillett	add	$16, %r10
1307*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_11)
1308*533d3a49SEdward Gillett
1309*533d3a49SEdward GillettLABEL(gobble_ashr_11):
1310*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1311*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1312*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1313*533d3a49SEdward Gillett
1314*533d3a49SEdward Gillett	psrldq	$11, %xmm3
1315*533d3a49SEdward Gillett	pslldq	$5, %xmm2
1316*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1317*533d3a49SEdward Gillett
1318*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1319*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1320*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1321*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1322*533d3a49SEdward Gillett	sub	$0xffff, %edx
1323*533d3a49SEdward Gillett	jnz	LABEL(exit)
1324*533d3a49SEdward Gillett
1325*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1326*533d3a49SEdward Gillett	sub	$16, %r11
1327*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1328*533d3a49SEdward Gillett#endif
1329*533d3a49SEdward Gillett
1330*533d3a49SEdward Gillett	add	$16, %rcx
1331*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1332*533d3a49SEdward Gillett
1333*533d3a49SEdward Gillett	add	$16, %r10
1334*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_11)	/* cross page boundary */
1335*533d3a49SEdward Gillett
1336*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1337*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1338*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1339*533d3a49SEdward Gillett
1340*533d3a49SEdward Gillett	psrldq	$11, %xmm3
1341*533d3a49SEdward Gillett	pslldq 	$5, %xmm2
1342*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1343*533d3a49SEdward Gillett
1344*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1345*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1346*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1347*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1348*533d3a49SEdward Gillett	sub	$0xffff, %edx
1349*533d3a49SEdward Gillett	jnz	LABEL(exit)
1350*533d3a49SEdward Gillett
1351*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1352*533d3a49SEdward Gillett	sub	$16, %r11
1353*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1354*533d3a49SEdward Gillett#endif
1355*533d3a49SEdward Gillett
1356*533d3a49SEdward Gillett	add	$16, %rcx
1357*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1358*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_11)
1359*533d3a49SEdward Gillett
1360*533d3a49SEdward Gillett	.p2align 4
1361*533d3a49SEdward GillettLABEL(nibble_ashr_11):
1362*533d3a49SEdward Gillett	psrldq	$11, %xmm4
1363*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1364*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1365*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1366*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1367*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1368*533d3a49SEdward Gillett	sub	$0x001f, %edx
1369*533d3a49SEdward Gillett	jnz	LABEL(exit)
1370*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1371*533d3a49SEdward Gillett	cmp	$5, %r11
1372*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1373*533d3a49SEdward Gillett#endif
1374*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1375*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1376*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_11)
1377*533d3a49SEdward Gillett
1378*533d3a49SEdward Gillett/*
1379*533d3a49SEdward Gillett * ashr_12 handles the following cases:
1380*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 4
1381*533d3a49SEdward Gillett */
1382*533d3a49SEdward Gillett	.p2align 4
1383*533d3a49SEdward GillettLABEL(ashr_12):
1384*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1385*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
1386*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
1387*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1388*533d3a49SEdward Gillett	pslldq	$4, %xmm2
1389*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
1390*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
1391*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
1392*533d3a49SEdward Gillett	shr	%cl, %edx
1393*533d3a49SEdward Gillett	shr	%cl, %r9d
1394*533d3a49SEdward Gillett	sub	%r9d, %edx
1395*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
1396*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
1397*533d3a49SEdward Gillett
1398*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
1399*533d3a49SEdward Gillett
1400*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1401*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
1402*533d3a49SEdward Gillett	mov	$12, %r9d	/* rdi bytes already examined. Used in exit code */
1403*533d3a49SEdward Gillett	/*
1404*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
1405*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
1406*533d3a49SEdward Gillett	 * need to do a nibble.
1407*533d3a49SEdward Gillett	 */
1408*533d3a49SEdward Gillett	lea	12(%rdi), %r10
1409*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
1410*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
1411*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
1412*533d3a49SEdward Gillett
1413*533d3a49SEdward Gillett	.p2align 4
1414*533d3a49SEdward GillettLABEL(loop_ashr_12):
1415*533d3a49SEdward Gillett	add	$16, %r10
1416*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_12)
1417*533d3a49SEdward Gillett
1418*533d3a49SEdward GillettLABEL(gobble_ashr_12):
1419*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1420*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1421*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1422*533d3a49SEdward Gillett
1423*533d3a49SEdward Gillett	psrldq	$12, %xmm3
1424*533d3a49SEdward Gillett	pslldq	$4, %xmm2
1425*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1426*533d3a49SEdward Gillett
1427*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1428*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1429*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1430*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1431*533d3a49SEdward Gillett	sub	$0xffff, %edx
1432*533d3a49SEdward Gillett	jnz	LABEL(exit)
1433*533d3a49SEdward Gillett
1434*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1435*533d3a49SEdward Gillett	sub	$16, %r11
1436*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1437*533d3a49SEdward Gillett#endif
1438*533d3a49SEdward Gillett
1439*533d3a49SEdward Gillett	add	$16, %rcx
1440*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1441*533d3a49SEdward Gillett
1442*533d3a49SEdward Gillett	add	$16, %r10
1443*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_12)	/* cross page boundary */
1444*533d3a49SEdward Gillett
1445*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1446*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1447*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1448*533d3a49SEdward Gillett
1449*533d3a49SEdward Gillett	psrldq	$12, %xmm3
1450*533d3a49SEdward Gillett	pslldq 	$4, %xmm2
1451*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1452*533d3a49SEdward Gillett
1453*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1454*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1455*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1456*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1457*533d3a49SEdward Gillett	sub	$0xffff, %edx
1458*533d3a49SEdward Gillett	jnz	LABEL(exit)
1459*533d3a49SEdward Gillett
1460*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1461*533d3a49SEdward Gillett	sub	$16, %r11
1462*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1463*533d3a49SEdward Gillett#endif
1464*533d3a49SEdward Gillett
1465*533d3a49SEdward Gillett	add	$16, %rcx
1466*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1467*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_12)
1468*533d3a49SEdward Gillett
1469*533d3a49SEdward Gillett	.p2align 4
1470*533d3a49SEdward GillettLABEL(nibble_ashr_12):
1471*533d3a49SEdward Gillett	psrldq	$12, %xmm4
1472*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1473*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1474*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1475*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1476*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1477*533d3a49SEdward Gillett	sub	$0x000f, %edx
1478*533d3a49SEdward Gillett	jnz	LABEL(exit)
1479*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1480*533d3a49SEdward Gillett	cmp	$4, %r11
1481*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1482*533d3a49SEdward Gillett#endif
1483*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1484*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1485*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_12)
1486*533d3a49SEdward Gillett
1487*533d3a49SEdward Gillett/*
1488*533d3a49SEdward Gillett * ashr_13 handles the following cases:
1489*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 3
1490*533d3a49SEdward Gillett */
1491*533d3a49SEdward Gillett	.p2align 4
1492*533d3a49SEdward GillettLABEL(ashr_13):
1493*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1494*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
1495*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
1496*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1497*533d3a49SEdward Gillett	pslldq	$3, %xmm2
1498*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
1499*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
1500*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
1501*533d3a49SEdward Gillett	shr	%cl, %edx
1502*533d3a49SEdward Gillett	shr	%cl, %r9d
1503*533d3a49SEdward Gillett	sub	%r9d, %edx
1504*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
1505*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
1506*533d3a49SEdward Gillett
1507*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
1508*533d3a49SEdward Gillett
1509*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1510*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
1511*533d3a49SEdward Gillett	mov	$13, %r9d	/* rdi bytes already examined. Used in exit code */
1512*533d3a49SEdward Gillett	/*
1513*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
1514*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
1515*533d3a49SEdward Gillett	 * need to do a nibble.
1516*533d3a49SEdward Gillett	 */
1517*533d3a49SEdward Gillett	lea	13(%rdi), %r10
1518*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
1519*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
1520*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
1521*533d3a49SEdward Gillett
1522*533d3a49SEdward Gillett	.p2align 4
1523*533d3a49SEdward GillettLABEL(loop_ashr_13):
1524*533d3a49SEdward Gillett	add	$16, %r10
1525*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_13)
1526*533d3a49SEdward Gillett
1527*533d3a49SEdward GillettLABEL(gobble_ashr_13):
1528*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1529*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1530*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1531*533d3a49SEdward Gillett
1532*533d3a49SEdward Gillett	psrldq	$13, %xmm3
1533*533d3a49SEdward Gillett	pslldq	$3, %xmm2
1534*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1535*533d3a49SEdward Gillett
1536*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1537*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1538*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1539*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1540*533d3a49SEdward Gillett	sub	$0xffff, %edx
1541*533d3a49SEdward Gillett	jnz	LABEL(exit)
1542*533d3a49SEdward Gillett
1543*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1544*533d3a49SEdward Gillett	sub	$16, %r11
1545*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1546*533d3a49SEdward Gillett#endif
1547*533d3a49SEdward Gillett
1548*533d3a49SEdward Gillett	add	$16, %rcx
1549*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1550*533d3a49SEdward Gillett
1551*533d3a49SEdward Gillett	add	$16, %r10
1552*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_13)	/* cross page boundary */
1553*533d3a49SEdward Gillett
1554*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1555*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1556*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1557*533d3a49SEdward Gillett
1558*533d3a49SEdward Gillett	psrldq	$13, %xmm3
1559*533d3a49SEdward Gillett	pslldq 	$3, %xmm2
1560*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1561*533d3a49SEdward Gillett
1562*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1563*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1564*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1565*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1566*533d3a49SEdward Gillett	sub	$0xffff, %edx
1567*533d3a49SEdward Gillett	jnz	LABEL(exit)
1568*533d3a49SEdward Gillett
1569*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1570*533d3a49SEdward Gillett	sub	$16, %r11
1571*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1572*533d3a49SEdward Gillett#endif
1573*533d3a49SEdward Gillett
1574*533d3a49SEdward Gillett	add	$16, %rcx
1575*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1576*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_13)
1577*533d3a49SEdward Gillett
1578*533d3a49SEdward Gillett	.p2align 4
1579*533d3a49SEdward GillettLABEL(nibble_ashr_13):
1580*533d3a49SEdward Gillett	psrldq	$13, %xmm4
1581*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1582*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1583*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1584*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1585*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1586*533d3a49SEdward Gillett	sub	$0x0007, %edx
1587*533d3a49SEdward Gillett	jnz	LABEL(exit)
1588*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1589*533d3a49SEdward Gillett	cmp	$3, %r11
1590*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1591*533d3a49SEdward Gillett#endif
1592*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1593*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1594*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_13)
1595*533d3a49SEdward Gillett
1596*533d3a49SEdward Gillett/*
1597*533d3a49SEdward Gillett * ashr_14 handles the following cases:
1598*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 2
1599*533d3a49SEdward Gillett */
1600*533d3a49SEdward Gillett	.p2align 4
1601*533d3a49SEdward GillettLABEL(ashr_14):
1602*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1603*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
1604*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
1605*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1606*533d3a49SEdward Gillett	pslldq  $2, %xmm2
1607*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
1608*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
1609*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
1610*533d3a49SEdward Gillett	shr	%cl, %edx
1611*533d3a49SEdward Gillett	shr	%cl, %r9d
1612*533d3a49SEdward Gillett	sub	%r9d, %edx
1613*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
1614*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
1615*533d3a49SEdward Gillett
1616*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
1617*533d3a49SEdward Gillett
1618*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1619*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
1620*533d3a49SEdward Gillett	mov	$14, %r9d	/* rdi bytes already examined. Used in exit code */
1621*533d3a49SEdward Gillett	/*
1622*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
1623*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
1624*533d3a49SEdward Gillett	 * need to do a nibble.
1625*533d3a49SEdward Gillett	 */
1626*533d3a49SEdward Gillett	lea	14(%rdi), %r10
1627*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
1628*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
1629*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
1630*533d3a49SEdward Gillett
1631*533d3a49SEdward Gillett	.p2align 4
1632*533d3a49SEdward GillettLABEL(loop_ashr_14):
1633*533d3a49SEdward Gillett	add	$16, %r10
1634*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_14)
1635*533d3a49SEdward Gillett
1636*533d3a49SEdward GillettLABEL(gobble_ashr_14):
1637*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1638*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1639*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1640*533d3a49SEdward Gillett
1641*533d3a49SEdward Gillett	psrldq	$14, %xmm3
1642*533d3a49SEdward Gillett	pslldq	$2, %xmm2
1643*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1644*533d3a49SEdward Gillett
1645*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1646*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1647*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1648*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1649*533d3a49SEdward Gillett	sub	$0xffff, %edx
1650*533d3a49SEdward Gillett	jnz	LABEL(exit)
1651*533d3a49SEdward Gillett
1652*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1653*533d3a49SEdward Gillett	sub	$16, %r11
1654*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1655*533d3a49SEdward Gillett#endif
1656*533d3a49SEdward Gillett
1657*533d3a49SEdward Gillett	add	$16, %rcx
1658*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1659*533d3a49SEdward Gillett
1660*533d3a49SEdward Gillett	add	$16, %r10
1661*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_14)	/* cross page boundary */
1662*533d3a49SEdward Gillett
1663*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1664*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1665*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1666*533d3a49SEdward Gillett
1667*533d3a49SEdward Gillett	psrldq	$14, %xmm3
1668*533d3a49SEdward Gillett	pslldq 	$2, %xmm2
1669*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1670*533d3a49SEdward Gillett
1671*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1672*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1673*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1674*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1675*533d3a49SEdward Gillett	sub	$0xffff, %edx
1676*533d3a49SEdward Gillett	jnz	LABEL(exit)
1677*533d3a49SEdward Gillett
1678*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1679*533d3a49SEdward Gillett	sub	$16, %r11
1680*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1681*533d3a49SEdward Gillett#endif
1682*533d3a49SEdward Gillett
1683*533d3a49SEdward Gillett	add	$16, %rcx
1684*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1685*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_14)
1686*533d3a49SEdward Gillett
1687*533d3a49SEdward Gillett	.p2align 4
1688*533d3a49SEdward GillettLABEL(nibble_ashr_14):
1689*533d3a49SEdward Gillett	psrldq	$14, %xmm4
1690*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1691*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1692*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1693*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1694*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1695*533d3a49SEdward Gillett	sub	$0x0003, %edx
1696*533d3a49SEdward Gillett	jnz	LABEL(exit)
1697*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1698*533d3a49SEdward Gillett	cmp	$2, %r11
1699*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1700*533d3a49SEdward Gillett#endif
1701*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1702*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1703*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_14)
1704*533d3a49SEdward Gillett
1705*533d3a49SEdward Gillett/*
1706*533d3a49SEdward Gillett * ashr_15 handles the following cases:
1707*533d3a49SEdward Gillett * 	abs(str1 offset - str2 offset) = 1
1708*533d3a49SEdward Gillett */
1709*533d3a49SEdward Gillett	.p2align 4
1710*533d3a49SEdward GillettLABEL(ashr_15):
1711*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1712*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm2
1713*533d3a49SEdward Gillett	movdqa	(%rsi), %xmm1
1714*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1715*533d3a49SEdward Gillett	pslldq	$1, %xmm2
1716*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm2
1717*533d3a49SEdward Gillett	psubb	%xmm0, %xmm2
1718*533d3a49SEdward Gillett	pmovmskb %xmm2, %r9d
1719*533d3a49SEdward Gillett	shr	%cl, %edx
1720*533d3a49SEdward Gillett	shr	%cl, %r9d
1721*533d3a49SEdward Gillett	sub	%r9d, %edx
1722*533d3a49SEdward Gillett	jnz	LABEL(less32bytes)
1723*533d3a49SEdward Gillett
1724*533d3a49SEdward Gillett	movdqa	(%rdi), %xmm3
1725*533d3a49SEdward Gillett
1726*533d3a49SEdward Gillett	UPDATE_STRNCMP_COUNTER
1727*533d3a49SEdward Gillett
1728*533d3a49SEdward Gillett	pxor	%xmm0, %xmm0
1729*533d3a49SEdward Gillett	mov	$16, %rcx	/* index for loads */
1730*533d3a49SEdward Gillett	mov	$15, %r9d	/* rdi bytes already examined. Used in exit code */
1731*533d3a49SEdward Gillett	/*
1732*533d3a49SEdward Gillett	 * Setup %r10 value allows us to detect crossing a page boundary.
1733*533d3a49SEdward Gillett	 * When %r10 goes positive we are crossing a page boundary and
1734*533d3a49SEdward Gillett	 * need to do a nibble.
1735*533d3a49SEdward Gillett	 */
1736*533d3a49SEdward Gillett	lea	15(%rdi), %r10
1737*533d3a49SEdward Gillett	and	$0xfff, %r10	/* offset into 4K page */
1738*533d3a49SEdward Gillett	sub	$0x1000, %r10	/* subtract 4K pagesize */
1739*533d3a49SEdward Gillett	movdqa	%xmm3, %xmm4
1740*533d3a49SEdward Gillett
1741*533d3a49SEdward Gillett	.p2align 4
1742*533d3a49SEdward GillettLABEL(loop_ashr_15):
1743*533d3a49SEdward Gillett	add	$16, %r10
1744*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_15)
1745*533d3a49SEdward Gillett
1746*533d3a49SEdward GillettLABEL(gobble_ashr_15):
1747*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1748*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1749*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1750*533d3a49SEdward Gillett
1751*533d3a49SEdward Gillett	psrldq	$15, %xmm3
1752*533d3a49SEdward Gillett	pslldq	$1, %xmm2
1753*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1754*533d3a49SEdward Gillett
1755*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1756*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1757*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1758*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1759*533d3a49SEdward Gillett	sub	$0xffff, %edx
1760*533d3a49SEdward Gillett	jnz	LABEL(exit)
1761*533d3a49SEdward Gillett
1762*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1763*533d3a49SEdward Gillett	sub	$16, %r11
1764*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1765*533d3a49SEdward Gillett#endif
1766*533d3a49SEdward Gillett
1767*533d3a49SEdward Gillett	add	$16, %rcx
1768*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1769*533d3a49SEdward Gillett
1770*533d3a49SEdward Gillett	add	$16, %r10
1771*533d3a49SEdward Gillett	jg	LABEL(nibble_ashr_15)	/* cross page boundary */
1772*533d3a49SEdward Gillett
1773*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1774*533d3a49SEdward Gillett	movdqa	(%rdi, %rcx), %xmm2
1775*533d3a49SEdward Gillett	movdqa	%xmm2, %xmm4
1776*533d3a49SEdward Gillett
1777*533d3a49SEdward Gillett	psrldq	$15, %xmm3
1778*533d3a49SEdward Gillett	pslldq 	$1, %xmm2
1779*533d3a49SEdward Gillett	por	%xmm3, %xmm2
1780*533d3a49SEdward Gillett
1781*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1782*533d3a49SEdward Gillett	pcmpeqb	%xmm2, %xmm1
1783*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1784*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1785*533d3a49SEdward Gillett	sub	$0xffff, %edx
1786*533d3a49SEdward Gillett	jnz	LABEL(exit)
1787*533d3a49SEdward Gillett
1788*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1789*533d3a49SEdward Gillett	sub	$16, %r11
1790*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1791*533d3a49SEdward Gillett#endif
1792*533d3a49SEdward Gillett
1793*533d3a49SEdward Gillett	add	$16, %rcx
1794*533d3a49SEdward Gillett	movdqa	%xmm4, %xmm3
1795*533d3a49SEdward Gillett	jmp	LABEL(loop_ashr_15)
1796*533d3a49SEdward Gillett
1797*533d3a49SEdward Gillett	.p2align 4
1798*533d3a49SEdward GillettLABEL(nibble_ashr_15):
1799*533d3a49SEdward Gillett	psrldq	$15, %xmm4
1800*533d3a49SEdward Gillett	movdqa	(%rsi, %rcx), %xmm1
1801*533d3a49SEdward Gillett	pcmpeqb	%xmm1, %xmm0
1802*533d3a49SEdward Gillett	pcmpeqb	%xmm4, %xmm1
1803*533d3a49SEdward Gillett	psubb	%xmm0, %xmm1
1804*533d3a49SEdward Gillett	pmovmskb %xmm1, %edx
1805*533d3a49SEdward Gillett	sub	$0x0001, %edx
1806*533d3a49SEdward Gillett	jnz	LABEL(exit)
1807*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1808*533d3a49SEdward Gillett	cmp	$1, %r11
1809*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1810*533d3a49SEdward Gillett#endif
1811*533d3a49SEdward Gillett 	pxor	%xmm0, %xmm0
1812*533d3a49SEdward Gillett	sub	$0x1000, %r10		/* subtract 4K from %r10 */
1813*533d3a49SEdward Gillett	jmp	LABEL(gobble_ashr_15)
1814*533d3a49SEdward Gillett
1815*533d3a49SEdward Gillett	.p2align 4
18167c478bd9Sstevel@tonic-gateLABEL(exit):
1817*533d3a49SEdward Gillett	lea	-16(%r9, %rcx), %rax	/* locate the exact offset for rdi */
1818*533d3a49SEdward GillettLABEL(less32bytes):
1819*533d3a49SEdward Gillett	lea	(%rdi, %rax), %rdi	/* locate the exact address for first operand(rdi) */
1820*533d3a49SEdward Gillett	lea	(%rsi, %rcx), %rsi	/* locate the exact address for second operand(rsi) */
1821*533d3a49SEdward Gillett	test	%r8d, %r8d
1822*533d3a49SEdward Gillett	jz	LABEL(ret)
1823*533d3a49SEdward Gillett	xchg	%rsi, %rdi		/* recover original order according to flag(%r8d) */
18247c478bd9Sstevel@tonic-gate
1825*533d3a49SEdward Gillett	.p2align 4
1826*533d3a49SEdward GillettLABEL(ret):
1827*533d3a49SEdward GillettLABEL(less16bytes):
1828*533d3a49SEdward Gillett	/*
1829*533d3a49SEdward Gillett	 * Check to see if BSF is fast on this processor. If not, use a different
1830*533d3a49SEdward Gillett	 * exit tail.
1831*533d3a49SEdward Gillett	 */
1832*533d3a49SEdward Gillett	testl	$USE_BSF,.memops_method(%rip)
1833*533d3a49SEdward Gillett	jz	LABEL(AMD_exit)
1834*533d3a49SEdward Gillett	bsf	%rdx, %rdx		/* find and store bit index in %rdx */
18357c478bd9Sstevel@tonic-gate
18367c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
1837*533d3a49SEdward Gillett	sub	%rdx, %r11
1838*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
18397c478bd9Sstevel@tonic-gate#endif
1840*533d3a49SEdward Gillett	xor	%ecx, %ecx		/* clear %ecx */
1841*533d3a49SEdward Gillett	xor	%eax, %eax		/* clear %eax */
1842*533d3a49SEdward Gillett
1843*533d3a49SEdward Gillett	movb	(%rsi, %rdx), %cl
1844*533d3a49SEdward Gillett	movb	(%rdi, %rdx), %al
1845*533d3a49SEdward Gillett
1846*533d3a49SEdward Gillett	sub	%ecx, %eax
18477c478bd9Sstevel@tonic-gate	ret
18487c478bd9Sstevel@tonic-gate
18497c478bd9Sstevel@tonic-gate#ifdef USE_AS_STRNCMP
1850*533d3a49SEdward GillettLABEL(strcmp_exitz):
1851*533d3a49SEdward Gillett	xor	%eax, %eax
1852*533d3a49SEdward Gillett	ret
1853*533d3a49SEdward Gillett#endif
1854*533d3a49SEdward Gillett
1855*533d3a49SEdward Gillett	/*
1856*533d3a49SEdward Gillett	 * This exit tail does not use the bsf instruction.
1857*533d3a49SEdward Gillett	 */
1858*533d3a49SEdward Gillett	.p2align 4
1859*533d3a49SEdward GillettLABEL(AMD_exit):
1860*533d3a49SEdward Gillett	test	%dl, %dl
1861*533d3a49SEdward Gillett	jz	LABEL(next_8_bytes)
1862*533d3a49SEdward Gillett
1863*533d3a49SEdward Gillett	test	$0x01, %dl
1864*533d3a49SEdward Gillett	jnz	LABEL(Byte0)
1865*533d3a49SEdward Gillett
1866*533d3a49SEdward Gillett	test	$0x02, %dl
1867*533d3a49SEdward Gillett	jnz	LABEL(Byte1)
1868*533d3a49SEdward Gillett
1869*533d3a49SEdward Gillett	test	$0x04, %dl
1870*533d3a49SEdward Gillett	jnz	LABEL(Byte2)
1871*533d3a49SEdward Gillett
1872*533d3a49SEdward Gillett	test	$0x08, %dl
1873*533d3a49SEdward Gillett	jnz	LABEL(Byte3)
1874*533d3a49SEdward Gillett
1875*533d3a49SEdward Gillett	test	$0x10, %dl
1876*533d3a49SEdward Gillett	jnz	LABEL(Byte4)
1877*533d3a49SEdward Gillett
1878*533d3a49SEdward Gillett	test	$0x20, %dl
1879*533d3a49SEdward Gillett	jnz	LABEL(Byte5)
1880*533d3a49SEdward Gillett
1881*533d3a49SEdward Gillett	test	$0x40, %dl
1882*533d3a49SEdward Gillett	jnz	LABEL(Byte6)
1883*533d3a49SEdward Gillett
1884*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1885*533d3a49SEdward Gillett	sub	$7, %r11
1886*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1887*533d3a49SEdward Gillett#endif
1888*533d3a49SEdward Gillett	movzx	7(%rsi), %ecx
1889*533d3a49SEdward Gillett	movzx	7(%rdi), %eax
1890*533d3a49SEdward Gillett
1891*533d3a49SEdward Gillett	sub	%ecx, %eax
1892*533d3a49SEdward Gillett	ret
1893*533d3a49SEdward Gillett
1894*533d3a49SEdward Gillett	.p2align 4
1895*533d3a49SEdward GillettLABEL(Byte0):
1896*533d3a49SEdward Gillett	/*
1897*533d3a49SEdward Gillett	 * never need to handle byte 0 for strncmpy
1898*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1899*533d3a49SEdward Gillett	sub	$0, %r11
1900*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1901*533d3a49SEdward Gillett#endif
1902*533d3a49SEdward Gillett	*/
1903*533d3a49SEdward Gillett	movzx	(%rsi), %ecx
1904*533d3a49SEdward Gillett	movzx	(%rdi), %eax
1905*533d3a49SEdward Gillett
1906*533d3a49SEdward Gillett	sub	%ecx, %eax
1907*533d3a49SEdward Gillett	ret
1908*533d3a49SEdward Gillett
1909*533d3a49SEdward Gillett	.p2align 4
1910*533d3a49SEdward GillettLABEL(Byte1):
1911*533d3a49SEdward Gillett
1912*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1913*533d3a49SEdward Gillett	sub	$1, %r11
1914*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1915*533d3a49SEdward Gillett#endif
1916*533d3a49SEdward Gillett	movzx	1(%rsi), %ecx
1917*533d3a49SEdward Gillett	movzx	1(%rdi), %eax
1918*533d3a49SEdward Gillett
1919*533d3a49SEdward Gillett	sub	%ecx, %eax
1920*533d3a49SEdward Gillett	ret
1921*533d3a49SEdward Gillett
1922*533d3a49SEdward Gillett	.p2align 4
1923*533d3a49SEdward GillettLABEL(Byte2):
1924*533d3a49SEdward Gillett
1925*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1926*533d3a49SEdward Gillett	sub	$2, %r11
1927*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1928*533d3a49SEdward Gillett#endif
1929*533d3a49SEdward Gillett	movzx	2(%rsi), %ecx
1930*533d3a49SEdward Gillett	movzx	2(%rdi), %eax
1931*533d3a49SEdward Gillett
1932*533d3a49SEdward Gillett	sub	%ecx, %eax
1933*533d3a49SEdward Gillett	ret
1934*533d3a49SEdward Gillett
1935*533d3a49SEdward Gillett	.p2align 4
1936*533d3a49SEdward GillettLABEL(Byte3):
1937*533d3a49SEdward Gillett
1938*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1939*533d3a49SEdward Gillett	sub	$3, %r11
1940*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1941*533d3a49SEdward Gillett#endif
1942*533d3a49SEdward Gillett	movzx	3(%rsi), %ecx
1943*533d3a49SEdward Gillett	movzx	3(%rdi), %eax
1944*533d3a49SEdward Gillett
1945*533d3a49SEdward Gillett	sub	%ecx, %eax
1946*533d3a49SEdward Gillett	ret
1947*533d3a49SEdward Gillett
1948*533d3a49SEdward Gillett	.p2align 4
1949*533d3a49SEdward GillettLABEL(Byte4):
1950*533d3a49SEdward Gillett
1951*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1952*533d3a49SEdward Gillett	sub	$4, %r11
1953*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1954*533d3a49SEdward Gillett#endif
1955*533d3a49SEdward Gillett	movzx	4(%rsi), %ecx
1956*533d3a49SEdward Gillett	movzx	4(%rdi), %eax
1957*533d3a49SEdward Gillett
1958*533d3a49SEdward Gillett	sub	%ecx, %eax
1959*533d3a49SEdward Gillett	ret
1960*533d3a49SEdward Gillett
1961*533d3a49SEdward Gillett	.p2align 4
1962*533d3a49SEdward GillettLABEL(Byte5):
1963*533d3a49SEdward Gillett
1964*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1965*533d3a49SEdward Gillett	sub	$5, %r11
1966*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1967*533d3a49SEdward Gillett#endif
1968*533d3a49SEdward Gillett	movzx	5(%rsi), %ecx
1969*533d3a49SEdward Gillett	movzx	5(%rdi), %eax
1970*533d3a49SEdward Gillett
1971*533d3a49SEdward Gillett	sub	%ecx, %eax
1972*533d3a49SEdward Gillett	ret
1973*533d3a49SEdward Gillett
1974*533d3a49SEdward Gillett	.p2align 4
1975*533d3a49SEdward GillettLABEL(Byte6):
1976*533d3a49SEdward Gillett
1977*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1978*533d3a49SEdward Gillett	sub	$6, %r11
1979*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1980*533d3a49SEdward Gillett#endif
1981*533d3a49SEdward Gillett	movzx	6(%rsi), %ecx
1982*533d3a49SEdward Gillett	movzx	6(%rdi), %eax
1983*533d3a49SEdward Gillett
1984*533d3a49SEdward Gillett	sub	%ecx, %eax
1985*533d3a49SEdward Gillett	ret
1986*533d3a49SEdward Gillett
1987*533d3a49SEdward Gillett	.p2align 4
1988*533d3a49SEdward GillettLABEL(next_8_bytes):
1989*533d3a49SEdward Gillett	add	$8, %rdi
1990*533d3a49SEdward Gillett	add	$8, %rsi
1991*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
1992*533d3a49SEdward Gillett	sub	$8, %r11
1993*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
1994*533d3a49SEdward Gillett#endif
1995*533d3a49SEdward Gillett	test	$0x01, %dh
1996*533d3a49SEdward Gillett	jnz	LABEL(Byte0)
1997*533d3a49SEdward Gillett
1998*533d3a49SEdward Gillett	test	$0x02, %dh
1999*533d3a49SEdward Gillett	jnz	LABEL(Byte1)
2000*533d3a49SEdward Gillett
2001*533d3a49SEdward Gillett	test	$0x04, %dh
2002*533d3a49SEdward Gillett	jnz	LABEL(Byte2)
2003*533d3a49SEdward Gillett
2004*533d3a49SEdward Gillett	test	$0x08, %dh
2005*533d3a49SEdward Gillett	jnz	LABEL(Byte3)
2006*533d3a49SEdward Gillett
2007*533d3a49SEdward Gillett	test	$0x10, %dh
2008*533d3a49SEdward Gillett	jnz	LABEL(Byte4)
2009*533d3a49SEdward Gillett
2010*533d3a49SEdward Gillett	test	$0x20, %dh
2011*533d3a49SEdward Gillett	jnz	LABEL(Byte5)
2012*533d3a49SEdward Gillett
2013*533d3a49SEdward Gillett	test	$0x40, %dh
2014*533d3a49SEdward Gillett	jnz	LABEL(Byte6)
2015*533d3a49SEdward Gillett
2016*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
2017*533d3a49SEdward Gillett	sub	$7, %r11
2018*533d3a49SEdward Gillett	jbe	LABEL(strcmp_exitz)
2019*533d3a49SEdward Gillett#endif
2020*533d3a49SEdward Gillett	movzx	7(%rsi), %ecx
2021*533d3a49SEdward Gillett	movzx	7(%rdi), %eax
2022*533d3a49SEdward Gillett
2023*533d3a49SEdward Gillett	sub	%ecx, %eax
2024*533d3a49SEdward Gillett	ret
2025*533d3a49SEdward Gillett
2026*533d3a49SEdward Gillett	.pushsection .rodata
2027*533d3a49SEdward Gillett	.p2align 4
2028*533d3a49SEdward GillettLABEL(unaligned_table):
2029*533d3a49SEdward Gillett	.int	LABEL(ashr_0) - LABEL(unaligned_table)
2030*533d3a49SEdward Gillett	.int	LABEL(ashr_15) - LABEL(unaligned_table)
2031*533d3a49SEdward Gillett	.int	LABEL(ashr_14) - LABEL(unaligned_table)
2032*533d3a49SEdward Gillett	.int	LABEL(ashr_13) - LABEL(unaligned_table)
2033*533d3a49SEdward Gillett	.int	LABEL(ashr_12) - LABEL(unaligned_table)
2034*533d3a49SEdward Gillett	.int	LABEL(ashr_11) - LABEL(unaligned_table)
2035*533d3a49SEdward Gillett	.int	LABEL(ashr_10) - LABEL(unaligned_table)
2036*533d3a49SEdward Gillett	.int	LABEL(ashr_9) - LABEL(unaligned_table)
2037*533d3a49SEdward Gillett	.int	LABEL(ashr_8) - LABEL(unaligned_table)
2038*533d3a49SEdward Gillett	.int	LABEL(ashr_7) - LABEL(unaligned_table)
2039*533d3a49SEdward Gillett	.int	LABEL(ashr_6) - LABEL(unaligned_table)
2040*533d3a49SEdward Gillett	.int	LABEL(ashr_5) - LABEL(unaligned_table)
2041*533d3a49SEdward Gillett	.int	LABEL(ashr_4) - LABEL(unaligned_table)
2042*533d3a49SEdward Gillett	.int	LABEL(ashr_3) - LABEL(unaligned_table)
2043*533d3a49SEdward Gillett	.int	LABEL(ashr_2) - LABEL(unaligned_table)
2044*533d3a49SEdward Gillett	.int	LABEL(ashr_1) - LABEL(unaligned_table)
2045*533d3a49SEdward Gillett	.popsection
2046*533d3a49SEdward Gillett#ifdef USE_AS_STRNCMP
20477c478bd9Sstevel@tonic-gate	SET_SIZE(strncmp)
20487c478bd9Sstevel@tonic-gate#else
20497c478bd9Sstevel@tonic-gate	SET_SIZE(strcmp)		/* (const char *, const char *) */
20507c478bd9Sstevel@tonic-gate#endif
2051