xref: /illumos-gate/usr/src/lib/libc/sparc/gen/strncmp.S (revision 5d9d9091f564c198a760790b0bfa72c44e17912b)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe	.file	"strncmp.s"
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe/*
30*5d9d9091SRichard Lowe * strncmp(s1, s2, n)
31*5d9d9091SRichard Lowe *
32*5d9d9091SRichard Lowe * Compare strings (at most n bytes):  s1>s2: >0  s1==s2: 0  s1<s2: <0
33*5d9d9091SRichard Lowe *
34*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for strncmp
35*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
36*5d9d9091SRichard Lowe *
37*5d9d9091SRichard Lowe *	int
38*5d9d9091SRichard Lowe *	strncmp(const char *s1, const char *s2, size_t n)
39*5d9d9091SRichard Lowe *	{
40*5d9d9091SRichard Lowe *		n++;
41*5d9d9091SRichard Lowe *		if (s1 == s2)
42*5d9d9091SRichard Lowe *			return (0);
43*5d9d9091SRichard Lowe *		while (--n != 0 && *s1 == *s2++)
44*5d9d9091SRichard Lowe *			if (*s1++ == '\0')
45*5d9d9091SRichard Lowe *				return (0);
46*5d9d9091SRichard Lowe *		return ((n == 0) ? 0 : (*s1 - s2[-1]));
47*5d9d9091SRichard Lowe *	}
48*5d9d9091SRichard Lowe */
49*5d9d9091SRichard Lowe
50*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
51*5d9d9091SRichard Lowe
52*5d9d9091SRichard Lowe	ENTRY(strncmp)
53*5d9d9091SRichard Lowe	save	%sp, -SA(WINDOWSIZE), %sp
54*5d9d9091SRichard Lowe	cmp	%i2, 8
55*5d9d9091SRichard Lowe	blu,a	.cmp_bytes		! for small counts go do bytes
56*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0		! delay slot, get diff from s1 - s2
57*5d9d9091SRichard Lowe	andcc	%i0, 3, %g0		! is s1 aligned
58*5d9d9091SRichard Lowe1:	bz	.iss2			! if so go check s2
59*5d9d9091SRichard Lowe	andcc	%i1, 3, %i3		! is s2 aligned
60*5d9d9091SRichard Lowe
61*5d9d9091SRichard Lowe	deccc	%i2			! --n >= 0 ?
62*5d9d9091SRichard Lowe	bcs	.doneq
63*5d9d9091SRichard Lowe	nop				! delay slot
64*5d9d9091SRichard Lowe
65*5d9d9091SRichard Lowe	ldub	[%i0], %i4		! else cmp one byte
66*5d9d9091SRichard Lowe	ldub	[%i1], %i5
67*5d9d9091SRichard Lowe	inc	%i0
68*5d9d9091SRichard Lowe	cmp	%i4, %i5
69*5d9d9091SRichard Lowe	bne	.noteqb
70*5d9d9091SRichard Lowe	inc	%i1
71*5d9d9091SRichard Lowe	tst	%i4			! terminating zero
72*5d9d9091SRichard Lowe	bnz	1b
73*5d9d9091SRichard Lowe	andcc	%i0, 3, %g0
74*5d9d9091SRichard Lowe	b,a	.doneq
75*5d9d9091SRichard Lowe
76*5d9d9091SRichard Lowe.iss2:
77*5d9d9091SRichard Lowe	set     0x7efefeff, %l6
78*5d9d9091SRichard Lowe	set     0x81010100, %l7
79*5d9d9091SRichard Lowe	sethi	%hi(0xff000000), %l0	! masks to test for terminating null
80*5d9d9091SRichard Lowe	sethi	%hi(0x00ff0000), %l1
81*5d9d9091SRichard Lowe	srl	%l1, 8, %l2		! generate 0x0000ff00 mask
82*5d9d9091SRichard Lowe
83*5d9d9091SRichard Lowe	bz	.w4cmp			! if s2 word aligned, compare words
84*5d9d9091SRichard Lowe	cmp	%i3, 2			! check if s2 half aligned
85*5d9d9091SRichard Lowe	be	.w2cmp
86*5d9d9091SRichard Lowe	cmp	%i3, 1			! check if aligned to 1 or 3 bytes
87*5d9d9091SRichard Lowe.w3cmp:	ldub	[%i1], %i5
88*5d9d9091SRichard Lowe	inc	1, %i1
89*5d9d9091SRichard Lowe	be	.w1cmp
90*5d9d9091SRichard Lowe	sll	%i5, 24, %i5
91*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0
92*5d9d9091SRichard Lowe2:
93*5d9d9091SRichard Lowe	deccc	4, %i2			! n >= 4 ?
94*5d9d9091SRichard Lowe	bgeu,a	3f
95*5d9d9091SRichard Lowe	ld	[%i1], %i3		! delay slot
96*5d9d9091SRichard Lowe	dec	%i1			! reset s2
97*5d9d9091SRichard Lowe	inc	%i0			! reset s1 diff
98*5d9d9091SRichard Lowe	b	.cmp_bytes		! do a byte at a time if n < 4
99*5d9d9091SRichard Lowe	inc	4, %i2
100*5d9d9091SRichard Lowe3:
101*5d9d9091SRichard Lowe	ld	[%i0 + %i1], %i4
102*5d9d9091SRichard Lowe	inc	4, %i1
103*5d9d9091SRichard Lowe	srl	%i3, 8, %l4		! merge with the other half
104*5d9d9091SRichard Lowe	or	%l4, %i5, %i5
105*5d9d9091SRichard Lowe	cmp	%i4, %i5
106*5d9d9091SRichard Lowe	be	1f
107*5d9d9091SRichard Lowe
108*5d9d9091SRichard Lowe	add	%i4, %l6, %l3
109*5d9d9091SRichard Lowe	b,a	.noteq
110*5d9d9091SRichard Lowe1:	xor	%l3, %i4, %l3
111*5d9d9091SRichard Lowe	and	%l3, %l7, %l3
112*5d9d9091SRichard Lowe	cmp	%l3, %l7
113*5d9d9091SRichard Lowe	be,a	2b
114*5d9d9091SRichard Lowe	sll	%i3, 24, %i5
115*5d9d9091SRichard Lowe
116*5d9d9091SRichard Lowe	!
117*5d9d9091SRichard Lowe	! For 7-bit characters, we know one of the bytes is zero, but for
118*5d9d9091SRichard Lowe	! 8-bit characters, the zero detection algorithm gives some false
119*5d9d9091SRichard Lowe	! triggers ... check every byte individually.
120*5d9d9091SRichard Lowe	!
121*5d9d9091SRichard Lowe	andcc	%i4, %l0, %g0		! check if first byte was zero
122*5d9d9091SRichard Lowe	bnz	1f
123*5d9d9091SRichard Lowe	andcc	%i4, %l1, %g0		! check if second byte was zero
124*5d9d9091SRichard Lowe	b,a	.doneq
125*5d9d9091SRichard Lowe1:	bnz	1f
126*5d9d9091SRichard Lowe	andcc 	%i4, %l2, %g0		! check if third byte was zero
127*5d9d9091SRichard Lowe	b,a	.doneq
128*5d9d9091SRichard Lowe1:	bnz	1f
129*5d9d9091SRichard Lowe	andcc	%i4, 0xff, %g0		! check if last byte is zero
130*5d9d9091SRichard Lowe	b,a	.doneq
131*5d9d9091SRichard Lowe1:	bnz	2b
132*5d9d9091SRichard Lowe	sll	%i3, 24, %i5
133*5d9d9091SRichard Lowe	b,a	.doneq
134*5d9d9091SRichard Lowe
135*5d9d9091SRichard Lowe.w1cmp:	clr	%l4
136*5d9d9091SRichard Lowe	lduh	[%i1], %l4
137*5d9d9091SRichard Lowe	inc	2, %i1
138*5d9d9091SRichard Lowe	sll	%l4, 8, %l4
139*5d9d9091SRichard Lowe	or	%i5, %l4, %i5
140*5d9d9091SRichard Lowe
141*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0
142*5d9d9091SRichard Lowe3:
143*5d9d9091SRichard Lowe	deccc	4, %i2			! n >= 4 ?
144*5d9d9091SRichard Lowe	bgeu,a	4f
145*5d9d9091SRichard Lowe	ld	[%i1], %i3		! delay slot
146*5d9d9091SRichard Lowe	dec	3, %i1			! reset s2
147*5d9d9091SRichard Lowe	inc	3, %i0			! reset s1 diff
148*5d9d9091SRichard Lowe	b	.cmp_bytes		! do a byte at a time if n < 4
149*5d9d9091SRichard Lowe	inc	4, %i2
150*5d9d9091SRichard Lowe4:
151*5d9d9091SRichard Lowe	ld	[%i0 + %i1], %i4
152*5d9d9091SRichard Lowe	inc	4, %i1
153*5d9d9091SRichard Lowe	srl	%i3, 24, %l4		! merge with the other half
154*5d9d9091SRichard Lowe	or	%l4, %i5, %i5
155*5d9d9091SRichard Lowe	cmp	%i4, %i5
156*5d9d9091SRichard Lowe	be	1f
157*5d9d9091SRichard Lowe
158*5d9d9091SRichard Lowe	add	%i4, %l6, %l3
159*5d9d9091SRichard Lowe	b,a	.noteq
160*5d9d9091SRichard Lowe1:	xor	%l3, %i4, %l3
161*5d9d9091SRichard Lowe	and	%l3, %l7, %l3
162*5d9d9091SRichard Lowe	cmp	%l3, %l7
163*5d9d9091SRichard Lowe	be,a	3b
164*5d9d9091SRichard Lowe	sll	%i3, 8, %i5
165*5d9d9091SRichard Lowe
166*5d9d9091SRichard Lowe	andcc	%i4, %l0, %g0		! check if first byte was zero
167*5d9d9091SRichard Lowe	bnz	1f
168*5d9d9091SRichard Lowe	andcc	%i4, %l1, %g0		! check if second byte was zero
169*5d9d9091SRichard Lowe	b,a	.doneq
170*5d9d9091SRichard Lowe1:	bnz	1f
171*5d9d9091SRichard Lowe	andcc 	%i4, %l2, %g0		! check if third byte was zero
172*5d9d9091SRichard Lowe	b,a	.doneq
173*5d9d9091SRichard Lowe1:	bnz	1f
174*5d9d9091SRichard Lowe	andcc	%i4, 0xff, %g0		! check if last byte is zero
175*5d9d9091SRichard Lowe	b,a	.doneq
176*5d9d9091SRichard Lowe1:	bnz	3b
177*5d9d9091SRichard Lowe	sll	%i3, 8, %i5
178*5d9d9091SRichard Lowe	b,a	.doneq
179*5d9d9091SRichard Lowe
180*5d9d9091SRichard Lowe.w2cmp:
181*5d9d9091SRichard Lowe	lduh	[%i1], %i5		! read a halfword to align s2
182*5d9d9091SRichard Lowe	inc	2, %i1
183*5d9d9091SRichard Lowe	sll	%i5, 16, %i5
184*5d9d9091SRichard Lowe
185*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0
186*5d9d9091SRichard Lowe4:
187*5d9d9091SRichard Lowe	deccc	4, %i2			! n >= 4 ?
188*5d9d9091SRichard Lowe	bgeu,a	5f
189*5d9d9091SRichard Lowe	ld	[%i1], %i3		! delay slot
190*5d9d9091SRichard Lowe	dec	2, %i1			! reset s2
191*5d9d9091SRichard Lowe	inc	2, %i0			! reset s1 diff
192*5d9d9091SRichard Lowe	b	.cmp_bytes		! do a byte at a time if n < 4
193*5d9d9091SRichard Lowe	inc	4, %i2			! delay slot
194*5d9d9091SRichard Lowe5:
195*5d9d9091SRichard Lowe	ld	[%i1 + %i0], %i4	! read a word from s2
196*5d9d9091SRichard Lowe	inc	4, %i1
197*5d9d9091SRichard Lowe	srl	%i3, 16, %l4		! merge with the other half
198*5d9d9091SRichard Lowe	or	%l4, %i5, %i5
199*5d9d9091SRichard Lowe	cmp	%i4, %i5
200*5d9d9091SRichard Lowe	be	1f
201*5d9d9091SRichard Lowe
202*5d9d9091SRichard Lowe	add	%i4, %l6, %l3
203*5d9d9091SRichard Lowe	b,a	.noteq
204*5d9d9091SRichard Lowe1:	xor	%l3, %i4, %l3		! are any bytes 0?
205*5d9d9091SRichard Lowe	and	%l3, %l7, %l3
206*5d9d9091SRichard Lowe	cmp	%l3, %l7
207*5d9d9091SRichard Lowe	be,a	4b
208*5d9d9091SRichard Lowe	sll	%i3, 16, %i5
209*5d9d9091SRichard Lowe
210*5d9d9091SRichard Lowe	andcc	%i4, %l0, %g0		! check if first byte was zero
211*5d9d9091SRichard Lowe	bnz	1f
212*5d9d9091SRichard Lowe	andcc	%i4, %l1, %g0		! check if second byte was zero
213*5d9d9091SRichard Lowe	b,a	.doneq
214*5d9d9091SRichard Lowe1:	bnz	1f
215*5d9d9091SRichard Lowe	andcc 	%i4, %l2, %g0		! check if third byte was zero
216*5d9d9091SRichard Lowe	b,a	.doneq
217*5d9d9091SRichard Lowe1:	bnz	1f
218*5d9d9091SRichard Lowe	andcc	%i4, 0xff, %g0		! check if last byte is zero
219*5d9d9091SRichard Lowe	b,a	.doneq
220*5d9d9091SRichard Lowe1:	bnz	4b
221*5d9d9091SRichard Lowe	sll	%i3, 16, %i5
222*5d9d9091SRichard Lowe	b,a	.doneq
223*5d9d9091SRichard Lowe
224*5d9d9091SRichard Lowe.w4cmp:	sub	%i0, %i1, %i0
225*5d9d9091SRichard Lowe	ld	[%i1], %i5		! read a word from s1
226*5d9d9091SRichard Lowe5:	cmp     %i2,0
227*5d9d9091SRichard Lowe	be,a    .doneq
228*5d9d9091SRichard Lowe	nop
229*5d9d9091SRichard Lowe	ld      [%i1], %i5              ! read a word from s1
230*5d9d9091SRichard Lowe	deccc	4, %i2			! n >= 4 ?
231*5d9d9091SRichard Lowe	bcs,a	.cmp_bytes		! do a byte at a time if n < 4
232*5d9d9091SRichard Lowe	inc	4, %i2
233*5d9d9091SRichard Lowe
234*5d9d9091SRichard Lowe	ld	[%i1 + %i0], %i4	! read a word from s2
235*5d9d9091SRichard Lowe	cmp	%i4, %i5
236*5d9d9091SRichard Lowe	inc	4, %i1
237*5d9d9091SRichard Lowe	be	1f
238*5d9d9091SRichard Lowe
239*5d9d9091SRichard Lowe	add	%i4, %l6, %l3
240*5d9d9091SRichard Lowe	b,a	.noteq
241*5d9d9091SRichard Lowe1:	xor	%l3, %i4, %l3
242*5d9d9091SRichard Lowe	and	%l3, %l7, %l3
243*5d9d9091SRichard Lowe	cmp	%l3, %l7
244*5d9d9091SRichard Lowe	be,a	5b
245*5d9d9091SRichard Lowe	nop
246*5d9d9091SRichard Lowe
247*5d9d9091SRichard Lowe	andcc	%i4, %l0, %g0		! check if first byte was zero
248*5d9d9091SRichard Lowe	bnz	1f
249*5d9d9091SRichard Lowe	andcc	%i4, %l1, %g0		! check if second byte was zero
250*5d9d9091SRichard Lowe	b,a	.doneq
251*5d9d9091SRichard Lowe1:	bnz	1f
252*5d9d9091SRichard Lowe	andcc 	%i4, %l2, %g0		! check if third byte was zero
253*5d9d9091SRichard Lowe	b,a	.doneq
254*5d9d9091SRichard Lowe1:	bnz	1f
255*5d9d9091SRichard Lowe	andcc	%i4, 0xff, %g0		! check if last byte is zero
256*5d9d9091SRichard Lowe	b,a	.doneq
257*5d9d9091SRichard Lowe1:	bnz,a	5b
258*5d9d9091SRichard Lowe	ld	[%i1], %i5
259*5d9d9091SRichard Lowe.doneq:	ret
260*5d9d9091SRichard Lowe	restore	%g0, %g0, %o0		! equal return zero
261*5d9d9091SRichard Lowe
262*5d9d9091SRichard Lowe.noteq:	srl	%i4, 24, %l4
263*5d9d9091SRichard Lowe	srl	%i5, 24, %l5
264*5d9d9091SRichard Lowe	subcc	%l4, %l5, %i0
265*5d9d9091SRichard Lowe	bne	6f
266*5d9d9091SRichard Lowe	andcc	%l4, 0xff, %g0
267*5d9d9091SRichard Lowe	bz	.doneq
268*5d9d9091SRichard Lowe	sll	%i4, 8, %l4
269*5d9d9091SRichard Lowe	sll	%i5, 8, %l5
270*5d9d9091SRichard Lowe	srl	%l4, 24, %l4
271*5d9d9091SRichard Lowe	srl	%l5, 24, %l5
272*5d9d9091SRichard Lowe	subcc	%l4, %l5, %i0
273*5d9d9091SRichard Lowe	bne	6f
274*5d9d9091SRichard Lowe	andcc	%l4, 0xff, %g0
275*5d9d9091SRichard Lowe	bz	.doneq
276*5d9d9091SRichard Lowe	sll	%i4, 16, %l4
277*5d9d9091SRichard Lowe	sll	%i5, 16, %l5
278*5d9d9091SRichard Lowe	srl	%l4, 24, %l4
279*5d9d9091SRichard Lowe	srl	%l5, 24, %l5
280*5d9d9091SRichard Lowe	subcc	%l4, %l5, %i0
281*5d9d9091SRichard Lowe	bne	6f
282*5d9d9091SRichard Lowe	andcc	%l4, 0xff, %g0
283*5d9d9091SRichard Lowe	bz	.doneq
284*5d9d9091SRichard Lowe	nop
285*5d9d9091SRichard Lowe.noteqb:
286*5d9d9091SRichard Lowe	and	%i4, 0xff, %l4
287*5d9d9091SRichard Lowe	and	%i5, 0xff, %l5
288*5d9d9091SRichard Lowe	subcc	%l4, %l5, %i0
289*5d9d9091SRichard Lowe6:	ret
290*5d9d9091SRichard Lowe	restore	%i0, %g0, %o0
291*5d9d9091SRichard Lowe
292*5d9d9091SRichard Lowe	! Do a byte by byte comparison, disregarding alignments
293*5d9d9091SRichard Lowe.cmp_bytes:
294*5d9d9091SRichard Lowe	deccc	%i2			! --n >= 0 ?
295*5d9d9091SRichard Lowe1:
296*5d9d9091SRichard Lowe	bcs	.doneq
297*5d9d9091SRichard Lowe	nop				! delay slot
298*5d9d9091SRichard Lowe	ldub	[%i1 + %i0], %i4	! read a word from s1
299*5d9d9091SRichard Lowe	ldub	[%i1], %i5		! read a word from s2
300*5d9d9091SRichard Lowe
301*5d9d9091SRichard Lowe	inc	%i1
302*5d9d9091SRichard Lowe	cmp	%i4, %i5
303*5d9d9091SRichard Lowe	bne	.noteqb
304*5d9d9091SRichard Lowe	tst	%i4			! terminating zero
305*5d9d9091SRichard Lowe	bnz	1b
306*5d9d9091SRichard Lowe	deccc	%i2			! --n >= 0
307*5d9d9091SRichard Lowe	b,a	.doneq
308*5d9d9091SRichard Lowe
309*5d9d9091SRichard Lowe	SET_SIZE(strncmp)
310