xref: /titanic_44/usr/src/lib/libc/sparcv9/gen/strncmp.s (revision dcafa541382944b24abd3a40c357b47e04f314e2)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * strncmp(s1, s2, n)
33 *
34 * Compare strings (at most n bytes):  s1>s2: >0  s1==s2: 0  s1<s2: <0
35 *
36 * Fast assembler language version of the following C-program for strncmp
37 * which represents the `standard' for the C-library.
38 *
39 *	int
40 *	strncmp(const char *s1, const char *s2, size_t n)
41 *	{
42 *		n++;
43 *		if (s1 == s2)
44 *			return (0);
45 *		while (--n != 0 && *s1 == *s2++)
46 *			if(*s1++ == '\0')
47 *				return(0);
48 *		return ((n == 0) ? 0 : (*s1 - s2[-1]));
49 *	}
50 */
51
52#include <sys/asm_linkage.h>
53
54	ENTRY(strncmp)
55	save	%sp, -SA(WINDOWSIZE), %sp
56	cmp	%i2, 8
57	blu,a,pn %xcc, .cmp_bytes	! for small counts go do bytes
58	sub	%i0, %i1, %i0		! delay slot, get diff from s1 - s2
59	andcc	%i0, 3, %g0		! is s1 aligned
601:	bz,pn	%icc, .iss2		! if so go check s2
61	andcc	%i1, 3, %i3		! is s2 aligned
62
63	deccc	%i2			! --n >= 0 ?
64	bcs,pn	%xcc, .doneq
65	nop				! delay slot
66
67	ldub	[%i0], %i4		! else cmp one byte
68	ldub	[%i1], %i5
69	inc	%i0
70	cmp	%i4, %i5
71	bne,pn	%icc, .noteqb
72	inc	%i1
73	tst	%i4			! terminating zero
74	bnz,pt	%icc, 1b
75	andcc	%i0, 3, %g0
76	b,a	.doneq
77
78.iss2:
79	set     0x7efefeff, %l6
80	set     0x81010100, %l7
81	sethi	%hi(0xff000000), %l0	! masks to test for terminating null
82	sethi	%hi(0x00ff0000), %l1
83	srl	%l1, 8, %l2		! generate 0x0000ff00 mask
84
85	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
86	cmp	%i3, 2			! check if s2 half aligned
87	be,pn	%icc, .w2cmp
88	cmp	%i3, 1			! check if aligned to 1 or 3 bytes
89.w3cmp:	ldub	[%i1], %i5
90	inc	1, %i1
91	be,pt	%icc, .w1cmp
92	sll	%i5, 24, %i5
93	sub	%i0, %i1, %i0
942:
95	deccc	4, %i2			! n >= 4 ?
96	bgeu,a,pt %xcc, 3f
97	lduw	[%i1], %i3		! delay slot
98	dec	%i1			! reset s2
99	inc	%i0			! reset s1 diff
100	b	.cmp_bytes		! do a byte at a time if n < 4
101	inc	4, %i2
1023:
103	lduw	[%i0 + %i1], %i4
104	inc	4, %i1
105	srl	%i3, 8, %l4		! merge with the other half
106	or	%l4, %i5, %i5
107	cmp	%i4, %i5
108	be,pn	%icc, 1f
109
110	add	%i4, %l6, %l3
111	b,a	.noteq
1121:	xor	%l3, %i4, %l3
113	and	%l3, %l7, %l3
114	cmp	%l3, %l7
115	be,a,pt	%icc, 2b
116	sll	%i3, 24, %i5
117
118	!
119	! For 7-bit characters, we know one of the bytes is zero, but for
120	! 8-bit characters, the zero detection algorithm gives some false
121	! triggers ... check every byte individually.
122	!
123	andcc	%i4, %l0, %g0		! check if first byte was zero
124	bnz,pt	%icc, 1f
125	andcc	%i4, %l1, %g0		! check if second byte was zero
126	b,a	.doneq
1271:	bnz,pt	%icc, 1f
128	andcc 	%i4, %l2, %g0		! check if third byte was zero
129	b,a	.doneq
1301:	bnz,pt	%icc, 1f
131	andcc	%i4, 0xff, %g0		! check if last byte is zero
132	b,a	.doneq
1331:	bnz,pn	%icc, 2b
134	sll	%i3, 24, %i5
135	b,a	.doneq
136
137.w1cmp:	clr	%l4
138	lduh	[%i1], %l4
139	inc	2, %i1
140	sll	%l4, 8, %l4
141	or	%i5, %l4, %i5
142
143	sub	%i0, %i1, %i0
1443:
145	deccc	4, %i2			! n >= 4 ?
146	bgeu,a,pt %xcc, 4f
147	lduw	[%i1], %i3		! delay slot
148	dec	3, %i1			! reset s2
149	inc	3, %i0			! reset s1 diff
150	b	.cmp_bytes		! do a byte at a time if n < 4
151	inc	4, %i2
1524:
153	lduw	[%i0 + %i1], %i4
154	inc	4, %i1
155	srl	%i3, 24, %l4		! merge with the other half
156	or	%l4, %i5, %i5
157	cmp	%i4, %i5
158	be,pt	%icc, 1f
159
160	add	%i4, %l6, %l3
161	b,a	.noteq
1621:	xor	%l3, %i4, %l3
163	and	%l3, %l7, %l3
164	cmp	%l3, %l7
165	be,a,pt	%icc, 3b
166	sll	%i3, 8, %i5
167
168	andcc	%i4, %l0, %g0		! check if first byte was zero
169	bnz,pt	%icc, 1f
170	andcc	%i4, %l1, %g0		! check if second byte was zero
171	b,a	.doneq
1721:	bnz,pt	%icc, 1f
173	andcc 	%i4, %l2, %g0		! check if third byte was zero
174	b,a	.doneq
1751:	bnz,pt	%icc, 1f
176	andcc	%i4, 0xff, %g0		! check if last byte is zero
177	b,a	.doneq
1781:	bnz,pn	%icc, 3b
179	sll	%i3, 8, %i5
180	b,a	.doneq
181
182.w2cmp:
183	lduh	[%i1], %i5		! read a halfword to align s2
184	inc	2, %i1
185	sll	%i5, 16, %i5
186
187	sub	%i0, %i1, %i0
1884:
189	deccc	4, %i2			! n >= 4 ?
190	bgeu,a,pt %xcc, 5f
191	lduw	[%i1], %i3		! delay slot
192	dec	2, %i1			! reset s2
193	inc	2, %i0			! reset s1 diff
194	b	.cmp_bytes		! do a byte at a time if n < 4
195	inc	4, %i2			! delay slot
1965:
197	lduw	[%i1 + %i0], %i4	! read a word from s2
198	inc	4, %i1
199	srl	%i3, 16, %l4		! merge with the other half
200	or	%l4, %i5, %i5
201	cmp	%i4, %i5
202	be,pt	%icc, 1f
203
204	add	%i4, %l6, %l3
205	b,a	.noteq
2061:	xor	%l3, %i4, %l3		! are any bytes 0?
207	and	%l3, %l7, %l3
208	cmp	%l3, %l7
209	be,a,pt	%icc, 4b
210	sll	%i3, 16, %i5
211
212	andcc	%i4, %l0, %g0		! check if first byte was zero
213	bnz,pt	%icc, 1f
214	andcc	%i4, %l1, %g0		! check if second byte was zero
215	b,a	.doneq
2161:	bnz,pt	%icc, 1f
217	andcc 	%i4, %l2, %g0		! check if third byte was zero
218	b,a	.doneq
2191:	bnz,pt	%icc, 1f
220	andcc	%i4, 0xff, %g0		! check if last byte is zero
221	b,a	.doneq
2221:	bnz,pn	%icc, 4b
223	sll	%i3, 16, %i5
224	b,a	.doneq
225
226.w4cmp:	sub	%i0, %i1, %i0
227	lduw	[%i1], %i5		! read a word from s1
2285:	cmp	%i2, 0
229	be,a,pn	%xcc, .doneq
230	nop
231	lduw	[%i1], %i5		! read a word from s1
232	deccc	4, %i2			! n >= 4 ?
233	bcs,a,pn %xcc, .cmp_bytes	! do a byte at a time if n < 4
234	inc	4, %i2
235
236	lduw	[%i1 + %i0], %i4	! read a word from s2
237	cmp	%i4, %i5
238	inc	4, %i1
239	be,pt	%icc, 1f
240
241	add	%i4, %l6, %l3
242	b,a	.noteq
2431:	xor	%l3, %i4, %l3
244	and	%l3, %l7, %l3
245	cmp	%l3, %l7
246	be,pt	%icc, 5b
247	nop
248
249	andcc	%i4, %l0, %g0		! check if first byte was zero
250	bnz,pt	%icc, 1f
251	andcc	%i4, %l1, %g0		! check if second byte was zero
252	b,a	.doneq
2531:	bnz,pt	%icc, 1f
254	andcc 	%i4, %l2, %g0		! check if third byte was zero
255	b,a	.doneq
2561:	bnz,pt	%icc, 1f
257	andcc	%i4, 0xff, %g0		! check if last byte is zero
258	b,a	.doneq
2591:	bnz,a,pn %icc, 5b
260	lduw	[%i1], %i5
261.doneq:	ret
262	restore	%g0, %g0, %o0		! equal return zero
263
264.noteq:	srl	%i4, 24, %l4
265	srl	%i5, 24, %l5
266	subcc	%l4, %l5, %i0
267	bne,pt	%icc, 6f
268	andcc	%l4, 0xff, %g0
269	bz	.doneq
270	sll	%i4, 8, %l4
271	sll	%i5, 8, %l5
272	srl	%l4, 24, %l4
273	srl	%l5, 24, %l5
274	subcc	%l4, %l5, %i0
275	bne,pt	%icc, 6f
276	andcc	%l4, 0xff, %g0
277	bz,pt	%icc, .doneq
278	sll	%i4, 16, %l4
279	sll	%i5, 16, %l5
280	srl	%l4, 24, %l4
281	srl	%l5, 24, %l5
282	subcc	%l4, %l5, %i0
283	bne,pt	%icc, 6f
284	andcc	%l4, 0xff, %g0
285	bz,pt	%icc, .doneq
286	nop
287.noteqb:
288	and	%i4, 0xff, %l4
289	and	%i5, 0xff, %l5
290	subcc	%l4, %l5, %i0
2916:	ret
292	restore	%i0, %g0, %o0
293
294	! Do a byte by byte comparison, disregarding alignments
295.cmp_bytes:
296	deccc	%i2			! --n >= 0 ?
2971:
298	bcs,pn	%xcc, .doneq
299	nop				! delay slot
300	ldub	[%i1 + %i0], %i4	! read a byte from s1
301	ldub	[%i1], %i5		! read a byte from s2
302
303	inc	%i1
304	cmp	%i4, %i5
305	bne,pt	%icc, .noteqb
306	tst	%i4			! terminating zero
307	bnz,pt	%icc, 1b
308	deccc	%i2			! --n >= 0
309	b,a	.doneq
310
311	SET_SIZE(strncmp)
312