xref: /titanic_52/usr/src/lib/libc/sparcv9/gen/strncmp.s (revision 71269a2275bf5a143dad6461eee2710a344e7261)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"strncmp.s"
28
29/*
30 * strncmp(s1, s2, n)
31 *
32 * Compare strings (at most n bytes):  s1>s2: >0  s1==s2: 0  s1<s2: <0
33 *
34 * Fast assembler language version of the following C-program for strncmp
35 * which represents the `standard' for the C-library.
36 *
37 *	int
38 *	strncmp(const char *s1, const char *s2, size_t n)
39 *	{
40 *		n++;
41 *		if (s1 == s2)
42 *			return (0);
43 *		while (--n != 0 && *s1 == *s2++)
44 *			if(*s1++ == '\0')
45 *				return(0);
46 *		return ((n == 0) ? 0 : (*s1 - s2[-1]));
47 *	}
48 */
49
50#include <sys/asm_linkage.h>
51
52	ENTRY(strncmp)
53	save	%sp, -SA(WINDOWSIZE), %sp
54	cmp	%i2, 8
55	blu,a,pn %xcc, .cmp_bytes	! for small counts go do bytes
56	sub	%i0, %i1, %i0		! delay slot, get diff from s1 - s2
57	andcc	%i0, 3, %g0		! is s1 aligned
581:	bz,pn	%icc, .iss2		! if so go check s2
59	andcc	%i1, 3, %i3		! is s2 aligned
60
61	deccc	%i2			! --n >= 0 ?
62	bcs,pn	%xcc, .doneq
63	nop				! delay slot
64
65	ldub	[%i0], %i4		! else cmp one byte
66	ldub	[%i1], %i5
67	inc	%i0
68	cmp	%i4, %i5
69	bne,pn	%icc, .noteqb
70	inc	%i1
71	tst	%i4			! terminating zero
72	bnz,pt	%icc, 1b
73	andcc	%i0, 3, %g0
74	b,a	.doneq
75
76.iss2:
77	set     0x7efefeff, %l6
78	set     0x81010100, %l7
79	sethi	%hi(0xff000000), %l0	! masks to test for terminating null
80	sethi	%hi(0x00ff0000), %l1
81	srl	%l1, 8, %l2		! generate 0x0000ff00 mask
82
83	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
84	cmp	%i3, 2			! check if s2 half aligned
85	be,pn	%icc, .w2cmp
86	cmp	%i3, 1			! check if aligned to 1 or 3 bytes
87.w3cmp:	ldub	[%i1], %i5
88	inc	1, %i1
89	be,pt	%icc, .w1cmp
90	sll	%i5, 24, %i5
91	sub	%i0, %i1, %i0
922:
93	deccc	4, %i2			! n >= 4 ?
94	bgeu,a,pt %xcc, 3f
95	lduw	[%i1], %i3		! delay slot
96	dec	%i1			! reset s2
97	inc	%i0			! reset s1 diff
98	b	.cmp_bytes		! do a byte at a time if n < 4
99	inc	4, %i2
1003:
101	lduw	[%i0 + %i1], %i4
102	inc	4, %i1
103	srl	%i3, 8, %l4		! merge with the other half
104	or	%l4, %i5, %i5
105	cmp	%i4, %i5
106	be,pn	%icc, 1f
107
108	add	%i4, %l6, %l3
109	b,a	.noteq
1101:	xor	%l3, %i4, %l3
111	and	%l3, %l7, %l3
112	cmp	%l3, %l7
113	be,a,pt	%icc, 2b
114	sll	%i3, 24, %i5
115
116	!
117	! For 7-bit characters, we know one of the bytes is zero, but for
118	! 8-bit characters, the zero detection algorithm gives some false
119	! triggers ... check every byte individually.
120	!
121	andcc	%i4, %l0, %g0		! check if first byte was zero
122	bnz,pt	%icc, 1f
123	andcc	%i4, %l1, %g0		! check if second byte was zero
124	b,a	.doneq
1251:	bnz,pt	%icc, 1f
126	andcc 	%i4, %l2, %g0		! check if third byte was zero
127	b,a	.doneq
1281:	bnz,pt	%icc, 1f
129	andcc	%i4, 0xff, %g0		! check if last byte is zero
130	b,a	.doneq
1311:	bnz,pn	%icc, 2b
132	sll	%i3, 24, %i5
133	b,a	.doneq
134
135.w1cmp:	clr	%l4
136	lduh	[%i1], %l4
137	inc	2, %i1
138	sll	%l4, 8, %l4
139	or	%i5, %l4, %i5
140
141	sub	%i0, %i1, %i0
1423:
143	deccc	4, %i2			! n >= 4 ?
144	bgeu,a,pt %xcc, 4f
145	lduw	[%i1], %i3		! delay slot
146	dec	3, %i1			! reset s2
147	inc	3, %i0			! reset s1 diff
148	b	.cmp_bytes		! do a byte at a time if n < 4
149	inc	4, %i2
1504:
151	lduw	[%i0 + %i1], %i4
152	inc	4, %i1
153	srl	%i3, 24, %l4		! merge with the other half
154	or	%l4, %i5, %i5
155	cmp	%i4, %i5
156	be,pt	%icc, 1f
157
158	add	%i4, %l6, %l3
159	b,a	.noteq
1601:	xor	%l3, %i4, %l3
161	and	%l3, %l7, %l3
162	cmp	%l3, %l7
163	be,a,pt	%icc, 3b
164	sll	%i3, 8, %i5
165
166	andcc	%i4, %l0, %g0		! check if first byte was zero
167	bnz,pt	%icc, 1f
168	andcc	%i4, %l1, %g0		! check if second byte was zero
169	b,a	.doneq
1701:	bnz,pt	%icc, 1f
171	andcc 	%i4, %l2, %g0		! check if third byte was zero
172	b,a	.doneq
1731:	bnz,pt	%icc, 1f
174	andcc	%i4, 0xff, %g0		! check if last byte is zero
175	b,a	.doneq
1761:	bnz,pn	%icc, 3b
177	sll	%i3, 8, %i5
178	b,a	.doneq
179
180.w2cmp:
181	lduh	[%i1], %i5		! read a halfword to align s2
182	inc	2, %i1
183	sll	%i5, 16, %i5
184
185	sub	%i0, %i1, %i0
1864:
187	deccc	4, %i2			! n >= 4 ?
188	bgeu,a,pt %xcc, 5f
189	lduw	[%i1], %i3		! delay slot
190	dec	2, %i1			! reset s2
191	inc	2, %i0			! reset s1 diff
192	b	.cmp_bytes		! do a byte at a time if n < 4
193	inc	4, %i2			! delay slot
1945:
195	lduw	[%i1 + %i0], %i4	! read a word from s2
196	inc	4, %i1
197	srl	%i3, 16, %l4		! merge with the other half
198	or	%l4, %i5, %i5
199	cmp	%i4, %i5
200	be,pt	%icc, 1f
201
202	add	%i4, %l6, %l3
203	b,a	.noteq
2041:	xor	%l3, %i4, %l3		! are any bytes 0?
205	and	%l3, %l7, %l3
206	cmp	%l3, %l7
207	be,a,pt	%icc, 4b
208	sll	%i3, 16, %i5
209
210	andcc	%i4, %l0, %g0		! check if first byte was zero
211	bnz,pt	%icc, 1f
212	andcc	%i4, %l1, %g0		! check if second byte was zero
213	b,a	.doneq
2141:	bnz,pt	%icc, 1f
215	andcc 	%i4, %l2, %g0		! check if third byte was zero
216	b,a	.doneq
2171:	bnz,pt	%icc, 1f
218	andcc	%i4, 0xff, %g0		! check if last byte is zero
219	b,a	.doneq
2201:	bnz,pn	%icc, 4b
221	sll	%i3, 16, %i5
222	b,a	.doneq
223
224.w4cmp:	sub	%i0, %i1, %i0
225	lduw	[%i1], %i5		! read a word from s1
2265:	cmp	%i2, 0
227	be,a,pn	%xcc, .doneq
228	nop
229	lduw	[%i1], %i5		! read a word from s1
230	deccc	4, %i2			! n >= 4 ?
231	bcs,a,pn %xcc, .cmp_bytes	! do a byte at a time if n < 4
232	inc	4, %i2
233
234	lduw	[%i1 + %i0], %i4	! read a word from s2
235	cmp	%i4, %i5
236	inc	4, %i1
237	be,pt	%icc, 1f
238
239	add	%i4, %l6, %l3
240	b,a	.noteq
2411:	xor	%l3, %i4, %l3
242	and	%l3, %l7, %l3
243	cmp	%l3, %l7
244	be,pt	%icc, 5b
245	nop
246
247	andcc	%i4, %l0, %g0		! check if first byte was zero
248	bnz,pt	%icc, 1f
249	andcc	%i4, %l1, %g0		! check if second byte was zero
250	b,a	.doneq
2511:	bnz,pt	%icc, 1f
252	andcc 	%i4, %l2, %g0		! check if third byte was zero
253	b,a	.doneq
2541:	bnz,pt	%icc, 1f
255	andcc	%i4, 0xff, %g0		! check if last byte is zero
256	b,a	.doneq
2571:	bnz,a,pn %icc, 5b
258	lduw	[%i1], %i5
259.doneq:	ret
260	restore	%g0, %g0, %o0		! equal return zero
261
262.noteq:	srl	%i4, 24, %l4
263	srl	%i5, 24, %l5
264	subcc	%l4, %l5, %i0
265	bne,pt	%icc, 6f
266	andcc	%l4, 0xff, %g0
267	bz	.doneq
268	sll	%i4, 8, %l4
269	sll	%i5, 8, %l5
270	srl	%l4, 24, %l4
271	srl	%l5, 24, %l5
272	subcc	%l4, %l5, %i0
273	bne,pt	%icc, 6f
274	andcc	%l4, 0xff, %g0
275	bz,pt	%icc, .doneq
276	sll	%i4, 16, %l4
277	sll	%i5, 16, %l5
278	srl	%l4, 24, %l4
279	srl	%l5, 24, %l5
280	subcc	%l4, %l5, %i0
281	bne,pt	%icc, 6f
282	andcc	%l4, 0xff, %g0
283	bz,pt	%icc, .doneq
284	nop
285.noteqb:
286	and	%i4, 0xff, %l4
287	and	%i5, 0xff, %l5
288	subcc	%l4, %l5, %i0
2896:	ret
290	restore	%i0, %g0, %o0
291
292	! Do a byte by byte comparison, disregarding alignments
293.cmp_bytes:
294	deccc	%i2			! --n >= 0 ?
2951:
296	bcs,pn	%xcc, .doneq
297	nop				! delay slot
298	ldub	[%i1 + %i0], %i4	! read a byte from s1
299	ldub	[%i1], %i5		! read a byte from s2
300
301	inc	%i1
302	cmp	%i4, %i5
303	bne,pt	%icc, .noteqb
304	tst	%i4			! terminating zero
305	bnz,pt	%icc, 1b
306	deccc	%i2			! --n >= 0
307	b,a	.doneq
308
309	SET_SIZE(strncmp)
310