xref: /titanic_41/usr/src/lib/libc/sparcv9/gen/strncmp.s (revision d29b2c4438482eb00488be49a1f5d6835f455546)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * strncmp(s1, s2, n)
33 *
34 * Compare strings (at most n bytes):  s1>s2: >0  s1==s2: 0  s1<s2: <0
35 *
36 * Fast assembler language version of the following C-program for strncmp
37 * which represents the `standard' for the C-library.
38 *
39 *	int
40 *	strncmp(const char *s1, const char *s2, size_t n)
41 *	{
42 *		n++;
43 *		if (s1 == s2)
44 *			return (0);
45 *		while (--n != 0 && *s1 == *s2++)
46 *			if(*s1++ == '\0')
47 *				return(0);
48 *		return ((n == 0) ? 0 : (*s1 - s2[-1]));
49 *	}
50 */
51
52#include <sys/asm_linkage.h>
53#include "synonyms.h"
54
55	ENTRY(strncmp)
56	save	%sp, -SA(WINDOWSIZE), %sp
57	cmp	%i2, 8
58	blu,a,pn %xcc, .cmp_bytes	! for small counts go do bytes
59	sub	%i0, %i1, %i0		! delay slot, get diff from s1 - s2
60	andcc	%i0, 3, %g0		! is s1 aligned
611:	bz,pn	%icc, .iss2		! if so go check s2
62	andcc	%i1, 3, %i3		! is s2 aligned
63
64	deccc	%i2			! --n >= 0 ?
65	bcs,pn	%xcc, .doneq
66	nop				! delay slot
67
68	ldub	[%i0], %i4		! else cmp one byte
69	ldub	[%i1], %i5
70	inc	%i0
71	cmp	%i4, %i5
72	bne,pn	%icc, .noteqb
73	inc	%i1
74	tst	%i4			! terminating zero
75	bnz,pt	%icc, 1b
76	andcc	%i0, 3, %g0
77	b,a	.doneq
78
79.iss2:
80	set     0x7efefeff, %l6
81	set     0x81010100, %l7
82	sethi	%hi(0xff000000), %l0	! masks to test for terminating null
83	sethi	%hi(0x00ff0000), %l1
84	srl	%l1, 8, %l2		! generate 0x0000ff00 mask
85
86	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
87	cmp	%i3, 2			! check if s2 half aligned
88	be,pn	%icc, .w2cmp
89	cmp	%i3, 1			! check if aligned to 1 or 3 bytes
90.w3cmp:	ldub	[%i1], %i5
91	inc	1, %i1
92	be,pt	%icc, .w1cmp
93	sll	%i5, 24, %i5
94	sub	%i0, %i1, %i0
952:
96	deccc	4, %i2			! n >= 4 ?
97	bgeu,a,pt %xcc, 3f
98	lduw	[%i1], %i3		! delay slot
99	dec	%i1			! reset s2
100	inc	%i0			! reset s1 diff
101	b	.cmp_bytes		! do a byte at a time if n < 4
102	inc	4, %i2
1033:
104	lduw	[%i0 + %i1], %i4
105	inc	4, %i1
106	srl	%i3, 8, %l4		! merge with the other half
107	or	%l4, %i5, %i5
108	cmp	%i4, %i5
109	be,pn	%icc, 1f
110
111	add	%i4, %l6, %l3
112	b,a	.noteq
1131:	xor	%l3, %i4, %l3
114	and	%l3, %l7, %l3
115	cmp	%l3, %l7
116	be,a,pt	%icc, 2b
117	sll	%i3, 24, %i5
118
119	!
120	! For 7-bit characters, we know one of the bytes is zero, but for
121	! 8-bit characters, the zero detection algorithm gives some false
122	! triggers ... check every byte individually.
123	!
124	andcc	%i4, %l0, %g0		! check if first byte was zero
125	bnz,pt	%icc, 1f
126	andcc	%i4, %l1, %g0		! check if second byte was zero
127	b,a	.doneq
1281:	bnz,pt	%icc, 1f
129	andcc 	%i4, %l2, %g0		! check if third byte was zero
130	b,a	.doneq
1311:	bnz,pt	%icc, 1f
132	andcc	%i4, 0xff, %g0		! check if last byte is zero
133	b,a	.doneq
1341:	bnz,pn	%icc, 2b
135	sll	%i3, 24, %i5
136	b,a	.doneq
137
138.w1cmp:	clr	%l4
139	lduh	[%i1], %l4
140	inc	2, %i1
141	sll	%l4, 8, %l4
142	or	%i5, %l4, %i5
143
144	sub	%i0, %i1, %i0
1453:
146	deccc	4, %i2			! n >= 4 ?
147	bgeu,a,pt %xcc, 4f
148	lduw	[%i1], %i3		! delay slot
149	dec	3, %i1			! reset s2
150	inc	3, %i0			! reset s1 diff
151	b	.cmp_bytes		! do a byte at a time if n < 4
152	inc	4, %i2
1534:
154	lduw	[%i0 + %i1], %i4
155	inc	4, %i1
156	srl	%i3, 24, %l4		! merge with the other half
157	or	%l4, %i5, %i5
158	cmp	%i4, %i5
159	be,pt	%icc, 1f
160
161	add	%i4, %l6, %l3
162	b,a	.noteq
1631:	xor	%l3, %i4, %l3
164	and	%l3, %l7, %l3
165	cmp	%l3, %l7
166	be,a,pt	%icc, 3b
167	sll	%i3, 8, %i5
168
169	andcc	%i4, %l0, %g0		! check if first byte was zero
170	bnz,pt	%icc, 1f
171	andcc	%i4, %l1, %g0		! check if second byte was zero
172	b,a	.doneq
1731:	bnz,pt	%icc, 1f
174	andcc 	%i4, %l2, %g0		! check if third byte was zero
175	b,a	.doneq
1761:	bnz,pt	%icc, 1f
177	andcc	%i4, 0xff, %g0		! check if last byte is zero
178	b,a	.doneq
1791:	bnz,pn	%icc, 3b
180	sll	%i3, 8, %i5
181	b,a	.doneq
182
183.w2cmp:
184	lduh	[%i1], %i5		! read a halfword to align s2
185	inc	2, %i1
186	sll	%i5, 16, %i5
187
188	sub	%i0, %i1, %i0
1894:
190	deccc	4, %i2			! n >= 4 ?
191	bgeu,a,pt %xcc, 5f
192	lduw	[%i1], %i3		! delay slot
193	dec	2, %i1			! reset s2
194	inc	2, %i0			! reset s1 diff
195	b	.cmp_bytes		! do a byte at a time if n < 4
196	inc	4, %i2			! delay slot
1975:
198	lduw	[%i1 + %i0], %i4	! read a word from s2
199	inc	4, %i1
200	srl	%i3, 16, %l4		! merge with the other half
201	or	%l4, %i5, %i5
202	cmp	%i4, %i5
203	be,pt	%icc, 1f
204
205	add	%i4, %l6, %l3
206	b,a	.noteq
2071:	xor	%l3, %i4, %l3		! are any bytes 0?
208	and	%l3, %l7, %l3
209	cmp	%l3, %l7
210	be,a,pt	%icc, 4b
211	sll	%i3, 16, %i5
212
213	andcc	%i4, %l0, %g0		! check if first byte was zero
214	bnz,pt	%icc, 1f
215	andcc	%i4, %l1, %g0		! check if second byte was zero
216	b,a	.doneq
2171:	bnz,pt	%icc, 1f
218	andcc 	%i4, %l2, %g0		! check if third byte was zero
219	b,a	.doneq
2201:	bnz,pt	%icc, 1f
221	andcc	%i4, 0xff, %g0		! check if last byte is zero
222	b,a	.doneq
2231:	bnz,pn	%icc, 4b
224	sll	%i3, 16, %i5
225	b,a	.doneq
226
227.w4cmp:	sub	%i0, %i1, %i0
228	lduw	[%i1], %i5		! read a word from s1
2295:	cmp	%i2, 0
230	be,a,pn	%xcc, .doneq
231	nop
232	lduw	[%i1], %i5		! read a word from s1
233	deccc	4, %i2			! n >= 4 ?
234	bcs,a,pn %xcc, .cmp_bytes	! do a byte at a time if n < 4
235	inc	4, %i2
236
237	lduw	[%i1 + %i0], %i4	! read a word from s2
238	cmp	%i4, %i5
239	inc	4, %i1
240	be,pt	%icc, 1f
241
242	add	%i4, %l6, %l3
243	b,a	.noteq
2441:	xor	%l3, %i4, %l3
245	and	%l3, %l7, %l3
246	cmp	%l3, %l7
247	be,pt	%icc, 5b
248	nop
249
250	andcc	%i4, %l0, %g0		! check if first byte was zero
251	bnz,pt	%icc, 1f
252	andcc	%i4, %l1, %g0		! check if second byte was zero
253	b,a	.doneq
2541:	bnz,pt	%icc, 1f
255	andcc 	%i4, %l2, %g0		! check if third byte was zero
256	b,a	.doneq
2571:	bnz,pt	%icc, 1f
258	andcc	%i4, 0xff, %g0		! check if last byte is zero
259	b,a	.doneq
2601:	bnz,a,pn %icc, 5b
261	lduw	[%i1], %i5
262.doneq:	ret
263	restore	%g0, %g0, %o0		! equal return zero
264
265.noteq:	srl	%i4, 24, %l4
266	srl	%i5, 24, %l5
267	subcc	%l4, %l5, %i0
268	bne,pt	%icc, 6f
269	andcc	%l4, 0xff, %g0
270	bz	.doneq
271	sll	%i4, 8, %l4
272	sll	%i5, 8, %l5
273	srl	%l4, 24, %l4
274	srl	%l5, 24, %l5
275	subcc	%l4, %l5, %i0
276	bne,pt	%icc, 6f
277	andcc	%l4, 0xff, %g0
278	bz,pt	%icc, .doneq
279	sll	%i4, 16, %l4
280	sll	%i5, 16, %l5
281	srl	%l4, 24, %l4
282	srl	%l5, 24, %l5
283	subcc	%l4, %l5, %i0
284	bne,pt	%icc, 6f
285	andcc	%l4, 0xff, %g0
286	bz,pt	%icc, .doneq
287	nop
288.noteqb:
289	and	%i4, 0xff, %l4
290	and	%i5, 0xff, %l5
291	subcc	%l4, %l5, %i0
2926:	ret
293	restore	%i0, %g0, %o0
294
295	! Do a byte by byte comparison, disregarding alignments
296.cmp_bytes:
297	deccc	%i2			! --n >= 0 ?
2981:
299	bcs,pn	%xcc, .doneq
300	nop				! delay slot
301	ldub	[%i1 + %i0], %i4	! read a byte from s1
302	ldub	[%i1], %i5		! read a byte from s2
303
304	inc	%i1
305	cmp	%i4, %i5
306	bne,pt	%icc, .noteqb
307	tst	%i4			! terminating zero
308	bnz,pt	%icc, 1b
309	deccc	%i2			! --n >= 0
310	b,a	.doneq
311
312	SET_SIZE(strncmp)
313