xref: /titanic_41/usr/src/lib/libc/sparc/gen/strncmp.s (revision 5fbb41393be5d63f75952b1d72d4df2642d22557)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"strncmp.s"
28
29/*
30 * strncmp(s1, s2, n)
31 *
32 * Compare strings (at most n bytes):  s1>s2: >0  s1==s2: 0  s1<s2: <0
33 *
34 * Fast assembler language version of the following C-program for strncmp
35 * which represents the `standard' for the C-library.
36 *
37 *	int
38 *	strncmp(const char *s1, const char *s2, size_t n)
39 *	{
40 *		n++;
41 *		if (s1 == s2)
42 *			return (0);
43 *		while (--n != 0 && *s1 == *s2++)
44 *			if (*s1++ == '\0')
45 *				return (0);
46 *		return ((n == 0) ? 0 : (*s1 - s2[-1]));
47 *	}
48 */
49
50#include <sys/asm_linkage.h>
51
52	ENTRY(strncmp)
53	save	%sp, -SA(WINDOWSIZE), %sp
54	cmp	%i2, 8
55	blu,a	.cmp_bytes		! for small counts go do bytes
56	sub	%i0, %i1, %i0		! delay slot, get diff from s1 - s2
57	andcc	%i0, 3, %g0		! is s1 aligned
581:	bz	.iss2			! if so go check s2
59	andcc	%i1, 3, %i3		! is s2 aligned
60
61	deccc	%i2			! --n >= 0 ?
62	bcs	.doneq
63	nop				! delay slot
64
65	ldub	[%i0], %i4		! else cmp one byte
66	ldub	[%i1], %i5
67	inc	%i0
68	cmp	%i4, %i5
69	bne	.noteqb
70	inc	%i1
71	tst	%i4			! terminating zero
72	bnz	1b
73	andcc	%i0, 3, %g0
74	b,a	.doneq
75
76.iss2:
77	set     0x7efefeff, %l6
78	set     0x81010100, %l7
79	sethi	%hi(0xff000000), %l0	! masks to test for terminating null
80	sethi	%hi(0x00ff0000), %l1
81	srl	%l1, 8, %l2		! generate 0x0000ff00 mask
82
83	bz	.w4cmp			! if s2 word aligned, compare words
84	cmp	%i3, 2			! check if s2 half aligned
85	be	.w2cmp
86	cmp	%i3, 1			! check if aligned to 1 or 3 bytes
87.w3cmp:	ldub	[%i1], %i5
88	inc	1, %i1
89	be	.w1cmp
90	sll	%i5, 24, %i5
91	sub	%i0, %i1, %i0
922:
93	deccc	4, %i2			! n >= 4 ?
94	bgeu,a	3f
95	ld	[%i1], %i3		! delay slot
96	dec	%i1			! reset s2
97	inc	%i0			! reset s1 diff
98	b	.cmp_bytes		! do a byte at a time if n < 4
99	inc	4, %i2
1003:
101	ld	[%i0 + %i1], %i4
102	inc	4, %i1
103	srl	%i3, 8, %l4		! merge with the other half
104	or	%l4, %i5, %i5
105	cmp	%i4, %i5
106	be	1f
107
108	add	%i4, %l6, %l3
109	b,a	.noteq
1101:	xor	%l3, %i4, %l3
111	and	%l3, %l7, %l3
112	cmp	%l3, %l7
113	be,a	2b
114	sll	%i3, 24, %i5
115
116	!
117	! For 7-bit characters, we know one of the bytes is zero, but for
118	! 8-bit characters, the zero detection algorithm gives some false
119	! triggers ... check every byte individually.
120	!
121	andcc	%i4, %l0, %g0		! check if first byte was zero
122	bnz	1f
123	andcc	%i4, %l1, %g0		! check if second byte was zero
124	b,a	.doneq
1251:	bnz	1f
126	andcc 	%i4, %l2, %g0		! check if third byte was zero
127	b,a	.doneq
1281:	bnz	1f
129	andcc	%i4, 0xff, %g0		! check if last byte is zero
130	b,a	.doneq
1311:	bnz	2b
132	sll	%i3, 24, %i5
133	b,a	.doneq
134
135.w1cmp:	clr	%l4
136	lduh	[%i1], %l4
137	inc	2, %i1
138	sll	%l4, 8, %l4
139	or	%i5, %l4, %i5
140
141	sub	%i0, %i1, %i0
1423:
143	deccc	4, %i2			! n >= 4 ?
144	bgeu,a	4f
145	ld	[%i1], %i3		! delay slot
146	dec	3, %i1			! reset s2
147	inc	3, %i0			! reset s1 diff
148	b	.cmp_bytes		! do a byte at a time if n < 4
149	inc	4, %i2
1504:
151	ld	[%i0 + %i1], %i4
152	inc	4, %i1
153	srl	%i3, 24, %l4		! merge with the other half
154	or	%l4, %i5, %i5
155	cmp	%i4, %i5
156	be	1f
157
158	add	%i4, %l6, %l3
159	b,a	.noteq
1601:	xor	%l3, %i4, %l3
161	and	%l3, %l7, %l3
162	cmp	%l3, %l7
163	be,a	3b
164	sll	%i3, 8, %i5
165
166	andcc	%i4, %l0, %g0		! check if first byte was zero
167	bnz	1f
168	andcc	%i4, %l1, %g0		! check if second byte was zero
169	b,a	.doneq
1701:	bnz	1f
171	andcc 	%i4, %l2, %g0		! check if third byte was zero
172	b,a	.doneq
1731:	bnz	1f
174	andcc	%i4, 0xff, %g0		! check if last byte is zero
175	b,a	.doneq
1761:	bnz	3b
177	sll	%i3, 8, %i5
178	b,a	.doneq
179
180.w2cmp:
181	lduh	[%i1], %i5		! read a halfword to align s2
182	inc	2, %i1
183	sll	%i5, 16, %i5
184
185	sub	%i0, %i1, %i0
1864:
187	deccc	4, %i2			! n >= 4 ?
188	bgeu,a	5f
189	ld	[%i1], %i3		! delay slot
190	dec	2, %i1			! reset s2
191	inc	2, %i0			! reset s1 diff
192	b	.cmp_bytes		! do a byte at a time if n < 4
193	inc	4, %i2			! delay slot
1945:
195	ld	[%i1 + %i0], %i4	! read a word from s2
196	inc	4, %i1
197	srl	%i3, 16, %l4		! merge with the other half
198	or	%l4, %i5, %i5
199	cmp	%i4, %i5
200	be	1f
201
202	add	%i4, %l6, %l3
203	b,a	.noteq
2041:	xor	%l3, %i4, %l3		! are any bytes 0?
205	and	%l3, %l7, %l3
206	cmp	%l3, %l7
207	be,a	4b
208	sll	%i3, 16, %i5
209
210	andcc	%i4, %l0, %g0		! check if first byte was zero
211	bnz	1f
212	andcc	%i4, %l1, %g0		! check if second byte was zero
213	b,a	.doneq
2141:	bnz	1f
215	andcc 	%i4, %l2, %g0		! check if third byte was zero
216	b,a	.doneq
2171:	bnz	1f
218	andcc	%i4, 0xff, %g0		! check if last byte is zero
219	b,a	.doneq
2201:	bnz	4b
221	sll	%i3, 16, %i5
222	b,a	.doneq
223
224.w4cmp:	sub	%i0, %i1, %i0
225	ld	[%i1], %i5		! read a word from s1
2265:	cmp     %i2,0
227	be,a    .doneq
228	nop
229	ld      [%i1], %i5              ! read a word from s1
230	deccc	4, %i2			! n >= 4 ?
231	bcs,a	.cmp_bytes		! do a byte at a time if n < 4
232	inc	4, %i2
233
234	ld	[%i1 + %i0], %i4	! read a word from s2
235	cmp	%i4, %i5
236	inc	4, %i1
237	be	1f
238
239	add	%i4, %l6, %l3
240	b,a	.noteq
2411:	xor	%l3, %i4, %l3
242	and	%l3, %l7, %l3
243	cmp	%l3, %l7
244	be,a	5b
245	nop
246
247	andcc	%i4, %l0, %g0		! check if first byte was zero
248	bnz	1f
249	andcc	%i4, %l1, %g0		! check if second byte was zero
250	b,a	.doneq
2511:	bnz	1f
252	andcc 	%i4, %l2, %g0		! check if third byte was zero
253	b,a	.doneq
2541:	bnz	1f
255	andcc	%i4, 0xff, %g0		! check if last byte is zero
256	b,a	.doneq
2571:	bnz,a	5b
258	ld	[%i1], %i5
259.doneq:	ret
260	restore	%g0, %g0, %o0		! equal return zero
261
262.noteq:	srl	%i4, 24, %l4
263	srl	%i5, 24, %l5
264	subcc	%l4, %l5, %i0
265	bne	6f
266	andcc	%l4, 0xff, %g0
267	bz	.doneq
268	sll	%i4, 8, %l4
269	sll	%i5, 8, %l5
270	srl	%l4, 24, %l4
271	srl	%l5, 24, %l5
272	subcc	%l4, %l5, %i0
273	bne	6f
274	andcc	%l4, 0xff, %g0
275	bz	.doneq
276	sll	%i4, 16, %l4
277	sll	%i5, 16, %l5
278	srl	%l4, 24, %l4
279	srl	%l5, 24, %l5
280	subcc	%l4, %l5, %i0
281	bne	6f
282	andcc	%l4, 0xff, %g0
283	bz	.doneq
284	nop
285.noteqb:
286	and	%i4, 0xff, %l4
287	and	%i5, 0xff, %l5
288	subcc	%l4, %l5, %i0
2896:	ret
290	restore	%i0, %g0, %o0
291
292	! Do a byte by byte comparison, disregarding alignments
293.cmp_bytes:
294	deccc	%i2			! --n >= 0 ?
2951:
296	bcs	.doneq
297	nop				! delay slot
298	ldub	[%i1 + %i0], %i4	! read a word from s1
299	ldub	[%i1], %i5		! read a word from s2
300
301	inc	%i1
302	cmp	%i4, %i5
303	bne	.noteqb
304	tst	%i4			! terminating zero
305	bnz	1b
306	deccc	%i2			! --n >= 0
307	b,a	.doneq
308
309	SET_SIZE(strncmp)
310