xref: /titanic_52/usr/src/lib/libc/sparcv9/gen/memcmp.s (revision c2580b931007758eab8cb5ae8726ebe1588e259b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright (c) 1997-1998 by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27.ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * memcmp(s1, s2, len)
33 *
34 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
35 *
36 * Fast assembler language version of the following C-program for memcmp
37 * which represents the `standard' for the C-library.
38 *
39 *	int
40 *	memcmp(const void *s1, const void *s2, size_t n)
41 *	{
42 *		if (s1 != s2 && n != 0) {
43 *			const char *ps1 = s1;
44 *			const char *ps2 = s2;
45 *			do {
46 *				if (*ps1++ != *ps2++)
47 *					return (ps1[-1] - ps2[-1]);
48 *			} while (--n != 0);
49 *		}
50 *		return (0);
51 *	}
52 */
53
54#include <sys/asm_linkage.h>
55
56	ANSI_PRAGMA_WEAK(memcmp,function)
57
58#include "synonyms.h"
59
60	ENTRY(memcmp)
61	cmp	%o0, %o1		! s1 == s2?
62	be,pn	%xcc, .cmpeq
63	cmp	%o2, 17
64	bleu,a,pn %xcc, .cmpbyt		! for small counts go do bytes
65	sub	%o1, %o0, %o1
66
67	andcc	%o0, 3, %o3		! is s1 aligned?
68	bz,a,pn	%icc, .iss2		! if so go check s2
69	andcc	%o1, 3, %o4		! is s2 aligned?
70	cmp	%o3, 2
71	be,pn	%icc, .algn2
72	cmp	%o3, 3
73
74.algn1:	ldub	[%o0], %o4		! cmp one byte
75	inc	%o0
76	ldub	[%o1], %o5
77	inc	%o1
78	dec	%o2
79	be,pn	%icc, .algn3
80	cmp	%o4, %o5
81	be,pt	%icc, .algn2
82	nop
83	b,a	.noteq
84
85.algn2:	lduh	[%o0], %o4
86	inc	2, %o0
87	ldub	[%o1], %o5
88	inc	1, %o1
89	srl	%o4, 8, %o3
90	cmp	%o3, %o5
91	be,a,pt	%icc, 1f
92	ldub	[%o1], %o5		! delay slot, get next byte from s2
93	b	.noteq
94	mov	%o3, %o4		! delay slot, move *s1 to %o4
951:	inc	%o1
96	dec	2, %o2
97	and	%o4, 0xff, %o4
98	cmp	%o4, %o5
99.algn3:	be,a,pt	%icc, .iss2
100	andcc	%o1, 3, %o4		! delay slot, is s2 aligned?
101	b,a	.noteq
102
103.cmpbyt:b	.bytcmp
104	deccc	%o2
1051:	ldub	[%o0 + %o1], %o5	! byte compare loop
106	inc	%o0
107	cmp	%o4, %o5
108	be,a,pt	%icc, .bytcmp
109	deccc	%o2			! delay slot, compare count (len)
110	b,a	.noteq
111.bytcmp:bgeu,a,pt %xcc, 1b
112	ldub	[%o0], %o4
113.cmpeq:
114	retl				! strings compare equal
115	clr	%o0
116
117.noteq_word:				! words aren't equal. find unequal byte
118	srl	%o4, 24, %o1		! first byte
119	srl	%o5, 24, %o2
120	cmp	%o1, %o2
121	bne,pn	%icc, 1f
122	sll	%o4, 8, %o4
123	sll	%o5, 8, %o5
124	srl	%o4, 24, %o1
125	srl	%o5, 24, %o2
126	cmp	%o1, %o2
127	bne,pn	%icc, 1f
128	sll	%o4, 8, %o4
129	sll	%o5, 8, %o5
130	srl	%o4, 24, %o1
131	srl	%o5, 24, %o2
132	cmp	%o1, %o2
133	bne,pn	%icc, 1f
134	sll	%o4, 8, %o4
135	sll	%o5, 8, %o5
136	srl	%o4, 24, %o1
137	srl	%o5, 24, %o2
1381:
139	retl
140	sub	%o1, %o2, %o0		! delay slot
141
142.noteq:
143	retl				! strings aren't equal
144	sub	%o4, %o5, %o0		! delay slot, return(*s1 - *s2)
145
146.iss2:	andn	%o2, 3, %o3		! count of aligned bytes
147	and	%o2, 3, %o2		! remaining bytes
148	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
149	cmp	%o4, 2
150	be,pn	%icc, .w2cmp		! s2 half aligned
151	cmp	%o4, 1
152
153.w3cmp:
154	dec	4, %o3			! avoid reading beyond the last byte
155	inc	4, %o2
156	ldub	[%o1], %g1		! read a byte to align for word reads
157	inc	1, %o1
158	be,pt	%icc, .w1cmp		! aligned to 1 or 3 bytes
159	sll	%g1, 24, %o5
160
161	sub	%o1, %o0, %o1
1622:	lduw	[%o0 + %o1], %g1
163	lduw	[%o0], %o4
164	inc	4, %o0
165	srl	%g1, 8, %g5		! merge with the other half
166	or	%g5, %o5, %o5
167	cmp	%o4, %o5
168	bne,pt	%icc, .noteq_word
169	deccc	4, %o3
170	bnz,pt	%xcc, 2b
171	sll	%g1, 24, %o5
172	sub	%o1, 1, %o1		! used 3 bytes of the last word read
173	b	.bytcmp
174	deccc	%o2
175
176.w1cmp:
177	dec	4, %o3			! avoid reading beyond the last byte
178	inc	4, %o2
179	lduh	[%o1], %g1		! read 3 bytes to word align
180	inc	2, %o1
181	sll	%g1, 8, %g5
182	or	%o5, %g5, %o5
183
184	sub	%o1, %o0, %o1
1853:	lduw	[%o0 + %o1], %g1
186	lduw	[%o0], %o4
187	inc	4, %o0
188	srl	%g1, 24, %g5		! merge with the other half
189	or	%g5, %o5, %o5
190	cmp	%o4, %o5
191	bne,pt	%icc, .noteq_word
192	deccc	4, %o3
193	bnz,pt	%xcc, 3b
194	sll	%g1, 8, %o5
195	sub	%o1, 3, %o1		! used 1 byte of the last word read
196	b	.bytcmp
197	deccc	%o2
198
199.w2cmp:
200	dec	4, %o3			! avoid reading beyond the last byte
201	inc	4, %o2
202	lduh	[%o1], %g1		! read a halfword to align s2
203	inc	2, %o1
204	sll	%g1, 16, %o5
205	sub	%o1, %o0, %o1
2064:	lduw	[%o0 + %o1], %g1	! read a word from s2
207	lduw	[%o0], %o4		! read a word from s1
208	inc	4, %o0
209	srl	%g1, 16, %g5		! merge with the other half
210	or	%g5, %o5, %o5
211	cmp	%o4, %o5
212	bne,pn	%icc, .noteq_word
213	deccc	4, %o3
214	bnz,pt	%xcc, 4b
215	sll	%g1, 16, %o5
216	sub	%o1, 2, %o1		! only used half of the last read word
217	b	.bytcmp
218	deccc	%o2
219
220.w4cmp:
221	sub	%o1, %o0, %o1
222	lduw	[%o0 + %o1], %o5
2235:	lduw	[%o0], %o4
224	inc	4, %o0
225	cmp	%o4, %o5
226	bne,pt	%icc, .noteq_word
227	deccc	4, %o3
228	bnz,a,pt %xcc, 5b
229	lduw	[%o0 + %o1], %o5
230	b	.bytcmp			! compare remaining bytes, if any
231	deccc	%o2
232
233	SET_SIZE(memcmp)
234