xref: /titanic_52/usr/src/lib/libc/sparcv9/gen/memcmp.s (revision 9db67a327daf1243e630c20b81978ffd2a7baad7)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * memcmp(s1, s2, len)
33 *
34 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
35 *
36 * Fast assembler language version of the following C-program for memcmp
37 * which represents the `standard' for the C-library.
38 *
39 *	int
40 *	memcmp(const void *s1, const void *s2, size_t n)
41 *	{
42 *		if (s1 != s2 && n != 0) {
43 *			const char *ps1 = s1;
44 *			const char *ps2 = s2;
45 *			do {
46 *				if (*ps1++ != *ps2++)
47 *					return (ps1[-1] - ps2[-1]);
48 *			} while (--n != 0);
49 *		}
50 *		return (0);
51 *	}
52 */
53
54#include <sys/asm_linkage.h>
55
56	ANSI_PRAGMA_WEAK(memcmp,function)
57
58	ENTRY(memcmp)
59	cmp	%o0, %o1		! s1 == s2?
60	be,pn	%xcc, .cmpeq
61	cmp	%o2, 17
62	bleu,a,pn %xcc, .cmpbyt		! for small counts go do bytes
63	sub	%o1, %o0, %o1
64
65	andcc	%o0, 3, %o3		! is s1 aligned?
66	bz,a,pn	%icc, .iss2		! if so go check s2
67	andcc	%o1, 3, %o4		! is s2 aligned?
68	cmp	%o3, 2
69	be,pn	%icc, .algn2
70	cmp	%o3, 3
71
72.algn1:	ldub	[%o0], %o4		! cmp one byte
73	inc	%o0
74	ldub	[%o1], %o5
75	inc	%o1
76	dec	%o2
77	be,pn	%icc, .algn3
78	cmp	%o4, %o5
79	be,pt	%icc, .algn2
80	nop
81	b,a	.noteq
82
83.algn2:	lduh	[%o0], %o4
84	inc	2, %o0
85	ldub	[%o1], %o5
86	inc	1, %o1
87	srl	%o4, 8, %o3
88	cmp	%o3, %o5
89	be,a,pt	%icc, 1f
90	ldub	[%o1], %o5		! delay slot, get next byte from s2
91	b	.noteq
92	mov	%o3, %o4		! delay slot, move *s1 to %o4
931:	inc	%o1
94	dec	2, %o2
95	and	%o4, 0xff, %o4
96	cmp	%o4, %o5
97.algn3:	be,a,pt	%icc, .iss2
98	andcc	%o1, 3, %o4		! delay slot, is s2 aligned?
99	b,a	.noteq
100
101.cmpbyt:b	.bytcmp
102	deccc	%o2
1031:	ldub	[%o0 + %o1], %o5	! byte compare loop
104	inc	%o0
105	cmp	%o4, %o5
106	be,a,pt	%icc, .bytcmp
107	deccc	%o2			! delay slot, compare count (len)
108	b,a	.noteq
109.bytcmp:bgeu,a,pt %xcc, 1b
110	ldub	[%o0], %o4
111.cmpeq:
112	retl				! strings compare equal
113	clr	%o0
114
115.noteq_word:				! words aren't equal. find unequal byte
116	srl	%o4, 24, %o1		! first byte
117	srl	%o5, 24, %o2
118	cmp	%o1, %o2
119	bne,pn	%icc, 1f
120	sll	%o4, 8, %o4
121	sll	%o5, 8, %o5
122	srl	%o4, 24, %o1
123	srl	%o5, 24, %o2
124	cmp	%o1, %o2
125	bne,pn	%icc, 1f
126	sll	%o4, 8, %o4
127	sll	%o5, 8, %o5
128	srl	%o4, 24, %o1
129	srl	%o5, 24, %o2
130	cmp	%o1, %o2
131	bne,pn	%icc, 1f
132	sll	%o4, 8, %o4
133	sll	%o5, 8, %o5
134	srl	%o4, 24, %o1
135	srl	%o5, 24, %o2
1361:
137	retl
138	sub	%o1, %o2, %o0		! delay slot
139
140.noteq:
141	retl				! strings aren't equal
142	sub	%o4, %o5, %o0		! delay slot, return(*s1 - *s2)
143
144.iss2:	andn	%o2, 3, %o3		! count of aligned bytes
145	and	%o2, 3, %o2		! remaining bytes
146	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
147	cmp	%o4, 2
148	be,pn	%icc, .w2cmp		! s2 half aligned
149	cmp	%o4, 1
150
151.w3cmp:
152	dec	4, %o3			! avoid reading beyond the last byte
153	inc	4, %o2
154	ldub	[%o1], %g1		! read a byte to align for word reads
155	inc	1, %o1
156	be,pt	%icc, .w1cmp		! aligned to 1 or 3 bytes
157	sll	%g1, 24, %o5
158
159	sub	%o1, %o0, %o1
1602:	lduw	[%o0 + %o1], %g1
161	lduw	[%o0], %o4
162	inc	4, %o0
163	srl	%g1, 8, %g5		! merge with the other half
164	or	%g5, %o5, %o5
165	cmp	%o4, %o5
166	bne,pt	%icc, .noteq_word
167	deccc	4, %o3
168	bnz,pt	%xcc, 2b
169	sll	%g1, 24, %o5
170	sub	%o1, 1, %o1		! used 3 bytes of the last word read
171	b	.bytcmp
172	deccc	%o2
173
174.w1cmp:
175	dec	4, %o3			! avoid reading beyond the last byte
176	inc	4, %o2
177	lduh	[%o1], %g1		! read 3 bytes to word align
178	inc	2, %o1
179	sll	%g1, 8, %g5
180	or	%o5, %g5, %o5
181
182	sub	%o1, %o0, %o1
1833:	lduw	[%o0 + %o1], %g1
184	lduw	[%o0], %o4
185	inc	4, %o0
186	srl	%g1, 24, %g5		! merge with the other half
187	or	%g5, %o5, %o5
188	cmp	%o4, %o5
189	bne,pt	%icc, .noteq_word
190	deccc	4, %o3
191	bnz,pt	%xcc, 3b
192	sll	%g1, 8, %o5
193	sub	%o1, 3, %o1		! used 1 byte of the last word read
194	b	.bytcmp
195	deccc	%o2
196
197.w2cmp:
198	dec	4, %o3			! avoid reading beyond the last byte
199	inc	4, %o2
200	lduh	[%o1], %g1		! read a halfword to align s2
201	inc	2, %o1
202	sll	%g1, 16, %o5
203	sub	%o1, %o0, %o1
2044:	lduw	[%o0 + %o1], %g1	! read a word from s2
205	lduw	[%o0], %o4		! read a word from s1
206	inc	4, %o0
207	srl	%g1, 16, %g5		! merge with the other half
208	or	%g5, %o5, %o5
209	cmp	%o4, %o5
210	bne,pn	%icc, .noteq_word
211	deccc	4, %o3
212	bnz,pt	%xcc, 4b
213	sll	%g1, 16, %o5
214	sub	%o1, 2, %o1		! only used half of the last read word
215	b	.bytcmp
216	deccc	%o2
217
218.w4cmp:
219	sub	%o1, %o0, %o1
220	lduw	[%o0 + %o1], %o5
2215:	lduw	[%o0], %o4
222	inc	4, %o0
223	cmp	%o4, %o5
224	bne,pt	%icc, .noteq_word
225	deccc	4, %o3
226	bnz,a,pt %xcc, 5b
227	lduw	[%o0 + %o1], %o5
228	b	.bytcmp			! compare remaining bytes, if any
229	deccc	%o2
230
231	SET_SIZE(memcmp)
232