xref: /illumos-gate/usr/src/lib/libc/sparcv9/gen/memcmp.S (revision 784279176e68a516c9e391eb98dda7bd543fa6dd)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"memcmp.s"
28
29/*
30 * memcmp(s1, s2, len)
31 *
32 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
33 *
34 * Fast assembler language version of the following C-program for memcmp
35 * which represents the `standard' for the C-library.
36 *
37 *	int
38 *	memcmp(const void *s1, const void *s2, size_t n)
39 *	{
40 *		if (s1 != s2 && n != 0) {
41 *			const char *ps1 = s1;
42 *			const char *ps2 = s2;
43 *			do {
44 *				if (*ps1++ != *ps2++)
45 *					return (ps1[-1] - ps2[-1]);
46 *			} while (--n != 0);
47 *		}
48 *		return (0);
49 *	}
50 */
51
52#include <sys/asm_linkage.h>
53
54	ANSI_PRAGMA_WEAK(memcmp,function)
55
56	ENTRY(memcmp)
57	cmp	%o0, %o1		! s1 == s2?
58	be,pn	%xcc, .cmpeq
59	cmp	%o2, 17
60	bleu,a,pn %xcc, .cmpbyt		! for small counts go do bytes
61	sub	%o1, %o0, %o1
62
63	andcc	%o0, 3, %o3		! is s1 aligned?
64	bz,a,pn	%icc, .iss2		! if so go check s2
65	andcc	%o1, 3, %o4		! is s2 aligned?
66	cmp	%o3, 2
67	be,pn	%icc, .algn2
68	cmp	%o3, 3
69
70.algn1:	ldub	[%o0], %o4		! cmp one byte
71	inc	%o0
72	ldub	[%o1], %o5
73	inc	%o1
74	dec	%o2
75	be,pn	%icc, .algn3
76	cmp	%o4, %o5
77	be,pt	%icc, .algn2
78	nop
79	b,a	.noteq
80
81.algn2:	lduh	[%o0], %o4
82	inc	2, %o0
83	ldub	[%o1], %o5
84	inc	1, %o1
85	srl	%o4, 8, %o3
86	cmp	%o3, %o5
87	be,a,pt	%icc, 1f
88	ldub	[%o1], %o5		! delay slot, get next byte from s2
89	b	.noteq
90	mov	%o3, %o4		! delay slot, move *s1 to %o4
911:	inc	%o1
92	dec	2, %o2
93	and	%o4, 0xff, %o4
94	cmp	%o4, %o5
95.algn3:	be,a,pt	%icc, .iss2
96	andcc	%o1, 3, %o4		! delay slot, is s2 aligned?
97	b,a	.noteq
98
99.cmpbyt:b	.bytcmp
100	deccc	%o2
1011:	ldub	[%o0 + %o1], %o5	! byte compare loop
102	inc	%o0
103	cmp	%o4, %o5
104	be,a,pt	%icc, .bytcmp
105	deccc	%o2			! delay slot, compare count (len)
106	b,a	.noteq
107.bytcmp:bgeu,a,pt %xcc, 1b
108	ldub	[%o0], %o4
109.cmpeq:
110	retl				! strings compare equal
111	clr	%o0
112
113.noteq_word:				! words aren't equal. find unequal byte
114	srl	%o4, 24, %o1		! first byte
115	srl	%o5, 24, %o2
116	cmp	%o1, %o2
117	bne,pn	%icc, 1f
118	sll	%o4, 8, %o4
119	sll	%o5, 8, %o5
120	srl	%o4, 24, %o1
121	srl	%o5, 24, %o2
122	cmp	%o1, %o2
123	bne,pn	%icc, 1f
124	sll	%o4, 8, %o4
125	sll	%o5, 8, %o5
126	srl	%o4, 24, %o1
127	srl	%o5, 24, %o2
128	cmp	%o1, %o2
129	bne,pn	%icc, 1f
130	sll	%o4, 8, %o4
131	sll	%o5, 8, %o5
132	srl	%o4, 24, %o1
133	srl	%o5, 24, %o2
1341:
135	retl
136	sub	%o1, %o2, %o0		! delay slot
137
138.noteq:
139	retl				! strings aren't equal
140	sub	%o4, %o5, %o0		! delay slot, return(*s1 - *s2)
141
142.iss2:	andn	%o2, 3, %o3		! count of aligned bytes
143	and	%o2, 3, %o2		! remaining bytes
144	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
145	cmp	%o4, 2
146	be,pn	%icc, .w2cmp		! s2 half aligned
147	cmp	%o4, 1
148
149.w3cmp:
150	dec	4, %o3			! avoid reading beyond the last byte
151	inc	4, %o2
152	ldub	[%o1], %g1		! read a byte to align for word reads
153	inc	1, %o1
154	be,pt	%icc, .w1cmp		! aligned to 1 or 3 bytes
155	sll	%g1, 24, %o5
156
157	sub	%o1, %o0, %o1
1582:	lduw	[%o0 + %o1], %g1
159	lduw	[%o0], %o4
160	inc	4, %o0
161	srl	%g1, 8, %g5		! merge with the other half
162	or	%g5, %o5, %o5
163	cmp	%o4, %o5
164	bne,pt	%icc, .noteq_word
165	deccc	4, %o3
166	bnz,pt	%xcc, 2b
167	sll	%g1, 24, %o5
168	sub	%o1, 1, %o1		! used 3 bytes of the last word read
169	b	.bytcmp
170	deccc	%o2
171
172.w1cmp:
173	dec	4, %o3			! avoid reading beyond the last byte
174	inc	4, %o2
175	lduh	[%o1], %g1		! read 3 bytes to word align
176	inc	2, %o1
177	sll	%g1, 8, %g5
178	or	%o5, %g5, %o5
179
180	sub	%o1, %o0, %o1
1813:	lduw	[%o0 + %o1], %g1
182	lduw	[%o0], %o4
183	inc	4, %o0
184	srl	%g1, 24, %g5		! merge with the other half
185	or	%g5, %o5, %o5
186	cmp	%o4, %o5
187	bne,pt	%icc, .noteq_word
188	deccc	4, %o3
189	bnz,pt	%xcc, 3b
190	sll	%g1, 8, %o5
191	sub	%o1, 3, %o1		! used 1 byte of the last word read
192	b	.bytcmp
193	deccc	%o2
194
195.w2cmp:
196	dec	4, %o3			! avoid reading beyond the last byte
197	inc	4, %o2
198	lduh	[%o1], %g1		! read a halfword to align s2
199	inc	2, %o1
200	sll	%g1, 16, %o5
201	sub	%o1, %o0, %o1
2024:	lduw	[%o0 + %o1], %g1	! read a word from s2
203	lduw	[%o0], %o4		! read a word from s1
204	inc	4, %o0
205	srl	%g1, 16, %g5		! merge with the other half
206	or	%g5, %o5, %o5
207	cmp	%o4, %o5
208	bne,pn	%icc, .noteq_word
209	deccc	4, %o3
210	bnz,pt	%xcc, 4b
211	sll	%g1, 16, %o5
212	sub	%o1, 2, %o1		! only used half of the last read word
213	b	.bytcmp
214	deccc	%o2
215
216.w4cmp:
217	sub	%o1, %o0, %o1
218	lduw	[%o0 + %o1], %o5
2195:	lduw	[%o0], %o4
220	inc	4, %o0
221	cmp	%o4, %o5
222	bne,pt	%icc, .noteq_word
223	deccc	4, %o3
224	bnz,a,pt %xcc, 5b
225	lduw	[%o0 + %o1], %o5
226	b	.bytcmp			! compare remaining bytes, if any
227	deccc	%o2
228
229	SET_SIZE(memcmp)
230