xref: /titanic_50/usr/src/lib/libc/sparc/gen/memcmp.s (revision 56798e90e16fea70fd1365ed1808375d9a4a17e3)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * memcmp(s1, s2, len)
33 *
34 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
35 *
36 * Fast assembler language version of the following C-program for memcmp
37 * which represents the `standard' for the C-library.
38 *
39 *	int
40 *	memcmp(const void *s1, const void *s2, size_t n)
41 *	{
42 *		if (s1 != s2 && n != 0) {
43 *			const char *ps1 = s1;
44 *			const char *ps2 = s2;
45 *			do {
46 *				if (*ps1++ != *ps2++)
47 *					return (ps1[-1] - ps2[-1]);
48 *			} while (--n != 0);
49 *		}
50 *		return (NULL);
51 *	}
52 */
53
54#include <sys/asm_linkage.h>
55
56	ANSI_PRAGMA_WEAK(memcmp,function)
57
58	ENTRY(memcmp)
59	st	%g2, [%sp + 68]		! g2 must be restored before retl
60	cmp	%o0, %o1		! s1 == s2?
61	be	.cmpeq
62	cmp	%o2, 17
63	bleu,a	.cmpbyt			! for small counts go do bytes
64	sub	%o1, %o0, %o1
65
66	andcc	%o0, 3, %o3		! is s1 aligned?
67	bz,a	.iss2			! if so go check s2
68	andcc	%o1, 3, %o4		! is s2 aligned?
69	cmp	%o3, 2
70	be	.algn2
71	cmp	%o3, 3
72
73.algn1:	ldub	[%o0], %o4		! cmp one byte
74	inc	%o0
75	ldub	[%o1], %o5
76	inc	%o1
77	dec	%o2
78	be	.algn3
79	cmp	%o4, %o5
80	be	.algn2
81	nop
82	b,a	.noteq
83
84.algn2:	lduh	[%o0], %o4
85	inc	2, %o0
86	ldub	[%o1], %o5
87	inc	1, %o1
88	srl	%o4, 8, %o3
89	cmp	%o3, %o5
90	be,a	1f
91	ldub	[%o1], %o5		! delay slot, get next byte from s2
92	b	.noteq
93	mov	%o3, %o4		! delay slot, move *s1 to %o4
941:	inc	%o1
95	dec	2, %o2
96	and	%o4, 0xff, %o4
97	cmp	%o4, %o5
98.algn3:	be,a	.iss2
99	andcc	%o1, 3, %o4		! delay slot, is s2 aligned?
100	b,a	.noteq
101
102.cmpbyt:b	.bytcmp
103	deccc	%o2
1041:	ldub	[%o0 + %o1], %o5	! byte compare loop
105	inc	%o0
106	cmp	%o4, %o5
107	be,a	.bytcmp
108	deccc	%o2			! delay slot, compare count (len)
109	b,a	.noteq
110.bytcmp:bgeu,a	1b
111	ldub	[%o0], %o4
112.cmpeq:	ld	[%sp + 68], %g2
113	retl				! strings compare equal
114	clr	%o0
115
116.noteq_word:				! words aren't equal. find unequal byte
117	srl	%o4, 24, %o1		! first byte
118	srl	%o5, 24, %o2
119	cmp	%o1, %o2
120	bne	1f
121	sll	%o4, 8, %o4
122	sll	%o5, 8, %o5
123	srl	%o4, 24, %o1
124	srl	%o5, 24, %o2
125	cmp	%o1, %o2
126	bne	1f
127	sll	%o4, 8, %o4
128	sll	%o5, 8, %o5
129	srl	%o4, 24, %o1
130	srl	%o5, 24, %o2
131	cmp	%o1, %o2
132	bne	1f
133	sll	%o4, 8, %o4
134	sll	%o5, 8, %o5
135	srl	%o4, 24, %o1
136	srl	%o5, 24, %o2
1371:
138	ld	[%sp + 68], %g2
139	retl
140	sub	%o1, %o2, %o0		! delay slot
141
142.noteq:
143	ld	[%sp + 68], %g2
144	retl				! strings aren't equal
145	sub	%o4, %o5, %o0		! delay slot, return(*s1 - *s2)
146
147.iss2:	andn	%o2, 3, %o3		! count of aligned bytes
148	and	%o2, 3, %o2		! remaining bytes
149	bz	.w4cmp			! if s2 word aligned, compare words
150	cmp	%o4, 2
151	be	.w2cmp			! s2 half aligned
152	cmp	%o4, 1
153
154.w3cmp:
155	dec	4, %o3			! avoid reading beyond the last byte
156	inc	4, %o2
157	ldub	[%o1], %g1		! read a byte to align for word reads
158	inc	1, %o1
159	be	.w1cmp			! aligned to 1 or 3 bytes
160	sll	%g1, 24, %o5
161
162	sub	%o1, %o0, %o1
1632:	ld	[%o0 + %o1], %g1
164	ld	[%o0], %o4
165	inc	4, %o0
166	srl	%g1, 8, %g2		! merge with the other half
167	or	%g2, %o5, %o5
168	cmp	%o4, %o5
169	bne	.noteq_word
170	deccc	4, %o3
171	bnz	2b
172	sll	%g1, 24, %o5
173	sub	%o1, 1, %o1		! used 3 bytes of the last word read
174	b	.bytcmp
175	deccc	%o2
176
177.w1cmp:
178	dec	4, %o3			! avoid reading beyond the last byte
179	inc	4, %o2
180	lduh	[%o1], %g1		! read 3 bytes to word align
181	inc	2, %o1
182	sll	%g1, 8, %g2
183	or	%o5, %g2, %o5
184
185	sub	%o1, %o0, %o1
1863:	ld	[%o0 + %o1], %g1
187	ld	[%o0], %o4
188	inc	4, %o0
189	srl	%g1, 24, %g2		! merge with the other half
190	or	%g2, %o5, %o5
191	cmp	%o4, %o5
192	bne	.noteq_word
193	deccc	4, %o3
194	bnz	3b
195	sll	%g1, 8, %o5
196	sub	%o1, 3, %o1		! used 1 byte of the last word read
197	b	.bytcmp
198	deccc	%o2
199
200.w2cmp:
201	dec	4, %o3			! avoid reading beyond the last byte
202	inc	4, %o2
203	lduh	[%o1], %g1		! read a halfword to align s2
204	inc	2, %o1
205	sll	%g1, 16, %o5
206
207	sub	%o1, %o0, %o1
2084:	ld	[%o0 + %o1], %g1	! read a word from s2
209	ld	[%o0], %o4		! read a word from s1
210	inc	4, %o0
211	srl	%g1, 16, %g2		! merge with the other half
212	or	%g2, %o5, %o5
213	cmp	%o4, %o5
214	bne	.noteq_word
215	deccc	4, %o3
216	bnz	4b
217	sll	%g1, 16, %o5
218	sub	%o1, 2, %o1		! only used half of the last read word
219	b	.bytcmp
220	deccc	%o2
221
222.w4cmp:
223	sub	%o1, %o0, %o1
224	ld	[%o0 + %o1], %o5
2255:	ld	[%o0], %o4
226	inc	4, %o0
227	cmp	%o4, %o5
228	bne	.noteq_word
229	deccc	4, %o3
230	bnz,a	5b
231	ld	[%o0 + %o1], %o5
232	b	.bytcmp			! compare remaining bytes, if any
233	deccc	%o2
234
235	SET_SIZE(memcmp)
236