xref: /illumos-gate/usr/src/lib/libc/sparc/gen/memcmp.S (revision 9b9d39d2a32ff806d2431dbcc50968ef1e6d46b2)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"memcmp.s"
28
29/*
30 * memcmp(s1, s2, len)
31 *
32 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
33 *
34 * Fast assembler language version of the following C-program for memcmp
35 * which represents the `standard' for the C-library.
36 *
37 *	int
38 *	memcmp(const void *s1, const void *s2, size_t n)
39 *	{
40 *		if (s1 != s2 && n != 0) {
41 *			const char *ps1 = s1;
42 *			const char *ps2 = s2;
43 *			do {
44 *				if (*ps1++ != *ps2++)
45 *					return (ps1[-1] - ps2[-1]);
46 *			} while (--n != 0);
47 *		}
48 *		return (NULL);
49 *	}
50 */
51
52#include <sys/asm_linkage.h>
53
54	ANSI_PRAGMA_WEAK(memcmp,function)
55
56	ENTRY(memcmp)
57	st	%g2, [%sp + 68]		! g2 must be restored before retl
58	cmp	%o0, %o1		! s1 == s2?
59	be	.cmpeq
60	cmp	%o2, 17
61	bleu,a	.cmpbyt			! for small counts go do bytes
62	sub	%o1, %o0, %o1
63
64	andcc	%o0, 3, %o3		! is s1 aligned?
65	bz,a	.iss2			! if so go check s2
66	andcc	%o1, 3, %o4		! is s2 aligned?
67	cmp	%o3, 2
68	be	.algn2
69	cmp	%o3, 3
70
71.algn1:	ldub	[%o0], %o4		! cmp one byte
72	inc	%o0
73	ldub	[%o1], %o5
74	inc	%o1
75	dec	%o2
76	be	.algn3
77	cmp	%o4, %o5
78	be	.algn2
79	nop
80	b,a	.noteq
81
82.algn2:	lduh	[%o0], %o4
83	inc	2, %o0
84	ldub	[%o1], %o5
85	inc	1, %o1
86	srl	%o4, 8, %o3
87	cmp	%o3, %o5
88	be,a	1f
89	ldub	[%o1], %o5		! delay slot, get next byte from s2
90	b	.noteq
91	mov	%o3, %o4		! delay slot, move *s1 to %o4
921:	inc	%o1
93	dec	2, %o2
94	and	%o4, 0xff, %o4
95	cmp	%o4, %o5
96.algn3:	be,a	.iss2
97	andcc	%o1, 3, %o4		! delay slot, is s2 aligned?
98	b,a	.noteq
99
100.cmpbyt:b	.bytcmp
101	deccc	%o2
1021:	ldub	[%o0 + %o1], %o5	! byte compare loop
103	inc	%o0
104	cmp	%o4, %o5
105	be,a	.bytcmp
106	deccc	%o2			! delay slot, compare count (len)
107	b,a	.noteq
108.bytcmp:bgeu,a	1b
109	ldub	[%o0], %o4
110.cmpeq:	ld	[%sp + 68], %g2
111	retl				! strings compare equal
112	clr	%o0
113
114.noteq_word:				! words aren't equal. find unequal byte
115	srl	%o4, 24, %o1		! first byte
116	srl	%o5, 24, %o2
117	cmp	%o1, %o2
118	bne	1f
119	sll	%o4, 8, %o4
120	sll	%o5, 8, %o5
121	srl	%o4, 24, %o1
122	srl	%o5, 24, %o2
123	cmp	%o1, %o2
124	bne	1f
125	sll	%o4, 8, %o4
126	sll	%o5, 8, %o5
127	srl	%o4, 24, %o1
128	srl	%o5, 24, %o2
129	cmp	%o1, %o2
130	bne	1f
131	sll	%o4, 8, %o4
132	sll	%o5, 8, %o5
133	srl	%o4, 24, %o1
134	srl	%o5, 24, %o2
1351:
136	ld	[%sp + 68], %g2
137	retl
138	sub	%o1, %o2, %o0		! delay slot
139
140.noteq:
141	ld	[%sp + 68], %g2
142	retl				! strings aren't equal
143	sub	%o4, %o5, %o0		! delay slot, return(*s1 - *s2)
144
145.iss2:	andn	%o2, 3, %o3		! count of aligned bytes
146	and	%o2, 3, %o2		! remaining bytes
147	bz	.w4cmp			! if s2 word aligned, compare words
148	cmp	%o4, 2
149	be	.w2cmp			! s2 half aligned
150	cmp	%o4, 1
151
152.w3cmp:
153	dec	4, %o3			! avoid reading beyond the last byte
154	inc	4, %o2
155	ldub	[%o1], %g1		! read a byte to align for word reads
156	inc	1, %o1
157	be	.w1cmp			! aligned to 1 or 3 bytes
158	sll	%g1, 24, %o5
159
160	sub	%o1, %o0, %o1
1612:	ld	[%o0 + %o1], %g1
162	ld	[%o0], %o4
163	inc	4, %o0
164	srl	%g1, 8, %g2		! merge with the other half
165	or	%g2, %o5, %o5
166	cmp	%o4, %o5
167	bne	.noteq_word
168	deccc	4, %o3
169	bnz	2b
170	sll	%g1, 24, %o5
171	sub	%o1, 1, %o1		! used 3 bytes of the last word read
172	b	.bytcmp
173	deccc	%o2
174
175.w1cmp:
176	dec	4, %o3			! avoid reading beyond the last byte
177	inc	4, %o2
178	lduh	[%o1], %g1		! read 3 bytes to word align
179	inc	2, %o1
180	sll	%g1, 8, %g2
181	or	%o5, %g2, %o5
182
183	sub	%o1, %o0, %o1
1843:	ld	[%o0 + %o1], %g1
185	ld	[%o0], %o4
186	inc	4, %o0
187	srl	%g1, 24, %g2		! merge with the other half
188	or	%g2, %o5, %o5
189	cmp	%o4, %o5
190	bne	.noteq_word
191	deccc	4, %o3
192	bnz	3b
193	sll	%g1, 8, %o5
194	sub	%o1, 3, %o1		! used 1 byte of the last word read
195	b	.bytcmp
196	deccc	%o2
197
198.w2cmp:
199	dec	4, %o3			! avoid reading beyond the last byte
200	inc	4, %o2
201	lduh	[%o1], %g1		! read a halfword to align s2
202	inc	2, %o1
203	sll	%g1, 16, %o5
204
205	sub	%o1, %o0, %o1
2064:	ld	[%o0 + %o1], %g1	! read a word from s2
207	ld	[%o0], %o4		! read a word from s1
208	inc	4, %o0
209	srl	%g1, 16, %g2		! merge with the other half
210	or	%g2, %o5, %o5
211	cmp	%o4, %o5
212	bne	.noteq_word
213	deccc	4, %o3
214	bnz	4b
215	sll	%g1, 16, %o5
216	sub	%o1, 2, %o1		! only used half of the last read word
217	b	.bytcmp
218	deccc	%o2
219
220.w4cmp:
221	sub	%o1, %o0, %o1
222	ld	[%o0 + %o1], %o5
2235:	ld	[%o0], %o4
224	inc	4, %o0
225	cmp	%o4, %o5
226	bne	.noteq_word
227	deccc	4, %o3
228	bnz,a	5b
229	ld	[%o0 + %o1], %o5
230	b	.bytcmp			! compare remaining bytes, if any
231	deccc	%o2
232
233	SET_SIZE(memcmp)
234