xref: /illumos-gate/usr/src/lib/libc/capabilities/sun4u-us3/common/memcmp.S (revision d17be682a2c70b4505d43c830bbd2603da11918d)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26	.file	"memcmp.s"
27
28/*
29 * memcmp(s1, s2, len)
30 *
31 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
32 *
33 * Fast assembler language version of the following C-program for memcmp
34 * which represents the `standard' for the C-library.
35 *
36 *	int
37 *	memcmp(const void *s1, const void *s2, size_t n)
38 *	{
39 *		if (s1 != s2 && n != 0) {
40 *			const char *ps1 = s1;
41 *			const char *ps2 = s2;
42 *			do {
43 *				if (*ps1++ != *ps2++)
44 *					return(ps1[-1] - ps2[-1]);
45 *			} while (--n != 0);
46 *		}
47 *		return (0);
48 *	}
49 */
50
51#include <sys/asm_linkage.h>
52#include <sys/machasi.h>
53
54#define	BLOCK_SIZE	64
55
56	ANSI_PRAGMA_WEAK(memcmp,function)
57
58	ENTRY(memcmp)
59	cmp	%o0, %o1		! s1 == s2?
60	be	%ncc, .cmpeq
61	prefetch [%o0], #one_read
62	prefetch [%o1], #one_read
63
64	! for small counts byte compare immediately
65	cmp	%o2, 48
66	bleu,a 	%ncc, .bytcmp
67	mov	%o2, %o3		! o3 <= 48
68
69	! Count > 48. We will byte compare (8 + num of bytes to dbl align)
70	! bytes. We assume that most miscompares will occur in the 1st 8 bytes
71
72	prefetch [%o0 + (1 * BLOCK_SIZE)], #one_read
73	prefetch [%o1 + (1 * BLOCK_SIZE)], #one_read
74
75.chkdbl:
76	and     %o0, 7, %o4             ! is s1 aligned on a 8 byte bound
77	mov	8, %o3			! o2 > 48;  o3 = 8
78        sub     %o4, 8, %o4		! o4 = -(num of bytes to dbl align)
79	ba	%ncc, .bytcmp
80        sub     %o3, %o4, %o3           ! o3 = 8 + (num of bytes to dbl align)
81
821:	ldub	[%o1], %o5        	! byte compare loop
83        inc     %o1
84        inc     %o0
85	dec	%o2
86        cmp     %o4, %o5
87	bne	%ncc, .noteq
88.bytcmp:
89	deccc   %o3
90	bgeu,a	%ncc, 1b
91        ldub    [%o0], %o4
92
93	! Check to see if there are more bytes to compare
94	cmp	%o2, 0			! is o2 > 0
95	bgu	%ncc, .dwcmp		! we should already be dbl aligned
96	nop
97.cmpeq:
98        retl                             ! strings compare equal
99	sub	%g0, %g0, %o0
100
101.noteq:
102	retl				! strings aren't equal
103	sub	%o4, %o5, %o0		! return(*s1 - *s2)
104
105
106        ! double word compare - using ldd and faligndata. Compares upto
107        ! 8 byte multiple count and does byte compare for the residual.
108
109.dwcmp:
110	prefetch [%o0 + (2 * BLOCK_SIZE)], #one_read
111	prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read
112
113        ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions.
114        ! So set it anyway, without checking.
115        rd      %fprs, %o3              ! o3 = fprs
116        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
117
118        andn    %o2, 7, %o4             ! o4 has 8 byte aligned cnt
119	sub     %o4, 8, %o4
120        alignaddr %o1, %g0, %g1
121        ldd     [%g1], %d0
1224:
123        add     %g1, 8, %g1
124        ldd     [%g1], %d2
125	ldd	[%o0], %d6
126	prefetch [%g1 + (3 * BLOCK_SIZE)], #one_read
127	prefetch [%o0 + (3 * BLOCK_SIZE)], #one_read
128        faligndata %d0, %d2, %d8
129	fcmpne32 %d6, %d8, %o5
130	fsrc1	%d6, %d6		! 2 fsrc1's added since o5 cannot
131	fsrc1	%d8, %d8		! be used for 3 cycles else we
132	fmovd	%d2, %d0		! create 9 bubbles in the pipeline
133	brnz,a,pn %o5, 6f
134	sub     %o1, %o0, %o1           ! o1 gets the difference
135        subcc   %o4, 8, %o4
136        add     %o0, 8, %o0
137        add     %o1, 8, %o1
138        bgu,pt	%ncc, 4b
139        sub     %o2, 8, %o2
140
141.residcmp:
142        ba      6f
143	sub     %o1, %o0, %o1           ! o1 gets the difference
144
1455:      ldub    [%o0 + %o1], %o5        ! byte compare loop
146        inc     %o0
147        cmp     %o4, %o5
148        bne     %ncc, .dnoteq
1496:
150        deccc   %o2
151        bgeu,a	%ncc, 5b
152        ldub    [%o0], %o4
153
154	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
155	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
156	retl
157	sub	%g0, %g0, %o0		! strings compare equal
158
159.dnoteq:
160	and     %o3, 0x4, %o3           ! fprs.du = fprs.dl = 0
161	wr      %o3, %g0, %fprs         ! fprs = o3 - restore fprs
162	retl
163	sub	%o4, %o5, %o0		! return(*s1 - *s2)
164
165	SET_SIZE(memcmp)
166