xref: /titanic_51/usr/src/lib/libc/sparcv9/gen/memcpy.s (revision fa9e4066f08beec538e775443c5be79dd423fcab)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 1997-2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * memcpy(s1, s2, len)
33 *
34 * Copy s2 to s1, always copy n bytes.
35 * Note: this does not work for overlapped copies, bcopy() does
36 *
37 * Added entry __align_cpy_1 is generally for use of the compilers.
38 *
39 *
40 * Fast assembler language version of the following C-program for memcpy
41 * which represents the `standard' for the C-library.
42 *
43 *	void *
44 *	memcpy(void *s, const void *s0, size_t n)
45 *	{
46 *		if (n != 0) {
47 *			char *s1 = s;
48 *			const char *s2 = s0;
49 *			do {
50 *				*s1++ = *s2++;
51 *			} while (--n != 0);
52 *		}
53 *		return (s);
54 *	}
55 */
56
57#include <sys/asm_linkage.h>
58
59	ANSI_PRAGMA_WEAK(memcpy,function)
60
61#include "synonyms.h"
62
63	.weak	_private_memcpy
64	.type	_private_memcpy, #function
65	_private_memcpy = memcpy
66
67	ENTRY(memcpy)
68	ENTRY(__align_cpy_1)
69	mov	%o0, %g5		! save des address for return val
70	cmp	%o2, 17			! for small counts copy bytes
71	bleu,pn	%xcc, .dbytecp
72	andcc	%o1, 3, %o5		! is src word aligned
73	bz,pn	%icc, .aldst
74	cmp	%o5, 2			! is src half-word aligned
75	be,pt	%xcc, .s2algn
76	cmp	%o5, 3			! src is byte aligned
77.s1algn:ldub	[%o1], %o3		! move 1 or 3 bytes to align it
78	inc	1, %o1
79	stb	%o3, [%g5]		! move a byte to align src
80	inc	1, %g5
81	bne,pt	%icc, .s2algn
82	dec	%o2
83	b	.ald			! now go align dest
84	andcc	%g5, 3, %o5
85
86.s2algn:lduh	[%o1], %o3		! know src is 2 byte alinged
87	inc	2, %o1
88	srl	%o3, 8, %o4
89	stb	%o4, [%g5]		! have to do bytes,
90	stb	%o3, [%g5 + 1]		! don't know dst alingment
91	inc	2, %g5
92	dec	2, %o2
93
94.aldst:	andcc	%g5, 3, %o5		! align the destination address
95.ald:	bz,pn	%icc, .w4cp
96	cmp	%o5, 2
97	bz,pn	%icc, .w2cp
98	cmp	%o5, 3
99.w3cp:	lduw	[%o1], %o4
100	inc	4, %o1
101	srl	%o4, 24, %o5
102	stb	%o5, [%g5]
103	bne,pt	%icc, .w1cp
104	inc	%g5
105	dec	1, %o2
106	andn	%o2, 3, %o3		! o3 is aligned word count
107	dec	4, %o3			! avoid reading beyond tail of src
108	sub	%o1, %g5, %o1		! o1 gets the difference
109
1101:	sll	%o4, 8, %g1		! save residual bytes
111	lduw	[%o1+%g5], %o4
112	deccc	4, %o3
113	srl	%o4, 24, %o5		! merge with residual
114	or	%o5, %g1, %g1
115	st	%g1, [%g5]
116	bnz,pt	%xcc, 1b
117	inc	4, %g5
118	sub	%o1, 3, %o1		! used one byte of last word read
119	and	%o2, 3, %o2
120	b	7f
121	inc	4, %o2
122
123.w1cp:	srl	%o4, 8, %o5
124	sth	%o5, [%g5]
125	inc	2, %g5
126	dec	3, %o2
127	andn	%o2, 3, %o3		! o3 is aligned word count
128	dec	4, %o3			! avoid reading beyond tail of src
129	sub	%o1, %g5, %o1		! o1 gets the difference
130
1312:	sll	%o4, 24, %g1		! save residual bytes
132	lduw	[%o1+%g5], %o4
133	deccc	4, %o3
134	srl	%o4, 8, %o5		! merge with residual
135	or	%o5, %g1, %g1
136	st	%g1, [%g5]
137	bnz,pt	%xcc, 2b
138	inc	4, %g5
139	sub	%o1, 1, %o1		! used three bytes of last word read
140	and	%o2, 3, %o2
141	b	7f
142	inc	4, %o2
143
144.w2cp:	lduw	[%o1], %o4
145	inc	4, %o1
146	srl	%o4, 16, %o5
147	sth	%o5, [%g5]
148	inc	2, %g5
149	dec	2, %o2
150	andn	%o2, 3, %o3		! o3 is aligned word count
151	dec	4, %o3			! avoid reading beyond tail of src
152	sub	%o1, %g5, %o1		! o1 gets the difference
153
1543:	sll	%o4, 16, %g1		! save residual bytes
155	lduw	[%o1+%g5], %o4
156	deccc	4, %o3
157	srl	%o4, 16, %o5		! merge with residual
158	or	%o5, %g1, %g1
159	st	%g1, [%g5]
160	bnz,pt	%xcc, 3b
161	inc	4, %g5
162	sub	%o1, 2, %o1		! used two bytes of last word read
163	and	%o2, 3, %o2
164	b	7f
165	inc	4, %o2
166
167.w4cp:	andn	%o2, 3, %o3		! o3 is aligned word count
168	sub	%o1, %g5, %o1		! o1 gets the difference
169
1701:	lduw	[%o1+%g5], %o4		! read from address
171	deccc	4, %o3			! decrement count
172	st	%o4, [%g5]		! write at destination address
173	bgu,pt	%xcc, 1b
174	inc	4, %g5			! increment to address
175	b	7f
176	and	%o2, 3, %o2		! number of leftover bytes, if any
177
178	!
179	! differenced byte copy, works with any alignment
180	!
181.dbytecp:
182	b	7f
183	sub	%o1, %g5, %o1		! o1 gets the difference
184
1854:	stb	%o4, [%g5]		! write to address
186	inc	%g5			! inc to address
1877:	deccc	%o2			! decrement count
188	bgeu,a,pt %xcc,4b		! loop till done
189	ldub	[%o1+%g5], %o4		! read from address
190	retl
191	nop
192
193	SET_SIZE(memcpy)
194	SET_SIZE(__align_cpy_1)
195