xref: /illumos-gate/usr/src/lib/libc/sparc/gen/memcpy.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24*5d9d9091SRichard Lowe */
25*5d9d9091SRichard Lowe
26*5d9d9091SRichard Lowe	.file	"memcpy.s"
27*5d9d9091SRichard Lowe
28*5d9d9091SRichard Lowe/*
29*5d9d9091SRichard Lowe * memcpy(s1, s2, len)
30*5d9d9091SRichard Lowe *
31*5d9d9091SRichard Lowe * Copy s2 to s1, always copy n bytes.
32*5d9d9091SRichard Lowe * Note: this does not work for overlapped copies, bcopy() does
33*5d9d9091SRichard Lowe *
34*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memcpy
35*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
36*5d9d9091SRichard Lowe *
37*5d9d9091SRichard Lowe *	void *
38*5d9d9091SRichard Lowe *	memcpy(void *s, const void *s0, size_t n)
39*5d9d9091SRichard Lowe *	{
40*5d9d9091SRichard Lowe *		if (n != 0) {
41*5d9d9091SRichard Lowe *	   	    char *s1 = s;
42*5d9d9091SRichard Lowe *		    const char *s2 = s0;
43*5d9d9091SRichard Lowe *		    do {
44*5d9d9091SRichard Lowe *			*s1++ = *s2++;
45*5d9d9091SRichard Lowe *		    } while (--n != 0);
46*5d9d9091SRichard Lowe *		}
47*5d9d9091SRichard Lowe *		return (s);
48*5d9d9091SRichard Lowe *	}
49*5d9d9091SRichard Lowe */
50*5d9d9091SRichard Lowe
51*5d9d9091SRichard Lowe#include "SYS.h"
52*5d9d9091SRichard Lowe
53*5d9d9091SRichard Lowe	ANSI_PRAGMA_WEAK(memcpy,function)
54*5d9d9091SRichard Lowe
55*5d9d9091SRichard Lowe	ENTRY(memcpy)
56*5d9d9091SRichard Lowe        ENTRY(__align_cpy_1)
57*5d9d9091SRichard Lowe	st	%o0, [%sp + 68]		! save des address for return val
58*5d9d9091SRichard Lowe	cmp	%o2, 17			! for small counts copy bytes
59*5d9d9091SRichard Lowe	bleu	.dbytecp
60*5d9d9091SRichard Lowe	andcc	%o1, 3, %o5		! is src word aligned
61*5d9d9091SRichard Lowe	bz	.aldst
62*5d9d9091SRichard Lowe	cmp	%o5, 2			! is src half-word aligned
63*5d9d9091SRichard Lowe	be	.s2algn
64*5d9d9091SRichard Lowe	cmp	%o5, 3			! src is byte aligned
65*5d9d9091SRichard Lowe.s1algn:ldub	[%o1], %o3		! move 1 or 3 bytes to align it
66*5d9d9091SRichard Lowe	inc	1, %o1
67*5d9d9091SRichard Lowe	stb	%o3, [%o0]		! move a byte to align src
68*5d9d9091SRichard Lowe	inc	1, %o0
69*5d9d9091SRichard Lowe	bne	.s2algn
70*5d9d9091SRichard Lowe	dec	%o2
71*5d9d9091SRichard Lowe	b	.ald			! now go align dest
72*5d9d9091SRichard Lowe	andcc	%o0, 3, %o5
73*5d9d9091SRichard Lowe
74*5d9d9091SRichard Lowe.s2algn:lduh	[%o1], %o3		! know src is 2 byte alinged
75*5d9d9091SRichard Lowe	inc	2, %o1
76*5d9d9091SRichard Lowe	srl	%o3, 8, %o4
77*5d9d9091SRichard Lowe	stb	%o4, [%o0]		! have to do bytes,
78*5d9d9091SRichard Lowe	stb	%o3, [%o0 + 1]		! don't know dst alingment
79*5d9d9091SRichard Lowe	inc	2, %o0
80*5d9d9091SRichard Lowe	dec	2, %o2
81*5d9d9091SRichard Lowe
82*5d9d9091SRichard Lowe.aldst:	andcc	%o0, 3, %o5		! align the destination address
83*5d9d9091SRichard Lowe.ald:	bz	.w4cp
84*5d9d9091SRichard Lowe	cmp	%o5, 2
85*5d9d9091SRichard Lowe	bz	.w2cp
86*5d9d9091SRichard Lowe	cmp	%o5, 3
87*5d9d9091SRichard Lowe.w3cp:	ld	[%o1], %o4
88*5d9d9091SRichard Lowe	inc	4, %o1
89*5d9d9091SRichard Lowe	srl	%o4, 24, %o5
90*5d9d9091SRichard Lowe	stb	%o5, [%o0]
91*5d9d9091SRichard Lowe	bne	.w1cp
92*5d9d9091SRichard Lowe	inc	%o0
93*5d9d9091SRichard Lowe	dec	1, %o2
94*5d9d9091SRichard Lowe	andn	%o2, 3, %o3		! o3 is aligned word count
95*5d9d9091SRichard Lowe	dec	4, %o3			! avoid reading beyond tail of src
96*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1		! o1 gets the difference
97*5d9d9091SRichard Lowe
98*5d9d9091SRichard Lowe1:	sll	%o4, 8, %g1		! save residual bytes
99*5d9d9091SRichard Lowe	ld	[%o1+%o0], %o4
100*5d9d9091SRichard Lowe	deccc	4, %o3
101*5d9d9091SRichard Lowe	srl	%o4, 24, %o5		! merge with residual
102*5d9d9091SRichard Lowe	or	%o5, %g1, %g1
103*5d9d9091SRichard Lowe	st	%g1, [%o0]
104*5d9d9091SRichard Lowe	bnz	1b
105*5d9d9091SRichard Lowe	inc	4, %o0
106*5d9d9091SRichard Lowe	sub	%o1, 3, %o1		! used one byte of last word read
107*5d9d9091SRichard Lowe	and	%o2, 3, %o2
108*5d9d9091SRichard Lowe	b	7f
109*5d9d9091SRichard Lowe	inc	4, %o2
110*5d9d9091SRichard Lowe
111*5d9d9091SRichard Lowe.w1cp:	srl	%o4, 8, %o5
112*5d9d9091SRichard Lowe	sth	%o5, [%o0]
113*5d9d9091SRichard Lowe	inc	2, %o0
114*5d9d9091SRichard Lowe	dec	3, %o2
115*5d9d9091SRichard Lowe	andn	%o2, 3, %o3		! o3 is aligned word count
116*5d9d9091SRichard Lowe	dec	4, %o3			! avoid reading beyond tail of src
117*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1		! o1 gets the difference
118*5d9d9091SRichard Lowe
119*5d9d9091SRichard Lowe2:	sll	%o4, 24, %g1		! save residual bytes
120*5d9d9091SRichard Lowe	ld	[%o1+%o0], %o4
121*5d9d9091SRichard Lowe	deccc	4, %o3
122*5d9d9091SRichard Lowe	srl	%o4, 8, %o5		! merge with residual
123*5d9d9091SRichard Lowe	or	%o5, %g1, %g1
124*5d9d9091SRichard Lowe	st	%g1, [%o0]
125*5d9d9091SRichard Lowe	bnz	2b
126*5d9d9091SRichard Lowe	inc	4, %o0
127*5d9d9091SRichard Lowe	sub	%o1, 1, %o1		! used three bytes of last word read
128*5d9d9091SRichard Lowe	and	%o2, 3, %o2
129*5d9d9091SRichard Lowe	b	7f
130*5d9d9091SRichard Lowe	inc	4, %o2
131*5d9d9091SRichard Lowe
132*5d9d9091SRichard Lowe.w2cp:	ld	[%o1], %o4
133*5d9d9091SRichard Lowe	inc	4, %o1
134*5d9d9091SRichard Lowe	srl	%o4, 16, %o5
135*5d9d9091SRichard Lowe	sth	%o5, [%o0]
136*5d9d9091SRichard Lowe	inc	2, %o0
137*5d9d9091SRichard Lowe	dec	2, %o2
138*5d9d9091SRichard Lowe	andn	%o2, 3, %o3		! o3 is aligned word count
139*5d9d9091SRichard Lowe	dec	4, %o3			! avoid reading beyond tail of src
140*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1		! o1 gets the difference
141*5d9d9091SRichard Lowe
142*5d9d9091SRichard Lowe3:	sll	%o4, 16, %g1		! save residual bytes
143*5d9d9091SRichard Lowe	ld	[%o1+%o0], %o4
144*5d9d9091SRichard Lowe	deccc	4, %o3
145*5d9d9091SRichard Lowe	srl	%o4, 16, %o5		! merge with residual
146*5d9d9091SRichard Lowe	or	%o5, %g1, %g1
147*5d9d9091SRichard Lowe	st	%g1, [%o0]
148*5d9d9091SRichard Lowe	bnz	3b
149*5d9d9091SRichard Lowe	inc	4, %o0
150*5d9d9091SRichard Lowe	sub	%o1, 2, %o1		! used two bytes of last word read
151*5d9d9091SRichard Lowe	and	%o2, 3, %o2
152*5d9d9091SRichard Lowe	b	7f
153*5d9d9091SRichard Lowe	inc	4, %o2
154*5d9d9091SRichard Lowe
155*5d9d9091SRichard Lowe.w4cp:	andn	%o2, 3, %o3		! o3 is aligned word count
156*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1		! o1 gets the difference
157*5d9d9091SRichard Lowe
158*5d9d9091SRichard Lowe1:	ld	[%o1+%o0], %o4		! read from address
159*5d9d9091SRichard Lowe	deccc	4, %o3			! decrement count
160*5d9d9091SRichard Lowe	st	%o4, [%o0]		! write at destination address
161*5d9d9091SRichard Lowe	bgu	1b
162*5d9d9091SRichard Lowe	inc	4, %o0			! increment to address
163*5d9d9091SRichard Lowe	b	7f
164*5d9d9091SRichard Lowe	and	%o2, 3, %o2		! number of leftover bytes, if any
165*5d9d9091SRichard Lowe
166*5d9d9091SRichard Lowe	!
167*5d9d9091SRichard Lowe	! differenced byte copy, works with any alignment
168*5d9d9091SRichard Lowe	!
169*5d9d9091SRichard Lowe.dbytecp:
170*5d9d9091SRichard Lowe	b	7f
171*5d9d9091SRichard Lowe	sub	%o1, %o0, %o1		! o1 gets the difference
172*5d9d9091SRichard Lowe
173*5d9d9091SRichard Lowe4:	stb	%o4, [%o0]		! write to address
174*5d9d9091SRichard Lowe	inc	%o0			! inc to address
175*5d9d9091SRichard Lowe7:	deccc	%o2			! decrement count
176*5d9d9091SRichard Lowe	bgeu,a	4b			! loop till done
177*5d9d9091SRichard Lowe	ldub	[%o1+%o0], %o4		! read from address
178*5d9d9091SRichard Lowe	retl
179*5d9d9091SRichard Lowe	ld	[%sp + 68], %o0		! return s1, destination address
180*5d9d9091SRichard Lowe
181*5d9d9091SRichard Lowe	SET_SIZE(memcpy)
182