xref: /illumos-gate/usr/src/lib/libc/sparcv9/crt/__align_cpy_2.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe	.file	"__align_cpy_2.s"
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe/*
30*5d9d9091SRichard Lowe * __align_cpy_2(s1, s2, len)
31*5d9d9091SRichard Lowe *
32*5d9d9091SRichard Lowe * Copy s2 to s1, always copy n bytes.
33*5d9d9091SRichard Lowe * Note: this does not work for overlapped copies, bcopy() does
34*5d9d9091SRichard Lowe *	 This routine is copied from memcpy.s, with all values doubled.
35*5d9d9091SRichard Lowe *	 No attempt has been made to improve the comments or performance.
36*5d9d9091SRichard Lowe *
37*5d9d9091SRichard Lowe */
38*5d9d9091SRichard Lowe
39*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
40*5d9d9091SRichard Lowe
41*5d9d9091SRichard Lowe	ENTRY(__align_cpy_2)
42*5d9d9091SRichard Lowe	cmp	%o0, %o1
43*5d9d9091SRichard Lowe	be,pn	%xcc, .done		! Identical addresses--done.
44*5d9d9091SRichard Lowe	mov	%o0, %g5		! save des address for return val
45*5d9d9091SRichard Lowe	cmp	%o2, 18			! for small counts copy bytes
46*5d9d9091SRichard Lowe	ble,pn	%xcc, .dbytecp
47*5d9d9091SRichard Lowe	andcc	%o1, 6, %o5		! is src 8-byte aligned
48*5d9d9091SRichard Lowe	bz,pn	%xcc, .aldst
49*5d9d9091SRichard Lowe	cmp	%o5, 4			! is src 4-byte aligned
50*5d9d9091SRichard Lowe	be,pt	%xcc, .s2algn
51*5d9d9091SRichard Lowe	cmp	%o5, 6			! src is 2-byte aligned
52*5d9d9091SRichard Lowe.s1algn:lduh	[%o1], %o3		! move 2 or 6 bytes to align it
53*5d9d9091SRichard Lowe	inc	2, %o1
54*5d9d9091SRichard Lowe	sth	%o3, [%g5]		! move 2 bytes to align src
55*5d9d9091SRichard Lowe	inc	2, %g5
56*5d9d9091SRichard Lowe	bne,pt	%xcc, .s2algn
57*5d9d9091SRichard Lowe	dec	2, %o2
58*5d9d9091SRichard Lowe	b	.ald			! now go align dest
59*5d9d9091SRichard Lowe	andcc	%g5, 6, %o5
60*5d9d9091SRichard Lowe
61*5d9d9091SRichard Lowe.s2algn:lduw	[%o1], %o3		! know src is 4-byte aligned
62*5d9d9091SRichard Lowe	inc	4, %o1
63*5d9d9091SRichard Lowe	srlx	%o3, 16, %o4
64*5d9d9091SRichard Lowe	sth	%o4, [%g5]		! have to do 2-bytes,
65*5d9d9091SRichard Lowe	sth	%o3, [%g5 + 2]		! don't know dst alignment
66*5d9d9091SRichard Lowe	inc	4, %g5
67*5d9d9091SRichard Lowe	dec	4, %o2
68*5d9d9091SRichard Lowe
69*5d9d9091SRichard Lowe.aldst:	andcc	%g5, 6, %o5		! align the destination address
70*5d9d9091SRichard Lowe.ald:	bz,pn	%xcc, .w4cp
71*5d9d9091SRichard Lowe	cmp	%o5, 4
72*5d9d9091SRichard Lowe	bz,pn	%xcc, .w2cp
73*5d9d9091SRichard Lowe	cmp	%o5, 6
74*5d9d9091SRichard Lowe.w3cp:	ldx	[%o1], %o4
75*5d9d9091SRichard Lowe	inc	8, %o1
76*5d9d9091SRichard Lowe	srlx	%o4, 48, %o5
77*5d9d9091SRichard Lowe	sth	%o5, [%g5]
78*5d9d9091SRichard Lowe	bne,pt	%xcc, .w1cp
79*5d9d9091SRichard Lowe	inc	2, %g5
80*5d9d9091SRichard Lowe	dec	2, %o2
81*5d9d9091SRichard Lowe	andn	%o2, 6, %o3		! o3 is aligned word count
82*5d9d9091SRichard Lowe	sub	%o1, %g5, %o1		! g5 gets the difference
83*5d9d9091SRichard Lowe
84*5d9d9091SRichard Lowe1:	sllx	%o4, 16, %g1		! save residual bytes
85*5d9d9091SRichard Lowe	ldx	[%o1+%g5], %o4
86*5d9d9091SRichard Lowe	deccc	8, %o3
87*5d9d9091SRichard Lowe	srlx	%o4, 48, %o5		! merge with residual
88*5d9d9091SRichard Lowe	or	%o5, %g1, %g1
89*5d9d9091SRichard Lowe	stx	%g1, [%g5]
90*5d9d9091SRichard Lowe	bnz,pt	%xcc, 1b
91*5d9d9091SRichard Lowe	inc	8, %g5
92*5d9d9091SRichard Lowe	sub	%o1, 6, %o1		! used two bytes of last word read
93*5d9d9091SRichard Lowe	b	7f
94*5d9d9091SRichard Lowe	and	%o2, 6, %o2
95*5d9d9091SRichard Lowe
96*5d9d9091SRichard Lowe.w1cp:	srlx	%o4, 16, %o5
97*5d9d9091SRichard Lowe	st	%o5, [%g5]
98*5d9d9091SRichard Lowe	inc	4, %g5
99*5d9d9091SRichard Lowe	dec	6, %o2
100*5d9d9091SRichard Lowe	andn	%o2, 6, %o3
101*5d9d9091SRichard Lowe	sub	%o1, %g5, %o1		! g5 gets the difference
102*5d9d9091SRichard Lowe
103*5d9d9091SRichard Lowe2:	sllx	%o4, 48, %g1		! save residual bytes
104*5d9d9091SRichard Lowe	ldx	[%o1+%g5], %o4
105*5d9d9091SRichard Lowe	deccc	8, %o3
106*5d9d9091SRichard Lowe	srlx	%o4, 16, %o5		! merge with residual
107*5d9d9091SRichard Lowe	or	%o5, %g1, %g1
108*5d9d9091SRichard Lowe	stx	%g1, [%g5]
109*5d9d9091SRichard Lowe	bnz,pt	%xcc, 2b
110*5d9d9091SRichard Lowe	inc	8, %g5
111*5d9d9091SRichard Lowe	sub	%o1, 2, %o1		! used six bytes of last word read
112*5d9d9091SRichard Lowe	b	7f
113*5d9d9091SRichard Lowe	and	%o2, 6, %o2
114*5d9d9091SRichard Lowe
115*5d9d9091SRichard Lowe.w2cp:	ldx	[%o1], %o4
116*5d9d9091SRichard Lowe	inc	8, %o1
117*5d9d9091SRichard Lowe	srlx	%o4, 32, %o5
118*5d9d9091SRichard Lowe	st	%o5, [%g5]
119*5d9d9091SRichard Lowe	inc	4, %g5
120*5d9d9091SRichard Lowe	dec	4, %o2
121*5d9d9091SRichard Lowe	andn	%o2, 6, %o3		! o3 is aligned word count
122*5d9d9091SRichard Lowe	sub	%o1, %g5, %o1		! g5 gets the difference
123*5d9d9091SRichard Lowe
124*5d9d9091SRichard Lowe3:	sllx	%o4, 32, %g1		! save residual bytes
125*5d9d9091SRichard Lowe	ldx	[%o1+%g5], %o4
126*5d9d9091SRichard Lowe	deccc	8, %o3
127*5d9d9091SRichard Lowe	srlx	%o4, 32, %o5		! merge with residual
128*5d9d9091SRichard Lowe	or	%o5, %g1, %g1
129*5d9d9091SRichard Lowe	stx	%g1, [%g5]
130*5d9d9091SRichard Lowe	bnz,pt	%xcc, 3b
131*5d9d9091SRichard Lowe	inc	8, %g5
132*5d9d9091SRichard Lowe	sub	%o1, 4, %o1		! used four bytes of last word read
133*5d9d9091SRichard Lowe	b	7f
134*5d9d9091SRichard Lowe	and	%o2, 6, %o2
135*5d9d9091SRichard Lowe
136*5d9d9091SRichard Lowe.w4cp:	andn	%o2, 6, %o3		! o3 is aligned word count
137*5d9d9091SRichard Lowe	sub	%o1, %g5, %o1		! g5 gets the difference
138*5d9d9091SRichard Lowe
139*5d9d9091SRichard Lowe1:	ldx	[%o1+%g5], %o4		! read from address
140*5d9d9091SRichard Lowe	deccc	8, %o3			! decrement count
141*5d9d9091SRichard Lowe	stx	%o4, [%g5]		! write at destination address
142*5d9d9091SRichard Lowe	bg,pt	%xcc, 1b
143*5d9d9091SRichard Lowe	inc	8, %g5			! increment to address
144*5d9d9091SRichard Lowe	b	7f
145*5d9d9091SRichard Lowe	and	%o2, 6, %o2		! number of leftover bytes, if any
146*5d9d9091SRichard Lowe
147*5d9d9091SRichard Lowe	!
148*5d9d9091SRichard Lowe	! differenced byte copy, works with any alignment
149*5d9d9091SRichard Lowe	!
150*5d9d9091SRichard Lowe.dbytecp:
151*5d9d9091SRichard Lowe	b	7f
152*5d9d9091SRichard Lowe	sub	%o1, %g5, %o1		! g5 gets the difference
153*5d9d9091SRichard Lowe
154*5d9d9091SRichard Lowe4:	sth	%o4, [%g5]		! write to address
155*5d9d9091SRichard Lowe	inc	2, %g5			! inc to address
156*5d9d9091SRichard Lowe7:	deccc	2, %o2			! decrement count
157*5d9d9091SRichard Lowe	bge,a,pt %xcc,4b		! loop till done
158*5d9d9091SRichard Lowe	lduh	[%o1+%g5], %o4		! read from address
159*5d9d9091SRichard Lowe.done:
160*5d9d9091SRichard Lowe	retl
161*5d9d9091SRichard Lowe	nop
162*5d9d9091SRichard Lowe
163*5d9d9091SRichard Lowe	SET_SIZE(__align_cpy_2)
164