xref: /illumos-gate/usr/src/lib/libc/sparcv9/crt/__align_cpy_2.S (revision 8361acf58a302751348aac091ab09484f3ecfb8c)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"__align_cpy_2.s"
28
29/*
30 * __align_cpy_2(s1, s2, len)
31 *
32 * Copy s2 to s1, always copy n bytes.
33 * Note: this does not work for overlapped copies, bcopy() does
34 *	 This routine is copied from memcpy.s, with all values doubled.
35 *	 No attempt has been made to improve the comments or performance.
36 *
37 */
38
39#include <sys/asm_linkage.h>
40
41	ENTRY(__align_cpy_2)
42	cmp	%o0, %o1
43	be,pn	%xcc, .done		! Identical addresses--done.
44	mov	%o0, %g5		! save des address for return val
45	cmp	%o2, 18			! for small counts copy bytes
46	ble,pn	%xcc, .dbytecp
47	andcc	%o1, 6, %o5		! is src 8-byte aligned
48	bz,pn	%xcc, .aldst
49	cmp	%o5, 4			! is src 4-byte aligned
50	be,pt	%xcc, .s2algn
51	cmp	%o5, 6			! src is 2-byte aligned
52.s1algn:lduh	[%o1], %o3		! move 2 or 6 bytes to align it
53	inc	2, %o1
54	sth	%o3, [%g5]		! move 2 bytes to align src
55	inc	2, %g5
56	bne,pt	%xcc, .s2algn
57	dec	2, %o2
58	b	.ald			! now go align dest
59	andcc	%g5, 6, %o5
60
61.s2algn:lduw	[%o1], %o3		! know src is 4-byte aligned
62	inc	4, %o1
63	srlx	%o3, 16, %o4
64	sth	%o4, [%g5]		! have to do 2-bytes,
65	sth	%o3, [%g5 + 2]		! don't know dst alignment
66	inc	4, %g5
67	dec	4, %o2
68
69.aldst:	andcc	%g5, 6, %o5		! align the destination address
70.ald:	bz,pn	%xcc, .w4cp
71	cmp	%o5, 4
72	bz,pn	%xcc, .w2cp
73	cmp	%o5, 6
74.w3cp:	ldx	[%o1], %o4
75	inc	8, %o1
76	srlx	%o4, 48, %o5
77	sth	%o5, [%g5]
78	bne,pt	%xcc, .w1cp
79	inc	2, %g5
80	dec	2, %o2
81	andn	%o2, 6, %o3		! o3 is aligned word count
82	sub	%o1, %g5, %o1		! g5 gets the difference
83
841:	sllx	%o4, 16, %g1		! save residual bytes
85	ldx	[%o1+%g5], %o4
86	deccc	8, %o3
87	srlx	%o4, 48, %o5		! merge with residual
88	or	%o5, %g1, %g1
89	stx	%g1, [%g5]
90	bnz,pt	%xcc, 1b
91	inc	8, %g5
92	sub	%o1, 6, %o1		! used two bytes of last word read
93	b	7f
94	and	%o2, 6, %o2
95
96.w1cp:	srlx	%o4, 16, %o5
97	st	%o5, [%g5]
98	inc	4, %g5
99	dec	6, %o2
100	andn	%o2, 6, %o3
101	sub	%o1, %g5, %o1		! g5 gets the difference
102
1032:	sllx	%o4, 48, %g1		! save residual bytes
104	ldx	[%o1+%g5], %o4
105	deccc	8, %o3
106	srlx	%o4, 16, %o5		! merge with residual
107	or	%o5, %g1, %g1
108	stx	%g1, [%g5]
109	bnz,pt	%xcc, 2b
110	inc	8, %g5
111	sub	%o1, 2, %o1		! used six bytes of last word read
112	b	7f
113	and	%o2, 6, %o2
114
115.w2cp:	ldx	[%o1], %o4
116	inc	8, %o1
117	srlx	%o4, 32, %o5
118	st	%o5, [%g5]
119	inc	4, %g5
120	dec	4, %o2
121	andn	%o2, 6, %o3		! o3 is aligned word count
122	sub	%o1, %g5, %o1		! g5 gets the difference
123
1243:	sllx	%o4, 32, %g1		! save residual bytes
125	ldx	[%o1+%g5], %o4
126	deccc	8, %o3
127	srlx	%o4, 32, %o5		! merge with residual
128	or	%o5, %g1, %g1
129	stx	%g1, [%g5]
130	bnz,pt	%xcc, 3b
131	inc	8, %g5
132	sub	%o1, 4, %o1		! used four bytes of last word read
133	b	7f
134	and	%o2, 6, %o2
135
136.w4cp:	andn	%o2, 6, %o3		! o3 is aligned word count
137	sub	%o1, %g5, %o1		! g5 gets the difference
138
1391:	ldx	[%o1+%g5], %o4		! read from address
140	deccc	8, %o3			! decrement count
141	stx	%o4, [%g5]		! write at destination address
142	bg,pt	%xcc, 1b
143	inc	8, %g5			! increment to address
144	b	7f
145	and	%o2, 6, %o2		! number of leftover bytes, if any
146
147	!
148	! differenced byte copy, works with any alignment
149	!
150.dbytecp:
151	b	7f
152	sub	%o1, %g5, %o1		! g5 gets the difference
153
1544:	sth	%o4, [%g5]		! write to address
155	inc	2, %g5			! inc to address
1567:	deccc	2, %o2			! decrement count
157	bge,a,pt %xcc,4b		! loop till done
158	lduh	[%o1+%g5], %o4		! read from address
159.done:
160	retl
161	nop
162
163	SET_SIZE(__align_cpy_2)
164