xref: /titanic_50/usr/src/lib/libc/sparc/gen/strcpy.s (revision c8a7b5101f0bf7200e2de00bf56d93ce653f100b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * strcpy(s1, s2)
33 *
34 * Copy string s2 to s1.  s1 must be large enough. Return s1.
35 *
36 * Fast assembler language version of the following C-program strcpy
37 * which represents the `standard' for the C-library.
38 *
39 *	char *
40 *	strcpy(s1, s2)
41 *	register char *s1;
42 *	register const char *s2;
43 *	{
44 *		char *os1 = s1;
45 *
46 *		while(*s1++ = *s2++)
47 *			;
48 *		return(os1);
49 *	}
50 *
51 */
52
53#include <sys/asm_linkage.h>
54
55	! This is a 32-bit implementation of strcpy.  It works by
56	! first checking the alignment of its source pointer. And,
57	! if it is not aligned, attempts to copy bytes until it is.
58	! once this has occurred, the copy takes place, while checking
59	! for zero bytes, based upon destination alignment.
60	! Methods exist to handle per-byte, half-word, and word sized
61	! copies.
62
63	ENTRY(strcpy)
64
65	.align 32
66
67	sub	%o1, %o0, %o3		! src - dst
68	andcc	%o1, 3, %o4		! src word aligned ?
69	bz	.srcaligned		! yup
70	mov	%o0, %o2		! save dst
71
72	cmp	%o4, 2			! src halfword aligned
73	be	.s2aligned		! yup
74	ldub	[%o2 + %o3], %o1	! src[0]
75	tst	%o1			! byte zero?
76	stb	%o1, [%o2]		! store first byte
77	bz	.done			! yup, done
78	cmp	%o4, 3			! only one byte needed to align?
79	bz	.srcaligned		! yup
80	inc	%o2			! src++, dst++
81
82.s2aligned:
83	lduh	[%o2 + %o3], %o1	! src[]
84	srl	%o1, 8, %o4		! %o4<7:0> = first byte
85	tst	%o4			! first byte zero ?
86	bz	.done			! yup, done
87	stb	%o4, [%o2]		! store first byte
88	andcc	%o1, 0xff, %g0		! second byte zero ?
89	bz	.done			! yup, done
90	stb	%o1, [%o2 + 1]		! store second byte
91	add	%o2, 2, %o2		! src += 2, dst += 2
92
93.srcaligned:
94	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1
95	sethi	%hi(0x80808080), %o5	! Alan Mycroft's magic2
96	or	%o4, %lo(0x01010101), %o4
97	andcc	%o2, 3, %o1		! destination word aligned?
98	bnz	.dstnotaligned		! nope
99	or	%o5, %lo(0x80808080), %o5
100
101.copyword:
102	lduw	[%o2 + %o3], %o1	! src word
103	add	%o2, 4, %o2		! src += 4, dst += 4
104	andn	%o5, %o1, %g1		! ~word & 0x80808080
105	sub	%o1, %o4, %o1		! word - 0x01010101
106	andcc	%o1, %g1, %g0		! ((word - 0x01010101) & ~word & 0x80808080)
107	add	%o1, %o4, %o1		! restore word
108	bz,a	.copyword		! no zero byte if magic expression == 0
109	st	%o1, [%o2 - 4]		! store word to dst (address pre-incremented)
110
111.zerobyte:
112	set	0xff000000, %o4		! mask for 1st byte
113	srl	%o1, 24, %o3		! %o3<7:0> = first byte
114	andcc	%o1, %o4, %g0		! first byte zero?
115	bz	.done			! yup, done
116	stb	%o3, [%o2 - 4]		! store first byte
117	set	0x00ff0000, %o5		! mask for 2nd byte
118	srl	%o1, 16, %o3		! %o3<7:0> = second byte
119	andcc	%o1, %o5, %g0		! second byte zero?
120	bz	.done			! yup, done
121	stb	%o3, [%o2 - 3]		! store second byte
122	srl	%o4, 16, %o4		! 0x0000ff00 = mask for 3rd byte
123	andcc	%o1, %o4, %g0		! third byte zero?
124	srl	%o1, 8, %o3		! %o3<7:0> = third byte
125	bz	.done			! yup, done
126	stb	%o3, [%o2 - 2]		! store third byte
127	stb	%o1, [%o2 - 1]		! store fourth byte
128
129.done:
130	retl				! done with leaf function
131	.empty
132
133.dstnotaligned:
134	cmp	%o1, 2			! dst half word aligned?
135	be,a	.storehalfword2		! yup, store half word at a time
136	lduw	[%o2 + %o3], %o1	! src word
137
138.storebyte:
139	lduw	[%o2 + %o3], %o1	! src word
140	add	%o2, 4, %o2		! src += 4, dst += 4
141	sub	%o1, %o4, %g1		! x - 0x01010101
142	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
143	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
144	bnz	.zerobyte		! word has zero byte, handle end cases
145	srl	%o1, 24, %g1		! %g1<7:0> = first byte
146	stb	%g1, [%o2 - 4]		! store first byte; half-word aligned now
147	srl	%o1, 8, %g1		! %g1<15:0> = byte 2, 3
148	sth	%g1, [%o2 - 3]		! store bytes 2, 3
149	ba	.storebyte		! next word
150	stb	%o1, [%o2 - 1]		! store fourth byte
151
152.storehalfword:
153	lduw	[%o2 + %o3], %o1	! src word
154.storehalfword2:
155	add	%o2, 4, %o2		! src += 4, dst += 4
156	sub	%o1, %o4, %g1		! x - 0x01010101
157	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
158	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
159	bnz	.zerobyte		! word has zero byte, handle end cases
160	srl	%o1, 16, %g1		! get first and second byte
161	sth	%g1, [%o2 - 4]		! store first and second byte
162	ba	.storehalfword		! next word
163	sth	%o1, [%o2 - 2]		! store third and fourth byte
164
165	! DO NOT remove these NOPs. It will slow down the halfword loop by 15%
166
167	nop				! padding
168	nop				! padding
169
170	SET_SIZE(strcpy)
171
172