xref: /titanic_52/usr/src/lib/libc/sparc/gen/strcpy.s (revision d14abf155341d55053c76eeec58b787a456b753b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"strcpy.s"
28
29/*
30 * strcpy(s1, s2)
31 *
32 * Copy string s2 to s1.  s1 must be large enough. Return s1.
33 *
34 * Fast assembler language version of the following C-program strcpy
35 * which represents the `standard' for the C-library.
36 *
37 *	char *
38 *	strcpy(s1, s2)
39 *	register char *s1;
40 *	register const char *s2;
41 *	{
42 *		char *os1 = s1;
43 *
44 *		while(*s1++ = *s2++)
45 *			;
46 *		return(os1);
47 *	}
48 *
49 */
50
51#include <sys/asm_linkage.h>
52
53	! This is a 32-bit implementation of strcpy.  It works by
54	! first checking the alignment of its source pointer. And,
55	! if it is not aligned, attempts to copy bytes until it is.
56	! once this has occurred, the copy takes place, while checking
57	! for zero bytes, based upon destination alignment.
58	! Methods exist to handle per-byte, half-word, and word sized
59	! copies.
60
61	ENTRY(strcpy)
62
63	.align 32
64
65	sub	%o1, %o0, %o3		! src - dst
66	andcc	%o1, 3, %o4		! src word aligned ?
67	bz	.srcaligned		! yup
68	mov	%o0, %o2		! save dst
69
70	cmp	%o4, 2			! src halfword aligned
71	be	.s2aligned		! yup
72	ldub	[%o2 + %o3], %o1	! src[0]
73	tst	%o1			! byte zero?
74	stb	%o1, [%o2]		! store first byte
75	bz	.done			! yup, done
76	cmp	%o4, 3			! only one byte needed to align?
77	bz	.srcaligned		! yup
78	inc	%o2			! src++, dst++
79
80.s2aligned:
81	lduh	[%o2 + %o3], %o1	! src[]
82	srl	%o1, 8, %o4		! %o4<7:0> = first byte
83	tst	%o4			! first byte zero ?
84	bz	.done			! yup, done
85	stb	%o4, [%o2]		! store first byte
86	andcc	%o1, 0xff, %g0		! second byte zero ?
87	bz	.done			! yup, done
88	stb	%o1, [%o2 + 1]		! store second byte
89	add	%o2, 2, %o2		! src += 2, dst += 2
90
91.srcaligned:
92	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1
93	sethi	%hi(0x80808080), %o5	! Alan Mycroft's magic2
94	or	%o4, %lo(0x01010101), %o4
95	andcc	%o2, 3, %o1		! destination word aligned?
96	bnz	.dstnotaligned		! nope
97	or	%o5, %lo(0x80808080), %o5
98
99.copyword:
100	lduw	[%o2 + %o3], %o1	! src word
101	add	%o2, 4, %o2		! src += 4, dst += 4
102	andn	%o5, %o1, %g1		! ~word & 0x80808080
103	sub	%o1, %o4, %o1		! word - 0x01010101
104	andcc	%o1, %g1, %g0		! ((word - 0x01010101) & ~word & 0x80808080)
105	add	%o1, %o4, %o1		! restore word
106	bz,a	.copyword		! no zero byte if magic expression == 0
107	st	%o1, [%o2 - 4]		! store word to dst (address pre-incremented)
108
109.zerobyte:
110	set	0xff000000, %o4		! mask for 1st byte
111	srl	%o1, 24, %o3		! %o3<7:0> = first byte
112	andcc	%o1, %o4, %g0		! first byte zero?
113	bz	.done			! yup, done
114	stb	%o3, [%o2 - 4]		! store first byte
115	set	0x00ff0000, %o5		! mask for 2nd byte
116	srl	%o1, 16, %o3		! %o3<7:0> = second byte
117	andcc	%o1, %o5, %g0		! second byte zero?
118	bz	.done			! yup, done
119	stb	%o3, [%o2 - 3]		! store second byte
120	srl	%o4, 16, %o4		! 0x0000ff00 = mask for 3rd byte
121	andcc	%o1, %o4, %g0		! third byte zero?
122	srl	%o1, 8, %o3		! %o3<7:0> = third byte
123	bz	.done			! yup, done
124	stb	%o3, [%o2 - 2]		! store third byte
125	stb	%o1, [%o2 - 1]		! store fourth byte
126
127.done:
128	retl				! done with leaf function
129	.empty
130
131.dstnotaligned:
132	cmp	%o1, 2			! dst half word aligned?
133	be,a	.storehalfword2		! yup, store half word at a time
134	lduw	[%o2 + %o3], %o1	! src word
135
136.storebyte:
137	lduw	[%o2 + %o3], %o1	! src word
138	add	%o2, 4, %o2		! src += 4, dst += 4
139	sub	%o1, %o4, %g1		! x - 0x01010101
140	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
141	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
142	bnz	.zerobyte		! word has zero byte, handle end cases
143	srl	%o1, 24, %g1		! %g1<7:0> = first byte
144	stb	%g1, [%o2 - 4]		! store first byte; half-word aligned now
145	srl	%o1, 8, %g1		! %g1<15:0> = byte 2, 3
146	sth	%g1, [%o2 - 3]		! store bytes 2, 3
147	ba	.storebyte		! next word
148	stb	%o1, [%o2 - 1]		! store fourth byte
149
150.storehalfword:
151	lduw	[%o2 + %o3], %o1	! src word
152.storehalfword2:
153	add	%o2, 4, %o2		! src += 4, dst += 4
154	sub	%o1, %o4, %g1		! x - 0x01010101
155	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
156	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
157	bnz	.zerobyte		! word has zero byte, handle end cases
158	srl	%o1, 16, %g1		! get first and second byte
159	sth	%g1, [%o2 - 4]		! store first and second byte
160	ba	.storehalfword		! next word
161	sth	%o1, [%o2 - 2]		! store third and fourth byte
162
163	! DO NOT remove these NOPs. It will slow down the halfword loop by 15%
164
165	nop				! padding
166	nop				! padding
167
168	SET_SIZE(strcpy)
169
170