xref: /titanic_44/usr/src/lib/libc/sparc/gen/strcpy.s (revision 381a2a9a387f449fab7d0c7e97c4184c26963abf)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27.ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * strcpy(s1, s2)
33 *
34 * Copy string s2 to s1.  s1 must be large enough. Return s1.
35 *
36 * Fast assembler language version of the following C-program strcpy
37 * which represents the `standard' for the C-library.
38 *
39 *	char *
40 *	strcpy(s1, s2)
41 *	register char *s1;
42 *	register const char *s2;
43 *	{
44 *		char *os1 = s1;
45 *
46 *		while(*s1++ = *s2++)
47 *			;
48 *		return(os1);
49 *	}
50 *
51 */
52
53#include <sys/asm_linkage.h>
54#include "synonyms.h"
55
56	! This is a 32-bit implementation of strcpy.  It works by
57	! first checking the alignment of its source pointer. And,
58	! if it is not aligned, attempts to copy bytes until it is.
59	! once this has occurred, the copy takes place, while checking
60	! for zero bytes, based upon destination alignment.
61	! Methods exist to handle per-byte, half-word, and word sized
62	! copies.
63
64	ENTRY(strcpy)
65
66	.align 32
67
68	sub	%o1, %o0, %o3		! src - dst
69	andcc	%o1, 3, %o4		! src word aligned ?
70	bz	.srcaligned		! yup
71	mov	%o0, %o2		! save dst
72
73	cmp	%o4, 2			! src halfword aligned
74	be	.s2aligned		! yup
75	ldub	[%o2 + %o3], %o1	! src[0]
76	tst	%o1			! byte zero?
77	stb	%o1, [%o2]		! store first byte
78	bz	.done			! yup, done
79	cmp	%o4, 3			! only one byte needed to align?
80	bz	.srcaligned		! yup
81	inc	%o2			! src++, dst++
82
83.s2aligned:
84	lduh	[%o2 + %o3], %o1	! src[]
85	srl	%o1, 8, %o4		! %o4<7:0> = first byte
86	tst	%o4			! first byte zero ?
87	bz	.done			! yup, done
88	stb	%o4, [%o2]		! store first byte
89	andcc	%o1, 0xff, %g0		! second byte zero ?
90	bz	.done			! yup, done
91	stb	%o1, [%o2 + 1]		! store second byte
92	add	%o2, 2, %o2		! src += 2, dst += 2
93
94.srcaligned:
95	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1
96	sethi	%hi(0x80808080), %o5	! Alan Mycroft's magic2
97	or	%o4, %lo(0x01010101), %o4
98	andcc	%o2, 3, %o1		! destination word aligned?
99	bnz	.dstnotaligned		! nope
100	or	%o5, %lo(0x80808080), %o5
101
102.copyword:
103	lduw	[%o2 + %o3], %o1	! src word
104	add	%o2, 4, %o2		! src += 4, dst += 4
105	andn	%o5, %o1, %g1		! ~word & 0x80808080
106	sub	%o1, %o4, %o1		! word - 0x01010101
107	andcc	%o1, %g1, %g0		! ((word - 0x01010101) & ~word & 0x80808080)
108	add	%o1, %o4, %o1		! restore word
109	bz,a	.copyword		! no zero byte if magic expression == 0
110	st	%o1, [%o2 - 4]		! store word to dst (address pre-incremented)
111
112.zerobyte:
113	set	0xff000000, %o4		! mask for 1st byte
114	srl	%o1, 24, %o3		! %o3<7:0> = first byte
115	andcc	%o1, %o4, %g0		! first byte zero?
116	bz	.done			! yup, done
117	stb	%o3, [%o2 - 4]		! store first byte
118	set	0x00ff0000, %o5		! mask for 2nd byte
119	srl	%o1, 16, %o3		! %o3<7:0> = second byte
120	andcc	%o1, %o5, %g0		! second byte zero?
121	bz	.done			! yup, done
122	stb	%o3, [%o2 - 3]		! store second byte
123	srl	%o4, 16, %o4		! 0x0000ff00 = mask for 3rd byte
124	andcc	%o1, %o4, %g0		! third byte zero?
125	srl	%o1, 8, %o3		! %o3<7:0> = third byte
126	bz	.done			! yup, done
127	stb	%o3, [%o2 - 2]		! store third byte
128	stb	%o1, [%o2 - 1]		! store fourth byte
129
130.done:
131	retl				! done with leaf function
132	.empty
133
134.dstnotaligned:
135	cmp	%o1, 2			! dst half word aligned?
136	be,a	.storehalfword2		! yup, store half word at a time
137	lduw	[%o2 + %o3], %o1	! src word
138
139.storebyte:
140	lduw	[%o2 + %o3], %o1	! src word
141	add	%o2, 4, %o2		! src += 4, dst += 4
142	sub	%o1, %o4, %g1		! x - 0x01010101
143	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
144	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
145	bnz	.zerobyte		! word has zero byte, handle end cases
146	srl	%o1, 24, %g1		! %g1<7:0> = first byte
147	stb	%g1, [%o2 - 4]		! store first byte; half-word aligned now
148	srl	%o1, 8, %g1		! %g1<15:0> = byte 2, 3
149	sth	%g1, [%o2 - 3]		! store bytes 2, 3
150	ba	.storebyte		! next word
151	stb	%o1, [%o2 - 1]		! store fourth byte
152
153.storehalfword:
154	lduw	[%o2 + %o3], %o1	! src word
155.storehalfword2:
156	add	%o2, 4, %o2		! src += 4, dst += 4
157	sub	%o1, %o4, %g1		! x - 0x01010101
158	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
159	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
160	bnz	.zerobyte		! word has zero byte, handle end cases
161	srl	%o1, 16, %g1		! get first and second byte
162	sth	%g1, [%o2 - 4]		! store first and second byte
163	ba	.storehalfword		! next word
164	sth	%o1, [%o2 - 2]		! store third and fourth byte
165
166	! DO NOT remove these NOPs. It will slow down the halfword loop by 15%
167
168	nop				! padding
169	nop				! padding
170
171	SET_SIZE(strcpy)
172
173