xref: /titanic_52/usr/src/lib/libc/sparc/gen/strncpy.s (revision 734b6a94890be549309b21156f8ed6d4561cac51)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28.ident	"%Z%%M%	%I%	%E% SMI"
29
30	.file	"%M%"
31
32/*
33 * strncpy(s1, s2)
34 *
35 * Copy string s2 to s1, truncating or null-padding to always copy n bytes
36 * return s1.
37 *
38 * Fast assembler language version of the following C-program for strncpy
39 * which represents the `standard' for the C-library.
40 *
41 *	char *
42 *	strncpy(char *s1, const char *s2, size_t n)
43 *	{
44 *		char *os1 = s1;
45 *
46 *		n++;
47 *		while ((--n != 0) &&  ((*s1++ = *s2++) != '\0'))
48 *			;
49 *		if (n != 0)
50 *			while (--n != 0)
51 *				*s1++ = '\0';
52 *		return (os1);
53 *	}
54 */
55
56#include <sys/asm_linkage.h>
57#include "synonyms.h"
58
59	! strncpy works similarly to strcpy, except that n bytes of s2
60	! are copied to s1. If a null character is reached in s2 yet more
61	! bytes remain to be copied, strncpy will copy null bytes into
62	! the destination string.
63	!
64	! This implementation works by first aligning the src ptr and
65	! performing small copies until it is aligned.  Then, the string
66	! is copied based upon destination alignment.  (byte, half-word,
67	! word, etc.)
68
69	ENTRY(strncpy)
70
71	.align 32
72	subcc	%g0, %o2, %o4		! n = -n
73	bz	.doneshort		! if n == 0, done
74	cmp	%o2, 7			! n < 7 ?
75	add	%o1, %o2, %o3		! src = src + n
76	blu	.shortcpy		! n < 7, use byte-wise copy
77	add	%o0, %o2, %o2		! dst = dst + n
78	andcc	%o1, 3, %o5		! src word aligned ?
79	bz	.wordaligned		! yup
80	save	%sp, -0x40, %sp		! create new register window
81	sub	%i5, 4, %i5		! bytes until src aligned
82	nop				! align loop on 16-byte boundary
83	nop				! align loop on 16-byte boundary
84
85.alignsrc:
86	ldub	[%i3 + %i4], %i1	! src[]
87	stb	%i1, [%i2 + %i4]	! dst[] = src[]
88	inccc	%i4			! src++, dst++, n--
89	bz	.done			! n == 0, done
90	tst     %i1			! end of src reached (null byte) ?
91	bz,a	.bytepad		! yes, at least one byte to pad here
92	add 	%i2, %i4, %l0		! need single dest pointer for fill
93	inccc	%i5			! src aligned now?
94	bnz	.alignsrc		! no, copy another byte
95	.empty
96
97.wordaligned:
98	add	%i2, %i4, %l0		! dst
99	sethi	%hi(0x01010101), %l1	! Alan Mycroft's magic1
100	sub	%i2, 4, %i2		! adjust for dest pre-incr in cpy loops
101	or	%l1, %lo(0x01010101),%l1!  finish loading magic1
102	andcc	%l0, 3, %g1		! destination word aligned ?
103	bnz	.dstnotaligned		! nope
104	sll	%l1, 7, %i5		! create Alan Mycroft's magic2
105
106.storeword:
107	lduw	[%i3 + %i4], %i1	! src dword
108	addcc	%i4, 4, %i4		! n += 4, src += 4, dst += 4
109	bcs	.lastword		! if counter wraps, last word
110	andn	%i5, %i1, %g1		! ~dword & 0x80808080
111	sub	%i1, %l1, %l0		! dword - 0x01010101
112	andcc	%l0, %g1, %g0		! ((dword - 0x01010101) & ~dword & 0x80808080)
113	bz,a	.storeword		! no zero byte if magic expression == 0
114	stw	%i1, [%i2 + %i4]	! store word to dst (address pre-incremented)
115
116	! n has not expired, but src is at the end. we need to push out the
117	! remaining src bytes and then start padding with null bytes
118
119.zerobyte:
120	add	%i2, %i4, %l0		! pointer to dest string
121	srl	%i1, 24, %g1		! first byte
122	stb	%g1, [%l0]		! store it
123	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
124	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
125	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
126	srl	%i1, 16, %g1		! second byte
127	stb	%g1, [%l0 + 1]		! store it
128	and	%g1, 0xff, %g1		! isolate byte
129	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
130	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
131	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
132	srl	%i1, 8, %g1		! third byte
133	stb	%g1, [%l0 + 2]		! store it
134	and	%g1, 0xff, %g1		! isolate byte
135	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
136	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
137	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
138	stb	%i1, [%l0 + 3]		! store fourth byte
139	addcc	%i4, 8, %g0		! number of pad bytes < 8 ?
140	bcs	.bytepad		! yes, do simple byte wise fill
141	add	%l0, 4, %l0		! dst += 4
142	andcc	%l0, 3, %l1		! dst offset relative to word boundary
143	bz	.fillaligned		! dst already word aligned
144
145	! here there is a least one more byte to zero out: otherwise we would
146	! have exited through label .lastword
147
148	sub	%l1, 4, %l1		! bytes to align dst to word boundary
149.makealigned:
150	stb	%g0, [%l0]		! dst[] = 0
151	addcc	%i4, 1, %i4		! n--
152	bz	.done			! n == 0, we are done
153	addcc	%l1, 1, %l1		! any more byte needed to align
154	bnz	.makealigned		! yup, pad another byte
155	add	%l0, 1, %l0		! dst++
156	nop				! pad to align copy loop below
157
158	! here we know that there at least another 4 bytes to pad, since
159	! we don't get here unless there were >= 8 bytes to pad to begin
160	! with, and we have padded at most 3 bytes suring dst aligning
161
162.fillaligned:
163	add	%i4, 3, %i2		! round up to next word boundary
164	and	%i2, -4, %l1		! pointer to next word boundary
165	and	%i2, 4, %i2		! word count odd ? 4 : 0
166	stw	%g0, [%l0]		! store first word
167	addcc	%l1, %i2, %l1		! dword count == 1 ?
168	add	%i4, %i2, %i4		! if word count odd, n -= 4
169	bz	.bytepad		! if word count == 1, pad bytes left
170	add	%l0, %i2, %l0		! bump dst if word count odd
171
172.fillword:
173	addcc	%l1, 8, %l1		! count -= 8
174	stw	%g0, [%l0]		! dst[n] = 0
175	stw	%g0, [%l0 + 4]		! dst[n+4] = 0
176	add	%l0, 8, %l0		! dst += 8
177	bcc	.fillword		! fill words until count == 0
178	addcc	%i4, 8, %i4		! n -= 8
179	bz	.done			! if n == 0, we are done
180	.empty
181
182.bytepad:
183	and	%i4, 1, %i2		! byte count odd ? 1 : 0
184	stb	%g0, [%l0]		! store first byte
185	addcc	%i4, %i2, %i4		! byte count == 1 ?
186	bz	.done			! yup, we are done
187	add	%l0, %i2, %l0		! bump pointer if odd
188
189.fillbyte:
190	addcc	%i4, 2, %i4		! n -= 2
191	stb	%g0, [%l0]		! dst[n] = 0
192	stb	%g0, [%l0 + 1]		! dst[n+1] = 0
193	bnz	.fillbyte		! fill until n == 0
194	add	%l0, 2, %l0		! dst += 2
195
196.done:
197	ret				! done
198	restore	%i0, %g0, %o0		! restore reg window, return dst
199
200	! this is the last word. It may contain null bytes. store bytes
201	! until n == 0. if null byte encountered, continue
202
203.lastword:
204	sub	%i4, 4, %i4		! undo counter pre-increment
205	add	%i2, 4, %i2		! adjust dst for counter un-bumping
206
207	srl	%i1, 24, %g1		! first byte
208	stb	%g1, [%i2 + %i4]	! store it
209	inccc	%i4			! n--
210	bz	.done			! if n == 0, we're done
211	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
212	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
213	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
214	srl	%i1, 16, %g1		! second byte
215	stb	%g1, [%i2 + %i4]	! store it
216	inccc	%i4			! n--
217	bz	.done			! if n == 0, we're done
218	and	%g1, 0xff, %g1		! isolate byte
219	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
220	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
221	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
222	srl	%i1, 8, %g1		! third byte
223	stb	%g1, [%i2 + %i4]	! store it
224	inccc	%i4			! n--
225	bz	.done			! if n == 0, we're done
226	and	%g1, 0xff, %g1		! isolate byte
227	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
228	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
229	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
230	ba	.done			! here n must be zero, we are done
231	stb	%i1, [%i2 + %i4]	! store fourth byte
232
233.dstnotaligned:
234	cmp	%g1, 2			! dst half word aligned?
235	be	.storehalfword2		! yup, store half word at a time
236	.empty
237.storebyte:
238	lduw	[%i3 + %i4], %i1	! x = src[]
239	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
240	bcs	.lastword		! if counter wraps, last word
241	andn	%i5, %i1, %g1		! ~x & 0x80808080
242	sub	%i1, %l1, %l0		! x - 0x01010101
243	andcc	%l0, %g1, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
244	bnz	.zerobyte		! end of src found, may need to pad
245	add	%i2, %i4, %l0		! dst (in pointer form)
246	srl	%i1, 24, %g1		! %g1<7:0> = 1st byte; half-word aligned now
247	stb	%g1, [%l0]		! store first byte
248	srl	%i1, 8, %g1		! %g1<15:0> = bytes 2, 3
249	sth	%g1, [%l0 + 1]		! store bytes 2, 3
250	ba	.storebyte		! next word
251	stb	%i1, [%l0 + 3]		! store fourth byte
252	nop
253	nop
254
255.storehalfword:
256	lduw	[%i3 + %i4], %i1	! x = src[]
257.storehalfword2:
258	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
259	bcs	.lastword		! if counter wraps, last word
260	andn	%i5, %i1, %g1		! ~x & 0x80808080
261	sub	%i1, %l1, %l0		! x - 0x01010101
262	andcc	%l0, %g1, %g0		! ((x -0x01010101) & ~x & 0x8080808080)
263	bnz	.zerobyte		! x has zero byte, handle end cases
264	add	%i2, %i4, %l0		! dst (in pointer form)
265	srl	%i1, 16, %g1		! %g1<15:0> = bytes 1, 2
266	sth	%g1, [%l0]		! store bytes 1, 2
267	ba	.storehalfword		! next dword
268	sth	%i1, [%l0 + 2]		! store bytes 3, 4
269
270.shortcpy:
271	ldub	[%o3 + %o4], %o5	! src[]
272	stb	%o5, [%o2 + %o4]	! dst[] = src[]
273	inccc	%o4			! src++, dst++, n--
274	bz	.doneshort		! if n == 0, done
275	tst	%o5			! src[] == 0 ?
276	bnz,a	.shortcpy		! nope, next byte
277	nop				! empty delay slot
278
279.padbyte:
280	stb	%g0, [%o2 + %o4]	! dst[] = 0
281.padbyte2:
282	addcc	%o4, 1, %o4		! dst++, n--
283	bnz,a	.padbyte2		! if n != 0, next byte
284	stb	%g0, [%o2 + %o4]	! dst[] = 0
285	nop				! align label below to 16-byte boundary
286
287.doneshort:
288	retl				! return from leaf
289	nop				! empty delay slot
290	SET_SIZE(strncpy)
291