xref: /titanic_50/usr/src/lib/libc/sparc/gen/strncpy.s (revision fa25784ca4b51c206177d891a654f1d36a25d41f)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29	.file	"%M%"
30
31/*
32 * strncpy(s1, s2)
33 *
34 * Copy string s2 to s1, truncating or null-padding to always copy n bytes
35 * return s1.
36 *
37 * Fast assembler language version of the following C-program for strncpy
38 * which represents the `standard' for the C-library.
39 *
40 *	char *
41 *	strncpy(char *s1, const char *s2, size_t n)
42 *	{
43 *		char *os1 = s1;
44 *
45 *		n++;
46 *		while ((--n != 0) &&  ((*s1++ = *s2++) != '\0'))
47 *			;
48 *		if (n != 0)
49 *			while (--n != 0)
50 *				*s1++ = '\0';
51 *		return (os1);
52 *	}
53 */
54
55#include <sys/asm_linkage.h>
56
57	! strncpy works similarly to strcpy, except that n bytes of s2
58	! are copied to s1. If a null character is reached in s2 yet more
59	! bytes remain to be copied, strncpy will copy null bytes into
60	! the destination string.
61	!
62	! This implementation works by first aligning the src ptr and
63	! performing small copies until it is aligned.  Then, the string
64	! is copied based upon destination alignment.  (byte, half-word,
65	! word, etc.)
66
67	ENTRY(strncpy)
68
69	.align 32
70	subcc	%g0, %o2, %o4		! n = -n
71	bz	.doneshort		! if n == 0, done
72	cmp	%o2, 7			! n < 7 ?
73	add	%o1, %o2, %o3		! src = src + n
74	blu	.shortcpy		! n < 7, use byte-wise copy
75	add	%o0, %o2, %o2		! dst = dst + n
76	andcc	%o1, 3, %o5		! src word aligned ?
77	bz	.wordaligned		! yup
78	save	%sp, -0x40, %sp		! create new register window
79	sub	%i5, 4, %i5		! bytes until src aligned
80	nop				! align loop on 16-byte boundary
81	nop				! align loop on 16-byte boundary
82
83.alignsrc:
84	ldub	[%i3 + %i4], %i1	! src[]
85	stb	%i1, [%i2 + %i4]	! dst[] = src[]
86	inccc	%i4			! src++, dst++, n--
87	bz	.done			! n == 0, done
88	tst     %i1			! end of src reached (null byte) ?
89	bz,a	.bytepad		! yes, at least one byte to pad here
90	add 	%i2, %i4, %l0		! need single dest pointer for fill
91	inccc	%i5			! src aligned now?
92	bnz	.alignsrc		! no, copy another byte
93	.empty
94
95.wordaligned:
96	add	%i2, %i4, %l0		! dst
97	sethi	%hi(0x01010101), %l1	! Alan Mycroft's magic1
98	sub	%i2, 4, %i2		! adjust for dest pre-incr in cpy loops
99	or	%l1, %lo(0x01010101),%l1!  finish loading magic1
100	andcc	%l0, 3, %g1		! destination word aligned ?
101	bnz	.dstnotaligned		! nope
102	sll	%l1, 7, %i5		! create Alan Mycroft's magic2
103
104.storeword:
105	lduw	[%i3 + %i4], %i1	! src dword
106	addcc	%i4, 4, %i4		! n += 4, src += 4, dst += 4
107	bcs	.lastword		! if counter wraps, last word
108	andn	%i5, %i1, %g1		! ~dword & 0x80808080
109	sub	%i1, %l1, %l0		! dword - 0x01010101
110	andcc	%l0, %g1, %g0		! ((dword - 0x01010101) & ~dword & 0x80808080)
111	bz,a	.storeword		! no zero byte if magic expression == 0
112	stw	%i1, [%i2 + %i4]	! store word to dst (address pre-incremented)
113
114	! n has not expired, but src is at the end. we need to push out the
115	! remaining src bytes and then start padding with null bytes
116
117.zerobyte:
118	add	%i2, %i4, %l0		! pointer to dest string
119	srl	%i1, 24, %g1		! first byte
120	stb	%g1, [%l0]		! store it
121	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
122	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
123	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
124	srl	%i1, 16, %g1		! second byte
125	stb	%g1, [%l0 + 1]		! store it
126	and	%g1, 0xff, %g1		! isolate byte
127	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
128	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
129	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
130	srl	%i1, 8, %g1		! third byte
131	stb	%g1, [%l0 + 2]		! store it
132	and	%g1, 0xff, %g1		! isolate byte
133	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
134	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
135	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
136	stb	%i1, [%l0 + 3]		! store fourth byte
137	addcc	%i4, 8, %g0		! number of pad bytes < 8 ?
138	bcs	.bytepad		! yes, do simple byte wise fill
139	add	%l0, 4, %l0		! dst += 4
140	andcc	%l0, 3, %l1		! dst offset relative to word boundary
141	bz	.fillaligned		! dst already word aligned
142
143	! here there is a least one more byte to zero out: otherwise we would
144	! have exited through label .lastword
145
146	sub	%l1, 4, %l1		! bytes to align dst to word boundary
147.makealigned:
148	stb	%g0, [%l0]		! dst[] = 0
149	addcc	%i4, 1, %i4		! n--
150	bz	.done			! n == 0, we are done
151	addcc	%l1, 1, %l1		! any more byte needed to align
152	bnz	.makealigned		! yup, pad another byte
153	add	%l0, 1, %l0		! dst++
154	nop				! pad to align copy loop below
155
156	! here we know that there at least another 4 bytes to pad, since
157	! we don't get here unless there were >= 8 bytes to pad to begin
158	! with, and we have padded at most 3 bytes suring dst aligning
159
160.fillaligned:
161	add	%i4, 3, %i2		! round up to next word boundary
162	and	%i2, -4, %l1		! pointer to next word boundary
163	and	%i2, 4, %i2		! word count odd ? 4 : 0
164	stw	%g0, [%l0]		! store first word
165	addcc	%l1, %i2, %l1		! dword count == 1 ?
166	add	%i4, %i2, %i4		! if word count odd, n -= 4
167	bz	.bytepad		! if word count == 1, pad bytes left
168	add	%l0, %i2, %l0		! bump dst if word count odd
169
170.fillword:
171	addcc	%l1, 8, %l1		! count -= 8
172	stw	%g0, [%l0]		! dst[n] = 0
173	stw	%g0, [%l0 + 4]		! dst[n+4] = 0
174	add	%l0, 8, %l0		! dst += 8
175	bcc	.fillword		! fill words until count == 0
176	addcc	%i4, 8, %i4		! n -= 8
177	bz	.done			! if n == 0, we are done
178	.empty
179
180.bytepad:
181	and	%i4, 1, %i2		! byte count odd ? 1 : 0
182	stb	%g0, [%l0]		! store first byte
183	addcc	%i4, %i2, %i4		! byte count == 1 ?
184	bz	.done			! yup, we are done
185	add	%l0, %i2, %l0		! bump pointer if odd
186
187.fillbyte:
188	addcc	%i4, 2, %i4		! n -= 2
189	stb	%g0, [%l0]		! dst[n] = 0
190	stb	%g0, [%l0 + 1]		! dst[n+1] = 0
191	bnz	.fillbyte		! fill until n == 0
192	add	%l0, 2, %l0		! dst += 2
193
194.done:
195	ret				! done
196	restore	%i0, %g0, %o0		! restore reg window, return dst
197
198	! this is the last word. It may contain null bytes. store bytes
199	! until n == 0. if null byte encountered, continue
200
201.lastword:
202	sub	%i4, 4, %i4		! undo counter pre-increment
203	add	%i2, 4, %i2		! adjust dst for counter un-bumping
204
205	srl	%i1, 24, %g1		! first byte
206	stb	%g1, [%i2 + %i4]	! store it
207	inccc	%i4			! n--
208	bz	.done			! if n == 0, we're done
209	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
210	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
211	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
212	srl	%i1, 16, %g1		! second byte
213	stb	%g1, [%i2 + %i4]	! store it
214	inccc	%i4			! n--
215	bz	.done			! if n == 0, we're done
216	and	%g1, 0xff, %g1		! isolate byte
217	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
218	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
219	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
220	srl	%i1, 8, %g1		! third byte
221	stb	%g1, [%i2 + %i4]	! store it
222	inccc	%i4			! n--
223	bz	.done			! if n == 0, we're done
224	and	%g1, 0xff, %g1		! isolate byte
225	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
226	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
227	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
228	ba	.done			! here n must be zero, we are done
229	stb	%i1, [%i2 + %i4]	! store fourth byte
230
231.dstnotaligned:
232	cmp	%g1, 2			! dst half word aligned?
233	be	.storehalfword2		! yup, store half word at a time
234	.empty
235.storebyte:
236	lduw	[%i3 + %i4], %i1	! x = src[]
237	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
238	bcs	.lastword		! if counter wraps, last word
239	andn	%i5, %i1, %g1		! ~x & 0x80808080
240	sub	%i1, %l1, %l0		! x - 0x01010101
241	andcc	%l0, %g1, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
242	bnz	.zerobyte		! end of src found, may need to pad
243	add	%i2, %i4, %l0		! dst (in pointer form)
244	srl	%i1, 24, %g1		! %g1<7:0> = 1st byte; half-word aligned now
245	stb	%g1, [%l0]		! store first byte
246	srl	%i1, 8, %g1		! %g1<15:0> = bytes 2, 3
247	sth	%g1, [%l0 + 1]		! store bytes 2, 3
248	ba	.storebyte		! next word
249	stb	%i1, [%l0 + 3]		! store fourth byte
250	nop
251	nop
252
253.storehalfword:
254	lduw	[%i3 + %i4], %i1	! x = src[]
255.storehalfword2:
256	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
257	bcs	.lastword		! if counter wraps, last word
258	andn	%i5, %i1, %g1		! ~x & 0x80808080
259	sub	%i1, %l1, %l0		! x - 0x01010101
260	andcc	%l0, %g1, %g0		! ((x -0x01010101) & ~x & 0x8080808080)
261	bnz	.zerobyte		! x has zero byte, handle end cases
262	add	%i2, %i4, %l0		! dst (in pointer form)
263	srl	%i1, 16, %g1		! %g1<15:0> = bytes 1, 2
264	sth	%g1, [%l0]		! store bytes 1, 2
265	ba	.storehalfword		! next dword
266	sth	%i1, [%l0 + 2]		! store bytes 3, 4
267
268.shortcpy:
269	ldub	[%o3 + %o4], %o5	! src[]
270	stb	%o5, [%o2 + %o4]	! dst[] = src[]
271	inccc	%o4			! src++, dst++, n--
272	bz	.doneshort		! if n == 0, done
273	tst	%o5			! src[] == 0 ?
274	bnz,a	.shortcpy		! nope, next byte
275	nop				! empty delay slot
276
277.padbyte:
278	stb	%g0, [%o2 + %o4]	! dst[] = 0
279.padbyte2:
280	addcc	%o4, 1, %o4		! dst++, n--
281	bnz,a	.padbyte2		! if n != 0, next byte
282	stb	%g0, [%o2 + %o4]	! dst[] = 0
283	nop				! align label below to 16-byte boundary
284
285.doneshort:
286	retl				! return from leaf
287	nop				! empty delay slot
288	SET_SIZE(strncpy)
289