xref: /illumos-gate/usr/src/lib/libc/sparcv9/gen/strncpy.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe	.file	"strncpy.s"
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe/*
30*5d9d9091SRichard Lowe * strncpy(s1, s2)
31*5d9d9091SRichard Lowe *
32*5d9d9091SRichard Lowe * Copy string s2 to s1, truncating or null-padding to always copy n bytes
33*5d9d9091SRichard Lowe * return s1.
34*5d9d9091SRichard Lowe *
35*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for strncpy
36*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
37*5d9d9091SRichard Lowe *
38*5d9d9091SRichard Lowe *	char *
39*5d9d9091SRichard Lowe *	strncpy(char *s1, const char *s2, size_t n)
40*5d9d9091SRichard Lowe *	{
41*5d9d9091SRichard Lowe *		char *os1 = s1;
42*5d9d9091SRichard Lowe *
43*5d9d9091SRichard Lowe *		n++;
44*5d9d9091SRichard Lowe *		while ((--n != 0) &&  ((*s1++ = *s2++) != '\0'))
45*5d9d9091SRichard Lowe *			;
46*5d9d9091SRichard Lowe *		if (n != 0)
47*5d9d9091SRichard Lowe *			while (--n != 0)
48*5d9d9091SRichard Lowe *				*s1++ = '\0';
49*5d9d9091SRichard Lowe *		return (os1);
50*5d9d9091SRichard Lowe *	}
51*5d9d9091SRichard Lowe */
52*5d9d9091SRichard Lowe
53*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
54*5d9d9091SRichard Lowe
55*5d9d9091SRichard Lowe	! strncpy works similarly to strcpy, except that n bytes of s2
56*5d9d9091SRichard Lowe	! are copied to s1. If a null character is reached in s2 yet more
57*5d9d9091SRichard Lowe	! bytes remain to be copied, strncpy will copy null bytes into
58*5d9d9091SRichard Lowe	! the destination string.
59*5d9d9091SRichard Lowe	!
60*5d9d9091SRichard Lowe	! This implementation works by first aligning the src ptr and
61*5d9d9091SRichard Lowe	! performing small copies until it is aligned.  Then, the string
62*5d9d9091SRichard Lowe	! is copied based upon destination alignment.  (byte, half-word,
63*5d9d9091SRichard Lowe	! word, etc.)
64*5d9d9091SRichard Lowe
65*5d9d9091SRichard Lowe	ENTRY(strncpy)
66*5d9d9091SRichard Lowe
67*5d9d9091SRichard Lowe	.align 32
68*5d9d9091SRichard Lowe	nop				! pad to align loop on 16-byte boundary
69*5d9d9091SRichard Lowe	subcc	%g0, %o2, %g4		! n = -n, n == 0 ?
70*5d9d9091SRichard Lowe	bz,pn	%ncc, .done		! n == 0, done
71*5d9d9091SRichard Lowe	add	%o1, %o2, %o3		! src = src + n
72*5d9d9091SRichard Lowe	andcc	%o1, 7, %o4		! dword aligned ?
73*5d9d9091SRichard Lowe	bz,pn	%ncc, .dwordaligned	! yup
74*5d9d9091SRichard Lowe	add	%o0, %o2, %o2		! dst = dst + n
75*5d9d9091SRichard Lowe	sub	%o4, 8, %o4		! bytes until src aligned
76*5d9d9091SRichard Lowe
77*5d9d9091SRichard Lowe.alignsrc:
78*5d9d9091SRichard Lowe	ldub	[%o3 + %g4], %o1	! src[]
79*5d9d9091SRichard Lowe	stb	%o1, [%o2 + %g4]	! dst[] = src[]
80*5d9d9091SRichard Lowe	addcc	%g4, 1, %g4		! src++, dst++, n--
81*5d9d9091SRichard Lowe	bz,pn	%ncc, .done		! n == 0, done
82*5d9d9091SRichard Lowe	tst	%o1			! end of src reached (null byte) ?
83*5d9d9091SRichard Lowe	bz,a	%ncc, .bytepad		! yes, at least one byte to pad here
84*5d9d9091SRichard Lowe	add 	%o2, %g4, %o3		! need single dest pointer for fill
85*5d9d9091SRichard Lowe	addcc	%o4, 1, %o4		! src aligned now?
86*5d9d9091SRichard Lowe	bnz,a	%ncc, .alignsrc		! no, copy another byte
87*5d9d9091SRichard Lowe	nop				! pad
88*5d9d9091SRichard Lowe	nop				! pad
89*5d9d9091SRichard Lowe
90*5d9d9091SRichard Lowe.dwordaligned:
91*5d9d9091SRichard Lowe	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1
92*5d9d9091SRichard Lowe	add	%o2, %g4, %g5		! dst
93*5d9d9091SRichard Lowe	or	%o4, %lo(0x01010101),%o4!  finish loading magic1
94*5d9d9091SRichard Lowe	and	%g5, 3, %g1		! dst<1:0> to examine offset
95*5d9d9091SRichard Lowe	sllx	%o4, 32, %o1		! spread magic1
96*5d9d9091SRichard Lowe	cmp	%g1, 1			! dst offset of 1 or 5
97*5d9d9091SRichard Lowe	or	%o4, %o1, %o4		!   to all 64 bits
98*5d9d9091SRichard Lowe	sub	%o2, 8, %o2		! adjust for dest pre-incr in cpy loops
99*5d9d9091SRichard Lowe	be,pn	%ncc, .storebyte1241	! store 1, 2, 4, 1 bytes
100*5d9d9091SRichard Lowe	sllx	%o4, 7, %o5		!  Alan Mycroft's magic2
101*5d9d9091SRichard Lowe	cmp	%g1, 3			! dst offset of 3 or 7
102*5d9d9091SRichard Lowe	be,pn	%ncc, .storebyte1421	! store 1, 4, 2, 1 bytes
103*5d9d9091SRichard Lowe	cmp	%g1, 2			! dst halfword aligned ?
104*5d9d9091SRichard Lowe	be,pn	%ncc, .storehalfword	! yup, store half-word wise
105*5d9d9091SRichard Lowe	andcc	%g5, 7, %g0		! dst word aligned ?
106*5d9d9091SRichard Lowe	bnz,pn	%ncc, .storeword2	! yup, store word wise
107*5d9d9091SRichard Lowe	nop				! ensure loop is 16-byte aligned
108*5d9d9091SRichard Lowe
109*5d9d9091SRichard Lowe.storedword:
110*5d9d9091SRichard Lowe	ldx	[%o3 + %g4], %o1	! src dword
111*5d9d9091SRichard Lowe	addcc	%g4, 8, %g4		! n += 8, src += 8, dst += 8
112*5d9d9091SRichard Lowe	bcs,pn	%ncc,.lastword		! if counter wraps, last word
113*5d9d9091SRichard Lowe	andn	%o5, %o1, %g1		! ~dword & 0x8080808080808080
114*5d9d9091SRichard Lowe	sub	%o1, %o4, %g5		! dword - 0x0101010101010101
115*5d9d9091SRichard Lowe	andcc	%g5, %g1, %g0		! ((dword - 0x0101010101010101) & ~dword & 0x8080808080808080)
116*5d9d9091SRichard Lowe	bz,a,pt	%ncc, .storedword	! no zero byte if magic expression == 0
117*5d9d9091SRichard Lowe	stx	%o1, [%o2 + %g4]	! store word to dst (address pre-incremented)
118*5d9d9091SRichard Lowe
119*5d9d9091SRichard Lowe	! n has not expired, but src is at the end. we need to push out the
120*5d9d9091SRichard Lowe	! remaining src bytes and then start padding with null bytes
121*5d9d9091SRichard Lowe
122*5d9d9091SRichard Lowe.zerobyte:
123*5d9d9091SRichard Lowe	add	%o2, %g4, %o3		! pointer to dest string
124*5d9d9091SRichard Lowe	srlx	%o1, 56, %g1		! first byte
125*5d9d9091SRichard Lowe	stb	%g1, [%o3]		! store it
126*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of string ?
127*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
128*5d9d9091SRichard Lowe	srlx	%o1, 48, %g1		! second byte
129*5d9d9091SRichard Lowe	stb	%g1, [%o3 + 1]		! store it
130*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of string ?
131*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
132*5d9d9091SRichard Lowe	srlx	%o1, 40, %g1		! third byte
133*5d9d9091SRichard Lowe	stb	%g1, [%o3 + 2]		! store it
134*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of string ?
135*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
136*5d9d9091SRichard Lowe	srlx	%o1, 32, %g1		! fourth byte
137*5d9d9091SRichard Lowe	stb	%g1, [%o3 + 3]		! store it
138*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of string ?
139*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
140*5d9d9091SRichard Lowe	srlx	%o1, 24, %g1		! fifth byte
141*5d9d9091SRichard Lowe	stb	%g1, [%o3 + 4]		! store it
142*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of string ?
143*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
144*5d9d9091SRichard Lowe	srlx	%o1, 16, %g1		! sixth byte
145*5d9d9091SRichard Lowe	stb	%g1, [%o3 + 5]		! store it
146*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of string ?
147*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
148*5d9d9091SRichard Lowe	srlx	%o1, 8, %g1		! seventh byte
149*5d9d9091SRichard Lowe	stb	%g1, [%o3 + 6]		! store it
150*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of string ?
151*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
152*5d9d9091SRichard Lowe	stb	%o1, [%o3 + 7]		! store eighth byte
153*5d9d9091SRichard Lowe	addcc	%g4, 16, %g0		! number of pad bytes < 16 ?
154*5d9d9091SRichard Lowe	bcs,pn	%ncc, .bytepad		! yes, do simple byte wise fill
155*5d9d9091SRichard Lowe	add	%o3, 8, %o3		! dst += 8
156*5d9d9091SRichard Lowe	andcc	%o3, 7, %o4		! dst offset relative to dword boundary
157*5d9d9091SRichard Lowe	bz,pn	%ncc, .fillaligned	! dst already dword aligned
158*5d9d9091SRichard Lowe
159*5d9d9091SRichard Lowe	! here there is a least one more byte to zero out: otherwise we would
160*5d9d9091SRichard Lowe	! have exited through label .lastword
161*5d9d9091SRichard Lowe
162*5d9d9091SRichard Lowe	sub	%o4, 8, %o4		! bytes to align dst to dword boundary
163*5d9d9091SRichard Lowe.makealigned:
164*5d9d9091SRichard Lowe	stb	%g0, [%o3]		! dst[] = 0
165*5d9d9091SRichard Lowe	addcc	%g4, 1, %g4		! n--
166*5d9d9091SRichard Lowe	bz,pt	%ncc, .done		! n == 0, we are done
167*5d9d9091SRichard Lowe	addcc	%o4, 1, %o4		! any more byte needed to align
168*5d9d9091SRichard Lowe	bnz,pt	%ncc, .makealigned	! yup, pad another byte
169*5d9d9091SRichard Lowe	add	%o3, 1, %o3		! dst++
170*5d9d9091SRichard Lowe	nop				! pad to align copy loop below
171*5d9d9091SRichard Lowe	nop				! pad to align copy loop below
172*5d9d9091SRichard Lowe
173*5d9d9091SRichard Lowe	! here we know that there at least another 8 bytes to pad, since
174*5d9d9091SRichard Lowe	! we don't get here unless there were >= 16 bytes to pad to begin
175*5d9d9091SRichard Lowe	! with, and we have padded at most 7 bytes suring dst aligning
176*5d9d9091SRichard Lowe
177*5d9d9091SRichard Lowe.fillaligned:
178*5d9d9091SRichard Lowe	add	%g4, 7, %o2		! round up to next dword boundary
179*5d9d9091SRichard Lowe	and	%o2, -8, %o4		! pointer to next dword boundary
180*5d9d9091SRichard Lowe	and	%o2, 8, %o2		! dword count odd ? 8 : 0
181*5d9d9091SRichard Lowe	stx	%g0, [%o3]		! store first dword
182*5d9d9091SRichard Lowe	addcc	%o4, %o2, %o4		! dword count == 1 ?
183*5d9d9091SRichard Lowe	add	%g4, %o2, %g4		! if dword count odd, n -= 8
184*5d9d9091SRichard Lowe	bz,pt	%ncc, .bytepad		! if dword count == 1, pad leftover bytes
185*5d9d9091SRichard Lowe	add	%o3, %o2, %o3		! bump dst if dword count odd
186*5d9d9091SRichard Lowe
187*5d9d9091SRichard Lowe.filldword:
188*5d9d9091SRichard Lowe	addcc	%o4, 16, %o4		! count -= 16
189*5d9d9091SRichard Lowe	stx	%g0, [%o3]		! dst[n] = 0
190*5d9d9091SRichard Lowe	stx	%g0, [%o3 + 8]		! dst[n+8] = 0
191*5d9d9091SRichard Lowe	add	%o3, 16, %o3		! dst += 16
192*5d9d9091SRichard Lowe	bcc,pt	%ncc, .filldword	! fill dwords until count == 0
193*5d9d9091SRichard Lowe	addcc	%g4, 16, %g4		! n -= 16
194*5d9d9091SRichard Lowe	bz,pn	%ncc, .done		! if n == 0, we are done
195*5d9d9091SRichard Lowe
196*5d9d9091SRichard Lowe.bytepad:
197*5d9d9091SRichard Lowe	and	%g4, 1, %o2		! byte count odd ? 1 : 0
198*5d9d9091SRichard Lowe	stb	%g0, [%o3]		! store first byte
199*5d9d9091SRichard Lowe	addcc	%g4, %o2, %g4		! byte count == 1 ?
200*5d9d9091SRichard Lowe	bz,pt	%ncc, .done		! yup, we are done
201*5d9d9091SRichard Lowe	add	%o3, %o2, %o3		! bump pointer if odd
202*5d9d9091SRichard Lowe
203*5d9d9091SRichard Lowe.fillbyte:
204*5d9d9091SRichard Lowe	addcc	%g4, 2, %g4		! n -= 2
205*5d9d9091SRichard Lowe	stb	%g0, [%o3]		! dst[n] = 0
206*5d9d9091SRichard Lowe	stb	%g0, [%o3 + 1]		! dst[n+1] = 0
207*5d9d9091SRichard Lowe	bnz,pt	%ncc, .fillbyte		! fill until n == 0
208*5d9d9091SRichard Lowe	add	%o3, 2, %o3		! dst += 2
209*5d9d9091SRichard Lowe
210*5d9d9091SRichard Lowe.done:
211*5d9d9091SRichard Lowe	retl				! done
212*5d9d9091SRichard Lowe	nop				! pad to align loops below
213*5d9d9091SRichard Lowe	nop				! pad to align loops below
214*5d9d9091SRichard Lowe
215*5d9d9091SRichard Lowe	! this is the last word. It may contain null bytes. store bytes
216*5d9d9091SRichard Lowe	! until n == 0. if null byte encountered, continue
217*5d9d9091SRichard Lowe
218*5d9d9091SRichard Lowe.lastword:
219*5d9d9091SRichard Lowe	sub	%g4, 8, %g4		! undo counter pre-increment
220*5d9d9091SRichard Lowe	add	%o2, 8, %o2		! adjust dst for counter un-bumping
221*5d9d9091SRichard Lowe
222*5d9d9091SRichard Lowe	srlx	%o1, 56, %g1		! first byte
223*5d9d9091SRichard Lowe	stb	%g1, [%o2 + %g4]	! store it
224*5d9d9091SRichard Lowe	inccc	%g4			! n--
225*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
226*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of src reached ?
227*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
228*5d9d9091SRichard Lowe	srlx	%o1, 48, %g1		! second byte
229*5d9d9091SRichard Lowe	stb	%g1, [%o2 + %g4]	! store it
230*5d9d9091SRichard Lowe	inccc	%g4			! n--
231*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
232*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of src reached ?
233*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
234*5d9d9091SRichard Lowe	srlx	%o1, 40, %g1		! third byte
235*5d9d9091SRichard Lowe	stb	%g1, [%o2 + %g4]	! store it
236*5d9d9091SRichard Lowe	inccc	%g4			! n--
237*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
238*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of src reached ?
239*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
240*5d9d9091SRichard Lowe	srlx	%o1, 32, %g1		! fourth byte
241*5d9d9091SRichard Lowe	stb	%g1, [%o2 + %g4]	! store it
242*5d9d9091SRichard Lowe	inccc	%g4			! n--
243*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
244*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of src reached ?
245*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
246*5d9d9091SRichard Lowe	srlx	%o1, 24, %g1		! fifth byte
247*5d9d9091SRichard Lowe	stb	%g1, [%o2 + %g4]	! store it
248*5d9d9091SRichard Lowe	inccc	%g4			! n--
249*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
250*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of src reached ?
251*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
252*5d9d9091SRichard Lowe	srlx	%o1, 16, %g1		! sixth byte
253*5d9d9091SRichard Lowe	stb	%g1, [%o2 + %g4]	! store it
254*5d9d9091SRichard Lowe	inccc	%g4			! n--
255*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
256*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of src reached ?
257*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
258*5d9d9091SRichard Lowe	srlx	%o1, 8, %g1		! seventh byte
259*5d9d9091SRichard Lowe	stb	%g1, [%o2 + %g4]	! store it
260*5d9d9091SRichard Lowe	inccc	%g4			! n--
261*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
262*5d9d9091SRichard Lowe	andcc	%g1, 0xff, %g0		! end of src reached ?
263*5d9d9091SRichard Lowe	movz	%ncc, %g0, %o1		! if so, start padding with null bytes
264*5d9d9091SRichard Lowe	ba	.done			! here n must be zero, we are done
265*5d9d9091SRichard Lowe	stb	%o1, [%o2 + %g4]	! store eigth byte
266*5d9d9091SRichard Lowe	nop				! pad to align loops below
267*5d9d9091SRichard Lowe	nop				! pad to align loops below
268*5d9d9091SRichard Lowe
269*5d9d9091SRichard Lowe.storebyte1421:
270*5d9d9091SRichard Lowe	ldx	[%o3 + %g4], %o1	! x = src[]
271*5d9d9091SRichard Lowe	addcc	%g4, 8, %g4		! src += 8, dst += 8
272*5d9d9091SRichard Lowe	bcs,pn	%ncc,.lastword		! if counter wraps, last word
273*5d9d9091SRichard Lowe	andn	%o5, %o1, %g1		! ~x & 0x8080808080808080
274*5d9d9091SRichard Lowe	sub	%o1, %o4, %g5		! x - 0x0101010101010101
275*5d9d9091SRichard Lowe	andcc	%g5, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
276*5d9d9091SRichard Lowe	bnz,pn	%ncc, .zerobyte		! end of src found, may need to pad
277*5d9d9091SRichard Lowe	add	%o2, %g4, %g5		! dst (in pointer form)
278*5d9d9091SRichard Lowe	srlx	%o1, 56, %g1		! %g1<7:0> = first byte; word aligned now
279*5d9d9091SRichard Lowe	stb	%g1, [%g5]		! store first byte
280*5d9d9091SRichard Lowe	srlx	%o1, 24, %g1		! %g1<31:0> = bytes 2, 3, 4, 5
281*5d9d9091SRichard Lowe	stw	%g1, [%g5 + 1]		! store bytes 2, 3, 4, 5
282*5d9d9091SRichard Lowe	srlx	%o1, 8, %g1		! %g1<15:0> = bytes 6, 7
283*5d9d9091SRichard Lowe	sth	%g1, [%g5 + 5]		! store bytes 6, 7
284*5d9d9091SRichard Lowe	ba	.storebyte1421		! next dword
285*5d9d9091SRichard Lowe	stb	%o1, [%g5 + 7]		! store eigth byte
286*5d9d9091SRichard Lowe
287*5d9d9091SRichard Lowe.storebyte1241:
288*5d9d9091SRichard Lowe	ldx	[%o3 + %g4], %o1	! x = src[]
289*5d9d9091SRichard Lowe	addcc	%g4, 8, %g4		! src += 8, dst += 8
290*5d9d9091SRichard Lowe	bcs,pn	%ncc,.lastword		! if counter wraps, last word
291*5d9d9091SRichard Lowe	andn	%o5, %o1, %g1		! ~x & 0x8080808080808080
292*5d9d9091SRichard Lowe	sub	%o1, %o4, %g5		! x - 0x0101010101010101
293*5d9d9091SRichard Lowe	andcc	%g5, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
294*5d9d9091SRichard Lowe	bnz,pn	%ncc, .zerobyte		! x has zero byte, handle end cases
295*5d9d9091SRichard Lowe	add	%o2, %g4, %g5		! dst (in pointer form)
296*5d9d9091SRichard Lowe	srlx	%o1, 56, %g1		! %g1<7:0> = first byte; half-word aligned now
297*5d9d9091SRichard Lowe	stb	%g1, [%g5]		! store first byte
298*5d9d9091SRichard Lowe	srlx	%o1, 40, %g1		! %g1<15:0> = bytes 2, 3
299*5d9d9091SRichard Lowe	sth	%g1, [%g5 + 1]		! store bytes 2, 3
300*5d9d9091SRichard Lowe	srlx	%o1, 8, %g1		! %g1<31:0> = bytes 4, 5, 6, 7
301*5d9d9091SRichard Lowe	stw	%g1, [%g5 + 3]		! store bytes 4, 5, 6, 7
302*5d9d9091SRichard Lowe	ba	.storebyte1241		! next dword
303*5d9d9091SRichard Lowe	stb	%o1, [%g5 + 7]		! store eigth byte
304*5d9d9091SRichard Lowe
305*5d9d9091SRichard Lowe.storehalfword:
306*5d9d9091SRichard Lowe	ldx	[%o3 + %g4], %o1	! x = src[]
307*5d9d9091SRichard Lowe	addcc	%g4, 8, %g4		! src += 8, dst += 8
308*5d9d9091SRichard Lowe	bcs,pn	%ncc,.lastword		! if counter wraps, last word
309*5d9d9091SRichard Lowe	andn	%o5, %o1, %g1		! ~x & 0x8080808080808080
310*5d9d9091SRichard Lowe	sub	%o1, %o4, %g5		! x - 0x0101010101010101
311*5d9d9091SRichard Lowe	andcc	%g5, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
312*5d9d9091SRichard Lowe	bnz,pn	%ncc, .zerobyte		! x has zero byte, handle end cases
313*5d9d9091SRichard Lowe	add	%o2, %g4, %g5		! dst (in pointer form)
314*5d9d9091SRichard Lowe	srlx	%o1, 48, %g1		! %g1<15:0> = bytes 1, 2; word aligned now
315*5d9d9091SRichard Lowe	sth	%g1, [%g5]		! store bytes 1, 2
316*5d9d9091SRichard Lowe	srlx	%o1, 16, %g1		! %g1<31:0> = bytes 3, 4, 5, 6
317*5d9d9091SRichard Lowe	stw	%g1, [%g5 + 2]		! store bytes 3, 4, 5, 6
318*5d9d9091SRichard Lowe	ba	.storehalfword		! next dword
319*5d9d9091SRichard Lowe	sth	%o1, [%g5 + 6]		! store bytes 7, 8
320*5d9d9091SRichard Lowe	nop				! align next loop to 16-byte boundary
321*5d9d9091SRichard Lowe	nop				! align next loop to 16-byte boundary
322*5d9d9091SRichard Lowe
323*5d9d9091SRichard Lowe.storeword2:
324*5d9d9091SRichard Lowe	ldx	[%o3 + %g4], %o1	! x = src[]
325*5d9d9091SRichard Lowe	addcc	%g4, 8, %g4		! src += 8, dst += 8
326*5d9d9091SRichard Lowe	bcs,pn	%ncc,.lastword		! if counter wraps, last word
327*5d9d9091SRichard Lowe	andn	%o5, %o1, %g1		! ~x & 0x8080808080808080
328*5d9d9091SRichard Lowe	sub	%o1, %o4, %g5		! x - 0x0101010101010101
329*5d9d9091SRichard Lowe	andcc	%g5, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
330*5d9d9091SRichard Lowe	bnz,pn	%ncc, .zerobyte		! x has zero byte, handle end cases
331*5d9d9091SRichard Lowe	add	%o2, %g4, %g5		! dst (in pointer form)
332*5d9d9091SRichard Lowe	srlx	%o1, 32, %g1		! %g1<31:0> = bytes 1, 2, 3, 4
333*5d9d9091SRichard Lowe	stw	%g1, [%g5]		! store bytes 1, 2, 3, 4
334*5d9d9091SRichard Lowe	ba	.storeword2		! next dword
335*5d9d9091SRichard Lowe	stw	%o1, [%g5 + 4]		! store bytes 5, 6, 7, 8
336*5d9d9091SRichard Lowe
337*5d9d9091SRichard Lowe	! do not remove these pads, loop above may slow down otherwise
338*5d9d9091SRichard Lowe
339*5d9d9091SRichard Lowe	nop				! pad
340*5d9d9091SRichard Lowe	nop				! pad
341*5d9d9091SRichard Lowe
342*5d9d9091SRichard Lowe	SET_SIZE(strncpy)
343