xref: /titanic_51/usr/src/lib/libc/capabilities/sun4v/common/memset.s (revision 1e49577a7fcde812700ded04431b49d67cc57d6d)
1*1e49577aSRod Evans/*
2*1e49577aSRod Evans * CDDL HEADER START
3*1e49577aSRod Evans *
4*1e49577aSRod Evans * The contents of this file are subject to the terms of the
5*1e49577aSRod Evans * Common Development and Distribution License (the "License").
6*1e49577aSRod Evans * You may not use this file except in compliance with the License.
7*1e49577aSRod Evans *
8*1e49577aSRod Evans * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*1e49577aSRod Evans * or http://www.opensolaris.org/os/licensing.
10*1e49577aSRod Evans * See the License for the specific language governing permissions
11*1e49577aSRod Evans * and limitations under the License.
12*1e49577aSRod Evans *
13*1e49577aSRod Evans * When distributing Covered Code, include this CDDL HEADER in each
14*1e49577aSRod Evans * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*1e49577aSRod Evans * If applicable, add the following below this CDDL HEADER, with the
16*1e49577aSRod Evans * fields enclosed by brackets "[]" replaced with your own identifying
17*1e49577aSRod Evans * information: Portions Copyright [yyyy] [name of copyright owner]
18*1e49577aSRod Evans *
19*1e49577aSRod Evans * CDDL HEADER END
20*1e49577aSRod Evans */
21*1e49577aSRod Evans
22*1e49577aSRod Evans/*
23*1e49577aSRod Evans * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24*1e49577aSRod Evans */
25*1e49577aSRod Evans
26*1e49577aSRod Evans	.file	"memset.s"
27*1e49577aSRod Evans
28*1e49577aSRod Evans/*
29*1e49577aSRod Evans * char *memset(sp, c, n)
30*1e49577aSRod Evans *
31*1e49577aSRod Evans * Set an array of n chars starting at sp to the character c.
32*1e49577aSRod Evans * Return sp.
33*1e49577aSRod Evans *
34*1e49577aSRod Evans * Fast assembler language version of the following C-program for memset
35*1e49577aSRod Evans * which represents the `standard' for the C-library.
36*1e49577aSRod Evans *
37*1e49577aSRod Evans *	void *
38*1e49577aSRod Evans *	memset(void *sp1, int c, size_t n)
39*1e49577aSRod Evans *	{
40*1e49577aSRod Evans *	    if (n != 0) {
41*1e49577aSRod Evans *		char *sp = sp1;
42*1e49577aSRod Evans *		do {
43*1e49577aSRod Evans *		    *sp++ = (char)c;
44*1e49577aSRod Evans *		} while (--n != 0);
45*1e49577aSRod Evans *	    }
46*1e49577aSRod Evans *	    return (sp1);
47*1e49577aSRod Evans *	}
48*1e49577aSRod Evans *
49*1e49577aSRod Evans * Flow :
50*1e49577aSRod Evans *
51*1e49577aSRod Evans *	For small 6 or fewer bytes stores, bytes will be stored.
52*1e49577aSRod Evans *
53*1e49577aSRod Evans *	For less than 32 bytes stores, align the address on 4 byte boundary.
54*1e49577aSRod Evans *	Then store as many 4-byte chunks, followed by trailing bytes.
55*1e49577aSRod Evans *
56*1e49577aSRod Evans *	For sizes greater than 32 bytes, align the address on 8 byte boundary.
57*1e49577aSRod Evans *	if (count > 64) {
58*1e49577aSRod Evans *		store as many 8-bytes chunks to block align the address
59*1e49577aSRod Evans *		store using ASI_BLK_INIT_ST_QUAD_LDD_P
60*1e49577aSRod Evans *	}
61*1e49577aSRod Evans *	Store as many 8-byte chunks, followed by trialing bytes.
62*1e49577aSRod Evans *
63*1e49577aSRod Evans */
64*1e49577aSRod Evans
65*1e49577aSRod Evans#include <sys/asm_linkage.h>
66*1e49577aSRod Evans#include <sys/niagaraasi.h>
67*1e49577aSRod Evans#include <sys/asi.h>
68*1e49577aSRod Evans
69*1e49577aSRod Evans	ANSI_PRAGMA_WEAK(memset,function)
70*1e49577aSRod Evans
71*1e49577aSRod Evans	.section        ".text"
72*1e49577aSRod Evans	.align 32
73*1e49577aSRod Evans
74*1e49577aSRod Evans	ENTRY(memset)
75*1e49577aSRod Evans
76*1e49577aSRod Evans	mov	%o0, %o5		! copy sp1 before using it
77*1e49577aSRod Evans	cmp	%o2, 7			! if small counts, just write bytes
78*1e49577aSRod Evans	blu,pn	%ncc, .wrchar
79*1e49577aSRod Evans	and	%o1, 0xff, %o1		! o1 is (char)c
80*1e49577aSRod Evans
81*1e49577aSRod Evans	sll	%o1, 8, %o3
82*1e49577aSRod Evans	or	%o1, %o3, %o1		! now o1 has 2 bytes of c
83*1e49577aSRod Evans	sll	%o1, 16, %o3
84*1e49577aSRod Evans
85*1e49577aSRod Evans	cmp	%o2, 0x20
86*1e49577aSRod Evans	blu,pn	%ncc, .wdalign
87*1e49577aSRod Evans	or	%o1, %o3, %o1		! now o1 has 4 bytes of c
88*1e49577aSRod Evans
89*1e49577aSRod Evans	sllx	%o1, 32, %o3
90*1e49577aSRod Evans	or	%o1, %o3, %o1		! now o1 has 8 bytes of c
91*1e49577aSRod Evans
92*1e49577aSRod Evans.dbalign:
93*1e49577aSRod Evans	andcc	%o5, 7, %o3		! is sp1 aligned on a 8 byte bound
94*1e49577aSRod Evans	bz,pt	%ncc, .blkalign		! already double aligned
95*1e49577aSRod Evans	sub	%o3, 8, %o3		! -(bytes till double aligned)
96*1e49577aSRod Evans	add	%o2, %o3, %o2		! update o2 with new count
97*1e49577aSRod Evans
98*1e49577aSRod Evans	! Set -(%o3) bytes till sp1 double aligned
99*1e49577aSRod Evans1:	stb	%o1, [%o5]		! there is at least 1 byte to set
100*1e49577aSRod Evans	inccc	%o3			! byte clearing loop
101*1e49577aSRod Evans	bl,pt	%ncc, 1b
102*1e49577aSRod Evans	inc	%o5
103*1e49577aSRod Evans
104*1e49577aSRod Evans	! Now sp1 is double aligned (sp1 is found in %o5)
105*1e49577aSRod Evans.blkalign:
106*1e49577aSRod Evans	mov	ASI_BLK_INIT_ST_QUAD_LDD_P, %asi
107*1e49577aSRod Evans
108*1e49577aSRod Evans	cmp	%o2, 0x40		! check if there are 64 bytes to set
109*1e49577aSRod Evans	blu,pn	%ncc, 5f
110*1e49577aSRod Evans	mov	%o2, %o3
111*1e49577aSRod Evans
112*1e49577aSRod Evans	andcc	%o5, 63, %o3		! is sp1 block aligned?
113*1e49577aSRod Evans	bz,pt	%ncc, .blkwr		! now block aligned
114*1e49577aSRod Evans	sub	%o3, 64, %o3		! o3 is -(bytes till block aligned)
115*1e49577aSRod Evans	add	%o2, %o3, %o2		! o2 is the remainder
116*1e49577aSRod Evans
117*1e49577aSRod Evans	! Store -(%o3) bytes till dst is block (64 byte) aligned.
118*1e49577aSRod Evans	! Use double word stores.
119*1e49577aSRod Evans	! Recall that dst is already double word aligned
120*1e49577aSRod Evans1:
121*1e49577aSRod Evans	stx	%o1, [%o5]
122*1e49577aSRod Evans	addcc	%o3, 8, %o3
123*1e49577aSRod Evans	bl,pt	%ncc, 1b
124*1e49577aSRod Evans	add	%o5, 8, %o5
125*1e49577aSRod Evans
126*1e49577aSRod Evans	! Now sp1 is block aligned
127*1e49577aSRod Evans.blkwr:
128*1e49577aSRod Evans	and	%o2, 63, %o3		! calc bytes left after blk store.
129*1e49577aSRod Evans	andn	%o2, 63, %o4		! calc size of blocks in bytes
130*1e49577aSRod Evans
131*1e49577aSRod Evans	cmp	%o4, 0x100		! check if there are 256 bytes to set
132*1e49577aSRod Evans	blu,pn	%ncc, 3f
133*1e49577aSRod Evans	nop
134*1e49577aSRod Evans2:
135*1e49577aSRod Evans	stxa	%o1, [%o5+0x0]%asi
136*1e49577aSRod Evans	stxa	%o1, [%o5+0x40]%asi
137*1e49577aSRod Evans	stxa	%o1, [%o5+0x80]%asi
138*1e49577aSRod Evans	stxa	%o1, [%o5+0xc0]%asi
139*1e49577aSRod Evans
140*1e49577aSRod Evans	stxa	%o1, [%o5+0x8]%asi
141*1e49577aSRod Evans	stxa	%o1, [%o5+0x10]%asi
142*1e49577aSRod Evans	stxa	%o1, [%o5+0x18]%asi
143*1e49577aSRod Evans	stxa	%o1, [%o5+0x20]%asi
144*1e49577aSRod Evans	stxa	%o1, [%o5+0x28]%asi
145*1e49577aSRod Evans	stxa	%o1, [%o5+0x30]%asi
146*1e49577aSRod Evans	stxa	%o1, [%o5+0x38]%asi
147*1e49577aSRod Evans
148*1e49577aSRod Evans	stxa	%o1, [%o5+0x48]%asi
149*1e49577aSRod Evans	stxa	%o1, [%o5+0x50]%asi
150*1e49577aSRod Evans	stxa	%o1, [%o5+0x58]%asi
151*1e49577aSRod Evans	stxa	%o1, [%o5+0x60]%asi
152*1e49577aSRod Evans	stxa	%o1, [%o5+0x68]%asi
153*1e49577aSRod Evans	stxa	%o1, [%o5+0x70]%asi
154*1e49577aSRod Evans	stxa	%o1, [%o5+0x78]%asi
155*1e49577aSRod Evans
156*1e49577aSRod Evans	stxa	%o1, [%o5+0x88]%asi
157*1e49577aSRod Evans	stxa	%o1, [%o5+0x90]%asi
158*1e49577aSRod Evans	stxa	%o1, [%o5+0x98]%asi
159*1e49577aSRod Evans	stxa	%o1, [%o5+0xa0]%asi
160*1e49577aSRod Evans	stxa	%o1, [%o5+0xa8]%asi
161*1e49577aSRod Evans	stxa	%o1, [%o5+0xb0]%asi
162*1e49577aSRod Evans	stxa	%o1, [%o5+0xb8]%asi
163*1e49577aSRod Evans
164*1e49577aSRod Evans	stxa	%o1, [%o5+0xc8]%asi
165*1e49577aSRod Evans	stxa	%o1, [%o5+0xd0]%asi
166*1e49577aSRod Evans	stxa	%o1, [%o5+0xd8]%asi
167*1e49577aSRod Evans	stxa	%o1, [%o5+0xe0]%asi
168*1e49577aSRod Evans	stxa	%o1, [%o5+0xe8]%asi
169*1e49577aSRod Evans	stxa	%o1, [%o5+0xf0]%asi
170*1e49577aSRod Evans	stxa	%o1, [%o5+0xf8]%asi
171*1e49577aSRod Evans
172*1e49577aSRod Evans	sub	%o4, 0x100, %o4
173*1e49577aSRod Evans	cmp	%o4, 0x100
174*1e49577aSRod Evans	bgu,pt	%ncc, 2b
175*1e49577aSRod Evans	add	%o5, 0x100, %o5
176*1e49577aSRod Evans
177*1e49577aSRod Evans3:
178*1e49577aSRod Evans	cmp	%o4, 0x40		! check if 64 bytes to set
179*1e49577aSRod Evans	blu	%ncc, 5f
180*1e49577aSRod Evans	nop
181*1e49577aSRod Evans4:
182*1e49577aSRod Evans	stxa	%o1, [%o5+0x0]%asi
183*1e49577aSRod Evans	stxa	%o1, [%o5+0x8]%asi
184*1e49577aSRod Evans	stxa	%o1, [%o5+0x10]%asi
185*1e49577aSRod Evans	stxa	%o1, [%o5+0x18]%asi
186*1e49577aSRod Evans	stxa	%o1, [%o5+0x20]%asi
187*1e49577aSRod Evans	stxa	%o1, [%o5+0x28]%asi
188*1e49577aSRod Evans	stxa	%o1, [%o5+0x30]%asi
189*1e49577aSRod Evans	stxa	%o1, [%o5+0x38]%asi
190*1e49577aSRod Evans
191*1e49577aSRod Evans	subcc	%o4, 0x40, %o4
192*1e49577aSRod Evans	bgu,pt	%ncc, 4b
193*1e49577aSRod Evans	add	%o5, 0x40, %o5
194*1e49577aSRod Evans
195*1e49577aSRod Evans5:
196*1e49577aSRod Evans	! Set the remaining doubles
197*1e49577aSRod Evans	membar	#Sync
198*1e49577aSRod Evans	mov	ASI_PNF, %asi		! restore %asi to default
199*1e49577aSRod Evans					! ASI_PRIMARY_NOFAULT value
200*1e49577aSRod Evans	subcc	%o3, 8, %o3		! Can we store any doubles?
201*1e49577aSRod Evans	blu,pn	%ncc, .wrchar
202*1e49577aSRod Evans	and	%o2, 7, %o2		! calc bytes left after doubles
203*1e49577aSRod Evans
204*1e49577aSRod Evans6:
205*1e49577aSRod Evans	stx	%o1, [%o5]		! store the doubles
206*1e49577aSRod Evans	subcc	%o3, 8, %o3
207*1e49577aSRod Evans	bgeu,pt	%ncc, 6b
208*1e49577aSRod Evans	add	%o5, 8, %o5
209*1e49577aSRod Evans
210*1e49577aSRod Evans	ba	.wrchar
211*1e49577aSRod Evans	nop
212*1e49577aSRod Evans
213*1e49577aSRod Evans.wdalign:
214*1e49577aSRod Evans	andcc	%o5, 3, %o3		! is sp1 aligned on a word boundary
215*1e49577aSRod Evans	bz,pn	%ncc, .wrword
216*1e49577aSRod Evans	andn	%o2, 3, %o3		! create word sized count in %o3
217*1e49577aSRod Evans
218*1e49577aSRod Evans	dec	%o2			! decrement count
219*1e49577aSRod Evans	stb	%o1, [%o5]		! clear a byte
220*1e49577aSRod Evans	b	.wdalign
221*1e49577aSRod Evans	inc	%o5			! next byte
222*1e49577aSRod Evans
223*1e49577aSRod Evans.wrword:
224*1e49577aSRod Evans	st	%o1, [%o5]		! 4-byte writing loop
225*1e49577aSRod Evans	subcc	%o3, 4, %o3
226*1e49577aSRod Evans	bnz,pt	%ncc, .wrword
227*1e49577aSRod Evans	inc	4, %o5
228*1e49577aSRod Evans
229*1e49577aSRod Evans	and	%o2, 3, %o2		! leftover count, if any
230*1e49577aSRod Evans
231*1e49577aSRod Evans.wrchar:
232*1e49577aSRod Evans	! Set the remaining bytes, if any
233*1e49577aSRod Evans	cmp	%o2, 0
234*1e49577aSRod Evans	be	%ncc, .exit
235*1e49577aSRod Evans	nop
236*1e49577aSRod Evans
237*1e49577aSRod Evans7:
238*1e49577aSRod Evans	deccc	%o2
239*1e49577aSRod Evans	stb	%o1, [%o5]
240*1e49577aSRod Evans	bgu,pt	%ncc, 7b
241*1e49577aSRod Evans	inc	%o5
242*1e49577aSRod Evans
243*1e49577aSRod Evans.exit:
244*1e49577aSRod Evans	retl				! %o0 was preserved
245*1e49577aSRod Evans	nop
246*1e49577aSRod Evans
247*1e49577aSRod Evans	SET_SIZE(memset)
248