xref: /illumos-gate/usr/src/lib/libc/capabilities/sun4v/common/memset.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24*5d9d9091SRichard Lowe */
25*5d9d9091SRichard Lowe
26*5d9d9091SRichard Lowe	.file	"memset.s"
27*5d9d9091SRichard Lowe
28*5d9d9091SRichard Lowe/*
29*5d9d9091SRichard Lowe * char *memset(sp, c, n)
30*5d9d9091SRichard Lowe *
31*5d9d9091SRichard Lowe * Set an array of n chars starting at sp to the character c.
32*5d9d9091SRichard Lowe * Return sp.
33*5d9d9091SRichard Lowe *
34*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memset
35*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
36*5d9d9091SRichard Lowe *
37*5d9d9091SRichard Lowe *	void *
38*5d9d9091SRichard Lowe *	memset(void *sp1, int c, size_t n)
39*5d9d9091SRichard Lowe *	{
40*5d9d9091SRichard Lowe *	    if (n != 0) {
41*5d9d9091SRichard Lowe *		char *sp = sp1;
42*5d9d9091SRichard Lowe *		do {
43*5d9d9091SRichard Lowe *		    *sp++ = (char)c;
44*5d9d9091SRichard Lowe *		} while (--n != 0);
45*5d9d9091SRichard Lowe *	    }
46*5d9d9091SRichard Lowe *	    return (sp1);
47*5d9d9091SRichard Lowe *	}
48*5d9d9091SRichard Lowe *
49*5d9d9091SRichard Lowe * Flow :
50*5d9d9091SRichard Lowe *
51*5d9d9091SRichard Lowe *	For small 6 or fewer bytes stores, bytes will be stored.
52*5d9d9091SRichard Lowe *
53*5d9d9091SRichard Lowe *	For less than 32 bytes stores, align the address on 4 byte boundary.
54*5d9d9091SRichard Lowe *	Then store as many 4-byte chunks, followed by trailing bytes.
55*5d9d9091SRichard Lowe *
56*5d9d9091SRichard Lowe *	For sizes greater than 32 bytes, align the address on 8 byte boundary.
57*5d9d9091SRichard Lowe *	if (count > 64) {
58*5d9d9091SRichard Lowe *		store as many 8-bytes chunks to block align the address
59*5d9d9091SRichard Lowe *		store using ASI_BLK_INIT_ST_QUAD_LDD_P
60*5d9d9091SRichard Lowe *	}
61*5d9d9091SRichard Lowe *	Store as many 8-byte chunks, followed by trialing bytes.
62*5d9d9091SRichard Lowe *
63*5d9d9091SRichard Lowe */
64*5d9d9091SRichard Lowe
65*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
66*5d9d9091SRichard Lowe#include <sys/niagaraasi.h>
67*5d9d9091SRichard Lowe#include <sys/asi.h>
68*5d9d9091SRichard Lowe
69*5d9d9091SRichard Lowe	ANSI_PRAGMA_WEAK(memset,function)
70*5d9d9091SRichard Lowe
71*5d9d9091SRichard Lowe	.section        ".text"
72*5d9d9091SRichard Lowe	.align 32
73*5d9d9091SRichard Lowe
74*5d9d9091SRichard Lowe	ENTRY(memset)
75*5d9d9091SRichard Lowe
76*5d9d9091SRichard Lowe	mov	%o0, %o5		! copy sp1 before using it
77*5d9d9091SRichard Lowe	cmp	%o2, 7			! if small counts, just write bytes
78*5d9d9091SRichard Lowe	blu,pn	%ncc, .wrchar
79*5d9d9091SRichard Lowe	and	%o1, 0xff, %o1		! o1 is (char)c
80*5d9d9091SRichard Lowe
81*5d9d9091SRichard Lowe	sll	%o1, 8, %o3
82*5d9d9091SRichard Lowe	or	%o1, %o3, %o1		! now o1 has 2 bytes of c
83*5d9d9091SRichard Lowe	sll	%o1, 16, %o3
84*5d9d9091SRichard Lowe
85*5d9d9091SRichard Lowe	cmp	%o2, 0x20
86*5d9d9091SRichard Lowe	blu,pn	%ncc, .wdalign
87*5d9d9091SRichard Lowe	or	%o1, %o3, %o1		! now o1 has 4 bytes of c
88*5d9d9091SRichard Lowe
89*5d9d9091SRichard Lowe	sllx	%o1, 32, %o3
90*5d9d9091SRichard Lowe	or	%o1, %o3, %o1		! now o1 has 8 bytes of c
91*5d9d9091SRichard Lowe
92*5d9d9091SRichard Lowe.dbalign:
93*5d9d9091SRichard Lowe	andcc	%o5, 7, %o3		! is sp1 aligned on a 8 byte bound
94*5d9d9091SRichard Lowe	bz,pt	%ncc, .blkalign		! already double aligned
95*5d9d9091SRichard Lowe	sub	%o3, 8, %o3		! -(bytes till double aligned)
96*5d9d9091SRichard Lowe	add	%o2, %o3, %o2		! update o2 with new count
97*5d9d9091SRichard Lowe
98*5d9d9091SRichard Lowe	! Set -(%o3) bytes till sp1 double aligned
99*5d9d9091SRichard Lowe1:	stb	%o1, [%o5]		! there is at least 1 byte to set
100*5d9d9091SRichard Lowe	inccc	%o3			! byte clearing loop
101*5d9d9091SRichard Lowe	bl,pt	%ncc, 1b
102*5d9d9091SRichard Lowe	inc	%o5
103*5d9d9091SRichard Lowe
104*5d9d9091SRichard Lowe	! Now sp1 is double aligned (sp1 is found in %o5)
105*5d9d9091SRichard Lowe.blkalign:
106*5d9d9091SRichard Lowe	mov	ASI_BLK_INIT_ST_QUAD_LDD_P, %asi
107*5d9d9091SRichard Lowe
108*5d9d9091SRichard Lowe	cmp	%o2, 0x40		! check if there are 64 bytes to set
109*5d9d9091SRichard Lowe	blu,pn	%ncc, 5f
110*5d9d9091SRichard Lowe	mov	%o2, %o3
111*5d9d9091SRichard Lowe
112*5d9d9091SRichard Lowe	andcc	%o5, 63, %o3		! is sp1 block aligned?
113*5d9d9091SRichard Lowe	bz,pt	%ncc, .blkwr		! now block aligned
114*5d9d9091SRichard Lowe	sub	%o3, 64, %o3		! o3 is -(bytes till block aligned)
115*5d9d9091SRichard Lowe	add	%o2, %o3, %o2		! o2 is the remainder
116*5d9d9091SRichard Lowe
117*5d9d9091SRichard Lowe	! Store -(%o3) bytes till dst is block (64 byte) aligned.
118*5d9d9091SRichard Lowe	! Use double word stores.
119*5d9d9091SRichard Lowe	! Recall that dst is already double word aligned
120*5d9d9091SRichard Lowe1:
121*5d9d9091SRichard Lowe	stx	%o1, [%o5]
122*5d9d9091SRichard Lowe	addcc	%o3, 8, %o3
123*5d9d9091SRichard Lowe	bl,pt	%ncc, 1b
124*5d9d9091SRichard Lowe	add	%o5, 8, %o5
125*5d9d9091SRichard Lowe
126*5d9d9091SRichard Lowe	! Now sp1 is block aligned
127*5d9d9091SRichard Lowe.blkwr:
128*5d9d9091SRichard Lowe	and	%o2, 63, %o3		! calc bytes left after blk store.
129*5d9d9091SRichard Lowe	andn	%o2, 63, %o4		! calc size of blocks in bytes
130*5d9d9091SRichard Lowe
131*5d9d9091SRichard Lowe	cmp	%o4, 0x100		! check if there are 256 bytes to set
132*5d9d9091SRichard Lowe	blu,pn	%ncc, 3f
133*5d9d9091SRichard Lowe	nop
134*5d9d9091SRichard Lowe2:
135*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x0]%asi
136*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x40]%asi
137*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x80]%asi
138*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xc0]%asi
139*5d9d9091SRichard Lowe
140*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x8]%asi
141*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x10]%asi
142*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x18]%asi
143*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x20]%asi
144*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x28]%asi
145*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x30]%asi
146*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x38]%asi
147*5d9d9091SRichard Lowe
148*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x48]%asi
149*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x50]%asi
150*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x58]%asi
151*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x60]%asi
152*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x68]%asi
153*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x70]%asi
154*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x78]%asi
155*5d9d9091SRichard Lowe
156*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x88]%asi
157*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x90]%asi
158*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x98]%asi
159*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xa0]%asi
160*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xa8]%asi
161*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xb0]%asi
162*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xb8]%asi
163*5d9d9091SRichard Lowe
164*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xc8]%asi
165*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xd0]%asi
166*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xd8]%asi
167*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xe0]%asi
168*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xe8]%asi
169*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xf0]%asi
170*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0xf8]%asi
171*5d9d9091SRichard Lowe
172*5d9d9091SRichard Lowe	sub	%o4, 0x100, %o4
173*5d9d9091SRichard Lowe	cmp	%o4, 0x100
174*5d9d9091SRichard Lowe	bgu,pt	%ncc, 2b
175*5d9d9091SRichard Lowe	add	%o5, 0x100, %o5
176*5d9d9091SRichard Lowe
177*5d9d9091SRichard Lowe3:
178*5d9d9091SRichard Lowe	cmp	%o4, 0x40		! check if 64 bytes to set
179*5d9d9091SRichard Lowe	blu	%ncc, 5f
180*5d9d9091SRichard Lowe	nop
181*5d9d9091SRichard Lowe4:
182*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x0]%asi
183*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x8]%asi
184*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x10]%asi
185*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x18]%asi
186*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x20]%asi
187*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x28]%asi
188*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x30]%asi
189*5d9d9091SRichard Lowe	stxa	%o1, [%o5+0x38]%asi
190*5d9d9091SRichard Lowe
191*5d9d9091SRichard Lowe	subcc	%o4, 0x40, %o4
192*5d9d9091SRichard Lowe	bgu,pt	%ncc, 4b
193*5d9d9091SRichard Lowe	add	%o5, 0x40, %o5
194*5d9d9091SRichard Lowe
195*5d9d9091SRichard Lowe5:
196*5d9d9091SRichard Lowe	! Set the remaining doubles
197*5d9d9091SRichard Lowe	membar	#Sync
198*5d9d9091SRichard Lowe	mov	ASI_PNF, %asi		! restore %asi to default
199*5d9d9091SRichard Lowe					! ASI_PRIMARY_NOFAULT value
200*5d9d9091SRichard Lowe	subcc	%o3, 8, %o3		! Can we store any doubles?
201*5d9d9091SRichard Lowe	blu,pn	%ncc, .wrchar
202*5d9d9091SRichard Lowe	and	%o2, 7, %o2		! calc bytes left after doubles
203*5d9d9091SRichard Lowe
204*5d9d9091SRichard Lowe6:
205*5d9d9091SRichard Lowe	stx	%o1, [%o5]		! store the doubles
206*5d9d9091SRichard Lowe	subcc	%o3, 8, %o3
207*5d9d9091SRichard Lowe	bgeu,pt	%ncc, 6b
208*5d9d9091SRichard Lowe	add	%o5, 8, %o5
209*5d9d9091SRichard Lowe
210*5d9d9091SRichard Lowe	ba	.wrchar
211*5d9d9091SRichard Lowe	nop
212*5d9d9091SRichard Lowe
213*5d9d9091SRichard Lowe.wdalign:
214*5d9d9091SRichard Lowe	andcc	%o5, 3, %o3		! is sp1 aligned on a word boundary
215*5d9d9091SRichard Lowe	bz,pn	%ncc, .wrword
216*5d9d9091SRichard Lowe	andn	%o2, 3, %o3		! create word sized count in %o3
217*5d9d9091SRichard Lowe
218*5d9d9091SRichard Lowe	dec	%o2			! decrement count
219*5d9d9091SRichard Lowe	stb	%o1, [%o5]		! clear a byte
220*5d9d9091SRichard Lowe	b	.wdalign
221*5d9d9091SRichard Lowe	inc	%o5			! next byte
222*5d9d9091SRichard Lowe
223*5d9d9091SRichard Lowe.wrword:
224*5d9d9091SRichard Lowe	st	%o1, [%o5]		! 4-byte writing loop
225*5d9d9091SRichard Lowe	subcc	%o3, 4, %o3
226*5d9d9091SRichard Lowe	bnz,pt	%ncc, .wrword
227*5d9d9091SRichard Lowe	inc	4, %o5
228*5d9d9091SRichard Lowe
229*5d9d9091SRichard Lowe	and	%o2, 3, %o2		! leftover count, if any
230*5d9d9091SRichard Lowe
231*5d9d9091SRichard Lowe.wrchar:
232*5d9d9091SRichard Lowe	! Set the remaining bytes, if any
233*5d9d9091SRichard Lowe	cmp	%o2, 0
234*5d9d9091SRichard Lowe	be	%ncc, .exit
235*5d9d9091SRichard Lowe	nop
236*5d9d9091SRichard Lowe
237*5d9d9091SRichard Lowe7:
238*5d9d9091SRichard Lowe	deccc	%o2
239*5d9d9091SRichard Lowe	stb	%o1, [%o5]
240*5d9d9091SRichard Lowe	bgu,pt	%ncc, 7b
241*5d9d9091SRichard Lowe	inc	%o5
242*5d9d9091SRichard Lowe
243*5d9d9091SRichard Lowe.exit:
244*5d9d9091SRichard Lowe	retl				! %o0 was preserved
245*5d9d9091SRichard Lowe	nop
246*5d9d9091SRichard Lowe
247*5d9d9091SRichard Lowe	SET_SIZE(memset)
248