xref: /titanic_53/usr/src/lib/libc/capabilities/sun4u/common/memset.s (revision 1e49577a7fcde812700ded04431b49d67cc57d6d)
1*1e49577aSRod Evans/*
2*1e49577aSRod Evans * CDDL HEADER START
3*1e49577aSRod Evans *
4*1e49577aSRod Evans * The contents of this file are subject to the terms of the
5*1e49577aSRod Evans * Common Development and Distribution License (the "License").
6*1e49577aSRod Evans * You may not use this file except in compliance with the License.
7*1e49577aSRod Evans *
8*1e49577aSRod Evans * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*1e49577aSRod Evans * or http://www.opensolaris.org/os/licensing.
10*1e49577aSRod Evans * See the License for the specific language governing permissions
11*1e49577aSRod Evans * and limitations under the License.
12*1e49577aSRod Evans *
13*1e49577aSRod Evans * When distributing Covered Code, include this CDDL HEADER in each
14*1e49577aSRod Evans * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*1e49577aSRod Evans * If applicable, add the following below this CDDL HEADER, with the
16*1e49577aSRod Evans * fields enclosed by brackets "[]" replaced with your own identifying
17*1e49577aSRod Evans * information: Portions Copyright [yyyy] [name of copyright owner]
18*1e49577aSRod Evans *
19*1e49577aSRod Evans * CDDL HEADER END
20*1e49577aSRod Evans */
21*1e49577aSRod Evans
22*1e49577aSRod Evans/*
23*1e49577aSRod Evans * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24*1e49577aSRod Evans */
25*1e49577aSRod Evans
26*1e49577aSRod Evans	.file	"memset.s"
27*1e49577aSRod Evans
28*1e49577aSRod Evans/*
29*1e49577aSRod Evans * char *memset(sp, c, n)
30*1e49577aSRod Evans *
31*1e49577aSRod Evans * Set an array of n chars starting at sp to the character c.
32*1e49577aSRod Evans * Return sp.
33*1e49577aSRod Evans *
34*1e49577aSRod Evans * Fast assembler language version of the following C-program for memset
35*1e49577aSRod Evans * which represents the `standard' for the C-library.
36*1e49577aSRod Evans *
37*1e49577aSRod Evans *	void *
38*1e49577aSRod Evans *	memset(void *sp1, int c, size_t n)
39*1e49577aSRod Evans *	{
40*1e49577aSRod Evans *	    if (n != 0) {
41*1e49577aSRod Evans *		char *sp = sp1;
42*1e49577aSRod Evans *		do {
43*1e49577aSRod Evans *		    *sp++ = (char)c;
44*1e49577aSRod Evans *		} while (--n != 0);
45*1e49577aSRod Evans *	    }
46*1e49577aSRod Evans *	    return (sp1);
47*1e49577aSRod Evans *	}
48*1e49577aSRod Evans */
49*1e49577aSRod Evans
50*1e49577aSRod Evans#include <sys/asm_linkage.h>
51*1e49577aSRod Evans#include <sys/sun4asi.h>
52*1e49577aSRod Evans
53*1e49577aSRod Evans	ANSI_PRAGMA_WEAK(memset,function)
54*1e49577aSRod Evans
55*1e49577aSRod Evans#define	ALIGN8(X)	(((X) + 7) & ~7)
56*1e49577aSRod Evans
57*1e49577aSRod Evans	.section        ".text"
58*1e49577aSRod Evans	.align 32
59*1e49577aSRod Evans
60*1e49577aSRod Evans	ENTRY(memset)
61*1e49577aSRod Evans	cmp	%o2, 12			! if small counts, just write bytes
62*1e49577aSRod Evans	bgeu,pn	%ncc, .wrbig
63*1e49577aSRod Evans	mov	%o0, %o5		! copy sp1 before using it
64*1e49577aSRod Evans
65*1e49577aSRod Evans.wrchar:
66*1e49577aSRod Evans	deccc   %o2			! byte clearing loop
67*1e49577aSRod Evans        inc     %o5
68*1e49577aSRod Evans	bgeu,a,pt %ncc, .wrchar
69*1e49577aSRod Evans        stb     %o1, [%o5 + -1]         ! we've already incremented the address
70*1e49577aSRod Evans
71*1e49577aSRod Evans        retl
72*1e49577aSRod Evans	.empty	! next instruction is safe, %o0 still good
73*1e49577aSRod Evans
74*1e49577aSRod Evans.wrbig:
75*1e49577aSRod Evans        andcc	%o5, 7, %o3		! is sp1 aligned on a 8 byte bound
76*1e49577aSRod Evans        bz,pt	%ncc, .blkchk		! already double aligned
77*1e49577aSRod Evans	and	%o1, 0xff, %o1		! o1 is (char)c
78*1e49577aSRod Evans        sub	%o3, 8, %o3		! -(bytes till double aligned)
79*1e49577aSRod Evans        add	%o2, %o3, %o2		! update o2 with new count
80*1e49577aSRod Evans
81*1e49577aSRod Evans	! Set -(%o3) bytes till sp1 double aligned
82*1e49577aSRod Evans1:	stb	%o1, [%o5]		! there is at least 1 byte to set
83*1e49577aSRod Evans	inccc	%o3			! byte clearing loop
84*1e49577aSRod Evans        bl,pt	%ncc, 1b
85*1e49577aSRod Evans        inc	%o5
86*1e49577aSRod Evans
87*1e49577aSRod Evans
88*1e49577aSRod Evans	! Now sp1 is double aligned (sp1 is found in %o5)
89*1e49577aSRod Evans.blkchk:
90*1e49577aSRod Evans	sll     %o1, 8, %o3
91*1e49577aSRod Evans        or      %o1, %o3, %o1		! now o1 has 2 bytes of c
92*1e49577aSRod Evans
93*1e49577aSRod Evans        sll     %o1, 16, %o3
94*1e49577aSRod Evans        or      %o1, %o3, %o1		! now o1 has 4 bytes of c
95*1e49577aSRod Evans
96*1e49577aSRod Evans	cmp     %o2, 4095		! if large count use Block ld/st
97*1e49577aSRod Evans
98*1e49577aSRod Evans	sllx	%o1, 32, %o3
99*1e49577aSRod Evans	or	%o1, %o3, %o1		! now o1 has 8 bytes of c
100*1e49577aSRod Evans
101*1e49577aSRod Evans        bgu,a,pn %ncc, .blkwr		! Do block write for large count
102*1e49577aSRod Evans        andcc   %o5, 63, %o3            ! is sp1 block aligned?
103*1e49577aSRod Evans
104*1e49577aSRod Evans	and	%o2, 24, %o3		! o3 is {0, 8, 16, 24}
105*1e49577aSRod Evans
106*1e49577aSRod Evans1:	subcc	%o3, 8, %o3		! double-word loop
107*1e49577aSRod Evans	add	%o5, 8, %o5
108*1e49577aSRod Evans	bgeu,a,pt %ncc, 1b
109*1e49577aSRod Evans	stx	%o1, [%o5 - 8]		! already incremented the address
110*1e49577aSRod Evans
111*1e49577aSRod Evans	andncc	%o2, 31, %o4		! o4 has 32 byte aligned count
112*1e49577aSRod Evans	bz,pn	%ncc, 3f		! First instruction of icache line
113*1e49577aSRod Evans2:
114*1e49577aSRod Evans	subcc	%o4, 32, %o4		! main loop, 32 bytes per iteration
115*1e49577aSRod Evans	stx	%o1, [%o5 - 8]
116*1e49577aSRod Evans	stx	%o1, [%o5]
117*1e49577aSRod Evans	stx	%o1, [%o5 + 8]
118*1e49577aSRod Evans	stx	%o1, [%o5 + 16]
119*1e49577aSRod Evans	bnz,pt	%ncc, 2b
120*1e49577aSRod Evans	add	%o5, 32, %o5
121*1e49577aSRod Evans
122*1e49577aSRod Evans3:
123*1e49577aSRod Evans	and	%o2, 7, %o2		! o2 has the remaining bytes (<8)
124*1e49577aSRod Evans
125*1e49577aSRod Evans4:
126*1e49577aSRod Evans	deccc   %o2                     ! byte clearing loop
127*1e49577aSRod Evans        inc     %o5
128*1e49577aSRod Evans        bgeu,a,pt %ncc, 4b
129*1e49577aSRod Evans        stb     %o1, [%o5 - 9]		! already incremented the address
130*1e49577aSRod Evans
131*1e49577aSRod Evans	retl
132*1e49577aSRod Evans	nop				! %o0 still preserved
133*1e49577aSRod Evans
134*1e49577aSRod Evans.blkwr:
135*1e49577aSRod Evans        bz,pn   %ncc, .blalign		! now block aligned
136*1e49577aSRod Evans        sub	%o3, 64, %o3		! o3 is -(bytes till block aligned)
137*1e49577aSRod Evans	add	%o2, %o3, %o2		! o2 is the remainder
138*1e49577aSRod Evans
139*1e49577aSRod Evans        ! Store -(%o3) bytes till dst is block (64 byte) aligned.
140*1e49577aSRod Evans        ! Use double word stores.
141*1e49577aSRod Evans	! Recall that dst is already double word aligned
142*1e49577aSRod Evans1:
143*1e49577aSRod Evans        stx     %o1, [%o5]
144*1e49577aSRod Evans	addcc   %o3, 8, %o3
145*1e49577aSRod Evans	bl,pt	%ncc, 1b
146*1e49577aSRod Evans	add     %o5, 8, %o5
147*1e49577aSRod Evans
148*1e49577aSRod Evans	! sp1 is block aligned
149*1e49577aSRod Evans.blalign:
150*1e49577aSRod Evans        rd      %fprs, %g1              ! g1 = fprs
151*1e49577aSRod Evans
152*1e49577aSRod Evans	and	%o2, 63, %o3		! calc bytes left after blk store.
153*1e49577aSRod Evans
154*1e49577aSRod Evans	andcc	%g1, 0x4, %g1		! fprs.du = fprs.dl = 0
155*1e49577aSRod Evans	bz,a	%ncc, 2f		! Is fprs.fef == 0
156*1e49577aSRod Evans        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
157*1e49577aSRod Evans2:
158*1e49577aSRod Evans	brnz,pn	%o1, 3f			! %o1 is safe to check all 64-bits
159*1e49577aSRod Evans	andn	%o2, 63, %o4		! calc size of blocks in bytes
160*1e49577aSRod Evans	fzero   %d0
161*1e49577aSRod Evans	fzero   %d2
162*1e49577aSRod Evans	fzero   %d4
163*1e49577aSRod Evans	fzero   %d6
164*1e49577aSRod Evans	fmuld   %d0, %d0, %d8
165*1e49577aSRod Evans	fzero   %d10
166*1e49577aSRod Evans	ba	4f
167*1e49577aSRod Evans	fmuld   %d0, %d0, %d12
168*1e49577aSRod Evans
169*1e49577aSRod Evans3:
170*1e49577aSRod Evans	! allocate 8 bytes of scratch space on the stack
171*1e49577aSRod Evans	add	%sp, -SA(16), %sp
172*1e49577aSRod Evans	stx	%o1, [%sp + STACK_BIAS + ALIGN8(MINFRAME)]  ! move %o1 to %d0
173*1e49577aSRod Evans	ldd	[%sp + STACK_BIAS + ALIGN8(MINFRAME)], %d0
174*1e49577aSRod Evans
175*1e49577aSRod Evans	fmovd	%d0, %d2
176*1e49577aSRod Evans	add	%sp, SA(16), %sp	! deallocate the scratch space
177*1e49577aSRod Evans	fmovd	%d0, %d4
178*1e49577aSRod Evans	fmovd	%d0, %d6
179*1e49577aSRod Evans	fmovd	%d0, %d8
180*1e49577aSRod Evans	fmovd	%d0, %d10
181*1e49577aSRod Evans	fmovd	%d0, %d12
182*1e49577aSRod Evans4:
183*1e49577aSRod Evans	fmovd	%d0, %d14
184*1e49577aSRod Evans
185*1e49577aSRod Evans	! 1st quadrant has 64 bytes of c
186*1e49577aSRod Evans	! instructions 32-byte aligned here
187*1e49577aSRod Evans
188*1e49577aSRod Evans        stda    %d0, [%o5]ASI_BLK_P
189*1e49577aSRod Evans        subcc   %o4, 64, %o4
190*1e49577aSRod Evans        bgu,pt	%ncc, 4b
191*1e49577aSRod Evans        add     %o5, 64, %o5
192*1e49577aSRod Evans
193*1e49577aSRod Evans	! Set the remaining doubles
194*1e49577aSRod Evans	subcc   %o3, 8, %o3		! Can we store any doubles?
195*1e49577aSRod Evans	blu,pn  %ncc, 6f
196*1e49577aSRod Evans	and	%o2, 7, %o2		! calc bytes left after doubles
197*1e49577aSRod Evans
198*1e49577aSRod Evans5:
199*1e49577aSRod Evans	std     %d0, [%o5]		! store the doubles
200*1e49577aSRod Evans	subcc   %o3, 8, %o3
201*1e49577aSRod Evans	bgeu,pt	%ncc, 5b
202*1e49577aSRod Evans        add     %o5, 8, %o5
203*1e49577aSRod Evans6:
204*1e49577aSRod Evans	! Set the remaining bytes
205*1e49577aSRod Evans	brz	%o2, .exit		! safe to check all 64-bits
206*1e49577aSRod Evans
207*1e49577aSRod Evans#if 0
208*1e49577aSRod Evans	! Terminate the copy with a partial store. (bug 1200071 does not apply)
209*1e49577aSRod Evans	! The data should be at d0
210*1e49577aSRod Evans        dec     %o2                     ! needed to get the mask right
211*1e49577aSRod Evans	edge8n	%g0, %o2, %o4
212*1e49577aSRod Evans	stda	%d0, [%o5]%o4, ASI_PST8_P
213*1e49577aSRod Evans#else
214*1e49577aSRod Evans7:
215*1e49577aSRod Evans	deccc	%o2
216*1e49577aSRod Evans	stb	%o1, [%o5]
217*1e49577aSRod Evans	bgu,pt	%ncc, 7b
218*1e49577aSRod Evans	inc	%o5
219*1e49577aSRod Evans#endif
220*1e49577aSRod Evans
221*1e49577aSRod Evans.exit:
222*1e49577aSRod Evans        membar  #StoreLoad|#StoreStore
223*1e49577aSRod Evans        retl				! %o0 was preserved
224*1e49577aSRod Evans        wr	%g1, %g0, %fprs         ! fprs = g1  restore fprs
225*1e49577aSRod Evans
226*1e49577aSRod Evans	SET_SIZE(memset)
227