xref: /titanic_44/usr/src/uts/sun4u/sys/fpras_impl.h (revision 5c51f1241dbbdf2656d0e10011981411ed0c9673)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_FPRAS_IMPL_H
28 #define	_SYS_FPRAS_IMPL_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/fpras.h>
33 
34 #if !defined(_ASM)
35 #include <sys/types.h>
36 #else
37 #include <sys/intreg.h>
38 #include <sys/errno.h>
39 #endif	/* _ASM */
40 
41 #ifdef	__cplusplus
42 extern "C" {
43 #endif
44 
45 /*
46  * sun4u/cheetah fpRAS implementation.  Arrays etc will be allocated in sun4u
47  * post_startup() if fpras_implemented is set.  This file may belong at
48  * the cpu level (eg, cheetahregs.h) but most of it should be common
49  * when fpRAS support is added for additional cpu types so we introduce
50  * it at the sun4u level (and set fpras_implemented in cpu_setup).
51  *
52  * If fpRAS is implemented on a sun4u/cpu combination that does not use
53  * an ASR for %stick then the FPRAS_INTERVAL macro will need some
54  * modification.
55  */
56 
57 /*
58  * Upper bound for check frequency per cpu and per operation.  For example, if
59  * this is 100 then for cpuid N performing a bcopy if that cpu has not
60  * performed a checked bcopy in the the last 1/100th of a second then
61  * we'll check the current operation.  A value of 0 will check every operation.
62  * Modifying fpras_frequency from its default is not recommended.
63  * fpras_interval is computed from fpras_frequency.
64  */
65 #if !defined(_ASM)
66 extern int fpras_frequency;
67 extern int64_t fpras_interval;
68 #endif	/* _ASM */
69 #define	FPRAS_DEFAULT_FREQUENCY	100
70 
71 #if !defined(_ASM)
72 
73 /*
74  * Structure of a check function.  The preamble prepares registers for the
75  * upcoming calculation that is performed in blk0 and blk1.  One of those
76  * blocks will be rewritten as part of an FPRAS_REWRITE operation.  Finally
77  * the result checked in chkresult should be as predetermined, and we should
78  * return zero on success and nonzero on failure.  If an illegal instruction
79  * is encountered in the execution of the check function then we trampoline
80  * to the final three instructions to return a different value.
81  *
82  * Note that the size of this structure is a power of 2 as is the
83  * size of a struct fpras_chkfngrp.  The asm macros below rely on this
84  * in performing bit shifts instead of mulx.
85  */
86 struct fpras_chkfn {
87 	uint32_t	fpras_preamble[16];
88 	uint32_t	fpras_blk0[16];
89 	uint32_t	fpras_blk1[16];
90 	uint32_t	fpras_chkresult[13];
91 	uint32_t	fpras_trampoline[3];
92 };
93 
94 /*
95  * Check function constructed to match a struct fpras_chkfn
96  */
97 extern int fpras_chkfn_type1(void);
98 
99 /*
100  * A group of check functions, one for each operation type.  These will
101  * be the check functions for copy operations on a particular processor.
102  */
103 struct fpras_chkfngrp {
104 	struct fpras_chkfn fpras_fn[FPRAS_NCOPYOPS];
105 };
106 
107 /*
108  * Where we store check functions for execution.  Indexed by cpuid and
109  * function within that for cacheline friendliness.  Startup code
110  * copies the check function into this array.  The fpRAS mechanism will
111  * rewrite one of fpras_blk0 or fpras_blk1 before calling the check function
112  * for a cpuid & copy function combination.
113  */
114 extern struct fpras_chkfngrp *fpras_chkfngrps;
115 
116 #endif	/* !_ASM */
117 
118 #if defined(_ASM)
119 
120 /* BEGIN CSTYLED */
121 
122 /*
123  * The INTERVAL macro decides whether we will check this copy operation,
124  * based on performing no more than 1 check per cpu & operation in a specified
125  * time interval.  If it decides to abort this check (ie, we have checked
126  * recently) then it returns doex NULL, otherwise doex is the address of the
127  * check function to execute later.  Migration must have been prevented before
128  * calling this macro.  Args:
129  *
130  *	operation (immediate): one of FPRAS_BCOPY etc
131  *	blk (immediate): which block to copy
132  *	doex (register): register in which to return check function address
133  *	tmp1 (register): used for scratch, not preserved
134  *	tmp2 (register): used for scratch, not preserved
135  *	tmp3 (register): used for scratch, not preserved
136  *	tmp4 (register): used for scratch, not preserved
137  *	label: free local numeric label
138  */
139 
140 #define	FPRAS_INTERVAL(operation, blk, doex, tmp1, tmp2, tmp3, tmp4, label) \
141 	sethi	%hi(fpras_interval), tmp1				;\
142 	ldx	[tmp1 + %lo(fpras_interval)], tmp1			;\
143 	brlz,pn	tmp1, label/**/f	/* not initialized? */		;\
144 	  clr	doex							;\
145 	sethi	%hi(fpras_disableids), tmp2				;\
146 	ld	[tmp2 + %lo(fpras_disableids)], tmp2			;\
147 	mov	0x1, tmp3						;\
148 	sll	tmp3, operation, tmp3					;\
149 	btst	tmp3, tmp2						;\
150 	bnz,a,pn %icc, label/**/f	/* disabled for this op? */	;\
151 	  nop								;\
152 	set	fpras_chkfn_type1, tmp2					;\
153 	prefetch [tmp2 + (FPRAS_BLK0 + blk * 64)], #one_read		;\
154 	ldn	[THREAD_REG + T_CPU], tmp2				;\
155 	ldn	[tmp2 + CPU_PRIVATE], tmp2				;\
156 	brz,pn	tmp2, label/**/f	/* early in startup? */		;\
157 	  mov	operation, tmp3						;\
158 	sll	tmp3, 3, tmp3						;\
159 	set	CHPR_FPRAS_TIMESTAMP, tmp4				;\
160 	add	tmp2, tmp4, tmp2					;\
161 	add	tmp2, tmp3, tmp2	/* keep ptr for update */	;\
162 	ldx	[tmp2], tmp3		/* last timestamp */		;\
163 	rd	STICK, doex		/* doex is a scratch here */	;\
164 	sub	doex, tmp3, tmp4	/* delta since last check */	;\
165 	cmp	tmp4, tmp1		/* compare delta to interval */	;\
166 	blu,a,pn %xcc, label/**/f					;\
167 	  clr	doex							;\
168 	stx	doex, [tmp2]		/* updated timestamp */		;\
169 	ldn	[THREAD_REG + T_CPU], tmp1				;\
170 	ld	[tmp1 + CPU_ID], tmp1					;\
171 	sethi	%hi(fpras_chkfngrps), doex				;\
172 	ldn	[doex + %lo(fpras_chkfngrps)], doex			;\
173 	sll	tmp1, FPRAS_CHKFNGRP_SIZE_SHIFT, tmp1			;\
174 	add	doex, tmp1, doex					;\
175 	mov	operation, tmp1						;\
176 	sll	tmp1, FPRAS_CHKFN_SIZE_SHIFT, tmp1			;\
177 	add	doex, tmp1, doex	/* address of check function */	;\
178 label:
179 
180 /*
181  * The REWRITE macro copies an instruction block from fpras_chkfn_type1
182  * into a per-cpu fpras check function.
183  * If doex is NULL it must not attempt any copy, and must leave doex NULL.
184  * CPU migration of this thread must be prevented before we call this macro.
185  * We must have checked for fp in use (and saved state, including the
186  * quadrant of registers indicated by the fpq argument and fp enabled before
187  * using this macro.  Args:
188  *
189  *	blk (immediate): as above
190  *	doex (register): register in which to return check function addr
191  *	[fpq (fp register): frf quadrant to be used (%f0/%f16/%f32/%f48)]
192  *		This is used on type 1 rewrite only - on others the
193  *		quadrant is implicit/hardcoded in the macro name.
194  *	tmp1 (register): used for scratch, not preserved
195  *	label1: free local numeric label
196  *	[label2: free local numeric label]
197  *		This is used in type 2 only.
198  *
199  * Note that the REWRITE macros do not perform a flush instruction -
200  * flush is not necessary on Cheetah derivative processors in which
201  * i$ snoops for invalidations.
202  */
203 
204 /*
205  * Rewrite type 1 will work with any instruction pattern - it just block
206  * loads and block stores the given block.  A membar after block store
207  * forces the block store to complete before upcoming reuse of the
208  * fpregs in the block;  the block load is blocking on sun4u/cheetah
209  * so no need for a membar after it.
210  */
211 
212 #define	FPRAS_REWRITE_TYPE1(blk, doex, fpq, tmp1, label)	\
213 	brz,pn  doex, label/**/f				;\
214 	  sethi	%hi(fpras_chkfn_type1), tmp1			;\
215 	add	tmp1, %lo(fpras_chkfn_type1), tmp1		;\
216 	add	tmp1, FPRAS_BLK0 + blk * 64, tmp1		;\
217 	ldda	[tmp1]ASI_BLK_P, fpq				;\
218 	add	doex, FPRAS_BLK0 + blk * 64, tmp1		;\
219 	stda	fpq, [tmp1]ASI_BLK_P				;\
220 	membar	#Sync						;\
221 label:
222 
223 /*
224  * Rewrite type 2 will only work with instruction blocks that satisfy
225  * this particular repeat pattern.  Note that the frf quadrant to
226  * use is implicit in the macro name and had better match what the
227  * copy function is preserving.
228 *
229  * The odd looking repetition in the initial loop is designed to open
230  * up boths paths from prefetch cache to the frf - unrolling the loop
231  * would defeat this.  In addition we perform idempotent faligndata
232  * manipulations using %tick as a randomly aligned address (this only
233  * works for address that aren't doubleword aligned).
234  */
235 #define	FPRAS_REWRITE_TYPE2Q1(blk, doex, tmp1, tmp2, label1, label2)	\
236 	brz,pn	doex, label1/**/f					;\
237 	  mov	0x2, tmp1						;\
238 	set	fpras_chkfn_type1, tmp2					;\
239 label2:									;\
240 	deccc		tmp1						;\
241 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64)], %f4		;\
242 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f2	;\
243 	bnz,a,pt	%icc, label2/**/b				;\
244 	  fsrc1		%f4, %f0					;\
245 	rdpr		%tick, tmp1					;\
246 	fsrc1		%f4, %f8					;\
247 	fsrc1		%f2, %f10					;\
248 	btst		0x7, tmp1					;\
249 	alignaddr	tmp1, %g0, %g0	/* changes %gsr */		;\
250 	bz,pn		%icc, label2/**/f				;\
251 	  faligndata	%f2, %f4, %f6					;\
252 	faligndata	%f0, %f2, %f12					;\
253 	alignaddrl	tmp1, %g0, %g0					;\
254 	faligndata	%f12, %f6, %f6					;\
255 label2:									;\
256 	add		doex, FPRAS_BLK0 + blk * 64, tmp1		;\
257 	fsrc2		%f8, %f12					;\
258 	fsrc1		%f6, %f14					;\
259 	stda		%f0, [tmp1]ASI_BLK_P				;\
260 	membar		#Sync						;\
261 label1:
262 
263 #define	FPRAS_REWRITE_TYPE2Q2(blk, doex, tmp1, tmp2, label1, label2)	\
264 	brz,pn	doex, label1/**/f					;\
265 	  mov	0x2, tmp1						;\
266 	set	fpras_chkfn_type1, tmp2					;\
267 label2:									;\
268 	deccc		tmp1						;\
269 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64)], %f20	;\
270 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f18	;\
271 	bnz,a,pt	%icc, label2/**/b				;\
272 	  fsrc1		%f20, %f16					;\
273 	rdpr		%tick, tmp1					;\
274 	fsrc1		%f20, %f24					;\
275 	fsrc1		%f18, %f26					;\
276 	btst		0x7, tmp1					;\
277 	alignaddr	tmp1, %g0, %g0	/* changes %gsr */		;\
278 	bz,pn		%icc, label2/**/f				;\
279 	  faligndata	%f18, %f20, %f22				;\
280 	faligndata	%f16, %f18, %f28				;\
281 	alignaddrl	tmp1, %g0, %g0					;\
282 	faligndata	%f28, %f22, %f22				;\
283 label2:									;\
284 	add		doex, FPRAS_BLK0 + blk * 64, tmp1		;\
285 	fsrc2		%f24, %f28					;\
286 	fsrc1		%f22, %f30					;\
287 	stda		%f16, [tmp1]ASI_BLK_P				;\
288 	membar		#Sync						;\
289 label1:
290 
291 /*
292  * The CHECK macro takes the 'doex' address of the check function to
293  * execute and jumps to it (if not NULL). If the check function returns
294  * nonzero then the check has failed and the CHECK macro must initiate
295  * an appropriate failure action.  Illegal instruction trap handlers
296  * will also recognise traps in this PC range as fp failures.  Thread
297  * migration must only be reallowed after completion of this check.  The
298  * CHECK macro should be treated as a CALL/JMPL - output registers are
299  * forfeit after using it.  If the call to fpras_failure returns
300  * (it may decide to panic) then invoke lofault handler (which must exist)
301  * to return an error (be sure to use this macro before restoring original
302  * lofault setup in copy functions).  Note that the lofault handler is the
303  * copyops aware proxy handler which will perform other tidy up operations
304  * (unbind, fp state restore) that would normally have been done in the tail
305  * of the copy function.
306  *
307  *	operation (immedidate): as above
308  *	doex (register): doex value returned from the REWRITE
309  *	label: free local numeric label
310  */
311 
312 #define	FPRAS_CHECK(operation, doex, label)				\
313 	brz,pn	doex, label/**/f					;\
314 	  nop								;\
315 	jmpl	doex, %o7						;\
316 	  nop								;\
317 	cmp	%o0, FPRAS_OK						;\
318 	be	%icc, label/**/f					;\
319 	  nop								;\
320 	mov	%o0, %o1	/* how detected */			;\
321 	call	fpras_failure	/* take failure action */		;\
322 	  mov	operation, %o0						;\
323 	ldn	[THREAD_REG + T_LOFAULT], doex				;\
324 	jmp	doex							;\
325 	  mov	EFAULT, %g1						;\
326 label:
327 
328 /* END CSTYLED */
329 
330 #endif	/* _ASM */
331 
332 #ifdef	__cplusplus
333 }
334 #endif
335 
336 #endif	/* _SYS_FPRAS_IMPL_H */
337