xref: /illumos-gate/usr/src/uts/sun4u/sys/fpras_impl.h (revision 10a40e179c111088c21d8e895198ac95dcb83d14)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_FPRAS_IMPL_H
28 #define	_SYS_FPRAS_IMPL_H
29 
30 #include <sys/fpras.h>
31 
32 #if !defined(_ASM)
33 #include <sys/types.h>
34 #else
35 #include <sys/intreg.h>
36 #include <sys/errno.h>
37 #endif	/* _ASM */
38 
39 #ifdef	__cplusplus
40 extern "C" {
41 #endif
42 
43 /*
44  * sun4u/cheetah fpRAS implementation.  Arrays etc will be allocated in sun4u
45  * post_startup() if fpras_implemented is set.  This file may belong at
46  * the cpu level (eg, cheetahregs.h) but most of it should be common
47  * when fpRAS support is added for additional cpu types so we introduce
48  * it at the sun4u level (and set fpras_implemented in cpu_setup).
49  *
50  * If fpRAS is implemented on a sun4u/cpu combination that does not use
51  * an ASR for %stick then the FPRAS_INTERVAL macro will need some
52  * modification.
53  */
54 
55 /*
56  * Upper bound for check frequency per cpu and per operation.  For example, if
57  * this is 100 then for cpuid N performing a bcopy if that cpu has not
58  * performed a checked bcopy in the the last 1/100th of a second then
59  * we'll check the current operation.  A value of 0 will check every operation.
60  * Modifying fpras_frequency from its default is not recommended.
61  * fpras_interval is computed from fpras_frequency.
62  */
63 #if !defined(_ASM)
64 extern int fpras_frequency;
65 extern int64_t fpras_interval;
66 #endif	/* _ASM */
67 #define	FPRAS_DEFAULT_FREQUENCY	100
68 
69 #if !defined(_ASM)
70 
71 /*
72  * Structure of a check function.  The preamble prepares registers for the
73  * upcoming calculation that is performed in blk0 and blk1.  One of those
74  * blocks will be rewritten as part of an FPRAS_REWRITE operation.  Finally
75  * the result checked in chkresult should be as predetermined, and we should
76  * return zero on success and nonzero on failure.  If an illegal instruction
77  * is encountered in the execution of the check function then we trampoline
78  * to the final three instructions to return a different value.
79  *
80  * Note that the size of this structure is a power of 2 as is the
81  * size of a struct fpras_chkfngrp.  The asm macros below rely on this
82  * in performing bit shifts instead of mulx.
83  */
84 struct fpras_chkfn {
85 	uint32_t	fpras_preamble[16];
86 	uint32_t	fpras_blk0[16];
87 	uint32_t	fpras_blk1[16];
88 	uint32_t	fpras_chkresult[13];
89 	uint32_t	fpras_trampoline[3];
90 };
91 
92 /*
93  * Check function constructed to match a struct fpras_chkfn
94  */
95 extern int fpras_chkfn_type1(void);
96 
97 /*
98  * A group of check functions, one for each operation type.  These will
99  * be the check functions for copy operations on a particular processor.
100  */
101 struct fpras_chkfngrp {
102 	struct fpras_chkfn fpras_fn[FPRAS_NCOPYOPS];
103 };
104 
105 /*
106  * Where we store check functions for execution.  Indexed by cpuid and
107  * function within that for cacheline friendliness.  Startup code
108  * copies the check function into this array.  The fpRAS mechanism will
109  * rewrite one of fpras_blk0 or fpras_blk1 before calling the check function
110  * for a cpuid & copy function combination.
111  */
112 extern struct fpras_chkfngrp *fpras_chkfngrps;
113 
114 #endif	/* !_ASM */
115 
116 #if defined(_ASM)
117 
118 /* BEGIN CSTYLED */
119 
120 /*
121  * The INTERVAL macro decides whether we will check this copy operation,
122  * based on performing no more than 1 check per cpu & operation in a specified
123  * time interval.  If it decides to abort this check (ie, we have checked
124  * recently) then it returns doex NULL, otherwise doex is the address of the
125  * check function to execute later.  Migration must have been prevented before
126  * calling this macro.  Args:
127  *
128  *	operation (immediate): one of FPRAS_BCOPY etc
129  *	blk (immediate): which block to copy
130  *	doex (register): register in which to return check function address
131  *	tmp1 (register): used for scratch, not preserved
132  *	tmp2 (register): used for scratch, not preserved
133  *	tmp3 (register): used for scratch, not preserved
134  *	tmp4 (register): used for scratch, not preserved
135  *	label: free local numeric label
136  */
137 
138 #define	FPRAS_INTERVAL(operation, blk, doex, tmp1, tmp2, tmp3, tmp4, label) \
139 	sethi	%hi(fpras_interval), tmp1				;\
140 	ldx	[tmp1 + %lo(fpras_interval)], tmp1			;\
141 	brlz,pn	tmp1, label##f	/* not initialized? */		;\
142 	  clr	doex							;\
143 	sethi	%hi(fpras_disableids), tmp2				;\
144 	ld	[tmp2 + %lo(fpras_disableids)], tmp2			;\
145 	mov	0x1, tmp3						;\
146 	sll	tmp3, operation, tmp3					;\
147 	btst	tmp3, tmp2						;\
148 	bnz,a,pn %icc, label##f	/* disabled for this op? */	;\
149 	  nop								;\
150 	set	fpras_chkfn_type1, tmp2					;\
151 	prefetch [tmp2 + (FPRAS_BLK0 + blk * 64)], #one_read		;\
152 	ldn	[THREAD_REG + T_CPU], tmp2				;\
153 	ldn	[tmp2 + CPU_PRIVATE], tmp2				;\
154 	brz,pn	tmp2, label##f	/* early in startup? */		;\
155 	  mov	operation, tmp3						;\
156 	sll	tmp3, 3, tmp3						;\
157 	set	CHPR_FPRAS_TIMESTAMP, tmp4				;\
158 	add	tmp2, tmp4, tmp2					;\
159 	add	tmp2, tmp3, tmp2	/* keep ptr for update */	;\
160 	ldx	[tmp2], tmp3		/* last timestamp */		;\
161 	rd	STICK, doex		/* doex is a scratch here */	;\
162 	sub	doex, tmp3, tmp4	/* delta since last check */	;\
163 	cmp	tmp4, tmp1		/* compare delta to interval */	;\
164 	blu,a,pn %xcc, label##f					;\
165 	  clr	doex							;\
166 	stx	doex, [tmp2]		/* updated timestamp */		;\
167 	ldn	[THREAD_REG + T_CPU], tmp1				;\
168 	ld	[tmp1 + CPU_ID], tmp1					;\
169 	sethi	%hi(fpras_chkfngrps), doex				;\
170 	ldn	[doex + %lo(fpras_chkfngrps)], doex			;\
171 	sll	tmp1, FPRAS_CHKFNGRP_SIZE_SHIFT, tmp1			;\
172 	add	doex, tmp1, doex					;\
173 	mov	operation, tmp1						;\
174 	sll	tmp1, FPRAS_CHKFN_SIZE_SHIFT, tmp1			;\
175 	add	doex, tmp1, doex	/* address of check function */	;\
176 label:
177 
178 /*
179  * The REWRITE macro copies an instruction block from fpras_chkfn_type1
180  * into a per-cpu fpras check function.
181  * If doex is NULL it must not attempt any copy, and must leave doex NULL.
182  * CPU migration of this thread must be prevented before we call this macro.
183  * We must have checked for fp in use (and saved state, including the
184  * quadrant of registers indicated by the fpq argument and fp enabled before
185  * using this macro.  Args:
186  *
187  *	blk (immediate): as above
188  *	doex (register): register in which to return check function addr
189  *	[fpq (fp register): frf quadrant to be used (%f0/%f16/%f32/%f48)]
190  *		This is used on type 1 rewrite only - on others the
191  *		quadrant is implicit/hardcoded in the macro name.
192  *	tmp1 (register): used for scratch, not preserved
193  *	label1: free local numeric label
194  *	[label2: free local numeric label]
195  *		This is used in type 2 only.
196  *
197  * Note that the REWRITE macros do not perform a flush instruction -
198  * flush is not necessary on Cheetah derivative processors in which
199  * i$ snoops for invalidations.
200  */
201 
202 /*
203  * Rewrite type 1 will work with any instruction pattern - it just block
204  * loads and block stores the given block.  A membar after block store
205  * forces the block store to complete before upcoming reuse of the
206  * fpregs in the block;  the block load is blocking on sun4u/cheetah
207  * so no need for a membar after it.
208  */
209 
210 #define	FPRAS_REWRITE_TYPE1(blk, doex, fpq, tmp1, label)	\
211 	brz,pn  doex, label##f				;\
212 	  sethi	%hi(fpras_chkfn_type1), tmp1			;\
213 	add	tmp1, %lo(fpras_chkfn_type1), tmp1		;\
214 	add	tmp1, FPRAS_BLK0 + blk * 64, tmp1		;\
215 	ldda	[tmp1]ASI_BLK_P, fpq				;\
216 	add	doex, FPRAS_BLK0 + blk * 64, tmp1		;\
217 	stda	fpq, [tmp1]ASI_BLK_P				;\
218 	membar	#Sync						;\
219 label:
220 
221 /*
222  * Rewrite type 2 will only work with instruction blocks that satisfy
223  * this particular repeat pattern.  Note that the frf quadrant to
224  * use is implicit in the macro name and had better match what the
225  * copy function is preserving.
226 *
227  * The odd looking repetition in the initial loop is designed to open
228  * up boths paths from prefetch cache to the frf - unrolling the loop
229  * would defeat this.  In addition we perform idempotent faligndata
230  * manipulations using %tick as a randomly aligned address (this only
231  * works for address that aren't doubleword aligned).
232  */
233 #define	FPRAS_REWRITE_TYPE2Q1(blk, doex, tmp1, tmp2, label1, label2)	\
234 	brz,pn	doex, label1##f					;\
235 	  mov	0x2, tmp1						;\
236 	set	fpras_chkfn_type1, tmp2					;\
237 label2:									;\
238 	deccc		tmp1						;\
239 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64)], %f4		;\
240 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f2	;\
241 	bnz,a,pt	%icc, label2##b				;\
242 	  fsrc1		%f4, %f0					;\
243 	rdpr		%tick, tmp1					;\
244 	fsrc1		%f4, %f8					;\
245 	fsrc1		%f2, %f10					;\
246 	btst		0x7, tmp1					;\
247 	alignaddr	tmp1, %g0, %g0	/* changes %gsr */		;\
248 	bz,pn		%icc, label2##f				;\
249 	  faligndata	%f2, %f4, %f6					;\
250 	faligndata	%f0, %f2, %f12					;\
251 	alignaddrl	tmp1, %g0, %g0					;\
252 	faligndata	%f12, %f6, %f6					;\
253 label2:									;\
254 	add		doex, FPRAS_BLK0 + blk * 64, tmp1		;\
255 	fsrc2		%f8, %f12					;\
256 	fsrc1		%f6, %f14					;\
257 	stda		%f0, [tmp1]ASI_BLK_P				;\
258 	membar		#Sync						;\
259 label1:
260 
261 #define	FPRAS_REWRITE_TYPE2Q2(blk, doex, tmp1, tmp2, label1, label2)	\
262 	brz,pn	doex, label1##f					;\
263 	  mov	0x2, tmp1						;\
264 	set	fpras_chkfn_type1, tmp2					;\
265 label2:									;\
266 	deccc		tmp1						;\
267 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64)], %f20	;\
268 	ldd		[tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f18	;\
269 	bnz,a,pt	%icc, label2##b				;\
270 	  fsrc1		%f20, %f16					;\
271 	rdpr		%tick, tmp1					;\
272 	fsrc1		%f20, %f24					;\
273 	fsrc1		%f18, %f26					;\
274 	btst		0x7, tmp1					;\
275 	alignaddr	tmp1, %g0, %g0	/* changes %gsr */		;\
276 	bz,pn		%icc, label2##f				;\
277 	  faligndata	%f18, %f20, %f22				;\
278 	faligndata	%f16, %f18, %f28				;\
279 	alignaddrl	tmp1, %g0, %g0					;\
280 	faligndata	%f28, %f22, %f22				;\
281 label2:									;\
282 	add		doex, FPRAS_BLK0 + blk * 64, tmp1		;\
283 	fsrc2		%f24, %f28					;\
284 	fsrc1		%f22, %f30					;\
285 	stda		%f16, [tmp1]ASI_BLK_P				;\
286 	membar		#Sync						;\
287 label1:
288 
289 /*
290  * The CHECK macro takes the 'doex' address of the check function to
291  * execute and jumps to it (if not NULL). If the check function returns
292  * nonzero then the check has failed and the CHECK macro must initiate
293  * an appropriate failure action.  Illegal instruction trap handlers
294  * will also recognise traps in this PC range as fp failures.  Thread
295  * migration must only be reallowed after completion of this check.  The
296  * CHECK macro should be treated as a CALL/JMPL - output registers are
297  * forfeit after using it.  If the call to fpras_failure returns
298  * (it may decide to panic) then invoke lofault handler (which must exist)
299  * to return an error (be sure to use this macro before restoring original
300  * lofault setup in copy functions).  Note that the lofault handler is the
301  * copyops aware proxy handler which will perform other tidy up operations
302  * (unbind, fp state restore) that would normally have been done in the tail
303  * of the copy function.
304  *
305  *	operation (immedidate): as above
306  *	doex (register): doex value returned from the REWRITE
307  *	label: free local numeric label
308  */
309 
310 #define	FPRAS_CHECK(operation, doex, label)				\
311 	brz,pn	doex, label##f					;\
312 	  nop								;\
313 	jmpl	doex, %o7						;\
314 	  nop								;\
315 	cmp	%o0, FPRAS_OK						;\
316 	be	%icc, label##f					;\
317 	  nop								;\
318 	mov	%o0, %o1	/* how detected */			;\
319 	call	fpras_failure	/* take failure action */		;\
320 	  mov	operation, %o0						;\
321 	ldn	[THREAD_REG + T_LOFAULT], doex				;\
322 	jmp	doex							;\
323 	  mov	EFAULT, %g1						;\
324 label:
325 
326 /* END CSTYLED */
327 
328 #endif	/* _ASM */
329 
330 #ifdef	__cplusplus
331 }
332 #endif
333 
334 #endif	/* _SYS_FPRAS_IMPL_H */
335