1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_FPRAS_IMPL_H 28 #define _SYS_FPRAS_IMPL_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/fpras.h> 33 34 #if !defined(_ASM) 35 #include <sys/types.h> 36 #else 37 #include <sys/intreg.h> 38 #include <sys/errno.h> 39 #endif /* _ASM */ 40 41 #ifdef __cplusplus 42 extern "C" { 43 #endif 44 45 /* 46 * sun4u/cheetah fpRAS implementation. Arrays etc will be allocated in sun4u 47 * post_startup() if fpras_implemented is set. This file may belong at 48 * the cpu level (eg, cheetahregs.h) but most of it should be common 49 * when fpRAS support is added for additional cpu types so we introduce 50 * it at the sun4u level (and set fpras_implemented in cpu_setup). 51 * 52 * If fpRAS is implemented on a sun4u/cpu combination that does not use 53 * an ASR for %stick then the FPRAS_INTERVAL macro will need some 54 * modification. 55 */ 56 57 /* 58 * Upper bound for check frequency per cpu and per operation. For example, if 59 * this is 100 then for cpuid N performing a bcopy if that cpu has not 60 * performed a checked bcopy in the the last 1/100th of a second then 61 * we'll check the current operation. A value of 0 will check every operation. 62 * Modifying fpras_frequency from its default is not recommended. 63 * fpras_interval is computed from fpras_frequency. 64 */ 65 #if !defined(_ASM) 66 extern int fpras_frequency; 67 extern int64_t fpras_interval; 68 #endif /* _ASM */ 69 #define FPRAS_DEFAULT_FREQUENCY 100 70 71 #if !defined(_ASM) 72 73 /* 74 * Structure of a check function. The preamble prepares registers for the 75 * upcoming calculation that is performed in blk0 and blk1. One of those 76 * blocks will be rewritten as part of an FPRAS_REWRITE operation. Finally 77 * the result checked in chkresult should be as predetermined, and we should 78 * return zero on success and nonzero on failure. If an illegal instruction 79 * is encountered in the execution of the check function then we trampoline 80 * to the final three instructions to return a different value. 81 * 82 * Note that the size of this structure is a power of 2 as is the 83 * size of a struct fpras_chkfngrp. The asm macros below rely on this 84 * in performing bit shifts instead of mulx. 85 */ 86 struct fpras_chkfn { 87 uint32_t fpras_preamble[16]; 88 uint32_t fpras_blk0[16]; 89 uint32_t fpras_blk1[16]; 90 uint32_t fpras_chkresult[13]; 91 uint32_t fpras_trampoline[3]; 92 }; 93 94 /* 95 * Check function constructed to match a struct fpras_chkfn 96 */ 97 extern int fpras_chkfn_type1(void); 98 99 /* 100 * A group of check functions, one for each operation type. These will 101 * be the check functions for copy operations on a particular processor. 102 */ 103 struct fpras_chkfngrp { 104 struct fpras_chkfn fpras_fn[FPRAS_NCOPYOPS]; 105 }; 106 107 /* 108 * Where we store check functions for execution. Indexed by cpuid and 109 * function within that for cacheline friendliness. Startup code 110 * copies the check function into this array. The fpRAS mechanism will 111 * rewrite one of fpras_blk0 or fpras_blk1 before calling the check function 112 * for a cpuid & copy function combination. 113 */ 114 extern struct fpras_chkfngrp *fpras_chkfngrps; 115 116 #endif /* !_ASM */ 117 118 #if defined(_ASM) 119 120 /* BEGIN CSTYLED */ 121 122 /* 123 * The INTERVAL macro decides whether we will check this copy operation, 124 * based on performing no more than 1 check per cpu & operation in a specified 125 * time interval. If it decides to abort this check (ie, we have checked 126 * recently) then it returns doex NULL, otherwise doex is the address of the 127 * check function to execute later. Migration must have been prevented before 128 * calling this macro. Args: 129 * 130 * operation (immediate): one of FPRAS_BCOPY etc 131 * blk (immediate): which block to copy 132 * doex (register): register in which to return check function address 133 * tmp1 (register): used for scratch, not preserved 134 * tmp2 (register): used for scratch, not preserved 135 * tmp3 (register): used for scratch, not preserved 136 * tmp4 (register): used for scratch, not preserved 137 * label: free local numeric label 138 */ 139 140 #define FPRAS_INTERVAL(operation, blk, doex, tmp1, tmp2, tmp3, tmp4, label) \ 141 sethi %hi(fpras_interval), tmp1 ;\ 142 ldx [tmp1 + %lo(fpras_interval)], tmp1 ;\ 143 brlz,pn tmp1, label/**/f /* not initialized? */ ;\ 144 clr doex ;\ 145 sethi %hi(fpras_disableids), tmp2 ;\ 146 ld [tmp2 + %lo(fpras_disableids)], tmp2 ;\ 147 mov 0x1, tmp3 ;\ 148 sll tmp3, operation, tmp3 ;\ 149 btst tmp3, tmp2 ;\ 150 bnz,a,pn %icc, label/**/f /* disabled for this op? */ ;\ 151 nop ;\ 152 set fpras_chkfn_type1, tmp2 ;\ 153 prefetch [tmp2 + (FPRAS_BLK0 + blk * 64)], #one_read ;\ 154 ldn [THREAD_REG + T_CPU], tmp2 ;\ 155 ldn [tmp2 + CPU_PRIVATE], tmp2 ;\ 156 brz,pn tmp2, label/**/f /* early in startup? */ ;\ 157 mov operation, tmp3 ;\ 158 sll tmp3, 3, tmp3 ;\ 159 set CHPR_FPRAS_TIMESTAMP, tmp4 ;\ 160 add tmp2, tmp4, tmp2 ;\ 161 add tmp2, tmp3, tmp2 /* keep ptr for update */ ;\ 162 ldx [tmp2], tmp3 /* last timestamp */ ;\ 163 rd STICK, doex /* doex is a scratch here */ ;\ 164 sub doex, tmp3, tmp4 /* delta since last check */ ;\ 165 cmp tmp4, tmp1 /* compare delta to interval */ ;\ 166 blu,a,pn %xcc, label/**/f ;\ 167 clr doex ;\ 168 stx doex, [tmp2] /* updated timestamp */ ;\ 169 ldn [THREAD_REG + T_CPU], tmp1 ;\ 170 ld [tmp1 + CPU_ID], tmp1 ;\ 171 sethi %hi(fpras_chkfngrps), doex ;\ 172 ldn [doex + %lo(fpras_chkfngrps)], doex ;\ 173 sll tmp1, FPRAS_CHKFNGRP_SIZE_SHIFT, tmp1 ;\ 174 add doex, tmp1, doex ;\ 175 mov operation, tmp1 ;\ 176 sll tmp1, FPRAS_CHKFN_SIZE_SHIFT, tmp1 ;\ 177 add doex, tmp1, doex /* address of check function */ ;\ 178 label: 179 180 /* 181 * The REWRITE macro copies an instruction block from fpras_chkfn_type1 182 * into a per-cpu fpras check function. 183 * If doex is NULL it must not attempt any copy, and must leave doex NULL. 184 * CPU migration of this thread must be prevented before we call this macro. 185 * We must have checked for fp in use (and saved state, including the 186 * quadrant of registers indicated by the fpq argument and fp enabled before 187 * using this macro. Args: 188 * 189 * blk (immediate): as above 190 * doex (register): register in which to return check function addr 191 * [fpq (fp register): frf quadrant to be used (%f0/%f16/%f32/%f48)] 192 * This is used on type 1 rewrite only - on others the 193 * quadrant is implicit/hardcoded in the macro name. 194 * tmp1 (register): used for scratch, not preserved 195 * label1: free local numeric label 196 * [label2: free local numeric label] 197 * This is used in type 2 only. 198 * 199 * Note that the REWRITE macros do not perform a flush instruction - 200 * flush is not necessary on Cheetah derivative processors in which 201 * i$ snoops for invalidations. 202 */ 203 204 /* 205 * Rewrite type 1 will work with any instruction pattern - it just block 206 * loads and block stores the given block. A membar after block store 207 * forces the block store to complete before upcoming reuse of the 208 * fpregs in the block; the block load is blocking on sun4u/cheetah 209 * so no need for a membar after it. 210 */ 211 212 #define FPRAS_REWRITE_TYPE1(blk, doex, fpq, tmp1, label) \ 213 brz,pn doex, label/**/f ;\ 214 sethi %hi(fpras_chkfn_type1), tmp1 ;\ 215 add tmp1, %lo(fpras_chkfn_type1), tmp1 ;\ 216 add tmp1, FPRAS_BLK0 + blk * 64, tmp1 ;\ 217 ldda [tmp1]ASI_BLK_P, fpq ;\ 218 add doex, FPRAS_BLK0 + blk * 64, tmp1 ;\ 219 stda fpq, [tmp1]ASI_BLK_P ;\ 220 membar #Sync ;\ 221 label: 222 223 /* 224 * Rewrite type 2 will only work with instruction blocks that satisfy 225 * this particular repeat pattern. Note that the frf quadrant to 226 * use is implicit in the macro name and had better match what the 227 * copy function is preserving. 228 * 229 * The odd looking repetition in the initial loop is designed to open 230 * up boths paths from prefetch cache to the frf - unrolling the loop 231 * would defeat this. In addition we perform idempotent faligndata 232 * manipulations using %tick as a randomly aligned address (this only 233 * works for address that aren't doubleword aligned). 234 */ 235 #define FPRAS_REWRITE_TYPE2Q1(blk, doex, tmp1, tmp2, label1, label2) \ 236 brz,pn doex, label1/**/f ;\ 237 mov 0x2, tmp1 ;\ 238 set fpras_chkfn_type1, tmp2 ;\ 239 label2: ;\ 240 deccc tmp1 ;\ 241 ldd [tmp2 + (FPRAS_BLK0 + blk * 64)], %f4 ;\ 242 ldd [tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f2 ;\ 243 bnz,a,pt %icc, label2/**/b ;\ 244 fsrc1 %f4, %f0 ;\ 245 rdpr %tick, tmp1 ;\ 246 fsrc1 %f4, %f8 ;\ 247 fsrc1 %f2, %f10 ;\ 248 btst 0x7, tmp1 ;\ 249 alignaddr tmp1, %g0, %g0 /* changes %gsr */ ;\ 250 bz,pn %icc, label2/**/f ;\ 251 faligndata %f2, %f4, %f6 ;\ 252 faligndata %f0, %f2, %f12 ;\ 253 alignaddrl tmp1, %g0, %g0 ;\ 254 faligndata %f12, %f6, %f6 ;\ 255 label2: ;\ 256 add doex, FPRAS_BLK0 + blk * 64, tmp1 ;\ 257 fsrc2 %f8, %f12 ;\ 258 fsrc1 %f6, %f14 ;\ 259 stda %f0, [tmp1]ASI_BLK_P ;\ 260 membar #Sync ;\ 261 label1: 262 263 #define FPRAS_REWRITE_TYPE2Q2(blk, doex, tmp1, tmp2, label1, label2) \ 264 brz,pn doex, label1/**/f ;\ 265 mov 0x2, tmp1 ;\ 266 set fpras_chkfn_type1, tmp2 ;\ 267 label2: ;\ 268 deccc tmp1 ;\ 269 ldd [tmp2 + (FPRAS_BLK0 + blk * 64)], %f20 ;\ 270 ldd [tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f18 ;\ 271 bnz,a,pt %icc, label2/**/b ;\ 272 fsrc1 %f20, %f16 ;\ 273 rdpr %tick, tmp1 ;\ 274 fsrc1 %f20, %f24 ;\ 275 fsrc1 %f18, %f26 ;\ 276 btst 0x7, tmp1 ;\ 277 alignaddr tmp1, %g0, %g0 /* changes %gsr */ ;\ 278 bz,pn %icc, label2/**/f ;\ 279 faligndata %f18, %f20, %f22 ;\ 280 faligndata %f16, %f18, %f28 ;\ 281 alignaddrl tmp1, %g0, %g0 ;\ 282 faligndata %f28, %f22, %f22 ;\ 283 label2: ;\ 284 add doex, FPRAS_BLK0 + blk * 64, tmp1 ;\ 285 fsrc2 %f24, %f28 ;\ 286 fsrc1 %f22, %f30 ;\ 287 stda %f16, [tmp1]ASI_BLK_P ;\ 288 membar #Sync ;\ 289 label1: 290 291 /* 292 * The CHECK macro takes the 'doex' address of the check function to 293 * execute and jumps to it (if not NULL). If the check function returns 294 * nonzero then the check has failed and the CHECK macro must initiate 295 * an appropriate failure action. Illegal instruction trap handlers 296 * will also recognise traps in this PC range as fp failures. Thread 297 * migration must only be reallowed after completion of this check. The 298 * CHECK macro should be treated as a CALL/JMPL - output registers are 299 * forfeit after using it. If the call to fpras_failure returns 300 * (it may decide to panic) then invoke lofault handler (which must exist) 301 * to return an error (be sure to use this macro before restoring original 302 * lofault setup in copy functions). Note that the lofault handler is the 303 * copyops aware proxy handler which will perform other tidy up operations 304 * (unbind, fp state restore) that would normally have been done in the tail 305 * of the copy function. 306 * 307 * operation (immedidate): as above 308 * doex (register): doex value returned from the REWRITE 309 * label: free local numeric label 310 */ 311 312 #define FPRAS_CHECK(operation, doex, label) \ 313 brz,pn doex, label/**/f ;\ 314 nop ;\ 315 jmpl doex, %o7 ;\ 316 nop ;\ 317 cmp %o0, FPRAS_OK ;\ 318 be %icc, label/**/f ;\ 319 nop ;\ 320 mov %o0, %o1 /* how detected */ ;\ 321 call fpras_failure /* take failure action */ ;\ 322 mov operation, %o0 ;\ 323 ldn [THREAD_REG + T_LOFAULT], doex ;\ 324 jmp doex ;\ 325 mov EFAULT, %g1 ;\ 326 label: 327 328 /* END CSTYLED */ 329 330 #endif /* _ASM */ 331 332 #ifdef __cplusplus 333 } 334 #endif 335 336 #endif /* _SYS_FPRAS_IMPL_H */ 337