1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/ddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/archsystm.h> 34 #include <sys/vmsystm.h> 35 #include <sys/machparam.h> 36 #include <sys/machsystm.h> 37 #include <sys/machthread.h> 38 #include <sys/cpu.h> 39 #include <sys/cmp.h> 40 #include <sys/elf_SPARC.h> 41 #include <vm/vm_dep.h> 42 #include <vm/hat_sfmmu.h> 43 #include <vm/seg_kpm.h> 44 #include <sys/cpuvar.h> 45 #include <sys/cheetahregs.h> 46 #include <sys/us3_module.h> 47 #include <sys/async.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/dditypes.h> 51 #include <sys/prom_debug.h> 52 #include <sys/prom_plat.h> 53 #include <sys/cpu_module.h> 54 #include <sys/sysmacros.h> 55 #include <sys/intreg.h> 56 #include <sys/clock.h> 57 #include <sys/platform_module.h> 58 #include <sys/machtrap.h> 59 #include <sys/ontrap.h> 60 #include <sys/panic.h> 61 #include <sys/memlist.h> 62 #include <sys/bootconf.h> 63 #include <sys/ivintr.h> 64 #include <sys/atomic.h> 65 #include <sys/taskq.h> 66 #include <sys/note.h> 67 #include <sys/ndifm.h> 68 #include <sys/ddifm.h> 69 #include <sys/fm/protocol.h> 70 #include <sys/fm/util.h> 71 #include <sys/fm/cpu/UltraSPARC-III.h> 72 #include <sys/fpras_impl.h> 73 #include <sys/dtrace.h> 74 #include <sys/watchpoint.h> 75 #include <sys/plat_ecc_unum.h> 76 #include <sys/cyclic.h> 77 #include <sys/errorq.h> 78 #include <sys/errclassify.h> 79 80 #ifdef CHEETAHPLUS_ERRATUM_25 81 #include <sys/xc_impl.h> 82 #endif /* CHEETAHPLUS_ERRATUM_25 */ 83 84 /* 85 * Note that 'Cheetah PRM' refers to: 86 * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III 87 */ 88 89 /* 90 * Per CPU pointers to physical address of TL>0 logout data areas. 91 * These pointers have to be in the kernel nucleus to avoid MMU 92 * misses. 93 */ 94 uint64_t ch_err_tl1_paddrs[NCPU]; 95 96 /* 97 * One statically allocated structure to use during startup/DR 98 * to prevent unnecessary panics. 99 */ 100 ch_err_tl1_data_t ch_err_tl1_data; 101 102 /* 103 * Per CPU pending error at TL>0, used by level15 softint handler 104 */ 105 uchar_t ch_err_tl1_pending[NCPU]; 106 107 /* 108 * For deferred CE re-enable after trap. 109 */ 110 taskq_t *ch_check_ce_tq; 111 112 /* 113 * Internal functions. 114 */ 115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep); 116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt); 117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 118 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp); 119 static int clear_ecc(struct async_flt *ecc); 120 #if defined(CPU_IMP_ECACHE_ASSOC) 121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt); 122 #endif 123 static int cpu_ecache_set_size(struct cpu *cp); 124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag); 125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr); 126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag); 127 static int cpu_ectag_pa_to_subblk_state(int cachesize, 128 uint64_t subaddr, uint64_t tag); 129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt); 130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit); 131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit); 132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit); 133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit); 134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit); 135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp); 136 static void cpu_scrubphys(struct async_flt *aflt); 137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *, 138 int *, int *); 139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *); 140 static void cpu_ereport_init(struct async_flt *aflt); 141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t); 142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt); 143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 144 ch_cpu_logout_t *clop); 145 static int cpu_ce_delayed_ec_logout(uint64_t); 146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *); 147 148 #ifdef CHEETAHPLUS_ERRATUM_25 149 static int mondo_recover_proc(uint16_t, int); 150 static void cheetah_nudge_init(void); 151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 152 cyc_time_t *when); 153 static void cheetah_nudge_buddy(void); 154 #endif /* CHEETAHPLUS_ERRATUM_25 */ 155 156 #if defined(CPU_IMP_L1_CACHE_PARITY) 157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt); 158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index); 159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 160 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word); 161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt); 162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index); 163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt); 164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index); 165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *); 166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *); 167 #endif /* CPU_IMP_L1_CACHE_PARITY */ 168 169 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 170 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 171 int *segsp, int *banksp, int *mcidp); 172 173 /* 174 * This table is used to determine which bit(s) is(are) bad when an ECC 175 * error occurs. The array is indexed by an 9-bit syndrome. The entries 176 * of this array have the following semantics: 177 * 178 * 00-127 The number of the bad bit, when only one bit is bad. 179 * 128 ECC bit C0 is bad. 180 * 129 ECC bit C1 is bad. 181 * 130 ECC bit C2 is bad. 182 * 131 ECC bit C3 is bad. 183 * 132 ECC bit C4 is bad. 184 * 133 ECC bit C5 is bad. 185 * 134 ECC bit C6 is bad. 186 * 135 ECC bit C7 is bad. 187 * 136 ECC bit C8 is bad. 188 * 137-143 reserved for Mtag Data and ECC. 189 * 144(M2) Two bits are bad within a nibble. 190 * 145(M3) Three bits are bad within a nibble. 191 * 146(M3) Four bits are bad within a nibble. 192 * 147(M) Multiple bits (5 or more) are bad. 193 * 148 NO bits are bad. 194 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5. 195 */ 196 197 #define C0 128 198 #define C1 129 199 #define C2 130 200 #define C3 131 201 #define C4 132 202 #define C5 133 203 #define C6 134 204 #define C7 135 205 #define C8 136 206 #define MT0 137 /* Mtag Data bit 0 */ 207 #define MT1 138 208 #define MT2 139 209 #define MTC0 140 /* Mtag Check bit 0 */ 210 #define MTC1 141 211 #define MTC2 142 212 #define MTC3 143 213 #define M2 144 214 #define M3 145 215 #define M4 146 216 #define M 147 217 #define NA 148 218 #if defined(JALAPENO) || defined(SERRANO) 219 #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */ 220 #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */ 221 #define SLAST S003MEM /* last special syndrome */ 222 #else /* JALAPENO || SERRANO */ 223 #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */ 224 #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */ 225 #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */ 226 #define SLAST S11C /* last special syndrome */ 227 #endif /* JALAPENO || SERRANO */ 228 #if defined(JALAPENO) || defined(SERRANO) 229 #define BPAR0 152 /* syndrom 152 through 167 for bus parity */ 230 #define BPAR15 167 231 #endif /* JALAPENO || SERRANO */ 232 233 static uint8_t ecc_syndrome_tab[] = 234 { 235 NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, 236 C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, 237 C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, 238 M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, 239 C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, 240 M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, 241 M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, 242 #if defined(JALAPENO) || defined(SERRANO) 243 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 244 #else /* JALAPENO || SERRANO */ 245 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 246 #endif /* JALAPENO || SERRANO */ 247 C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, 248 M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, 249 M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, 250 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, 251 M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, 252 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, 253 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, 254 M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, 255 C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, 256 #if defined(JALAPENO) || defined(SERRANO) 257 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, 258 #else /* JALAPENO || SERRANO */ 259 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M, 260 #endif /* JALAPENO || SERRANO */ 261 M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, 262 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, 263 M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, 264 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, 265 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, 266 M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, 267 M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, 268 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, 269 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, 270 M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, 271 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, 272 M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, 273 M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, 274 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M 275 }; 276 277 #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t)) 278 279 #if !(defined(JALAPENO) || defined(SERRANO)) 280 /* 281 * This table is used to determine which bit(s) is(are) bad when a Mtag 282 * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries 283 * of this array have the following semantics: 284 * 285 * -1 Invalid mtag syndrome. 286 * 137 Mtag Data 0 is bad. 287 * 138 Mtag Data 1 is bad. 288 * 139 Mtag Data 2 is bad. 289 * 140 Mtag ECC 0 is bad. 290 * 141 Mtag ECC 1 is bad. 291 * 142 Mtag ECC 2 is bad. 292 * 143 Mtag ECC 3 is bad. 293 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6. 294 */ 295 short mtag_syndrome_tab[] = 296 { 297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2 298 }; 299 300 #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short)) 301 302 #else /* !(JALAPENO || SERRANO) */ 303 304 #define BSYND_TBL_SIZE 16 305 306 #endif /* !(JALAPENO || SERRANO) */ 307 308 /* 309 * CE initial classification and subsequent action lookup table 310 */ 311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE]; 312 static int ce_disp_inited; 313 314 /* 315 * Set to disable leaky and partner check for memory correctables 316 */ 317 int ce_xdiag_off; 318 319 /* 320 * The following are not incremented atomically so are indicative only 321 */ 322 static int ce_xdiag_drops; 323 static int ce_xdiag_lkydrops; 324 static int ce_xdiag_ptnrdrops; 325 static int ce_xdiag_bad; 326 327 /* 328 * CE leaky check callback structure 329 */ 330 typedef struct { 331 struct async_flt *lkycb_aflt; 332 errorq_t *lkycb_eqp; 333 errorq_elem_t *lkycb_eqep; 334 } ce_lkychk_cb_t; 335 336 /* 337 * defines for various ecache_flush_flag's 338 */ 339 #define ECACHE_FLUSH_LINE 1 340 #define ECACHE_FLUSH_ALL 2 341 342 /* 343 * STICK sync 344 */ 345 #define STICK_ITERATION 10 346 #define MAX_TSKEW 1 347 #define EV_A_START 0 348 #define EV_A_END 1 349 #define EV_B_START 2 350 #define EV_B_END 3 351 #define EVENTS 4 352 353 static int64_t stick_iter = STICK_ITERATION; 354 static int64_t stick_tsk = MAX_TSKEW; 355 356 typedef enum { 357 EVENT_NULL = 0, 358 SLAVE_START, 359 SLAVE_CONT, 360 MASTER_START 361 } event_cmd_t; 362 363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL; 364 static int64_t timestamp[EVENTS]; 365 static volatile int slave_done; 366 367 #ifdef DEBUG 368 #define DSYNC_ATTEMPTS 64 369 typedef struct { 370 int64_t skew_val[DSYNC_ATTEMPTS]; 371 } ss_t; 372 373 ss_t stick_sync_stats[NCPU]; 374 #endif /* DEBUG */ 375 376 /* 377 * Maximum number of contexts for Cheetah. 378 */ 379 #define MAX_NCTXS (1 << 13) 380 381 /* Will be set !NULL for Cheetah+ and derivatives. */ 382 uchar_t *ctx_pgsz_array = NULL; 383 #if defined(CPU_IMP_DUAL_PAGESIZE) 384 static uchar_t ctx_pgsz_arr[MAX_NCTXS]; 385 uint_t disable_dual_pgsz = 0; 386 #endif /* CPU_IMP_DUAL_PAGESIZE */ 387 388 /* 389 * Save the cache bootup state for use when internal 390 * caches are to be re-enabled after an error occurs. 391 */ 392 uint64_t cache_boot_state; 393 394 /* 395 * PA[22:0] represent Displacement in Safari configuration space. 396 */ 397 uint_t root_phys_addr_lo_mask = 0x7fffffu; 398 399 bus_config_eclk_t bus_config_eclk[] = { 400 #if defined(JALAPENO) || defined(SERRANO) 401 {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1}, 402 {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2}, 403 {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32}, 404 #else /* JALAPENO || SERRANO */ 405 {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1}, 406 {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2}, 407 {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32}, 408 #endif /* JALAPENO || SERRANO */ 409 {0, 0} 410 }; 411 412 /* 413 * Interval for deferred CEEN reenable 414 */ 415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS; 416 417 /* 418 * set in /etc/system to control logging of user BERR/TO's 419 */ 420 int cpu_berr_to_verbose = 0; 421 422 /* 423 * set to 0 in /etc/system to defer CEEN reenable for all CEs 424 */ 425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED; 426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT; 427 428 /* 429 * Set of all offline cpus 430 */ 431 cpuset_t cpu_offline_set; 432 433 static void cpu_delayed_check_ce_errors(void *); 434 static void cpu_check_ce_errors(void *); 435 void cpu_error_ecache_flush(ch_async_flt_t *); 436 static int cpu_error_ecache_flush_required(ch_async_flt_t *); 437 static void cpu_log_and_clear_ce(ch_async_flt_t *); 438 void cpu_ce_detected(ch_cpu_errors_t *, int); 439 440 /* 441 * CE Leaky check timeout in microseconds. This is chosen to be twice the 442 * memory refresh interval of current DIMMs (64ms). After initial fix that 443 * gives at least one full refresh cycle in which the cell can leak 444 * (whereafter further refreshes simply reinforce any incorrect bit value). 445 */ 446 clock_t cpu_ce_lkychk_timeout_usec = 128000; 447 448 /* 449 * CE partner check partner caching period in seconds 450 */ 451 int cpu_ce_ptnr_cachetime_sec = 60; 452 453 /* 454 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel 455 */ 456 #define CH_SET_TRAP(ttentry, ttlabel) \ 457 bcopy((const void *)&ttlabel, &ttentry, 32); \ 458 flush_instr_mem((caddr_t)&ttentry, 32); 459 460 static int min_ecache_size; 461 static uint_t priv_hcl_1; 462 static uint_t priv_hcl_2; 463 static uint_t priv_hcl_4; 464 static uint_t priv_hcl_8; 465 466 void 467 cpu_setup(void) 468 { 469 extern int at_flags; 470 extern int disable_delay_tlb_flush, delay_tlb_flush; 471 extern int cpc_has_overflow_intr; 472 extern int disable_text_largepages; 473 extern int use_text_pgsz4m; 474 475 /* 476 * Setup chip-specific trap handlers. 477 */ 478 cpu_init_trap(); 479 480 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 481 482 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 483 484 /* 485 * save the cache bootup state. 486 */ 487 cache_boot_state = get_dcu() & DCU_CACHE; 488 489 /* 490 * Use the maximum number of contexts available for Cheetah 491 * unless it has been tuned for debugging. 492 * We are checking against 0 here since this value can be patched 493 * while booting. It can not be patched via /etc/system since it 494 * will be patched too late and thus cause the system to panic. 495 */ 496 if (nctxs == 0) 497 nctxs = MAX_NCTXS; 498 499 /* 500 * Due to the number of entries in the fully-associative tlb 501 * this may have to be tuned lower than in spitfire. 502 */ 503 pp_slots = MIN(8, MAXPP_SLOTS); 504 505 /* 506 * Block stores do not invalidate all pages of the d$, pagecopy 507 * et. al. need virtual translations with virtual coloring taken 508 * into consideration. prefetch/ldd will pollute the d$ on the 509 * load side. 510 */ 511 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 512 513 if (use_page_coloring) { 514 do_pg_coloring = 1; 515 if (use_virtual_coloring) 516 do_virtual_coloring = 1; 517 } 518 519 isa_list = 520 "sparcv9+vis2 sparcv9+vis sparcv9 " 521 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 522 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 523 524 /* 525 * On Panther-based machines, this should 526 * also include AV_SPARC_POPC too 527 */ 528 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; 529 530 /* 531 * On cheetah, there's no hole in the virtual address space 532 */ 533 hole_start = hole_end = 0; 534 535 /* 536 * The kpm mapping window. 537 * kpm_size: 538 * The size of a single kpm range. 539 * The overall size will be: kpm_size * vac_colors. 540 * kpm_vbase: 541 * The virtual start address of the kpm range within the kernel 542 * virtual address space. kpm_vbase has to be kpm_size aligned. 543 */ 544 kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */ 545 kpm_size_shift = 43; 546 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 547 kpm_smallpages = 1; 548 549 /* 550 * The traptrace code uses either %tick or %stick for 551 * timestamping. We have %stick so we can use it. 552 */ 553 traptrace_use_stick = 1; 554 555 /* 556 * Cheetah has a performance counter overflow interrupt 557 */ 558 cpc_has_overflow_intr = 1; 559 560 /* 561 * Use cheetah flush-all support 562 */ 563 if (!disable_delay_tlb_flush) 564 delay_tlb_flush = 1; 565 566 #if defined(CPU_IMP_DUAL_PAGESIZE) 567 /* 568 * Use Cheetah+ and later dual page size support. 569 */ 570 if (!disable_dual_pgsz) { 571 ctx_pgsz_array = ctx_pgsz_arr; 572 } 573 #endif /* CPU_IMP_DUAL_PAGESIZE */ 574 575 /* 576 * Declare that this architecture/cpu combination does fpRAS. 577 */ 578 fpras_implemented = 1; 579 580 /* 581 * Enable 4M pages to be used for mapping user text by default. Don't 582 * use large pages for initialized data segments since we may not know 583 * at exec() time what should be the preferred large page size for DTLB 584 * programming. 585 */ 586 use_text_pgsz4m = 1; 587 disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | 588 (1 << TTE32M) | (1 << TTE256M); 589 590 /* 591 * Setup CE lookup table 592 */ 593 CE_INITDISPTBL_POPULATE(ce_disp_table); 594 ce_disp_inited = 1; 595 } 596 597 /* 598 * Called by setcpudelay 599 */ 600 void 601 cpu_init_tick_freq(void) 602 { 603 /* 604 * For UltraSPARC III and beyond we want to use the 605 * system clock rate as the basis for low level timing, 606 * due to support of mixed speed CPUs and power managment. 607 */ 608 if (system_clock_freq == 0) 609 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 610 611 sys_tick_freq = system_clock_freq; 612 } 613 614 #ifdef CHEETAHPLUS_ERRATUM_25 615 /* 616 * Tunables 617 */ 618 int cheetah_bpe_off = 0; 619 int cheetah_sendmondo_recover = 1; 620 int cheetah_sendmondo_fullscan = 0; 621 int cheetah_sendmondo_recover_delay = 5; 622 623 #define CHEETAH_LIVELOCK_MIN_DELAY 1 624 625 /* 626 * Recovery Statistics 627 */ 628 typedef struct cheetah_livelock_entry { 629 int cpuid; /* fallen cpu */ 630 int buddy; /* cpu that ran recovery */ 631 clock_t lbolt; /* when recovery started */ 632 hrtime_t recovery_time; /* time spent in recovery */ 633 } cheetah_livelock_entry_t; 634 635 #define CHEETAH_LIVELOCK_NENTRY 32 636 637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY]; 638 int cheetah_livelock_entry_nxt; 639 640 #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \ 641 statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \ 642 if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \ 643 cheetah_livelock_entry_nxt = 0; \ 644 } \ 645 } 646 647 #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val 648 649 struct { 650 hrtime_t hrt; /* maximum recovery time */ 651 int recovery; /* recovered */ 652 int full_claimed; /* maximum pages claimed in full recovery */ 653 int proc_entry; /* attempted to claim TSB */ 654 int proc_tsb_scan; /* tsb scanned */ 655 int proc_tsb_partscan; /* tsb partially scanned */ 656 int proc_tsb_fullscan; /* whole tsb scanned */ 657 int proc_claimed; /* maximum pages claimed in tsb scan */ 658 int proc_user; /* user thread */ 659 int proc_kernel; /* kernel thread */ 660 int proc_onflt; /* bad stack */ 661 int proc_cpu; /* null cpu */ 662 int proc_thread; /* null thread */ 663 int proc_proc; /* null proc */ 664 int proc_as; /* null as */ 665 int proc_hat; /* null hat */ 666 int proc_hat_inval; /* hat contents don't make sense */ 667 int proc_hat_busy; /* hat is changing TSBs */ 668 int proc_tsb_reloc; /* TSB skipped because being relocated */ 669 int proc_cnum_bad; /* cnum out of range */ 670 int proc_cnum; /* last cnum processed */ 671 tte_t proc_tte; /* last tte processed */ 672 } cheetah_livelock_stat; 673 674 #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++ 675 676 #define CHEETAH_LIVELOCK_STATSET(item, value) \ 677 cheetah_livelock_stat.item = value 678 679 #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \ 680 if (value > cheetah_livelock_stat.item) \ 681 cheetah_livelock_stat.item = value; \ 682 } 683 684 /* 685 * Attempt to recover a cpu by claiming every cache line as saved 686 * in the TSB that the non-responsive cpu is using. Since we can't 687 * grab any adaptive lock, this is at best an attempt to do so. Because 688 * we don't grab any locks, we must operate under the protection of 689 * on_fault(). 690 * 691 * Return 1 if cpuid could be recovered, 0 if failed. 692 */ 693 int 694 mondo_recover_proc(uint16_t cpuid, int bn) 695 { 696 label_t ljb; 697 cpu_t *cp; 698 kthread_t *t; 699 proc_t *p; 700 struct as *as; 701 struct hat *hat; 702 short cnum; 703 struct tsb_info *tsbinfop; 704 struct tsbe *tsbep; 705 caddr_t tsbp; 706 caddr_t end_tsbp; 707 uint64_t paddr; 708 uint64_t idsr; 709 u_longlong_t pahi, palo; 710 int pages_claimed = 0; 711 tte_t tsbe_tte; 712 int tried_kernel_tsb = 0; 713 714 CHEETAH_LIVELOCK_STAT(proc_entry); 715 716 if (on_fault(&ljb)) { 717 CHEETAH_LIVELOCK_STAT(proc_onflt); 718 goto badstruct; 719 } 720 721 if ((cp = cpu[cpuid]) == NULL) { 722 CHEETAH_LIVELOCK_STAT(proc_cpu); 723 goto badstruct; 724 } 725 726 if ((t = cp->cpu_thread) == NULL) { 727 CHEETAH_LIVELOCK_STAT(proc_thread); 728 goto badstruct; 729 } 730 731 if ((p = ttoproc(t)) == NULL) { 732 CHEETAH_LIVELOCK_STAT(proc_proc); 733 goto badstruct; 734 } 735 736 if ((as = p->p_as) == NULL) { 737 CHEETAH_LIVELOCK_STAT(proc_as); 738 goto badstruct; 739 } 740 741 if ((hat = as->a_hat) == NULL) { 742 CHEETAH_LIVELOCK_STAT(proc_hat); 743 goto badstruct; 744 } 745 746 if (hat != ksfmmup) { 747 CHEETAH_LIVELOCK_STAT(proc_user); 748 if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) { 749 CHEETAH_LIVELOCK_STAT(proc_hat_busy); 750 goto badstruct; 751 } 752 tsbinfop = hat->sfmmu_tsb; 753 if (tsbinfop == NULL) { 754 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 755 goto badstruct; 756 } 757 tsbp = tsbinfop->tsb_va; 758 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 759 } else { 760 CHEETAH_LIVELOCK_STAT(proc_kernel); 761 tsbinfop = NULL; 762 tsbp = ktsb_base; 763 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 764 } 765 766 /* Verify as */ 767 if (hat->sfmmu_as != as) { 768 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 769 goto badstruct; 770 } 771 772 cnum = hat->sfmmu_cnum; 773 CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum); 774 775 if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) { 776 CHEETAH_LIVELOCK_STAT(proc_cnum_bad); 777 goto badstruct; 778 } 779 780 do { 781 CHEETAH_LIVELOCK_STAT(proc_tsb_scan); 782 783 /* 784 * Skip TSBs being relocated. This is important because 785 * we want to avoid the following deadlock scenario: 786 * 787 * 1) when we came in we set ourselves to "in recover" state. 788 * 2) when we try to touch TSB being relocated the mapping 789 * will be in the suspended state so we'll spin waiting 790 * for it to be unlocked. 791 * 3) when the CPU that holds the TSB mapping locked tries to 792 * unlock it it will send a xtrap which will fail to xcall 793 * us or the CPU we're trying to recover, and will in turn 794 * enter the mondo code. 795 * 4) since we are still spinning on the locked mapping 796 * no further progress will be made and the system will 797 * inevitably hard hang. 798 * 799 * A TSB not being relocated can't begin being relocated 800 * while we're accessing it because we check 801 * sendmondo_in_recover before relocating TSBs. 802 */ 803 if (hat != ksfmmup && 804 (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) { 805 CHEETAH_LIVELOCK_STAT(proc_tsb_reloc); 806 goto next_tsbinfo; 807 } 808 809 for (tsbep = (struct tsbe *)tsbp; 810 tsbep < (struct tsbe *)end_tsbp; tsbep++) { 811 tsbe_tte = tsbep->tte_data; 812 813 if (tsbe_tte.tte_val == 0) { 814 /* 815 * Invalid tte 816 */ 817 continue; 818 } 819 if (tsbe_tte.tte_se) { 820 /* 821 * Don't want device registers 822 */ 823 continue; 824 } 825 if (tsbe_tte.tte_cp == 0) { 826 /* 827 * Must be cached in E$ 828 */ 829 continue; 830 } 831 CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte); 832 idsr = getidsr(); 833 if ((idsr & (IDSR_NACK_BIT(bn) | 834 IDSR_BUSY_BIT(bn))) == 0) { 835 CHEETAH_LIVELOCK_STAT(proc_tsb_partscan); 836 goto done; 837 } 838 pahi = tsbe_tte.tte_pahi; 839 palo = tsbe_tte.tte_palo; 840 paddr = (uint64_t)((pahi << 32) | 841 (palo << MMU_PAGESHIFT)); 842 claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)), 843 CH_ECACHE_SUBBLK_SIZE); 844 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 845 shipit(cpuid, bn); 846 } 847 pages_claimed++; 848 } 849 next_tsbinfo: 850 if (tsbinfop != NULL) 851 tsbinfop = tsbinfop->tsb_next; 852 if (tsbinfop != NULL) { 853 tsbp = tsbinfop->tsb_va; 854 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 855 } else if (tsbp == ktsb_base) { 856 tried_kernel_tsb = 1; 857 } else if (!tried_kernel_tsb) { 858 tsbp = ktsb_base; 859 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 860 hat = ksfmmup; 861 tsbinfop = NULL; 862 } 863 } while (tsbinfop != NULL || 864 ((tsbp == ktsb_base) && !tried_kernel_tsb)); 865 866 CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan); 867 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 868 no_fault(); 869 idsr = getidsr(); 870 if ((idsr & (IDSR_NACK_BIT(bn) | 871 IDSR_BUSY_BIT(bn))) == 0) { 872 return (1); 873 } else { 874 return (0); 875 } 876 877 done: 878 no_fault(); 879 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 880 return (1); 881 882 badstruct: 883 no_fault(); 884 return (0); 885 } 886 887 /* 888 * Attempt to claim ownership, temporarily, of every cache line that a 889 * non-responsive cpu might be using. This might kick that cpu out of 890 * this state. 891 * 892 * The return value indicates to the caller if we have exhausted all recovery 893 * techniques. If 1 is returned, it is useless to call this function again 894 * even for a different target CPU. 895 */ 896 int 897 mondo_recover(uint16_t cpuid, int bn) 898 { 899 struct memseg *seg; 900 uint64_t begin_pa, end_pa, cur_pa; 901 hrtime_t begin_hrt, end_hrt; 902 int retval = 0; 903 int pages_claimed = 0; 904 cheetah_livelock_entry_t *histp; 905 uint64_t idsr; 906 907 if (cas32(&sendmondo_in_recover, 0, 1) != 0) { 908 /* 909 * Wait while recovery takes place 910 */ 911 while (sendmondo_in_recover) { 912 drv_usecwait(1); 913 } 914 /* 915 * Assume we didn't claim the whole memory. If 916 * the target of this caller is not recovered, 917 * it will come back. 918 */ 919 return (retval); 920 } 921 922 CHEETAH_LIVELOCK_ENTRY_NEXT(histp) 923 CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt); 924 CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid); 925 CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id); 926 927 begin_hrt = gethrtime_waitfree(); 928 /* 929 * First try to claim the lines in the TSB the target 930 * may have been using. 931 */ 932 if (mondo_recover_proc(cpuid, bn) == 1) { 933 /* 934 * Didn't claim the whole memory 935 */ 936 goto done; 937 } 938 939 /* 940 * We tried using the TSB. The target is still 941 * not recovered. Check if complete memory scan is 942 * enabled. 943 */ 944 if (cheetah_sendmondo_fullscan == 0) { 945 /* 946 * Full memory scan is disabled. 947 */ 948 retval = 1; 949 goto done; 950 } 951 952 /* 953 * Try claiming the whole memory. 954 */ 955 for (seg = memsegs; seg; seg = seg->next) { 956 begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT; 957 end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT; 958 for (cur_pa = begin_pa; cur_pa < end_pa; 959 cur_pa += MMU_PAGESIZE) { 960 idsr = getidsr(); 961 if ((idsr & (IDSR_NACK_BIT(bn) | 962 IDSR_BUSY_BIT(bn))) == 0) { 963 /* 964 * Didn't claim all memory 965 */ 966 goto done; 967 } 968 claimlines(cur_pa, MMU_PAGESIZE, 969 CH_ECACHE_SUBBLK_SIZE); 970 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 971 shipit(cpuid, bn); 972 } 973 pages_claimed++; 974 } 975 } 976 977 /* 978 * We did all we could. 979 */ 980 retval = 1; 981 982 done: 983 /* 984 * Update statistics 985 */ 986 end_hrt = gethrtime_waitfree(); 987 CHEETAH_LIVELOCK_STAT(recovery); 988 CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt)); 989 CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed); 990 CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \ 991 (end_hrt - begin_hrt)); 992 993 while (cas32(&sendmondo_in_recover, 1, 0) != 1); 994 995 return (retval); 996 } 997 998 /* 999 * This is called by the cyclic framework when this CPU becomes online 1000 */ 1001 /*ARGSUSED*/ 1002 static void 1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 1004 { 1005 1006 hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy; 1007 hdlr->cyh_level = CY_LOW_LEVEL; 1008 hdlr->cyh_arg = NULL; 1009 1010 /* 1011 * Stagger the start time 1012 */ 1013 when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU); 1014 if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) { 1015 cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY; 1016 } 1017 when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC; 1018 } 1019 1020 /* 1021 * Create a low level cyclic to send a xtrap to the next cpu online. 1022 * However, there's no need to have this running on a uniprocessor system. 1023 */ 1024 static void 1025 cheetah_nudge_init(void) 1026 { 1027 cyc_omni_handler_t hdlr; 1028 1029 if (max_ncpus == 1) { 1030 return; 1031 } 1032 1033 hdlr.cyo_online = cheetah_nudge_onln; 1034 hdlr.cyo_offline = NULL; 1035 hdlr.cyo_arg = NULL; 1036 1037 mutex_enter(&cpu_lock); 1038 (void) cyclic_add_omni(&hdlr); 1039 mutex_exit(&cpu_lock); 1040 } 1041 1042 /* 1043 * Cyclic handler to wake up buddy 1044 */ 1045 void 1046 cheetah_nudge_buddy(void) 1047 { 1048 /* 1049 * Disable kernel preemption to protect the cpu list 1050 */ 1051 kpreempt_disable(); 1052 if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) { 1053 xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1, 1054 0, 0); 1055 } 1056 kpreempt_enable(); 1057 } 1058 1059 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1060 1061 #ifdef SEND_MONDO_STATS 1062 uint32_t x_one_stimes[64]; 1063 uint32_t x_one_ltimes[16]; 1064 uint32_t x_set_stimes[64]; 1065 uint32_t x_set_ltimes[16]; 1066 uint32_t x_set_cpus[NCPU]; 1067 uint32_t x_nack_stimes[64]; 1068 #endif 1069 1070 /* 1071 * Note: A version of this function is used by the debugger via the KDI, 1072 * and must be kept in sync with this version. Any changes made to this 1073 * function to support new chips or to accomodate errata must also be included 1074 * in the KDI-specific version. See us3_kdi.c. 1075 */ 1076 void 1077 send_one_mondo(int cpuid) 1078 { 1079 int busy, nack; 1080 uint64_t idsr, starttick, endtick, tick, lasttick; 1081 uint64_t busymask; 1082 #ifdef CHEETAHPLUS_ERRATUM_25 1083 int recovered = 0; 1084 #endif 1085 1086 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 1087 starttick = lasttick = gettick(); 1088 shipit(cpuid, 0); 1089 endtick = starttick + xc_tick_limit; 1090 busy = nack = 0; 1091 #if defined(JALAPENO) || defined(SERRANO) 1092 /* 1093 * Lower 2 bits of the agent ID determine which BUSY/NACK pair 1094 * will be used for dispatching interrupt. For now, assume 1095 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing 1096 * issues with respect to BUSY/NACK pair usage. 1097 */ 1098 busymask = IDSR_BUSY_BIT(cpuid); 1099 #else /* JALAPENO || SERRANO */ 1100 busymask = IDSR_BUSY; 1101 #endif /* JALAPENO || SERRANO */ 1102 for (;;) { 1103 idsr = getidsr(); 1104 if (idsr == 0) 1105 break; 1106 1107 tick = gettick(); 1108 /* 1109 * If there is a big jump between the current tick 1110 * count and lasttick, we have probably hit a break 1111 * point. Adjust endtick accordingly to avoid panic. 1112 */ 1113 if (tick > (lasttick + xc_tick_jump_limit)) 1114 endtick += (tick - lasttick); 1115 lasttick = tick; 1116 if (tick > endtick) { 1117 if (panic_quiesce) 1118 return; 1119 #ifdef CHEETAHPLUS_ERRATUM_25 1120 if (cheetah_sendmondo_recover && recovered == 0) { 1121 if (mondo_recover(cpuid, 0)) { 1122 /* 1123 * We claimed the whole memory or 1124 * full scan is disabled. 1125 */ 1126 recovered++; 1127 } 1128 tick = gettick(); 1129 endtick = tick + xc_tick_limit; 1130 lasttick = tick; 1131 /* 1132 * Recheck idsr 1133 */ 1134 continue; 1135 } else 1136 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1137 { 1138 cmn_err(CE_PANIC, "send mondo timeout " 1139 "(target 0x%x) [%d NACK %d BUSY]", 1140 cpuid, nack, busy); 1141 } 1142 } 1143 1144 if (idsr & busymask) { 1145 busy++; 1146 continue; 1147 } 1148 drv_usecwait(1); 1149 shipit(cpuid, 0); 1150 nack++; 1151 busy = 0; 1152 } 1153 #ifdef SEND_MONDO_STATS 1154 { 1155 int n = gettick() - starttick; 1156 if (n < 8192) 1157 x_one_stimes[n >> 7]++; 1158 else 1159 x_one_ltimes[(n >> 13) & 0xf]++; 1160 } 1161 #endif 1162 } 1163 1164 void 1165 syncfpu(void) 1166 { 1167 } 1168 1169 /* 1170 * Return processor specific async error structure 1171 * size used. 1172 */ 1173 int 1174 cpu_aflt_size(void) 1175 { 1176 return (sizeof (ch_async_flt_t)); 1177 } 1178 1179 /* 1180 * The fast_ecc_err handler transfers control here for UCU, UCC events. 1181 * Note that we flush Ecache twice, once in the fast_ecc_err handler to 1182 * flush the error that caused the UCU/UCC, then again here at the end to 1183 * flush the TL=1 trap handler code out of the Ecache, so we can minimize 1184 * the probability of getting a TL>1 Fast ECC trap when we're fielding 1185 * another Fast ECC trap. 1186 * 1187 * Cheetah+ also handles: TSCE: No additional processing required. 1188 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT. 1189 * 1190 * Note that the p_clo_flags input is only valid in cases where the 1191 * cpu_private struct is not yet initialized (since that is the only 1192 * time that information cannot be obtained from the logout struct.) 1193 */ 1194 /*ARGSUSED*/ 1195 void 1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags) 1197 { 1198 ch_cpu_logout_t *clop; 1199 uint64_t ceen; 1200 1201 /* 1202 * Get the CPU log out info. If we can't find our CPU private 1203 * pointer, then we will have to make due without any detailed 1204 * logout information. 1205 */ 1206 if (CPU_PRIVATE(CPU) == NULL) { 1207 clop = NULL; 1208 ceen = p_clo_flags & EN_REG_CEEN; 1209 } else { 1210 clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout); 1211 ceen = clop->clo_flags & EN_REG_CEEN; 1212 } 1213 1214 cpu_log_fast_ecc_error((caddr_t)rp->r_pc, 1215 (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, clop); 1216 } 1217 1218 /* 1219 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast 1220 * ECC at TL>0. Need to supply either a error register pointer or a 1221 * cpu logout structure pointer. 1222 */ 1223 static void 1224 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 1225 ch_cpu_logout_t *clop) 1226 { 1227 struct async_flt *aflt; 1228 ch_async_flt_t ch_flt; 1229 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1230 char pr_reason[MAX_REASON_STRING]; 1231 ch_cpu_errors_t cpu_error_regs; 1232 1233 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1234 /* 1235 * If no cpu logout data, then we will have to make due without 1236 * any detailed logout information. 1237 */ 1238 if (clop == NULL) { 1239 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1240 get_cpu_error_state(&cpu_error_regs); 1241 set_cpu_error_state(&cpu_error_regs); 1242 t_afar = cpu_error_regs.afar; 1243 t_afsr = cpu_error_regs.afsr; 1244 t_afsr_ext = cpu_error_regs.afsr_ext; 1245 #if defined(SERRANO) 1246 ch_flt.afar2 = cpu_error_regs.afar2; 1247 #endif /* SERRANO */ 1248 } else { 1249 t_afar = clop->clo_data.chd_afar; 1250 t_afsr = clop->clo_data.chd_afsr; 1251 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1252 #if defined(SERRANO) 1253 ch_flt.afar2 = clop->clo_data.chd_afar2; 1254 #endif /* SERRANO */ 1255 } 1256 1257 /* 1258 * In order to simplify code, we maintain this afsr_errs 1259 * variable which holds the aggregate of AFSR and AFSR_EXT 1260 * sticky bits. 1261 */ 1262 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1263 (t_afsr & C_AFSR_ALL_ERRS); 1264 pr_reason[0] = '\0'; 1265 1266 /* Setup the async fault structure */ 1267 aflt = (struct async_flt *)&ch_flt; 1268 aflt->flt_id = gethrtime_waitfree(); 1269 ch_flt.afsr_ext = t_afsr_ext; 1270 ch_flt.afsr_errs = t_afsr_errs; 1271 aflt->flt_stat = t_afsr; 1272 aflt->flt_addr = t_afar; 1273 aflt->flt_bus_id = getprocessorid(); 1274 aflt->flt_inst = CPU->cpu_id; 1275 aflt->flt_pc = tpc; 1276 aflt->flt_prot = AFLT_PROT_NONE; 1277 aflt->flt_class = CPU_FAULT; 1278 aflt->flt_priv = priv; 1279 aflt->flt_tl = tl; 1280 aflt->flt_status = ECC_F_TRAP; 1281 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1282 1283 /* 1284 * XXXX - Phenomenal hack to get around Solaris not getting all the 1285 * cmn_err messages out to the console. The situation is a UCU (in 1286 * priv mode) which causes a WDU which causes a UE (on the retry). 1287 * The messages for the UCU and WDU are enqueued and then pulled off 1288 * the async queue via softint and syslogd starts to process them 1289 * but doesn't get them to the console. The UE causes a panic, but 1290 * since the UCU/WDU messages are already in transit, those aren't 1291 * on the async queue. The hack is to check if we have a matching 1292 * WDU event for the UCU, and if it matches, we're more than likely 1293 * going to panic with a UE, unless we're under protection. So, we 1294 * check to see if we got a matching WDU event and if we're under 1295 * protection. 1296 * 1297 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about 1298 * looks like this: 1299 * UCU->WDU->UE 1300 * For Panther, it could look like either of these: 1301 * UCU---->WDU->L3_WDU->UE 1302 * L3_UCU->WDU->L3_WDU->UE 1303 */ 1304 if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) && 1305 aflt->flt_panic == 0 && aflt->flt_priv != 0 && 1306 curthread->t_ontrap == NULL && curthread->t_lofault == NULL) { 1307 get_cpu_error_state(&cpu_error_regs); 1308 aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) && 1309 (cpu_error_regs.afar == t_afar)); 1310 aflt->flt_panic |= ((clop == NULL) && 1311 (t_afsr_errs & C_AFSR_WDU)); 1312 } 1313 1314 /* 1315 * Queue events on the async event queue, one event per error bit. 1316 * If no events are queued or no Fast ECC events are on in the AFSR, 1317 * queue an event to complain. 1318 */ 1319 if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 || 1320 ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) { 1321 ch_flt.flt_type = CPU_INV_AFSR; 1322 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1323 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1324 aflt->flt_panic); 1325 } 1326 1327 /* 1328 * Zero out + invalidate CPU logout. 1329 */ 1330 if (clop) { 1331 bzero(clop, sizeof (ch_cpu_logout_t)); 1332 clop->clo_data.chd_afar = LOGOUT_INVALID; 1333 } 1334 1335 /* 1336 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1337 * or disrupting errors have happened. We do this because if a 1338 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1339 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1340 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1341 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1342 * deferred or disrupting error happening between checking the AFSR and 1343 * enabling NCEEN/CEEN. 1344 * 1345 * Note: CEEN reenabled only if it was on when trap taken. 1346 */ 1347 set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen)); 1348 if (clear_errors(&ch_flt)) { 1349 aflt->flt_panic |= ((ch_flt.afsr_errs & 1350 (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0); 1351 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1352 NULL); 1353 } 1354 1355 /* 1356 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1357 * be logged as part of the panic flow. 1358 */ 1359 if (aflt->flt_panic) 1360 fm_panic("%sError(s)", pr_reason); 1361 1362 /* 1363 * Flushing the Ecache here gets the part of the trap handler that 1364 * is run at TL=1 out of the Ecache. 1365 */ 1366 cpu_flush_ecache(); 1367 } 1368 1369 /* 1370 * This is called via sys_trap from pil15_interrupt code if the 1371 * corresponding entry in ch_err_tl1_pending is set. Checks the 1372 * various ch_err_tl1_data structures for valid entries based on the bit 1373 * settings in the ch_err_tl1_flags entry of the structure. 1374 */ 1375 /*ARGSUSED*/ 1376 void 1377 cpu_tl1_error(struct regs *rp, int panic) 1378 { 1379 ch_err_tl1_data_t *cl1p, cl1; 1380 int i, ncl1ps; 1381 uint64_t me_flags; 1382 uint64_t ceen; 1383 1384 if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) { 1385 cl1p = &ch_err_tl1_data; 1386 ncl1ps = 1; 1387 } else if (CPU_PRIVATE(CPU) != NULL) { 1388 cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]); 1389 ncl1ps = CH_ERR_TL1_TLMAX; 1390 } else { 1391 ncl1ps = 0; 1392 } 1393 1394 for (i = 0; i < ncl1ps; i++, cl1p++) { 1395 if (cl1p->ch_err_tl1_flags == 0) 1396 continue; 1397 1398 /* 1399 * Grab a copy of the logout data and invalidate 1400 * the logout area. 1401 */ 1402 cl1 = *cl1p; 1403 bzero(cl1p, sizeof (ch_err_tl1_data_t)); 1404 cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID; 1405 me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags); 1406 1407 /* 1408 * Log "first error" in ch_err_tl1_data. 1409 */ 1410 if (cl1.ch_err_tl1_flags & CH_ERR_FECC) { 1411 ceen = get_error_enable() & EN_REG_CEEN; 1412 cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1, 1413 1, ceen, &cl1.ch_err_tl1_logout); 1414 } 1415 #if defined(CPU_IMP_L1_CACHE_PARITY) 1416 if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1417 cpu_parity_error(rp, cl1.ch_err_tl1_flags, 1418 (caddr_t)cl1.ch_err_tl1_tpc); 1419 } 1420 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1421 1422 /* 1423 * Log "multiple events" in ch_err_tl1_data. Note that 1424 * we don't read and clear the AFSR/AFAR in the TL>0 code 1425 * if the structure is busy, we just do the cache flushing 1426 * we have to do and then do the retry. So the AFSR/AFAR 1427 * at this point *should* have some relevant info. If there 1428 * are no valid errors in the AFSR, we'll assume they've 1429 * already been picked up and logged. For I$/D$ parity, 1430 * we just log an event with an "Unknown" (NULL) TPC. 1431 */ 1432 if (me_flags & CH_ERR_FECC) { 1433 ch_cpu_errors_t cpu_error_regs; 1434 uint64_t t_afsr_errs; 1435 1436 /* 1437 * Get the error registers and see if there's 1438 * a pending error. If not, don't bother 1439 * generating an "Invalid AFSR" error event. 1440 */ 1441 get_cpu_error_state(&cpu_error_regs); 1442 t_afsr_errs = (cpu_error_regs.afsr_ext & 1443 C_AFSR_EXT_ALL_ERRS) | 1444 (cpu_error_regs.afsr & C_AFSR_ALL_ERRS); 1445 if (t_afsr_errs != 0) { 1446 ceen = get_error_enable() & EN_REG_CEEN; 1447 cpu_log_fast_ecc_error((caddr_t)NULL, 1, 1448 1, ceen, NULL); 1449 } 1450 } 1451 #if defined(CPU_IMP_L1_CACHE_PARITY) 1452 if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1453 cpu_parity_error(rp, me_flags, (caddr_t)NULL); 1454 } 1455 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1456 } 1457 } 1458 1459 /* 1460 * Called from Fast ECC TL>0 handler in case of fatal error. 1461 * cpu_tl1_error should always find an associated ch_err_tl1_data structure, 1462 * but if we don't, we'll panic with something reasonable. 1463 */ 1464 /*ARGSUSED*/ 1465 void 1466 cpu_tl1_err_panic(struct regs *rp, ulong_t flags) 1467 { 1468 cpu_tl1_error(rp, 1); 1469 /* 1470 * Should never return, but just in case. 1471 */ 1472 fm_panic("Unsurvivable ECC Error at TL>0"); 1473 } 1474 1475 /* 1476 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST, 1477 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events. 1478 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU 1479 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC 1480 * 1481 * Cheetah+ also handles (No additional processing required): 1482 * DUE, DTO, DBERR (NCEEN controlled) 1483 * THCE (CEEN and ET_ECC_en controlled) 1484 * TUE (ET_ECC_en controlled) 1485 * 1486 * Panther further adds: 1487 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 1488 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 1489 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 1490 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 1491 * THCE (CEEN and L2_tag_ECC_en controlled) 1492 * L3_THCE (CEEN and ET_ECC_en controlled) 1493 * 1494 * Note that the p_clo_flags input is only valid in cases where the 1495 * cpu_private struct is not yet initialized (since that is the only 1496 * time that information cannot be obtained from the logout struct.) 1497 */ 1498 /*ARGSUSED*/ 1499 void 1500 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags) 1501 { 1502 struct async_flt *aflt; 1503 ch_async_flt_t ch_flt; 1504 char pr_reason[MAX_REASON_STRING]; 1505 ch_cpu_logout_t *clop; 1506 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1507 ch_cpu_errors_t cpu_error_regs; 1508 1509 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1510 /* 1511 * Get the CPU log out info. If we can't find our CPU private 1512 * pointer, then we will have to make due without any detailed 1513 * logout information. 1514 */ 1515 if (CPU_PRIVATE(CPU) == NULL) { 1516 clop = NULL; 1517 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1518 get_cpu_error_state(&cpu_error_regs); 1519 set_cpu_error_state(&cpu_error_regs); 1520 t_afar = cpu_error_regs.afar; 1521 t_afsr = cpu_error_regs.afsr; 1522 t_afsr_ext = cpu_error_regs.afsr_ext; 1523 #if defined(SERRANO) 1524 ch_flt.afar2 = cpu_error_regs.afar2; 1525 #endif /* SERRANO */ 1526 } else { 1527 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 1528 t_afar = clop->clo_data.chd_afar; 1529 t_afsr = clop->clo_data.chd_afsr; 1530 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1531 #if defined(SERRANO) 1532 ch_flt.afar2 = clop->clo_data.chd_afar2; 1533 #endif /* SERRANO */ 1534 } 1535 1536 /* 1537 * In order to simplify code, we maintain this afsr_errs 1538 * variable which holds the aggregate of AFSR and AFSR_EXT 1539 * sticky bits. 1540 */ 1541 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1542 (t_afsr & C_AFSR_ALL_ERRS); 1543 1544 pr_reason[0] = '\0'; 1545 /* Setup the async fault structure */ 1546 aflt = (struct async_flt *)&ch_flt; 1547 ch_flt.afsr_ext = t_afsr_ext; 1548 ch_flt.afsr_errs = t_afsr_errs; 1549 aflt->flt_stat = t_afsr; 1550 aflt->flt_addr = t_afar; 1551 aflt->flt_pc = (caddr_t)rp->r_pc; 1552 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1553 aflt->flt_tl = 0; 1554 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1555 1556 /* 1557 * If this trap is a result of one of the errors not masked 1558 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead 1559 * indicate that a timeout is to be set later. 1560 */ 1561 if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) && 1562 !aflt->flt_panic) 1563 ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED; 1564 else 1565 ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED; 1566 1567 /* 1568 * log the CE and clean up 1569 */ 1570 cpu_log_and_clear_ce(&ch_flt); 1571 1572 /* 1573 * We re-enable CEEN (if required) and check if any disrupting errors 1574 * have happened. We do this because if a disrupting error had occurred 1575 * with CEEN off, the trap will not be taken when CEEN is re-enabled. 1576 * Note that CEEN works differently on Cheetah than on Spitfire. Also, 1577 * we enable CEEN *before* checking the AFSR to avoid the small window 1578 * of a error happening between checking the AFSR and enabling CEEN. 1579 */ 1580 if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER) 1581 set_error_enable(get_error_enable() | EN_REG_CEEN); 1582 if (clear_errors(&ch_flt)) { 1583 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1584 NULL); 1585 } 1586 1587 /* 1588 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1589 * be logged as part of the panic flow. 1590 */ 1591 if (aflt->flt_panic) 1592 fm_panic("%sError(s)", pr_reason); 1593 } 1594 1595 /* 1596 * The async_err handler transfers control here for UE, EMU, EDU:BLD, 1597 * L3_EDU:BLD, TO, and BERR events. 1598 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 1599 * 1600 * Cheetah+: No additional errors handled. 1601 * 1602 * Note that the p_clo_flags input is only valid in cases where the 1603 * cpu_private struct is not yet initialized (since that is the only 1604 * time that information cannot be obtained from the logout struct.) 1605 */ 1606 /*ARGSUSED*/ 1607 void 1608 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags) 1609 { 1610 ushort_t ttype, tl; 1611 ch_async_flt_t ch_flt; 1612 struct async_flt *aflt; 1613 int trampolined = 0; 1614 char pr_reason[MAX_REASON_STRING]; 1615 ch_cpu_logout_t *clop; 1616 uint64_t ceen, clo_flags; 1617 uint64_t log_afsr; 1618 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1619 ch_cpu_errors_t cpu_error_regs; 1620 int expected = DDI_FM_ERR_UNEXPECTED; 1621 ddi_acc_hdl_t *hp; 1622 1623 /* 1624 * We need to look at p_flag to determine if the thread detected an 1625 * error while dumping core. We can't grab p_lock here, but it's ok 1626 * because we just need a consistent snapshot and we know that everyone 1627 * else will store a consistent set of bits while holding p_lock. We 1628 * don't have to worry about a race because SDOCORE is set once prior 1629 * to doing i/o from the process's address space and is never cleared. 1630 */ 1631 uint_t pflag = ttoproc(curthread)->p_flag; 1632 1633 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1634 /* 1635 * Get the CPU log out info. If we can't find our CPU private 1636 * pointer then we will have to make due without any detailed 1637 * logout information. 1638 */ 1639 if (CPU_PRIVATE(CPU) == NULL) { 1640 clop = NULL; 1641 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1642 get_cpu_error_state(&cpu_error_regs); 1643 set_cpu_error_state(&cpu_error_regs); 1644 t_afar = cpu_error_regs.afar; 1645 t_afsr = cpu_error_regs.afsr; 1646 t_afsr_ext = cpu_error_regs.afsr_ext; 1647 #if defined(SERRANO) 1648 ch_flt.afar2 = cpu_error_regs.afar2; 1649 #endif /* SERRANO */ 1650 clo_flags = p_clo_flags; 1651 } else { 1652 clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout); 1653 t_afar = clop->clo_data.chd_afar; 1654 t_afsr = clop->clo_data.chd_afsr; 1655 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1656 #if defined(SERRANO) 1657 ch_flt.afar2 = clop->clo_data.chd_afar2; 1658 #endif /* SERRANO */ 1659 clo_flags = clop->clo_flags; 1660 } 1661 1662 /* 1663 * In order to simplify code, we maintain this afsr_errs 1664 * variable which holds the aggregate of AFSR and AFSR_EXT 1665 * sticky bits. 1666 */ 1667 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1668 (t_afsr & C_AFSR_ALL_ERRS); 1669 pr_reason[0] = '\0'; 1670 1671 /* 1672 * Grab information encoded into our clo_flags field. 1673 */ 1674 ceen = clo_flags & EN_REG_CEEN; 1675 tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT; 1676 ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT; 1677 1678 /* 1679 * handle the specific error 1680 */ 1681 aflt = (struct async_flt *)&ch_flt; 1682 aflt->flt_id = gethrtime_waitfree(); 1683 aflt->flt_bus_id = getprocessorid(); 1684 aflt->flt_inst = CPU->cpu_id; 1685 ch_flt.afsr_ext = t_afsr_ext; 1686 ch_flt.afsr_errs = t_afsr_errs; 1687 aflt->flt_stat = t_afsr; 1688 aflt->flt_addr = t_afar; 1689 aflt->flt_pc = (caddr_t)rp->r_pc; 1690 aflt->flt_prot = AFLT_PROT_NONE; 1691 aflt->flt_class = CPU_FAULT; 1692 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1693 aflt->flt_tl = (uchar_t)tl; 1694 aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) || 1695 C_AFSR_PANIC(t_afsr_errs)); 1696 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1697 aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP); 1698 1699 /* 1700 * If the trap occurred in privileged mode at TL=0, we need to check to 1701 * see if we were executing in the kernel under on_trap() or t_lofault 1702 * protection. If so, modify the saved registers so that we return 1703 * from the trap to the appropriate trampoline routine. 1704 */ 1705 if (aflt->flt_priv && tl == 0) { 1706 if (curthread->t_ontrap != NULL) { 1707 on_trap_data_t *otp = curthread->t_ontrap; 1708 1709 if (otp->ot_prot & OT_DATA_EC) { 1710 aflt->flt_prot = AFLT_PROT_EC; 1711 otp->ot_trap |= OT_DATA_EC; 1712 rp->r_pc = otp->ot_trampoline; 1713 rp->r_npc = rp->r_pc + 4; 1714 trampolined = 1; 1715 } 1716 1717 if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) && 1718 (otp->ot_prot & OT_DATA_ACCESS)) { 1719 aflt->flt_prot = AFLT_PROT_ACCESS; 1720 otp->ot_trap |= OT_DATA_ACCESS; 1721 rp->r_pc = otp->ot_trampoline; 1722 rp->r_npc = rp->r_pc + 4; 1723 trampolined = 1; 1724 /* 1725 * for peeks and caut_gets errors are expected 1726 */ 1727 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1728 if (!hp) 1729 expected = DDI_FM_ERR_PEEK; 1730 else if (hp->ah_acc.devacc_attr_access == 1731 DDI_CAUTIOUS_ACC) 1732 expected = DDI_FM_ERR_EXPECTED; 1733 } 1734 1735 } else if (curthread->t_lofault) { 1736 aflt->flt_prot = AFLT_PROT_COPY; 1737 rp->r_g1 = EFAULT; 1738 rp->r_pc = curthread->t_lofault; 1739 rp->r_npc = rp->r_pc + 4; 1740 trampolined = 1; 1741 } 1742 } 1743 1744 /* 1745 * If we're in user mode or we're doing a protected copy, we either 1746 * want the ASTON code below to send a signal to the user process 1747 * or we want to panic if aft_panic is set. 1748 * 1749 * If we're in privileged mode and we're not doing a copy, then we 1750 * need to check if we've trampolined. If we haven't trampolined, 1751 * we should panic. 1752 */ 1753 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1754 if (t_afsr_errs & 1755 ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) & 1756 ~(C_AFSR_BERR | C_AFSR_TO))) 1757 aflt->flt_panic |= aft_panic; 1758 } else if (!trampolined) { 1759 aflt->flt_panic = 1; 1760 } 1761 1762 /* 1763 * If we've trampolined due to a privileged TO or BERR, or if an 1764 * unprivileged TO or BERR occurred, we don't want to enqueue an 1765 * event for that TO or BERR. Queue all other events (if any) besides 1766 * the TO/BERR. Since we may not be enqueing any events, we need to 1767 * ignore the number of events queued. If we haven't trampolined due 1768 * to a TO or BERR, just enqueue events normally. 1769 */ 1770 log_afsr = t_afsr_errs; 1771 if (trampolined) { 1772 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1773 } else if (!aflt->flt_priv) { 1774 /* 1775 * User mode, suppress messages if 1776 * cpu_berr_to_verbose is not set. 1777 */ 1778 if (!cpu_berr_to_verbose) 1779 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1780 } 1781 1782 /* 1783 * Log any errors that occurred 1784 */ 1785 if (((log_afsr & 1786 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) && 1787 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) || 1788 (t_afsr_errs & 1789 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) { 1790 ch_flt.flt_type = CPU_INV_AFSR; 1791 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1792 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1793 aflt->flt_panic); 1794 } 1795 1796 /* 1797 * Zero out + invalidate CPU logout. 1798 */ 1799 if (clop) { 1800 bzero(clop, sizeof (ch_cpu_logout_t)); 1801 clop->clo_data.chd_afar = LOGOUT_INVALID; 1802 } 1803 1804 #if defined(JALAPENO) || defined(SERRANO) 1805 /* 1806 * UE/RUE/BERR/TO: Call our bus nexus friends to check for 1807 * IO errors that may have resulted in this trap. 1808 */ 1809 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) { 1810 cpu_run_bus_error_handlers(aflt, expected); 1811 } 1812 1813 /* 1814 * UE/RUE: If UE or RUE is in memory, we need to flush the bad 1815 * line from the Ecache. We also need to query the bus nexus for 1816 * fatal errors. Attempts to do diagnostic read on caches may 1817 * introduce more errors (especially when the module is bad). 1818 */ 1819 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) { 1820 /* 1821 * Ask our bus nexus friends if they have any fatal errors. If 1822 * so, they will log appropriate error messages. 1823 */ 1824 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1825 aflt->flt_panic = 1; 1826 1827 /* 1828 * We got a UE or RUE and are panicking, save the fault PA in 1829 * a known location so that the platform specific panic code 1830 * can check for copyback errors. 1831 */ 1832 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1833 panic_aflt = *aflt; 1834 } 1835 } 1836 1837 /* 1838 * Flush Ecache line or entire Ecache 1839 */ 1840 if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR)) 1841 cpu_error_ecache_flush(&ch_flt); 1842 #else /* JALAPENO || SERRANO */ 1843 /* 1844 * UE/BERR/TO: Call our bus nexus friends to check for 1845 * IO errors that may have resulted in this trap. 1846 */ 1847 if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) { 1848 cpu_run_bus_error_handlers(aflt, expected); 1849 } 1850 1851 /* 1852 * UE: If the UE is in memory, we need to flush the bad 1853 * line from the Ecache. We also need to query the bus nexus for 1854 * fatal errors. Attempts to do diagnostic read on caches may 1855 * introduce more errors (especially when the module is bad). 1856 */ 1857 if (t_afsr & C_AFSR_UE) { 1858 /* 1859 * Ask our legacy bus nexus friends if they have any fatal 1860 * errors. If so, they will log appropriate error messages. 1861 */ 1862 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1863 aflt->flt_panic = 1; 1864 1865 /* 1866 * We got a UE and are panicking, save the fault PA in a known 1867 * location so that the platform specific panic code can check 1868 * for copyback errors. 1869 */ 1870 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1871 panic_aflt = *aflt; 1872 } 1873 } 1874 1875 /* 1876 * Flush Ecache line or entire Ecache 1877 */ 1878 if (t_afsr_errs & 1879 (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU)) 1880 cpu_error_ecache_flush(&ch_flt); 1881 #endif /* JALAPENO || SERRANO */ 1882 1883 /* 1884 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1885 * or disrupting errors have happened. We do this because if a 1886 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1887 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1888 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1889 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1890 * deferred or disrupting error happening between checking the AFSR and 1891 * enabling NCEEN/CEEN. 1892 * 1893 * Note: CEEN reenabled only if it was on when trap taken. 1894 */ 1895 set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen)); 1896 if (clear_errors(&ch_flt)) { 1897 /* 1898 * Check for secondary errors, and avoid panicking if we 1899 * have them 1900 */ 1901 if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs, 1902 t_afar) == 0) { 1903 aflt->flt_panic |= ((ch_flt.afsr_errs & 1904 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0); 1905 } 1906 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1907 NULL); 1908 } 1909 1910 /* 1911 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1912 * be logged as part of the panic flow. 1913 */ 1914 if (aflt->flt_panic) 1915 fm_panic("%sError(s)", pr_reason); 1916 1917 /* 1918 * If we queued an error and we are going to return from the trap and 1919 * the error was in user mode or inside of a copy routine, set AST flag 1920 * so the queue will be drained before returning to user mode. The 1921 * AST processing will also act on our failure policy. 1922 */ 1923 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1924 int pcb_flag = 0; 1925 1926 if (t_afsr_errs & 1927 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS & 1928 ~(C_AFSR_BERR | C_AFSR_TO))) 1929 pcb_flag |= ASYNC_HWERR; 1930 1931 if (t_afsr & C_AFSR_BERR) 1932 pcb_flag |= ASYNC_BERR; 1933 1934 if (t_afsr & C_AFSR_TO) 1935 pcb_flag |= ASYNC_BTO; 1936 1937 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1938 aston(curthread); 1939 } 1940 } 1941 1942 #if defined(CPU_IMP_L1_CACHE_PARITY) 1943 /* 1944 * Handling of data and instruction parity errors (traps 0x71, 0x72). 1945 * 1946 * For Panther, P$ data parity errors during floating point load hits 1947 * are also detected (reported as TT 0x71) and handled by this trap 1948 * handler. 1949 * 1950 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address) 1951 * is available. 1952 */ 1953 /*ARGSUSED*/ 1954 void 1955 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc) 1956 { 1957 ch_async_flt_t ch_flt; 1958 struct async_flt *aflt; 1959 uchar_t tl = ((flags & CH_ERR_TL) != 0); 1960 uchar_t iparity = ((flags & CH_ERR_IPE) != 0); 1961 uchar_t panic = ((flags & CH_ERR_PANIC) != 0); 1962 char *error_class; 1963 1964 /* 1965 * Log the error. 1966 * For icache parity errors the fault address is the trap PC. 1967 * For dcache/pcache parity errors the instruction would have to 1968 * be decoded to determine the address and that isn't possible 1969 * at high PIL. 1970 */ 1971 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1972 aflt = (struct async_flt *)&ch_flt; 1973 aflt->flt_id = gethrtime_waitfree(); 1974 aflt->flt_bus_id = getprocessorid(); 1975 aflt->flt_inst = CPU->cpu_id; 1976 aflt->flt_pc = tpc; 1977 aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR; 1978 aflt->flt_prot = AFLT_PROT_NONE; 1979 aflt->flt_class = CPU_FAULT; 1980 aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0; 1981 aflt->flt_tl = tl; 1982 aflt->flt_panic = panic; 1983 aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP; 1984 ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY; 1985 1986 if (iparity) { 1987 cpu_icache_parity_info(&ch_flt); 1988 if (ch_flt.parity_data.ipe.cpl_off != -1) 1989 error_class = FM_EREPORT_CPU_USIII_IDSPE; 1990 else if (ch_flt.parity_data.ipe.cpl_way != -1) 1991 error_class = FM_EREPORT_CPU_USIII_ITSPE; 1992 else 1993 error_class = FM_EREPORT_CPU_USIII_IPE; 1994 aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE; 1995 } else { 1996 cpu_dcache_parity_info(&ch_flt); 1997 if (ch_flt.parity_data.dpe.cpl_off != -1) 1998 error_class = FM_EREPORT_CPU_USIII_DDSPE; 1999 else if (ch_flt.parity_data.dpe.cpl_way != -1) 2000 error_class = FM_EREPORT_CPU_USIII_DTSPE; 2001 else 2002 error_class = FM_EREPORT_CPU_USIII_DPE; 2003 aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE; 2004 /* 2005 * For panther we also need to check the P$ for parity errors. 2006 */ 2007 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2008 cpu_pcache_parity_info(&ch_flt); 2009 if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2010 error_class = FM_EREPORT_CPU_USIII_PDSPE; 2011 aflt->flt_payload = 2012 FM_EREPORT_PAYLOAD_PCACHE_PE; 2013 } 2014 } 2015 } 2016 2017 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 2018 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 2019 2020 if (iparity) { 2021 /* 2022 * Invalidate entire I$. 2023 * This is required due to the use of diagnostic ASI 2024 * accesses that may result in a loss of I$ coherency. 2025 */ 2026 if (cache_boot_state & DCU_IC) { 2027 flush_icache(); 2028 } 2029 /* 2030 * According to section P.3.1 of the Panther PRM, we 2031 * need to do a little more for recovery on those 2032 * CPUs after encountering an I$ parity error. 2033 */ 2034 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2035 flush_ipb(); 2036 correct_dcache_parity(dcache_size, 2037 dcache_linesize); 2038 flush_pcache(); 2039 } 2040 } else { 2041 /* 2042 * Since the valid bit is ignored when checking parity the 2043 * D$ data and tag must also be corrected. Set D$ data bits 2044 * to zero and set utag to 0, 1, 2, 3. 2045 */ 2046 correct_dcache_parity(dcache_size, dcache_linesize); 2047 2048 /* 2049 * According to section P.3.3 of the Panther PRM, we 2050 * need to do a little more for recovery on those 2051 * CPUs after encountering a D$ or P$ parity error. 2052 * 2053 * As far as clearing P$ parity errors, it is enough to 2054 * simply invalidate all entries in the P$ since P$ parity 2055 * error traps are only generated for floating point load 2056 * hits. 2057 */ 2058 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2059 flush_icache(); 2060 flush_ipb(); 2061 flush_pcache(); 2062 } 2063 } 2064 2065 /* 2066 * Invalidate entire D$ if it was enabled. 2067 * This is done to avoid stale data in the D$ which might 2068 * occur with the D$ disabled and the trap handler doing 2069 * stores affecting lines already in the D$. 2070 */ 2071 if (cache_boot_state & DCU_DC) { 2072 flush_dcache(); 2073 } 2074 2075 /* 2076 * Restore caches to their bootup state. 2077 */ 2078 set_dcu(get_dcu() | cache_boot_state); 2079 2080 /* 2081 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2082 * be logged as part of the panic flow. 2083 */ 2084 if (aflt->flt_panic) 2085 fm_panic("%sError(s)", iparity ? "IPE " : "DPE "); 2086 2087 /* 2088 * If this error occurred at TL>0 then flush the E$ here to reduce 2089 * the chance of getting an unrecoverable Fast ECC error. This 2090 * flush will evict the part of the parity trap handler that is run 2091 * at TL>1. 2092 */ 2093 if (tl) { 2094 cpu_flush_ecache(); 2095 } 2096 } 2097 2098 /* 2099 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t 2100 * to indicate which portions of the captured data should be in the ereport. 2101 */ 2102 void 2103 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt) 2104 { 2105 int way = ch_flt->parity_data.ipe.cpl_way; 2106 int offset = ch_flt->parity_data.ipe.cpl_off; 2107 int tag_index; 2108 struct async_flt *aflt = (struct async_flt *)ch_flt; 2109 2110 2111 if ((offset != -1) || (way != -1)) { 2112 /* 2113 * Parity error in I$ tag or data 2114 */ 2115 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2116 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2117 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2118 PN_ICIDX_TO_WAY(tag_index); 2119 else 2120 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2121 CH_ICIDX_TO_WAY(tag_index); 2122 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2123 IC_LOGFLAG_MAGIC; 2124 } else { 2125 /* 2126 * Parity error was not identified. 2127 * Log tags and data for all ways. 2128 */ 2129 for (way = 0; way < CH_ICACHE_NWAY; way++) { 2130 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2131 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2132 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2133 PN_ICIDX_TO_WAY(tag_index); 2134 else 2135 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2136 CH_ICIDX_TO_WAY(tag_index); 2137 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2138 IC_LOGFLAG_MAGIC; 2139 } 2140 } 2141 } 2142 2143 /* 2144 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t 2145 * to indicate which portions of the captured data should be in the ereport. 2146 */ 2147 void 2148 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt) 2149 { 2150 int way = ch_flt->parity_data.dpe.cpl_way; 2151 int offset = ch_flt->parity_data.dpe.cpl_off; 2152 int tag_index; 2153 2154 if (offset != -1) { 2155 /* 2156 * Parity error in D$ or P$ data array. 2157 * 2158 * First check to see whether the parity error is in D$ or P$ 2159 * since P$ data parity errors are reported in Panther using 2160 * the same trap. 2161 */ 2162 if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2163 tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx; 2164 ch_flt->parity_data.dpe.cpl_pc[way].pc_way = 2165 CH_PCIDX_TO_WAY(tag_index); 2166 ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag = 2167 PC_LOGFLAG_MAGIC; 2168 } else { 2169 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2170 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2171 CH_DCIDX_TO_WAY(tag_index); 2172 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2173 DC_LOGFLAG_MAGIC; 2174 } 2175 } else if (way != -1) { 2176 /* 2177 * Parity error in D$ tag. 2178 */ 2179 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2180 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2181 CH_DCIDX_TO_WAY(tag_index); 2182 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2183 DC_LOGFLAG_MAGIC; 2184 } 2185 } 2186 #endif /* CPU_IMP_L1_CACHE_PARITY */ 2187 2188 /* 2189 * The cpu_async_log_err() function is called via the [uc]e_drain() function to 2190 * post-process CPU events that are dequeued. As such, it can be invoked 2191 * from softint context, from AST processing in the trap() flow, or from the 2192 * panic flow. We decode the CPU-specific data, and take appropriate actions. 2193 * Historically this entry point was used to log the actual cmn_err(9F) text; 2194 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 2195 * With FMA this function now also returns a flag which indicates to the 2196 * caller whether the ereport should be posted (1) or suppressed (0). 2197 */ 2198 static int 2199 cpu_async_log_err(void *flt, errorq_elem_t *eqep) 2200 { 2201 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 2202 struct async_flt *aflt = (struct async_flt *)flt; 2203 page_t *pp; 2204 2205 switch (ch_flt->flt_type) { 2206 case CPU_INV_AFSR: 2207 /* 2208 * If it is a disrupting trap and the AFSR is zero, then 2209 * the event has probably already been noted. Do not post 2210 * an ereport. 2211 */ 2212 if ((aflt->flt_status & ECC_C_TRAP) && 2213 (!(aflt->flt_stat & C_AFSR_MASK))) 2214 return (0); 2215 else 2216 return (1); 2217 case CPU_TO: 2218 case CPU_BERR: 2219 case CPU_FATAL: 2220 case CPU_FPUERR: 2221 return (1); 2222 2223 case CPU_UE_ECACHE_RETIRE: 2224 cpu_log_err(aflt); 2225 cpu_page_retire(ch_flt); 2226 return (1); 2227 2228 /* 2229 * Cases where we may want to suppress logging or perform 2230 * extended diagnostics. 2231 */ 2232 case CPU_CE: 2233 case CPU_EMC: 2234 pp = page_numtopp_nolock((pfn_t) 2235 (aflt->flt_addr >> MMU_PAGESHIFT)); 2236 2237 /* 2238 * We want to skip logging and further classification 2239 * only if ALL the following conditions are true: 2240 * 2241 * 1. There is only one error 2242 * 2. That error is a correctable memory error 2243 * 3. The error is caused by the memory scrubber (in 2244 * which case the error will have occurred under 2245 * on_trap protection) 2246 * 4. The error is on a retired page 2247 * 2248 * Note: AFLT_PROT_EC is used places other than the memory 2249 * scrubber. However, none of those errors should occur 2250 * on a retired page. 2251 */ 2252 if ((ch_flt->afsr_errs & 2253 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE && 2254 aflt->flt_prot == AFLT_PROT_EC) { 2255 2256 if (pp != NULL && page_isretired(pp)) { 2257 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2258 2259 /* 2260 * Since we're skipping logging, we'll need 2261 * to schedule the re-enabling of CEEN 2262 */ 2263 (void) timeout(cpu_delayed_check_ce_errors, 2264 (void *)aflt->flt_inst, drv_usectohz( 2265 (clock_t)cpu_ceen_delay_secs * MICROSEC)); 2266 } 2267 return (0); 2268 } 2269 } 2270 2271 /* 2272 * Perform/schedule further classification actions, but 2273 * only if the page is healthy (we don't want bad 2274 * pages inducing too much diagnostic activity). If we could 2275 * not find a page pointer then we also skip this. If 2276 * ce_scrub_xdiag_recirc returns nonzero then it has chosen 2277 * to copy and recirculate the event (for further diagnostics) 2278 * and we should not proceed to log it here. 2279 * 2280 * This must be the last step here before the cpu_log_err() 2281 * below - if an event recirculates cpu_ce_log_err() will 2282 * not call the current function but just proceed directly 2283 * to cpu_ereport_post after the cpu_log_err() avoided below. 2284 * 2285 * Note: Check cpu_impl_async_log_err if changing this 2286 */ 2287 if (pp) { 2288 if (page_isretired(pp) || page_deteriorating(pp)) { 2289 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2290 CE_XDIAG_SKIP_PAGEDET); 2291 } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep, 2292 offsetof(ch_async_flt_t, cmn_asyncflt))) { 2293 return (0); 2294 } 2295 } else { 2296 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2297 CE_XDIAG_SKIP_NOPP); 2298 } 2299 /*FALLTHRU*/ 2300 2301 /* 2302 * Cases where we just want to report the error and continue. 2303 */ 2304 case CPU_CE_ECACHE: 2305 case CPU_UE_ECACHE: 2306 case CPU_IV: 2307 case CPU_ORPH: 2308 cpu_log_err(aflt); 2309 return (1); 2310 2311 /* 2312 * Cases where we want to fall through to handle panicking. 2313 */ 2314 case CPU_UE: 2315 /* 2316 * We want to skip logging in the same conditions as the 2317 * CE case. In addition, we want to make sure we're not 2318 * panicking. 2319 */ 2320 if (!panicstr && (ch_flt->afsr_errs & 2321 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE && 2322 aflt->flt_prot == AFLT_PROT_EC) { 2323 page_t *pp = page_numtopp_nolock((pfn_t) 2324 (aflt->flt_addr >> MMU_PAGESHIFT)); 2325 2326 if (pp != NULL && page_isretired(pp)) { 2327 2328 /* Zero the address to clear the error */ 2329 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2330 return (0); 2331 } 2332 } 2333 cpu_log_err(aflt); 2334 break; 2335 2336 default: 2337 /* 2338 * If the us3_common.c code doesn't know the flt_type, it may 2339 * be an implementation-specific code. Call into the impldep 2340 * backend to find out what to do: if it tells us to continue, 2341 * break and handle as if falling through from a UE; if not, 2342 * the impldep backend has handled the error and we're done. 2343 */ 2344 switch (cpu_impl_async_log_err(flt, eqep)) { 2345 case CH_ASYNC_LOG_DONE: 2346 return (1); 2347 case CH_ASYNC_LOG_RECIRC: 2348 return (0); 2349 case CH_ASYNC_LOG_CONTINUE: 2350 break; /* continue on to handle UE-like error */ 2351 default: 2352 cmn_err(CE_WARN, "discarding error 0x%p with " 2353 "invalid fault type (0x%x)", 2354 (void *)aflt, ch_flt->flt_type); 2355 return (0); 2356 } 2357 } 2358 2359 /* ... fall through from the UE case */ 2360 2361 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2362 if (!panicstr) { 2363 cpu_page_retire(ch_flt); 2364 } else { 2365 /* 2366 * Clear UEs on panic so that we don't 2367 * get haunted by them during panic or 2368 * after reboot 2369 */ 2370 cpu_clearphys(aflt); 2371 (void) clear_errors(NULL); 2372 } 2373 } 2374 2375 return (1); 2376 } 2377 2378 /* 2379 * Retire the bad page that may contain the flushed error. 2380 */ 2381 void 2382 cpu_page_retire(ch_async_flt_t *ch_flt) 2383 { 2384 struct async_flt *aflt = (struct async_flt *)ch_flt; 2385 page_t *pp = page_numtopp_nolock(aflt->flt_addr >> MMU_PAGESHIFT); 2386 2387 if (pp != NULL) { 2388 page_settoxic(pp, PAGE_IS_FAULTY); 2389 (void) page_retire(pp, PAGE_IS_TOXIC); 2390 } 2391 } 2392 2393 /* 2394 * The cpu_log_err() function is called by cpu_async_log_err() to perform the 2395 * generic event post-processing for correctable and uncorrectable memory, 2396 * E$, and MTag errors. Historically this entry point was used to log bits of 2397 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be 2398 * converted into an ereport. In addition, it transmits the error to any 2399 * platform-specific service-processor FRU logging routines, if available. 2400 */ 2401 void 2402 cpu_log_err(struct async_flt *aflt) 2403 { 2404 char unum[UNUM_NAMLEN]; 2405 int len = 0; 2406 int synd_status, synd_code, afar_status; 2407 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 2408 2409 /* 2410 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 2411 * For Panther, L2$ is not external, so we don't want to 2412 * generate an E$ unum for those errors. 2413 */ 2414 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 2415 if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS) 2416 aflt->flt_status |= ECC_ECACHE; 2417 } else { 2418 if (ch_flt->flt_bit & C_AFSR_ECACHE) 2419 aflt->flt_status |= ECC_ECACHE; 2420 } 2421 2422 /* 2423 * Determine syndrome status. 2424 */ 2425 synd_status = afsr_to_synd_status(aflt->flt_inst, 2426 ch_flt->afsr_errs, ch_flt->flt_bit); 2427 2428 /* 2429 * Determine afar status. 2430 */ 2431 if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) 2432 afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 2433 ch_flt->flt_bit); 2434 else 2435 afar_status = AFLT_STAT_INVALID; 2436 2437 /* 2438 * If afar status is not invalid do a unum lookup. 2439 */ 2440 if (afar_status != AFLT_STAT_INVALID) { 2441 (void) cpu_get_mem_unum_aflt(synd_status, aflt, unum, 2442 UNUM_NAMLEN, &len); 2443 } else { 2444 unum[0] = '\0'; 2445 } 2446 2447 synd_code = synd_to_synd_code(synd_status, 2448 aflt->flt_synd, ch_flt->flt_bit); 2449 2450 /* 2451 * Do not send the fruid message (plat_ecc_error_data_t) 2452 * to the SC if it can handle the enhanced error information 2453 * (plat_ecc_error2_data_t) or when the tunable 2454 * ecc_log_fruid_enable is set to 0. 2455 */ 2456 2457 if (&plat_ecc_capability_sc_get && 2458 plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) { 2459 if (&plat_log_fruid_error) 2460 plat_log_fruid_error(synd_code, aflt, unum, 2461 ch_flt->flt_bit); 2462 } 2463 2464 if (aflt->flt_func != NULL) 2465 aflt->flt_func(aflt, unum); 2466 2467 if (afar_status != AFLT_STAT_INVALID) 2468 cpu_log_diag_info(ch_flt); 2469 2470 /* 2471 * If we have a CEEN error , we do not reenable CEEN until after 2472 * we exit the trap handler. Otherwise, another error may 2473 * occur causing the handler to be entered recursively. 2474 * We set a timeout to trigger in cpu_ceen_delay_secs seconds, 2475 * to try and ensure that the CPU makes progress in the face 2476 * of a CE storm. 2477 */ 2478 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2479 (void) timeout(cpu_delayed_check_ce_errors, 2480 (void *)aflt->flt_inst, 2481 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 2482 } 2483 } 2484 2485 /* 2486 * Invoked by error_init() early in startup and therefore before 2487 * startup_errorq() is called to drain any error Q - 2488 * 2489 * startup() 2490 * startup_end() 2491 * error_init() 2492 * cpu_error_init() 2493 * errorq_init() 2494 * errorq_drain() 2495 * start_other_cpus() 2496 * 2497 * The purpose of this routine is to create error-related taskqs. Taskqs 2498 * are used for this purpose because cpu_lock can't be grabbed from interrupt 2499 * context. 2500 */ 2501 void 2502 cpu_error_init(int items) 2503 { 2504 /* 2505 * Create taskq(s) to reenable CE 2506 */ 2507 ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri, 2508 items, items, TASKQ_PREPOPULATE); 2509 } 2510 2511 void 2512 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep) 2513 { 2514 char unum[UNUM_NAMLEN]; 2515 int len; 2516 2517 switch (aflt->flt_class) { 2518 case CPU_FAULT: 2519 cpu_ereport_init(aflt); 2520 if (cpu_async_log_err(aflt, eqep)) 2521 cpu_ereport_post(aflt); 2522 break; 2523 2524 case BUS_FAULT: 2525 if (aflt->flt_func != NULL) { 2526 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, 2527 unum, UNUM_NAMLEN, &len); 2528 aflt->flt_func(aflt, unum); 2529 } 2530 break; 2531 2532 case RECIRC_CPU_FAULT: 2533 aflt->flt_class = CPU_FAULT; 2534 cpu_log_err(aflt); 2535 cpu_ereport_post(aflt); 2536 break; 2537 2538 case RECIRC_BUS_FAULT: 2539 ASSERT(aflt->flt_class != RECIRC_BUS_FAULT); 2540 /*FALLTHRU*/ 2541 default: 2542 cmn_err(CE_WARN, "discarding CE error 0x%p with invalid " 2543 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 2544 return; 2545 } 2546 } 2547 2548 /* 2549 * Scrub and classify a CE. This function must not modify the 2550 * fault structure passed to it but instead should return the classification 2551 * information. 2552 */ 2553 2554 static uchar_t 2555 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried) 2556 { 2557 uchar_t disp = CE_XDIAG_EXTALG; 2558 on_trap_data_t otd; 2559 uint64_t orig_err; 2560 ch_cpu_logout_t *clop; 2561 2562 /* 2563 * Clear CEEN. CPU CE TL > 0 trap handling will already have done 2564 * this, but our other callers have not. Disable preemption to 2565 * avoid CPU migration so that we restore CEEN on the correct 2566 * cpu later. 2567 * 2568 * CEEN is cleared so that further CEs that our instruction and 2569 * data footprint induce do not cause use to either creep down 2570 * kernel stack to the point of overflow, or do so much CE 2571 * notification as to make little real forward progress. 2572 * 2573 * NCEEN must not be cleared. However it is possible that 2574 * our accesses to the flt_addr may provoke a bus error or timeout 2575 * if the offending address has just been unconfigured as part of 2576 * a DR action. So we must operate under on_trap protection. 2577 */ 2578 kpreempt_disable(); 2579 orig_err = get_error_enable(); 2580 if (orig_err & EN_REG_CEEN) 2581 set_error_enable(orig_err & ~EN_REG_CEEN); 2582 2583 /* 2584 * Our classification algorithm includes the line state before 2585 * the scrub; we'd like this captured after the detection and 2586 * before the algorithm below - the earlier the better. 2587 * 2588 * If we've come from a cpu CE trap then this info already exists 2589 * in the cpu logout area. 2590 * 2591 * For a CE detected by memscrub for which there was no trap 2592 * (running with CEEN off) cpu_log_and_clear_ce has called 2593 * cpu_ce_delayed_ec_logout to capture some cache data, and 2594 * marked the fault structure as incomplete as a flag to later 2595 * logging code. 2596 * 2597 * If called directly from an IO detected CE there has been 2598 * no line data capture. In this case we logout to the cpu logout 2599 * area - that's appropriate since it's the cpu cache data we need 2600 * for classification. We thus borrow the cpu logout area for a 2601 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in 2602 * this time (we will invalidate it again below). 2603 * 2604 * If called from the partner check xcall handler then this cpu 2605 * (the partner) has not necessarily experienced a CE at this 2606 * address. But we want to capture line state before its scrub 2607 * attempt since we use that in our classification. 2608 */ 2609 if (logout_tried == B_FALSE) { 2610 if (!cpu_ce_delayed_ec_logout(ecc->flt_addr)) 2611 disp |= CE_XDIAG_NOLOGOUT; 2612 } 2613 2614 /* 2615 * Scrub memory, then check AFSR for errors. The AFAR we scrub may 2616 * no longer be valid (if DR'd since the initial event) so we 2617 * perform this scrub under on_trap protection. If this access is 2618 * ok then further accesses below will also be ok - DR cannot 2619 * proceed while this thread is active (preemption is disabled); 2620 * to be safe we'll nonetheless use on_trap again below. 2621 */ 2622 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2623 cpu_scrubphys(ecc); 2624 } else { 2625 no_trap(); 2626 if (orig_err & EN_REG_CEEN) 2627 set_error_enable(orig_err); 2628 kpreempt_enable(); 2629 return (disp); 2630 } 2631 no_trap(); 2632 2633 /* 2634 * Did the casx read of the scrub log a CE that matches the AFAR? 2635 * Note that it's quite possible that the read sourced the data from 2636 * another cpu. 2637 */ 2638 if (clear_ecc(ecc)) 2639 disp |= CE_XDIAG_CE1; 2640 2641 /* 2642 * Read the data again. This time the read is very likely to 2643 * come from memory since the scrub induced a writeback to memory. 2644 */ 2645 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2646 (void) lddphys(P2ALIGN(ecc->flt_addr, 8)); 2647 } else { 2648 no_trap(); 2649 if (orig_err & EN_REG_CEEN) 2650 set_error_enable(orig_err); 2651 kpreempt_enable(); 2652 return (disp); 2653 } 2654 no_trap(); 2655 2656 /* Did that read induce a CE that matches the AFAR? */ 2657 if (clear_ecc(ecc)) 2658 disp |= CE_XDIAG_CE2; 2659 2660 /* 2661 * Look at the logout information and record whether we found the 2662 * line in l2/l3 cache. For Panther we are interested in whether 2663 * we found it in either cache (it won't reside in both but 2664 * it is possible to read it that way given the moving target). 2665 */ 2666 clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL; 2667 if (!(disp & CE_XDIAG_NOLOGOUT) && clop && 2668 clop->clo_data.chd_afar != LOGOUT_INVALID) { 2669 int hit, level; 2670 int state; 2671 int totalsize; 2672 ch_ec_data_t *ecp; 2673 2674 /* 2675 * If hit is nonzero then a match was found and hit will 2676 * be one greater than the index which hit. For Panther we 2677 * also need to pay attention to level to see which of l2$ or 2678 * l3$ it hit in. 2679 */ 2680 hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data, 2681 0, &level); 2682 2683 if (hit) { 2684 --hit; 2685 disp |= CE_XDIAG_AFARMATCH; 2686 2687 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2688 if (level == 2) 2689 ecp = &clop->clo_data.chd_l2_data[hit]; 2690 else 2691 ecp = &clop->clo_data.chd_ec_data[hit]; 2692 } else { 2693 ASSERT(level == 2); 2694 ecp = &clop->clo_data.chd_ec_data[hit]; 2695 } 2696 totalsize = cpunodes[CPU->cpu_id].ecache_size; 2697 state = cpu_ectag_pa_to_subblk_state(totalsize, 2698 ecc->flt_addr, ecp->ec_tag); 2699 2700 /* 2701 * Cheetah variants use different state encodings - 2702 * the CH_ECSTATE_* defines vary depending on the 2703 * module we're compiled for. Translate into our 2704 * one true version. Conflate Owner-Shared state 2705 * of SSM mode with Owner as victimisation of such 2706 * lines may cause a writeback. 2707 */ 2708 switch (state) { 2709 case CH_ECSTATE_MOD: 2710 disp |= EC_STATE_M; 2711 break; 2712 2713 case CH_ECSTATE_OWN: 2714 case CH_ECSTATE_OWS: 2715 disp |= EC_STATE_O; 2716 break; 2717 2718 case CH_ECSTATE_EXL: 2719 disp |= EC_STATE_E; 2720 break; 2721 2722 case CH_ECSTATE_SHR: 2723 disp |= EC_STATE_S; 2724 break; 2725 2726 default: 2727 disp |= EC_STATE_I; 2728 break; 2729 } 2730 } 2731 2732 /* 2733 * If we initiated the delayed logout then we are responsible 2734 * for invalidating the logout area. 2735 */ 2736 if (logout_tried == B_FALSE) { 2737 bzero(clop, sizeof (ch_cpu_logout_t)); 2738 clop->clo_data.chd_afar = LOGOUT_INVALID; 2739 } 2740 } 2741 2742 /* 2743 * Re-enable CEEN if we turned it off. 2744 */ 2745 if (orig_err & EN_REG_CEEN) 2746 set_error_enable(orig_err); 2747 kpreempt_enable(); 2748 2749 return (disp); 2750 } 2751 2752 /* 2753 * Scrub a correctable memory error and collect data for classification 2754 * of CE type. This function is called in the detection path, ie tl0 handling 2755 * of a correctable error trap (cpus) or interrupt (IO) at high PIL. 2756 */ 2757 void 2758 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried) 2759 { 2760 /* 2761 * Cheetah CE classification does not set any bits in flt_status. 2762 * Instead we will record classification datapoints in flt_disp. 2763 */ 2764 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 2765 2766 /* 2767 * To check if the error detected by IO is persistent, sticky or 2768 * intermittent. This is noticed by clear_ecc(). 2769 */ 2770 if (ecc->flt_status & ECC_IOBUS) 2771 ecc->flt_stat = C_AFSR_MEMORY; 2772 2773 /* 2774 * Record information from this first part of the algorithm in 2775 * flt_disp. 2776 */ 2777 ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried); 2778 } 2779 2780 /* 2781 * Select a partner to perform a further CE classification check from. 2782 * Must be called with kernel preemption disabled (to stop the cpu list 2783 * from changing). The detecting cpu we are partnering has cpuid 2784 * aflt->flt_inst; we might not be running on the detecting cpu. 2785 * 2786 * Restrict choice to active cpus in the same cpu partition as ourselves in 2787 * an effort to stop bad cpus in one partition causing other partitions to 2788 * perform excessive diagnostic activity. Actually since the errorq drain 2789 * is run from a softint most of the time and that is a global mechanism 2790 * this isolation is only partial. Return NULL if we fail to find a 2791 * suitable partner. 2792 * 2793 * We prefer a partner that is in a different latency group to ourselves as 2794 * we will share fewer datapaths. If such a partner is unavailable then 2795 * choose one in the same lgroup but prefer a different chip and only allow 2796 * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and 2797 * flags includes PTNR_SELFOK then permit selection of the original detector. 2798 * 2799 * We keep a cache of the last partner selected for a cpu, and we'll try to 2800 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds 2801 * have passed since that selection was made. This provides the benefit 2802 * of the point-of-view of different partners over time but without 2803 * requiring frequent cpu list traversals. 2804 */ 2805 2806 #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */ 2807 #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */ 2808 2809 static cpu_t * 2810 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) 2811 { 2812 cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr; 2813 hrtime_t lasttime, thistime; 2814 2815 ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL); 2816 2817 dtcr = cpu[aflt->flt_inst]; 2818 2819 /* 2820 * Short-circuit for the following cases: 2821 * . the dtcr is not flagged active 2822 * . there is just one cpu present 2823 * . the detector has disappeared 2824 * . we were given a bad flt_inst cpuid; this should not happen 2825 * (eg PCI code now fills flt_inst) but if it does it is no 2826 * reason to panic. 2827 * . there is just one cpu left online in the cpu partition 2828 * 2829 * If we return NULL after this point then we do not update the 2830 * chpr_ceptnr_seltime which will cause us to perform a full lookup 2831 * again next time; this is the case where the only other cpu online 2832 * in the detector's partition is on the same chip as the detector 2833 * and since CEEN re-enable is throttled even that case should not 2834 * hurt performance. 2835 */ 2836 if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) { 2837 return (NULL); 2838 } 2839 if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) { 2840 if (flags & PTNR_SELFOK) { 2841 *typep = CE_XDIAG_PTNR_SELF; 2842 return (dtcr); 2843 } else { 2844 return (NULL); 2845 } 2846 } 2847 2848 thistime = gethrtime(); 2849 lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime); 2850 2851 /* 2852 * Select a starting point. 2853 */ 2854 if (!lasttime) { 2855 /* 2856 * We've never selected a partner for this detector before. 2857 * Start the scan at the next online cpu in the same cpu 2858 * partition. 2859 */ 2860 sp = dtcr->cpu_next_part; 2861 } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) { 2862 /* 2863 * Our last selection has not aged yet. If this partner: 2864 * . is still a valid cpu, 2865 * . is still in the same partition as the detector 2866 * . is still marked active 2867 * . satisfies the 'flags' argument criteria 2868 * then select it again without updating the timestamp. 2869 */ 2870 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2871 if (sp == NULL || sp->cpu_part != dtcr->cpu_part || 2872 !cpu_flagged_active(sp->cpu_flags) || 2873 (sp == dtcr && !(flags & PTNR_SELFOK)) || 2874 (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id && 2875 !(flags & PTNR_SIBLINGOK))) { 2876 sp = dtcr->cpu_next_part; 2877 } else { 2878 if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2879 *typep = CE_XDIAG_PTNR_REMOTE; 2880 } else if (sp == dtcr) { 2881 *typep = CE_XDIAG_PTNR_SELF; 2882 } else if (sp->cpu_chip->chip_id == 2883 dtcr->cpu_chip->chip_id) { 2884 *typep = CE_XDIAG_PTNR_SIBLING; 2885 } else { 2886 *typep = CE_XDIAG_PTNR_LOCAL; 2887 } 2888 return (sp); 2889 } 2890 } else { 2891 /* 2892 * Our last selection has aged. If it is nonetheless still a 2893 * valid cpu then start the scan at the next cpu in the 2894 * partition after our last partner. If the last selection 2895 * is no longer a valid cpu then go with our default. In 2896 * this way we slowly cycle through possible partners to 2897 * obtain multiple viewpoints over time. 2898 */ 2899 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2900 if (sp == NULL) { 2901 sp = dtcr->cpu_next_part; 2902 } else { 2903 sp = sp->cpu_next_part; /* may be dtcr */ 2904 if (sp->cpu_part != dtcr->cpu_part) 2905 sp = dtcr; 2906 } 2907 } 2908 2909 /* 2910 * We have a proposed starting point for our search, but if this 2911 * cpu is offline then its cpu_next_part will point to itself 2912 * so we can't use that to iterate over cpus in this partition in 2913 * the loop below. We still want to avoid iterating over cpus not 2914 * in our partition, so in the case that our starting point is offline 2915 * we will repoint it to be the detector itself; and if the detector 2916 * happens to be offline we'll return NULL from the following loop. 2917 */ 2918 if (!cpu_flagged_active(sp->cpu_flags)) { 2919 sp = dtcr; 2920 } 2921 2922 ptnr = sp; 2923 locptnr = NULL; 2924 sibptnr = NULL; 2925 do { 2926 if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags)) 2927 continue; 2928 if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2929 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id; 2930 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2931 *typep = CE_XDIAG_PTNR_REMOTE; 2932 return (ptnr); 2933 } 2934 if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) { 2935 if (sibptnr == NULL) 2936 sibptnr = ptnr; 2937 continue; 2938 } 2939 if (locptnr == NULL) 2940 locptnr = ptnr; 2941 } while ((ptnr = ptnr->cpu_next_part) != sp); 2942 2943 /* 2944 * A foreign partner has already been returned if one was available. 2945 * 2946 * If locptnr is not NULL it is a cpu in the same lgroup as the 2947 * detector, is active, and is not a sibling of the detector. 2948 * 2949 * If sibptnr is not NULL it is a sibling of the detector, and is 2950 * active. 2951 * 2952 * If we have to resort to using the detector itself we have already 2953 * checked that it is active. 2954 */ 2955 if (locptnr) { 2956 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id; 2957 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2958 *typep = CE_XDIAG_PTNR_LOCAL; 2959 return (locptnr); 2960 } else if (sibptnr && flags & PTNR_SIBLINGOK) { 2961 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id; 2962 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2963 *typep = CE_XDIAG_PTNR_SIBLING; 2964 return (sibptnr); 2965 } else if (flags & PTNR_SELFOK) { 2966 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id; 2967 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2968 *typep = CE_XDIAG_PTNR_SELF; 2969 return (dtcr); 2970 } 2971 2972 return (NULL); 2973 } 2974 2975 /* 2976 * Cross call handler that is requested to run on the designated partner of 2977 * a cpu that experienced a possibly sticky or possibly persistnet CE. 2978 */ 2979 static void 2980 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp) 2981 { 2982 *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE); 2983 } 2984 2985 /* 2986 * The associated errorqs are never destroyed so we do not need to deal with 2987 * them disappearing before this timeout fires. If the affected memory 2988 * has been DR'd out since the original event the scrub algrithm will catch 2989 * any errors and return null disposition info. If the original detecting 2990 * cpu has been DR'd out then ereport detector info will not be able to 2991 * lookup CPU type; with a small timeout this is unlikely. 2992 */ 2993 static void 2994 ce_lkychk_cb(ce_lkychk_cb_t *cbarg) 2995 { 2996 struct async_flt *aflt = cbarg->lkycb_aflt; 2997 uchar_t disp; 2998 cpu_t *cp; 2999 int ptnrtype; 3000 3001 kpreempt_disable(); 3002 if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK, 3003 &ptnrtype)) { 3004 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt, 3005 (uint64_t)&disp); 3006 CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp); 3007 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3008 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3009 } else { 3010 ce_xdiag_lkydrops++; 3011 if (ncpus > 1) 3012 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3013 CE_XDIAG_SKIP_NOPTNR); 3014 } 3015 kpreempt_enable(); 3016 3017 errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC); 3018 kmem_free(cbarg, sizeof (ce_lkychk_cb_t)); 3019 } 3020 3021 /* 3022 * Called from errorq drain code when processing a CE error, both from 3023 * CPU and PCI drain functions. Decide what further classification actions, 3024 * if any, we will perform. Perform immediate actions now, and schedule 3025 * delayed actions as required. Note that we are no longer necessarily running 3026 * on the detecting cpu, and that the async_flt structure will not persist on 3027 * return from this function. 3028 * 3029 * Calls to this function should aim to be self-throtlling in some way. With 3030 * the delayed re-enable of CEEN the absolute rate of calls should not 3031 * be excessive. Callers should also avoid performing in-depth classification 3032 * for events in pages that are already known to be suspect. 3033 * 3034 * We return nonzero to indicate that the event has been copied and 3035 * recirculated for further testing. The caller should not log the event 3036 * in this case - it will be logged when further test results are available. 3037 * 3038 * Our possible contexts are that of errorq_drain: below lock level or from 3039 * panic context. We can assume that the cpu we are running on is online. 3040 */ 3041 3042 3043 #ifdef DEBUG 3044 static int ce_xdiag_forceaction; 3045 #endif 3046 3047 int 3048 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 3049 errorq_elem_t *eqep, size_t afltoffset) 3050 { 3051 ce_dispact_t dispact, action; 3052 cpu_t *cp; 3053 uchar_t dtcrinfo, disp; 3054 int ptnrtype; 3055 3056 if (!ce_disp_inited || panicstr || ce_xdiag_off) { 3057 ce_xdiag_drops++; 3058 return (0); 3059 } else if (!aflt->flt_in_memory) { 3060 ce_xdiag_drops++; 3061 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM); 3062 return (0); 3063 } 3064 3065 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 3066 3067 /* 3068 * Some correctable events are not scrubbed/classified, such as those 3069 * noticed at the tail of cpu_deferred_error. So if there is no 3070 * initial detector classification go no further. 3071 */ 3072 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) { 3073 ce_xdiag_drops++; 3074 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB); 3075 return (0); 3076 } 3077 3078 dispact = CE_DISPACT(ce_disp_table, 3079 CE_XDIAG_AFARMATCHED(dtcrinfo), 3080 CE_XDIAG_STATE(dtcrinfo), 3081 CE_XDIAG_CE1SEEN(dtcrinfo), 3082 CE_XDIAG_CE2SEEN(dtcrinfo)); 3083 3084 3085 action = CE_ACT(dispact); /* bad lookup caught below */ 3086 #ifdef DEBUG 3087 if (ce_xdiag_forceaction != 0) 3088 action = ce_xdiag_forceaction; 3089 #endif 3090 3091 switch (action) { 3092 case CE_ACT_LKYCHK: { 3093 caddr_t ndata; 3094 errorq_elem_t *neqep; 3095 struct async_flt *ecc; 3096 ce_lkychk_cb_t *cbargp; 3097 3098 if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) { 3099 ce_xdiag_lkydrops++; 3100 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3101 CE_XDIAG_SKIP_DUPFAIL); 3102 break; 3103 } 3104 ecc = (struct async_flt *)(ndata + afltoffset); 3105 3106 ASSERT(ecc->flt_class == CPU_FAULT || 3107 ecc->flt_class == BUS_FAULT); 3108 ecc->flt_class = (ecc->flt_class == CPU_FAULT) ? 3109 RECIRC_CPU_FAULT : RECIRC_BUS_FAULT; 3110 3111 cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP); 3112 cbargp->lkycb_aflt = ecc; 3113 cbargp->lkycb_eqp = eqp; 3114 cbargp->lkycb_eqep = neqep; 3115 3116 (void) timeout((void (*)(void *))ce_lkychk_cb, 3117 (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec)); 3118 return (1); 3119 } 3120 3121 case CE_ACT_PTNRCHK: 3122 kpreempt_disable(); /* stop cpu list changing */ 3123 if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) { 3124 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, 3125 (uint64_t)aflt, (uint64_t)&disp); 3126 CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp); 3127 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3128 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3129 } else if (ncpus > 1) { 3130 ce_xdiag_ptnrdrops++; 3131 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3132 CE_XDIAG_SKIP_NOPTNR); 3133 } else { 3134 ce_xdiag_ptnrdrops++; 3135 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3136 CE_XDIAG_SKIP_UNIPROC); 3137 } 3138 kpreempt_enable(); 3139 break; 3140 3141 case CE_ACT_DONE: 3142 break; 3143 3144 case CE_ACT(CE_DISP_BAD): 3145 default: 3146 #ifdef DEBUG 3147 cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action); 3148 #endif 3149 ce_xdiag_bad++; 3150 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD); 3151 break; 3152 } 3153 3154 return (0); 3155 } 3156 3157 /* 3158 * We route all errors through a single switch statement. 3159 */ 3160 void 3161 cpu_ue_log_err(struct async_flt *aflt) 3162 { 3163 switch (aflt->flt_class) { 3164 case CPU_FAULT: 3165 cpu_ereport_init(aflt); 3166 if (cpu_async_log_err(aflt, NULL)) 3167 cpu_ereport_post(aflt); 3168 break; 3169 3170 case BUS_FAULT: 3171 bus_async_log_err(aflt); 3172 break; 3173 3174 default: 3175 cmn_err(CE_WARN, "discarding async error %p with invalid " 3176 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 3177 return; 3178 } 3179 } 3180 3181 /* 3182 * Routine for panic hook callback from panic_idle(). 3183 */ 3184 void 3185 cpu_async_panic_callb(void) 3186 { 3187 ch_async_flt_t ch_flt; 3188 struct async_flt *aflt; 3189 ch_cpu_errors_t cpu_error_regs; 3190 uint64_t afsr_errs; 3191 3192 get_cpu_error_state(&cpu_error_regs); 3193 3194 afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3195 (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS); 3196 3197 if (afsr_errs) { 3198 3199 bzero(&ch_flt, sizeof (ch_async_flt_t)); 3200 aflt = (struct async_flt *)&ch_flt; 3201 aflt->flt_id = gethrtime_waitfree(); 3202 aflt->flt_bus_id = getprocessorid(); 3203 aflt->flt_inst = CPU->cpu_id; 3204 aflt->flt_stat = cpu_error_regs.afsr; 3205 aflt->flt_addr = cpu_error_regs.afar; 3206 aflt->flt_prot = AFLT_PROT_NONE; 3207 aflt->flt_class = CPU_FAULT; 3208 aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0); 3209 aflt->flt_panic = 1; 3210 ch_flt.afsr_ext = cpu_error_regs.afsr_ext; 3211 ch_flt.afsr_errs = afsr_errs; 3212 #if defined(SERRANO) 3213 ch_flt.afar2 = cpu_error_regs.afar2; 3214 #endif /* SERRANO */ 3215 (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL); 3216 } 3217 } 3218 3219 /* 3220 * Routine to convert a syndrome into a syndrome code. 3221 */ 3222 static int 3223 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit) 3224 { 3225 if (synd_status == AFLT_STAT_INVALID) 3226 return (-1); 3227 3228 /* 3229 * Use the syndrome to index the appropriate syndrome table, 3230 * to get the code indicating which bit(s) is(are) bad. 3231 */ 3232 if (afsr_bit & 3233 (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 3234 if (afsr_bit & C_AFSR_MSYND_ERRS) { 3235 #if defined(JALAPENO) || defined(SERRANO) 3236 if ((synd == 0) || (synd >= BSYND_TBL_SIZE)) 3237 return (-1); 3238 else 3239 return (BPAR0 + synd); 3240 #else /* JALAPENO || SERRANO */ 3241 if ((synd == 0) || (synd >= MSYND_TBL_SIZE)) 3242 return (-1); 3243 else 3244 return (mtag_syndrome_tab[synd]); 3245 #endif /* JALAPENO || SERRANO */ 3246 } else { 3247 if ((synd == 0) || (synd >= ESYND_TBL_SIZE)) 3248 return (-1); 3249 else 3250 return (ecc_syndrome_tab[synd]); 3251 } 3252 } else { 3253 return (-1); 3254 } 3255 } 3256 3257 /* 3258 * Routine to return a string identifying the physical name 3259 * associated with a memory/cache error. 3260 */ 3261 int 3262 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 3263 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 3264 ushort_t flt_status, char *buf, int buflen, int *lenp) 3265 { 3266 int synd_code; 3267 int ret; 3268 3269 /* 3270 * An AFSR of -1 defaults to a memory syndrome. 3271 */ 3272 if (flt_stat == (uint64_t)-1) 3273 flt_stat = C_AFSR_CE; 3274 3275 synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat); 3276 3277 /* 3278 * Syndrome code must be either a single-bit error code 3279 * (0...143) or -1 for unum lookup. 3280 */ 3281 if (synd_code < 0 || synd_code >= M2) 3282 synd_code = -1; 3283 if (&plat_get_mem_unum) { 3284 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 3285 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 3286 buf[0] = '\0'; 3287 *lenp = 0; 3288 } 3289 3290 return (ret); 3291 } 3292 3293 return (ENOTSUP); 3294 } 3295 3296 /* 3297 * Wrapper for cpu_get_mem_unum() routine that takes an 3298 * async_flt struct rather than explicit arguments. 3299 */ 3300 int 3301 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 3302 char *buf, int buflen, int *lenp) 3303 { 3304 /* 3305 * If we come thru here for an IO bus error aflt->flt_stat will 3306 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum() 3307 * so it will interpret this as a memory error. 3308 */ 3309 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 3310 (aflt->flt_class == BUS_FAULT) ? 3311 (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs, 3312 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 3313 aflt->flt_status, buf, buflen, lenp)); 3314 } 3315 3316 /* 3317 * This routine is a more generic interface to cpu_get_mem_unum() 3318 * that may be used by other modules (e.g. mm). 3319 */ 3320 int 3321 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 3322 char *buf, int buflen, int *lenp) 3323 { 3324 int synd_status, flt_in_memory, ret; 3325 ushort_t flt_status = 0; 3326 char unum[UNUM_NAMLEN]; 3327 3328 /* 3329 * Check for an invalid address. 3330 */ 3331 if (afar == (uint64_t)-1) 3332 return (ENXIO); 3333 3334 if (synd == (uint64_t)-1) 3335 synd_status = AFLT_STAT_INVALID; 3336 else 3337 synd_status = AFLT_STAT_VALID; 3338 3339 flt_in_memory = (*afsr & C_AFSR_MEMORY) && 3340 pf_is_memory(afar >> MMU_PAGESHIFT); 3341 3342 /* 3343 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 3344 * For Panther, L2$ is not external, so we don't want to 3345 * generate an E$ unum for those errors. 3346 */ 3347 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3348 if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS) 3349 flt_status |= ECC_ECACHE; 3350 } else { 3351 if (*afsr & C_AFSR_ECACHE) 3352 flt_status |= ECC_ECACHE; 3353 } 3354 3355 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 3356 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 3357 if (ret != 0) 3358 return (ret); 3359 3360 if (*lenp >= buflen) 3361 return (ENAMETOOLONG); 3362 3363 (void) strncpy(buf, unum, buflen); 3364 3365 return (0); 3366 } 3367 3368 /* 3369 * Routine to return memory information associated 3370 * with a physical address and syndrome. 3371 */ 3372 int 3373 cpu_get_mem_info(uint64_t synd, uint64_t afar, 3374 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 3375 int *segsp, int *banksp, int *mcidp) 3376 { 3377 int synd_status, synd_code; 3378 3379 if (afar == (uint64_t)-1) 3380 return (ENXIO); 3381 3382 if (synd == (uint64_t)-1) 3383 synd_status = AFLT_STAT_INVALID; 3384 else 3385 synd_status = AFLT_STAT_VALID; 3386 3387 synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE); 3388 3389 if (p2get_mem_info != NULL) 3390 return ((p2get_mem_info)(synd_code, afar, 3391 mem_sizep, seg_sizep, bank_sizep, 3392 segsp, banksp, mcidp)); 3393 else 3394 return (ENOTSUP); 3395 } 3396 3397 /* 3398 * Routine to return a string identifying the physical 3399 * name associated with a cpuid. 3400 */ 3401 int 3402 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 3403 { 3404 int ret; 3405 char unum[UNUM_NAMLEN]; 3406 3407 if (&plat_get_cpu_unum) { 3408 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 3409 != 0) 3410 return (ret); 3411 } else { 3412 return (ENOTSUP); 3413 } 3414 3415 if (*lenp >= buflen) 3416 return (ENAMETOOLONG); 3417 3418 (void) strncpy(buf, unum, buflen); 3419 3420 return (0); 3421 } 3422 3423 /* 3424 * This routine exports the name buffer size. 3425 */ 3426 size_t 3427 cpu_get_name_bufsize() 3428 { 3429 return (UNUM_NAMLEN); 3430 } 3431 3432 /* 3433 * Historical function, apparantly not used. 3434 */ 3435 /* ARGSUSED */ 3436 void 3437 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 3438 {} 3439 3440 /* 3441 * Historical function only called for SBus errors in debugging. 3442 */ 3443 /*ARGSUSED*/ 3444 void 3445 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 3446 {} 3447 3448 /* 3449 * Clear the AFSR sticky bits. The routine returns a non-zero value if 3450 * any of the AFSR's sticky errors are detected. If a non-null pointer to 3451 * an async fault structure argument is passed in, the captured error state 3452 * (AFSR, AFAR) info will be returned in the structure. 3453 */ 3454 int 3455 clear_errors(ch_async_flt_t *ch_flt) 3456 { 3457 struct async_flt *aflt = (struct async_flt *)ch_flt; 3458 ch_cpu_errors_t cpu_error_regs; 3459 3460 get_cpu_error_state(&cpu_error_regs); 3461 3462 if (ch_flt != NULL) { 3463 aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK; 3464 aflt->flt_addr = cpu_error_regs.afar; 3465 ch_flt->afsr_ext = cpu_error_regs.afsr_ext; 3466 ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3467 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); 3468 #if defined(SERRANO) 3469 ch_flt->afar2 = cpu_error_regs.afar2; 3470 #endif /* SERRANO */ 3471 } 3472 3473 set_cpu_error_state(&cpu_error_regs); 3474 3475 return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3476 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0); 3477 } 3478 3479 /* 3480 * Clear any AFSR error bits, and check for persistence. 3481 * 3482 * It would be desirable to also insist that syndrome match. PCI handling 3483 * has already filled flt_synd. For errors trapped by CPU we only fill 3484 * flt_synd when we queue the event, so we do not have a valid flt_synd 3485 * during initial classification (it is valid if we're called as part of 3486 * subsequent low-pil additional classification attempts). We could try 3487 * to determine which syndrome to use: we know we're only called for 3488 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use 3489 * would be esynd/none and esynd/msynd, respectively. If that is 3490 * implemented then what do we do in the case that we do experience an 3491 * error on the same afar but with different syndrome? At the very least 3492 * we should count such occurences. Anyway, for now, we'll leave it as 3493 * it has been for ages. 3494 */ 3495 static int 3496 clear_ecc(struct async_flt *aflt) 3497 { 3498 ch_cpu_errors_t cpu_error_regs; 3499 3500 /* 3501 * Snapshot the AFSR and AFAR and clear any errors 3502 */ 3503 get_cpu_error_state(&cpu_error_regs); 3504 set_cpu_error_state(&cpu_error_regs); 3505 3506 /* 3507 * If any of the same memory access error bits are still on and 3508 * the AFAR matches, return that the error is persistent. 3509 */ 3510 return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 && 3511 cpu_error_regs.afar == aflt->flt_addr); 3512 } 3513 3514 /* 3515 * Turn off all cpu error detection, normally only used for panics. 3516 */ 3517 void 3518 cpu_disable_errors(void) 3519 { 3520 xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE); 3521 } 3522 3523 /* 3524 * Enable errors. 3525 */ 3526 void 3527 cpu_enable_errors(void) 3528 { 3529 xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE); 3530 } 3531 3532 /* 3533 * Flush the entire ecache using displacement flush by reading through a 3534 * physical address range twice as large as the Ecache. 3535 */ 3536 void 3537 cpu_flush_ecache(void) 3538 { 3539 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 3540 cpunodes[CPU->cpu_id].ecache_linesize); 3541 } 3542 3543 /* 3544 * Return CPU E$ set size - E$ size divided by the associativity. 3545 * We use this function in places where the CPU_PRIVATE ptr may not be 3546 * initialized yet. Note that for send_mondo and in the Ecache scrubber, 3547 * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set 3548 * up before the kernel switches from OBP's to the kernel's trap table, so 3549 * we don't have to worry about cpunodes being unitialized. 3550 */ 3551 int 3552 cpu_ecache_set_size(struct cpu *cp) 3553 { 3554 if (CPU_PRIVATE(cp)) 3555 return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size)); 3556 3557 return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway()); 3558 } 3559 3560 /* 3561 * Flush Ecache line. 3562 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno. 3563 * Uses normal displacement flush for Cheetah. 3564 */ 3565 static void 3566 cpu_flush_ecache_line(ch_async_flt_t *ch_flt) 3567 { 3568 struct async_flt *aflt = (struct async_flt *)ch_flt; 3569 int ec_set_size = cpu_ecache_set_size(CPU); 3570 3571 ecache_flush_line(aflt->flt_addr, ec_set_size); 3572 } 3573 3574 /* 3575 * Scrub physical address. 3576 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3577 * Ecache or direct-mapped Ecache. 3578 */ 3579 static void 3580 cpu_scrubphys(struct async_flt *aflt) 3581 { 3582 int ec_set_size = cpu_ecache_set_size(CPU); 3583 3584 scrubphys(aflt->flt_addr, ec_set_size); 3585 } 3586 3587 /* 3588 * Clear physical address. 3589 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3590 * Ecache or direct-mapped Ecache. 3591 */ 3592 void 3593 cpu_clearphys(struct async_flt *aflt) 3594 { 3595 int lsize = cpunodes[CPU->cpu_id].ecache_linesize; 3596 int ec_set_size = cpu_ecache_set_size(CPU); 3597 3598 3599 clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize); 3600 } 3601 3602 #if defined(CPU_IMP_ECACHE_ASSOC) 3603 /* 3604 * Check for a matching valid line in all the sets. 3605 * If found, return set# + 1. Otherwise return 0. 3606 */ 3607 static int 3608 cpu_ecache_line_valid(ch_async_flt_t *ch_flt) 3609 { 3610 struct async_flt *aflt = (struct async_flt *)ch_flt; 3611 int totalsize = cpunodes[CPU->cpu_id].ecache_size; 3612 int ec_set_size = cpu_ecache_set_size(CPU); 3613 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 3614 int nway = cpu_ecache_nway(); 3615 int i; 3616 3617 for (i = 0; i < nway; i++, ecp++) { 3618 if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) && 3619 (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) == 3620 cpu_ectag_to_pa(ec_set_size, ecp->ec_tag)) 3621 return (i+1); 3622 } 3623 return (0); 3624 } 3625 #endif /* CPU_IMP_ECACHE_ASSOC */ 3626 3627 /* 3628 * Check whether a line in the given logout info matches the specified 3629 * fault address. If reqval is set then the line must not be Invalid. 3630 * Returns 0 on failure; on success (way + 1) is returned an *level is 3631 * set to 2 for l2$ or 3 for l3$. 3632 */ 3633 static int 3634 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level) 3635 { 3636 ch_diag_data_t *cdp = data; 3637 ch_ec_data_t *ecp; 3638 int totalsize, ec_set_size; 3639 int i, ways; 3640 int match = 0; 3641 int tagvalid; 3642 uint64_t addr, tagpa; 3643 int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation); 3644 3645 /* 3646 * Check the l2$ logout data 3647 */ 3648 if (ispanther) { 3649 ecp = &cdp->chd_l2_data[0]; 3650 ec_set_size = PN_L2_SET_SIZE; 3651 ways = PN_L2_NWAYS; 3652 } else { 3653 ecp = &cdp->chd_ec_data[0]; 3654 ec_set_size = cpu_ecache_set_size(CPU); 3655 ways = cpu_ecache_nway(); 3656 totalsize = cpunodes[CPU->cpu_id].ecache_size; 3657 } 3658 /* remove low order PA bits from fault address not used in PA tag */ 3659 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3660 for (i = 0; i < ways; i++, ecp++) { 3661 if (ispanther) { 3662 tagpa = PN_L2TAG_TO_PA(ecp->ec_tag); 3663 tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag); 3664 } else { 3665 tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag); 3666 tagvalid = !cpu_ectag_line_invalid(totalsize, 3667 ecp->ec_tag); 3668 } 3669 if (tagpa == addr && (!reqval || tagvalid)) { 3670 match = i + 1; 3671 *level = 2; 3672 break; 3673 } 3674 } 3675 3676 if (match || !ispanther) 3677 return (match); 3678 3679 /* For Panther we also check the l3$ */ 3680 ecp = &cdp->chd_ec_data[0]; 3681 ec_set_size = PN_L3_SET_SIZE; 3682 ways = PN_L3_NWAYS; 3683 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3684 3685 for (i = 0; i < ways; i++, ecp++) { 3686 if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval || 3687 !PN_L3_LINE_INVALID(ecp->ec_tag))) { 3688 match = i + 1; 3689 *level = 3; 3690 break; 3691 } 3692 } 3693 3694 return (match); 3695 } 3696 3697 #if defined(CPU_IMP_L1_CACHE_PARITY) 3698 /* 3699 * Record information related to the source of an Dcache Parity Error. 3700 */ 3701 static void 3702 cpu_dcache_parity_info(ch_async_flt_t *ch_flt) 3703 { 3704 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3705 int index; 3706 3707 /* 3708 * Since instruction decode cannot be done at high PIL 3709 * just examine the entire Dcache to locate the error. 3710 */ 3711 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3712 ch_flt->parity_data.dpe.cpl_way = -1; 3713 ch_flt->parity_data.dpe.cpl_off = -1; 3714 } 3715 for (index = 0; index < dc_set_size; index += dcache_linesize) 3716 cpu_dcache_parity_check(ch_flt, index); 3717 } 3718 3719 /* 3720 * Check all ways of the Dcache at a specified index for good parity. 3721 */ 3722 static void 3723 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index) 3724 { 3725 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3726 uint64_t parity_bits, pbits, data_word; 3727 static int parity_bits_popc[] = { 0, 1, 1, 0 }; 3728 int way, word, data_byte; 3729 ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0]; 3730 ch_dc_data_t tmp_dcp; 3731 3732 for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) { 3733 /* 3734 * Perform diagnostic read. 3735 */ 3736 get_dcache_dtag(index + way * dc_set_size, 3737 (uint64_t *)&tmp_dcp); 3738 3739 /* 3740 * Check tag for even parity. 3741 * Sum of 1 bits (including parity bit) should be even. 3742 */ 3743 if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) { 3744 /* 3745 * If this is the first error log detailed information 3746 * about it and check the snoop tag. Otherwise just 3747 * record the fact that we found another error. 3748 */ 3749 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3750 ch_flt->parity_data.dpe.cpl_way = way; 3751 ch_flt->parity_data.dpe.cpl_cache = 3752 CPU_DC_PARITY; 3753 ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG; 3754 3755 if (popc64(tmp_dcp.dc_sntag & 3756 CHP_DCSNTAG_PARMASK) & 1) { 3757 ch_flt->parity_data.dpe.cpl_tag |= 3758 CHP_DC_SNTAG; 3759 ch_flt->parity_data.dpe.cpl_lcnt++; 3760 } 3761 3762 bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t)); 3763 } 3764 3765 ch_flt->parity_data.dpe.cpl_lcnt++; 3766 } 3767 3768 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3769 /* 3770 * Panther has more parity bits than the other 3771 * processors for covering dcache data and so each 3772 * byte of data in each word has its own parity bit. 3773 */ 3774 parity_bits = tmp_dcp.dc_pn_data_parity; 3775 for (word = 0; word < 4; word++) { 3776 data_word = tmp_dcp.dc_data[word]; 3777 pbits = parity_bits & PN_DC_DATA_PARITY_MASK; 3778 for (data_byte = 0; data_byte < 8; 3779 data_byte++) { 3780 if (((popc64(data_word & 3781 PN_DC_DATA_PARITY_MASK)) & 1) ^ 3782 (pbits & 1)) { 3783 cpu_record_dc_data_parity( 3784 ch_flt, dcp, &tmp_dcp, way, 3785 word); 3786 } 3787 pbits >>= 1; 3788 data_word >>= 8; 3789 } 3790 parity_bits >>= 8; 3791 } 3792 } else { 3793 /* 3794 * Check data array for even parity. 3795 * The 8 parity bits are grouped into 4 pairs each 3796 * of which covers a 64-bit word. The endianness is 3797 * reversed -- the low-order parity bits cover the 3798 * high-order data words. 3799 */ 3800 parity_bits = tmp_dcp.dc_utag >> 8; 3801 for (word = 0; word < 4; word++) { 3802 pbits = (parity_bits >> (6 - word * 2)) & 3; 3803 if ((popc64(tmp_dcp.dc_data[word]) + 3804 parity_bits_popc[pbits]) & 1) { 3805 cpu_record_dc_data_parity(ch_flt, dcp, 3806 &tmp_dcp, way, word); 3807 } 3808 } 3809 } 3810 } 3811 } 3812 3813 static void 3814 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 3815 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word) 3816 { 3817 /* 3818 * If this is the first error log detailed information about it. 3819 * Otherwise just record the fact that we found another error. 3820 */ 3821 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3822 ch_flt->parity_data.dpe.cpl_way = way; 3823 ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY; 3824 ch_flt->parity_data.dpe.cpl_off = word * 8; 3825 bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t)); 3826 } 3827 ch_flt->parity_data.dpe.cpl_lcnt++; 3828 } 3829 3830 /* 3831 * Record information related to the source of an Icache Parity Error. 3832 * 3833 * Called with the Icache disabled so any diagnostic accesses are safe. 3834 */ 3835 static void 3836 cpu_icache_parity_info(ch_async_flt_t *ch_flt) 3837 { 3838 int ic_set_size; 3839 int ic_linesize; 3840 int index; 3841 3842 if (CPU_PRIVATE(CPU)) { 3843 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3844 CH_ICACHE_NWAY; 3845 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3846 } else { 3847 ic_set_size = icache_size / CH_ICACHE_NWAY; 3848 ic_linesize = icache_linesize; 3849 } 3850 3851 ch_flt->parity_data.ipe.cpl_way = -1; 3852 ch_flt->parity_data.ipe.cpl_off = -1; 3853 3854 for (index = 0; index < ic_set_size; index += ic_linesize) 3855 cpu_icache_parity_check(ch_flt, index); 3856 } 3857 3858 /* 3859 * Check all ways of the Icache at a specified index for good parity. 3860 */ 3861 static void 3862 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index) 3863 { 3864 uint64_t parmask, pn_inst_parity; 3865 int ic_set_size; 3866 int ic_linesize; 3867 int flt_index, way, instr, num_instr; 3868 struct async_flt *aflt = (struct async_flt *)ch_flt; 3869 ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0]; 3870 ch_ic_data_t tmp_icp; 3871 3872 if (CPU_PRIVATE(CPU)) { 3873 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3874 CH_ICACHE_NWAY; 3875 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3876 } else { 3877 ic_set_size = icache_size / CH_ICACHE_NWAY; 3878 ic_linesize = icache_linesize; 3879 } 3880 3881 /* 3882 * Panther has twice as many instructions per icache line and the 3883 * instruction parity bit is in a different location. 3884 */ 3885 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3886 num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t); 3887 pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK; 3888 } else { 3889 num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t); 3890 pn_inst_parity = 0; 3891 } 3892 3893 /* 3894 * Index at which we expect to find the parity error. 3895 */ 3896 flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize); 3897 3898 for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) { 3899 /* 3900 * Diagnostic reads expect address argument in ASI format. 3901 */ 3902 get_icache_dtag(2 * (index + way * ic_set_size), 3903 (uint64_t *)&tmp_icp); 3904 3905 /* 3906 * If this is the index in which we expect to find the 3907 * error log detailed information about each of the ways. 3908 * This information will be displayed later if we can't 3909 * determine the exact way in which the error is located. 3910 */ 3911 if (flt_index == index) 3912 bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t)); 3913 3914 /* 3915 * Check tag for even parity. 3916 * Sum of 1 bits (including parity bit) should be even. 3917 */ 3918 if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) { 3919 /* 3920 * If this way is the one in which we expected 3921 * to find the error record the way and check the 3922 * snoop tag. Otherwise just record the fact we 3923 * found another error. 3924 */ 3925 if (flt_index == index) { 3926 ch_flt->parity_data.ipe.cpl_way = way; 3927 ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG; 3928 3929 if (popc64(tmp_icp.ic_sntag & 3930 CHP_ICSNTAG_PARMASK) & 1) { 3931 ch_flt->parity_data.ipe.cpl_tag |= 3932 CHP_IC_SNTAG; 3933 ch_flt->parity_data.ipe.cpl_lcnt++; 3934 } 3935 3936 } 3937 ch_flt->parity_data.ipe.cpl_lcnt++; 3938 continue; 3939 } 3940 3941 /* 3942 * Check instruction data for even parity. 3943 * Bits participating in parity differ for PC-relative 3944 * versus non-PC-relative instructions. 3945 */ 3946 for (instr = 0; instr < num_instr; instr++) { 3947 parmask = (tmp_icp.ic_data[instr] & 3948 CH_ICDATA_PRED_ISPCREL) ? 3949 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) : 3950 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity); 3951 if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) { 3952 /* 3953 * If this way is the one in which we expected 3954 * to find the error record the way and offset. 3955 * Otherwise just log the fact we found another 3956 * error. 3957 */ 3958 if (flt_index == index) { 3959 ch_flt->parity_data.ipe.cpl_way = way; 3960 ch_flt->parity_data.ipe.cpl_off = 3961 instr * 4; 3962 } 3963 ch_flt->parity_data.ipe.cpl_lcnt++; 3964 continue; 3965 } 3966 } 3967 } 3968 } 3969 3970 /* 3971 * Record information related to the source of an Pcache Parity Error. 3972 */ 3973 static void 3974 cpu_pcache_parity_info(ch_async_flt_t *ch_flt) 3975 { 3976 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 3977 int index; 3978 3979 /* 3980 * Since instruction decode cannot be done at high PIL just 3981 * examine the entire Pcache to check for any parity errors. 3982 */ 3983 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3984 ch_flt->parity_data.dpe.cpl_way = -1; 3985 ch_flt->parity_data.dpe.cpl_off = -1; 3986 } 3987 for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE) 3988 cpu_pcache_parity_check(ch_flt, index); 3989 } 3990 3991 /* 3992 * Check all ways of the Pcache at a specified index for good parity. 3993 */ 3994 static void 3995 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index) 3996 { 3997 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 3998 int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t); 3999 int way, word, pbit, parity_bits; 4000 ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0]; 4001 ch_pc_data_t tmp_pcp; 4002 4003 for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) { 4004 /* 4005 * Perform diagnostic read. 4006 */ 4007 get_pcache_dtag(index + way * pc_set_size, 4008 (uint64_t *)&tmp_pcp); 4009 /* 4010 * Check data array for odd parity. There are 8 parity 4011 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each 4012 * of those bits covers exactly 8 bytes of the data 4013 * array: 4014 * 4015 * parity bit P$ data bytes covered 4016 * ---------- --------------------- 4017 * 50 63:56 4018 * 51 55:48 4019 * 52 47:40 4020 * 53 39:32 4021 * 54 31:24 4022 * 55 23:16 4023 * 56 15:8 4024 * 57 7:0 4025 */ 4026 parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status); 4027 for (word = 0; word < pc_data_words; word++) { 4028 pbit = (parity_bits >> (pc_data_words - word - 1)) & 1; 4029 if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) { 4030 /* 4031 * If this is the first error log detailed 4032 * information about it. Otherwise just record 4033 * the fact that we found another error. 4034 */ 4035 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 4036 ch_flt->parity_data.dpe.cpl_way = way; 4037 ch_flt->parity_data.dpe.cpl_cache = 4038 CPU_PC_PARITY; 4039 ch_flt->parity_data.dpe.cpl_off = 4040 word * sizeof (uint64_t); 4041 bcopy(&tmp_pcp, pcp, 4042 sizeof (ch_pc_data_t)); 4043 } 4044 ch_flt->parity_data.dpe.cpl_lcnt++; 4045 } 4046 } 4047 } 4048 } 4049 4050 4051 /* 4052 * Add L1 Data cache data to the ereport payload. 4053 */ 4054 static void 4055 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl) 4056 { 4057 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4058 ch_dc_data_t *dcp; 4059 ch_dc_data_t dcdata[CH_DCACHE_NWAY]; 4060 uint_t nelem; 4061 int i, ways_to_check, ways_logged = 0; 4062 4063 /* 4064 * If this is an D$ fault then there may be multiple 4065 * ways captured in the ch_parity_log_t structure. 4066 * Otherwise, there will be at most one way captured 4067 * in the ch_diag_data_t struct. 4068 * Check each way to see if it should be encoded. 4069 */ 4070 if (ch_flt->flt_type == CPU_DC_PARITY) 4071 ways_to_check = CH_DCACHE_NWAY; 4072 else 4073 ways_to_check = 1; 4074 for (i = 0; i < ways_to_check; i++) { 4075 if (ch_flt->flt_type == CPU_DC_PARITY) 4076 dcp = &ch_flt->parity_data.dpe.cpl_dc[i]; 4077 else 4078 dcp = &ch_flt->flt_diag_data.chd_dc_data; 4079 if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) { 4080 bcopy(dcp, &dcdata[ways_logged], 4081 sizeof (ch_dc_data_t)); 4082 ways_logged++; 4083 } 4084 } 4085 4086 /* 4087 * Add the dcache data to the payload. 4088 */ 4089 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS, 4090 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4091 if (ways_logged != 0) { 4092 nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged; 4093 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA, 4094 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL); 4095 } 4096 } 4097 4098 /* 4099 * Add L1 Instruction cache data to the ereport payload. 4100 */ 4101 static void 4102 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl) 4103 { 4104 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4105 ch_ic_data_t *icp; 4106 ch_ic_data_t icdata[CH_ICACHE_NWAY]; 4107 uint_t nelem; 4108 int i, ways_to_check, ways_logged = 0; 4109 4110 /* 4111 * If this is an I$ fault then there may be multiple 4112 * ways captured in the ch_parity_log_t structure. 4113 * Otherwise, there will be at most one way captured 4114 * in the ch_diag_data_t struct. 4115 * Check each way to see if it should be encoded. 4116 */ 4117 if (ch_flt->flt_type == CPU_IC_PARITY) 4118 ways_to_check = CH_ICACHE_NWAY; 4119 else 4120 ways_to_check = 1; 4121 for (i = 0; i < ways_to_check; i++) { 4122 if (ch_flt->flt_type == CPU_IC_PARITY) 4123 icp = &ch_flt->parity_data.ipe.cpl_ic[i]; 4124 else 4125 icp = &ch_flt->flt_diag_data.chd_ic_data; 4126 if (icp->ic_logflag == IC_LOGFLAG_MAGIC) { 4127 bcopy(icp, &icdata[ways_logged], 4128 sizeof (ch_ic_data_t)); 4129 ways_logged++; 4130 } 4131 } 4132 4133 /* 4134 * Add the icache data to the payload. 4135 */ 4136 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS, 4137 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4138 if (ways_logged != 0) { 4139 nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged; 4140 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA, 4141 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL); 4142 } 4143 } 4144 4145 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4146 4147 /* 4148 * Add ecache data to payload. 4149 */ 4150 static void 4151 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl) 4152 { 4153 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4154 ch_ec_data_t *ecp; 4155 ch_ec_data_t ecdata[CHD_EC_DATA_SETS]; 4156 uint_t nelem; 4157 int i, ways_logged = 0; 4158 4159 /* 4160 * Check each way to see if it should be encoded 4161 * and concatinate it into a temporary buffer. 4162 */ 4163 for (i = 0; i < CHD_EC_DATA_SETS; i++) { 4164 ecp = &ch_flt->flt_diag_data.chd_ec_data[i]; 4165 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4166 bcopy(ecp, &ecdata[ways_logged], 4167 sizeof (ch_ec_data_t)); 4168 ways_logged++; 4169 } 4170 } 4171 4172 /* 4173 * Panther CPUs have an additional level of cache and so 4174 * what we just collected was the L3 (ecache) and not the 4175 * L2 cache. 4176 */ 4177 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4178 /* 4179 * Add the L3 (ecache) data to the payload. 4180 */ 4181 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS, 4182 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4183 if (ways_logged != 0) { 4184 nelem = sizeof (ch_ec_data_t) / 4185 sizeof (uint64_t) * ways_logged; 4186 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA, 4187 DATA_TYPE_UINT64_ARRAY, nelem, 4188 (uint64_t *)ecdata, NULL); 4189 } 4190 4191 /* 4192 * Now collect the L2 cache. 4193 */ 4194 ways_logged = 0; 4195 for (i = 0; i < PN_L2_NWAYS; i++) { 4196 ecp = &ch_flt->flt_diag_data.chd_l2_data[i]; 4197 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4198 bcopy(ecp, &ecdata[ways_logged], 4199 sizeof (ch_ec_data_t)); 4200 ways_logged++; 4201 } 4202 } 4203 } 4204 4205 /* 4206 * Add the L2 cache data to the payload. 4207 */ 4208 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS, 4209 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4210 if (ways_logged != 0) { 4211 nelem = sizeof (ch_ec_data_t) / 4212 sizeof (uint64_t) * ways_logged; 4213 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA, 4214 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL); 4215 } 4216 } 4217 4218 /* 4219 * Encode the data saved in the ch_async_flt_t struct into 4220 * the FM ereport payload. 4221 */ 4222 static void 4223 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 4224 nvlist_t *resource, int *afar_status, int *synd_status) 4225 { 4226 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4227 *synd_status = AFLT_STAT_INVALID; 4228 *afar_status = AFLT_STAT_INVALID; 4229 4230 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) { 4231 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR, 4232 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 4233 } 4234 4235 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) && 4236 IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4237 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT, 4238 DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL); 4239 } 4240 4241 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) { 4242 *afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 4243 ch_flt->flt_bit); 4244 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, 4245 DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL); 4246 } 4247 4248 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) { 4249 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR, 4250 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 4251 } 4252 4253 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 4254 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 4255 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 4256 } 4257 4258 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 4259 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 4260 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 4261 } 4262 4263 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 4264 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 4265 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 4266 } 4267 4268 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 4269 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 4270 DATA_TYPE_BOOLEAN_VALUE, 4271 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 4272 } 4273 4274 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) { 4275 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME, 4276 DATA_TYPE_BOOLEAN_VALUE, 4277 (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL); 4278 } 4279 4280 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) { 4281 *synd_status = afsr_to_synd_status(aflt->flt_inst, 4282 ch_flt->afsr_errs, ch_flt->flt_bit); 4283 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, 4284 DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL); 4285 } 4286 4287 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) { 4288 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND, 4289 DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL); 4290 } 4291 4292 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) { 4293 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, 4294 DATA_TYPE_STRING, flt_to_error_type(aflt), NULL); 4295 } 4296 4297 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) { 4298 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, 4299 DATA_TYPE_UINT64, aflt->flt_disp, NULL); 4300 } 4301 4302 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2) 4303 cpu_payload_add_ecache(aflt, payload); 4304 4305 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) { 4306 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION, 4307 DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL); 4308 } 4309 4310 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) { 4311 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED, 4312 DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL); 4313 } 4314 4315 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) { 4316 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK, 4317 DATA_TYPE_UINT32_ARRAY, 16, 4318 (uint32_t *)&ch_flt->flt_fpdata, NULL); 4319 } 4320 4321 #if defined(CPU_IMP_L1_CACHE_PARITY) 4322 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D) 4323 cpu_payload_add_dcache(aflt, payload); 4324 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I) 4325 cpu_payload_add_icache(aflt, payload); 4326 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4327 4328 #if defined(CHEETAH_PLUS) 4329 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P) 4330 cpu_payload_add_pcache(aflt, payload); 4331 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB) 4332 cpu_payload_add_tlb(aflt, payload); 4333 #endif /* CHEETAH_PLUS */ 4334 /* 4335 * Create the FMRI that goes into the payload 4336 * and contains the unum info if necessary. 4337 */ 4338 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) && 4339 (*afar_status == AFLT_STAT_VALID)) { 4340 char unum[UNUM_NAMLEN]; 4341 int len; 4342 4343 if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum, 4344 UNUM_NAMLEN, &len) == 0) { 4345 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 4346 NULL, unum, NULL); 4347 fm_payload_set(payload, 4348 FM_EREPORT_PAYLOAD_NAME_RESOURCE, 4349 DATA_TYPE_NVLIST, resource, NULL); 4350 } 4351 } 4352 } 4353 4354 /* 4355 * Initialize the way info if necessary. 4356 */ 4357 void 4358 cpu_ereport_init(struct async_flt *aflt) 4359 { 4360 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4361 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4362 ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0]; 4363 int i; 4364 4365 /* 4366 * Initialize the info in the CPU logout structure. 4367 * The I$/D$ way information is not initialized here 4368 * since it is captured in the logout assembly code. 4369 */ 4370 for (i = 0; i < CHD_EC_DATA_SETS; i++) 4371 (ecp + i)->ec_way = i; 4372 4373 for (i = 0; i < PN_L2_NWAYS; i++) 4374 (l2p + i)->ec_way = i; 4375 } 4376 4377 /* 4378 * Returns whether fault address is valid for this error bit and 4379 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 4380 */ 4381 int 4382 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) 4383 { 4384 struct async_flt *aflt = (struct async_flt *)ch_flt; 4385 4386 return ((aflt->flt_stat & C_AFSR_MEMORY) && 4387 afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) == 4388 AFLT_STAT_VALID && 4389 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 4390 } 4391 4392 static void 4393 cpu_log_diag_info(ch_async_flt_t *ch_flt) 4394 { 4395 struct async_flt *aflt = (struct async_flt *)ch_flt; 4396 ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data; 4397 ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data; 4398 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4399 #if defined(CPU_IMP_ECACHE_ASSOC) 4400 int i, nway; 4401 #endif /* CPU_IMP_ECACHE_ASSOC */ 4402 4403 /* 4404 * Check if the CPU log out captured was valid. 4405 */ 4406 if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID || 4407 ch_flt->flt_data_incomplete) 4408 return; 4409 4410 #if defined(CPU_IMP_ECACHE_ASSOC) 4411 nway = cpu_ecache_nway(); 4412 i = cpu_ecache_line_valid(ch_flt); 4413 if (i == 0 || i > nway) { 4414 for (i = 0; i < nway; i++) 4415 ecp[i].ec_logflag = EC_LOGFLAG_MAGIC; 4416 } else 4417 ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC; 4418 #else /* CPU_IMP_ECACHE_ASSOC */ 4419 ecp->ec_logflag = EC_LOGFLAG_MAGIC; 4420 #endif /* CPU_IMP_ECACHE_ASSOC */ 4421 4422 #if defined(CHEETAH_PLUS) 4423 pn_cpu_log_diag_l2_info(ch_flt); 4424 #endif /* CHEETAH_PLUS */ 4425 4426 if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) { 4427 dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx); 4428 dcp->dc_logflag = DC_LOGFLAG_MAGIC; 4429 } 4430 4431 if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) { 4432 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 4433 icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx); 4434 else 4435 icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx); 4436 icp->ic_logflag = IC_LOGFLAG_MAGIC; 4437 } 4438 } 4439 4440 /* 4441 * Cheetah ECC calculation. 4442 * 4443 * We only need to do the calculation on the data bits and can ignore check 4444 * bit and Mtag bit terms in the calculation. 4445 */ 4446 static uint64_t ch_ecc_table[9][2] = { 4447 /* 4448 * low order 64-bits high-order 64-bits 4449 */ 4450 { 0x46bffffeccd1177f, 0x488800022100014c }, 4451 { 0x42fccc81331ff77f, 0x14424f1010249184 }, 4452 { 0x8898827c222f1ffe, 0x22c1222808184aaf }, 4453 { 0xf7632203e131ccf1, 0xe1241121848292b8 }, 4454 { 0x7f5511421b113809, 0x901c88d84288aafe }, 4455 { 0x1d49412184882487, 0x8f338c87c044c6ef }, 4456 { 0xf552181014448344, 0x7ff8f4443e411911 }, 4457 { 0x2189240808f24228, 0xfeeff8cc81333f42 }, 4458 { 0x3280008440001112, 0xfee88b337ffffd62 }, 4459 }; 4460 4461 /* 4462 * 64-bit population count, use well-known popcnt trick. 4463 * We could use the UltraSPARC V9 POPC instruction, but some 4464 * CPUs including Cheetahplus and Jaguar do not support that 4465 * instruction. 4466 */ 4467 int 4468 popc64(uint64_t val) 4469 { 4470 int cnt; 4471 4472 for (cnt = 0; val != 0; val &= val - 1) 4473 cnt++; 4474 return (cnt); 4475 } 4476 4477 /* 4478 * Generate the 9 ECC bits for the 128-bit chunk based on the table above. 4479 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number 4480 * of 1 bits == 0, so we can just use the least significant bit of the popcnt 4481 * instead of doing all the xor's. 4482 */ 4483 uint32_t 4484 us3_gen_ecc(uint64_t data_low, uint64_t data_high) 4485 { 4486 int bitno, s; 4487 int synd = 0; 4488 4489 for (bitno = 0; bitno < 9; bitno++) { 4490 s = (popc64(data_low & ch_ecc_table[bitno][0]) + 4491 popc64(data_high & ch_ecc_table[bitno][1])) & 1; 4492 synd |= (s << bitno); 4493 } 4494 return (synd); 4495 4496 } 4497 4498 /* 4499 * Queue one event based on ecc_type_to_info entry. If the event has an AFT1 4500 * tag associated with it or is a fatal event (aflt_panic set), it is sent to 4501 * the UE event queue. Otherwise it is dispatched to the CE event queue. 4502 */ 4503 static void 4504 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 4505 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp) 4506 { 4507 struct async_flt *aflt = (struct async_flt *)ch_flt; 4508 4509 if (reason && 4510 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 4511 (void) strcat(reason, eccp->ec_reason); 4512 } 4513 4514 ch_flt->flt_bit = eccp->ec_afsr_bit; 4515 ch_flt->flt_type = eccp->ec_flt_type; 4516 if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID) 4517 ch_flt->flt_diag_data = *cdp; 4518 else 4519 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 4520 aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit); 4521 4522 if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS) 4523 aflt->flt_synd = GET_M_SYND(aflt->flt_stat); 4524 else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) 4525 aflt->flt_synd = GET_E_SYND(aflt->flt_stat); 4526 else 4527 aflt->flt_synd = 0; 4528 4529 aflt->flt_payload = eccp->ec_err_payload; 4530 4531 if (aflt->flt_panic || (eccp->ec_afsr_bit & 4532 (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1))) 4533 cpu_errorq_dispatch(eccp->ec_err_class, 4534 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 4535 aflt->flt_panic); 4536 else 4537 cpu_errorq_dispatch(eccp->ec_err_class, 4538 (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue, 4539 aflt->flt_panic); 4540 } 4541 4542 /* 4543 * Queue events on async event queue one event per error bit. First we 4544 * queue the events that we "expect" for the given trap, then we queue events 4545 * that we may not expect. Return number of events queued. 4546 */ 4547 int 4548 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs, 4549 ch_cpu_logout_t *clop) 4550 { 4551 struct async_flt *aflt = (struct async_flt *)ch_flt; 4552 ecc_type_to_info_t *eccp; 4553 int nevents = 0; 4554 uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat; 4555 #if defined(CHEETAH_PLUS) 4556 uint64_t orig_t_afsr_errs; 4557 #endif 4558 uint64_t primary_afsr_ext = ch_flt->afsr_ext; 4559 uint64_t primary_afsr_errs = ch_flt->afsr_errs; 4560 ch_diag_data_t *cdp = NULL; 4561 4562 t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS); 4563 4564 #if defined(CHEETAH_PLUS) 4565 orig_t_afsr_errs = t_afsr_errs; 4566 4567 /* 4568 * For Cheetah+, log the shadow AFSR/AFAR bits first. 4569 */ 4570 if (clop != NULL) { 4571 /* 4572 * Set the AFSR and AFAR fields to the shadow registers. The 4573 * flt_addr and flt_stat fields will be reset to the primaries 4574 * below, but the sdw_addr and sdw_stat will stay as the 4575 * secondaries. 4576 */ 4577 cdp = &clop->clo_sdw_data; 4578 aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar; 4579 aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr; 4580 ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext; 4581 ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 4582 (cdp->chd_afsr & C_AFSR_ALL_ERRS); 4583 4584 /* 4585 * If the primary and shadow AFSR differ, tag the shadow as 4586 * the first fault. 4587 */ 4588 if ((primary_afar != cdp->chd_afar) || 4589 (primary_afsr_errs != ch_flt->afsr_errs)) { 4590 aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT); 4591 } 4592 4593 /* 4594 * Check AFSR bits as well as AFSR_EXT bits in order of 4595 * the AFAR overwrite priority. Our stored AFSR_EXT value 4596 * is expected to be zero for those CPUs which do not have 4597 * an AFSR_EXT register. 4598 */ 4599 for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) { 4600 if ((eccp->ec_afsr_bit & 4601 (ch_flt->afsr_errs & t_afsr_errs)) && 4602 ((eccp->ec_flags & aflt->flt_status) != 0)) { 4603 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4604 cdp = NULL; 4605 t_afsr_errs &= ~eccp->ec_afsr_bit; 4606 nevents++; 4607 } 4608 } 4609 4610 /* 4611 * If the ME bit is on in the primary AFSR turn all the 4612 * error bits on again that may set the ME bit to make 4613 * sure we see the ME AFSR error logs. 4614 */ 4615 if ((primary_afsr & C_AFSR_ME) != 0) 4616 t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS); 4617 } 4618 #endif /* CHEETAH_PLUS */ 4619 4620 if (clop != NULL) 4621 cdp = &clop->clo_data; 4622 4623 /* 4624 * Queue expected errors, error bit and fault type must match 4625 * in the ecc_type_to_info table. 4626 */ 4627 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4628 eccp++) { 4629 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 4630 (eccp->ec_flags & aflt->flt_status) != 0) { 4631 #if defined(SERRANO) 4632 /* 4633 * For FRC/FRU errors on Serrano the afar2 captures 4634 * the address and the associated data is 4635 * in the shadow logout area. 4636 */ 4637 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4638 if (clop != NULL) 4639 cdp = &clop->clo_sdw_data; 4640 aflt->flt_addr = ch_flt->afar2; 4641 } else { 4642 if (clop != NULL) 4643 cdp = &clop->clo_data; 4644 aflt->flt_addr = primary_afar; 4645 } 4646 #else /* SERRANO */ 4647 aflt->flt_addr = primary_afar; 4648 #endif /* SERRANO */ 4649 aflt->flt_stat = primary_afsr; 4650 ch_flt->afsr_ext = primary_afsr_ext; 4651 ch_flt->afsr_errs = primary_afsr_errs; 4652 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4653 cdp = NULL; 4654 t_afsr_errs &= ~eccp->ec_afsr_bit; 4655 nevents++; 4656 } 4657 } 4658 4659 /* 4660 * Queue unexpected errors, error bit only match. 4661 */ 4662 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4663 eccp++) { 4664 if (eccp->ec_afsr_bit & t_afsr_errs) { 4665 #if defined(SERRANO) 4666 /* 4667 * For FRC/FRU errors on Serrano the afar2 captures 4668 * the address and the associated data is 4669 * in the shadow logout area. 4670 */ 4671 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4672 if (clop != NULL) 4673 cdp = &clop->clo_sdw_data; 4674 aflt->flt_addr = ch_flt->afar2; 4675 } else { 4676 if (clop != NULL) 4677 cdp = &clop->clo_data; 4678 aflt->flt_addr = primary_afar; 4679 } 4680 #else /* SERRANO */ 4681 aflt->flt_addr = primary_afar; 4682 #endif /* SERRANO */ 4683 aflt->flt_stat = primary_afsr; 4684 ch_flt->afsr_ext = primary_afsr_ext; 4685 ch_flt->afsr_errs = primary_afsr_errs; 4686 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4687 cdp = NULL; 4688 t_afsr_errs &= ~eccp->ec_afsr_bit; 4689 nevents++; 4690 } 4691 } 4692 return (nevents); 4693 } 4694 4695 /* 4696 * Return trap type number. 4697 */ 4698 uint8_t 4699 flt_to_trap_type(struct async_flt *aflt) 4700 { 4701 if (aflt->flt_status & ECC_I_TRAP) 4702 return (TRAP_TYPE_ECC_I); 4703 if (aflt->flt_status & ECC_D_TRAP) 4704 return (TRAP_TYPE_ECC_D); 4705 if (aflt->flt_status & ECC_F_TRAP) 4706 return (TRAP_TYPE_ECC_F); 4707 if (aflt->flt_status & ECC_C_TRAP) 4708 return (TRAP_TYPE_ECC_C); 4709 if (aflt->flt_status & ECC_DP_TRAP) 4710 return (TRAP_TYPE_ECC_DP); 4711 if (aflt->flt_status & ECC_IP_TRAP) 4712 return (TRAP_TYPE_ECC_IP); 4713 if (aflt->flt_status & ECC_ITLB_TRAP) 4714 return (TRAP_TYPE_ECC_ITLB); 4715 if (aflt->flt_status & ECC_DTLB_TRAP) 4716 return (TRAP_TYPE_ECC_DTLB); 4717 return (TRAP_TYPE_UNKNOWN); 4718 } 4719 4720 /* 4721 * Decide an error type based on detector and leaky/partner tests. 4722 * The following array is used for quick translation - it must 4723 * stay in sync with ce_dispact_t. 4724 */ 4725 4726 static char *cetypes[] = { 4727 CE_DISP_DESC_U, 4728 CE_DISP_DESC_I, 4729 CE_DISP_DESC_PP, 4730 CE_DISP_DESC_P, 4731 CE_DISP_DESC_L, 4732 CE_DISP_DESC_PS, 4733 CE_DISP_DESC_S 4734 }; 4735 4736 char * 4737 flt_to_error_type(struct async_flt *aflt) 4738 { 4739 ce_dispact_t dispact, disp; 4740 uchar_t dtcrinfo, ptnrinfo, lkyinfo; 4741 4742 /* 4743 * The memory payload bundle is shared by some events that do 4744 * not perform any classification. For those flt_disp will be 4745 * 0 and we will return "unknown". 4746 */ 4747 if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0) 4748 return (cetypes[CE_DISP_UNKNOWN]); 4749 4750 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 4751 4752 /* 4753 * It is also possible that no scrub/classification was performed 4754 * by the detector, for instance where a disrupting error logged 4755 * in the AFSR while CEEN was off in cpu_deferred_error. 4756 */ 4757 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) 4758 return (cetypes[CE_DISP_UNKNOWN]); 4759 4760 /* 4761 * Lookup type in initial classification/action table 4762 */ 4763 dispact = CE_DISPACT(ce_disp_table, 4764 CE_XDIAG_AFARMATCHED(dtcrinfo), 4765 CE_XDIAG_STATE(dtcrinfo), 4766 CE_XDIAG_CE1SEEN(dtcrinfo), 4767 CE_XDIAG_CE2SEEN(dtcrinfo)); 4768 4769 /* 4770 * A bad lookup is not something to panic production systems for. 4771 */ 4772 ASSERT(dispact != CE_DISP_BAD); 4773 if (dispact == CE_DISP_BAD) 4774 return (cetypes[CE_DISP_UNKNOWN]); 4775 4776 disp = CE_DISP(dispact); 4777 4778 switch (disp) { 4779 case CE_DISP_UNKNOWN: 4780 case CE_DISP_INTERMITTENT: 4781 break; 4782 4783 case CE_DISP_POSS_PERS: 4784 /* 4785 * "Possible persistent" errors to which we have applied a valid 4786 * leaky test can be separated into "persistent" or "leaky". 4787 */ 4788 lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp); 4789 if (CE_XDIAG_TESTVALID(lkyinfo)) { 4790 if (CE_XDIAG_CE1SEEN(lkyinfo) || 4791 CE_XDIAG_CE2SEEN(lkyinfo)) 4792 disp = CE_DISP_LEAKY; 4793 else 4794 disp = CE_DISP_PERS; 4795 } 4796 break; 4797 4798 case CE_DISP_POSS_STICKY: 4799 /* 4800 * Promote "possible sticky" results that have been 4801 * confirmed by a partner test to "sticky". Unconfirmed 4802 * "possible sticky" events are left at that status - we do not 4803 * guess at any bad reader/writer etc status here. 4804 */ 4805 ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp); 4806 if (CE_XDIAG_TESTVALID(ptnrinfo) && 4807 CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo)) 4808 disp = CE_DISP_STICKY; 4809 4810 /* 4811 * Promote "possible sticky" results on a uniprocessor 4812 * to "sticky" 4813 */ 4814 if (disp == CE_DISP_POSS_STICKY && 4815 CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC) 4816 disp = CE_DISP_STICKY; 4817 break; 4818 4819 default: 4820 disp = CE_DISP_UNKNOWN; 4821 break; 4822 } 4823 4824 return (cetypes[disp]); 4825 } 4826 4827 /* 4828 * Given the entire afsr, the specific bit to check and a prioritized list of 4829 * error bits, determine the validity of the various overwrite priority 4830 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have 4831 * different overwrite priorities. 4832 * 4833 * Given a specific afsr error bit and the entire afsr, there are three cases: 4834 * INVALID: The specified bit is lower overwrite priority than some other 4835 * error bit which is on in the afsr (or IVU/IVC). 4836 * VALID: The specified bit is higher priority than all other error bits 4837 * which are on in the afsr. 4838 * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified 4839 * bit is on in the afsr. 4840 */ 4841 int 4842 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits) 4843 { 4844 uint64_t afsr_ow; 4845 4846 while ((afsr_ow = *ow_bits++) != 0) { 4847 /* 4848 * If bit is in the priority class, check to see if another 4849 * bit in the same class is on => ambiguous. Otherwise, 4850 * the value is valid. If the bit is not on at this priority 4851 * class, but a higher priority bit is on, then the value is 4852 * invalid. 4853 */ 4854 if (afsr_ow & afsr_bit) { 4855 /* 4856 * If equal pri bit is on, ambiguous. 4857 */ 4858 if (afsr & (afsr_ow & ~afsr_bit)) 4859 return (AFLT_STAT_AMBIGUOUS); 4860 return (AFLT_STAT_VALID); 4861 } else if (afsr & afsr_ow) 4862 break; 4863 } 4864 4865 /* 4866 * We didn't find a match or a higher priority bit was on. Not 4867 * finding a match handles the case of invalid AFAR for IVC, IVU. 4868 */ 4869 return (AFLT_STAT_INVALID); 4870 } 4871 4872 static int 4873 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit) 4874 { 4875 #if defined(SERRANO) 4876 if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) 4877 return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite)); 4878 else 4879 #endif /* SERRANO */ 4880 return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite)); 4881 } 4882 4883 static int 4884 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit) 4885 { 4886 return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite)); 4887 } 4888 4889 static int 4890 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit) 4891 { 4892 return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite)); 4893 } 4894 4895 static int 4896 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit) 4897 { 4898 #ifdef lint 4899 cpuid = cpuid; 4900 #endif 4901 if (afsr_bit & C_AFSR_MSYND_ERRS) { 4902 return (afsr_to_msynd_status(afsr, afsr_bit)); 4903 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 4904 #if defined(CHEETAH_PLUS) 4905 /* 4906 * The E_SYND overwrite policy is slightly different 4907 * for Panther CPUs. 4908 */ 4909 if (IS_PANTHER(cpunodes[cpuid].implementation)) 4910 return (afsr_to_pn_esynd_status(afsr, afsr_bit)); 4911 else 4912 return (afsr_to_esynd_status(afsr, afsr_bit)); 4913 #else /* CHEETAH_PLUS */ 4914 return (afsr_to_esynd_status(afsr, afsr_bit)); 4915 #endif /* CHEETAH_PLUS */ 4916 } else { 4917 return (AFLT_STAT_INVALID); 4918 } 4919 } 4920 4921 /* 4922 * Slave CPU stick synchronization. 4923 */ 4924 void 4925 sticksync_slave(void) 4926 { 4927 int i; 4928 int tries = 0; 4929 int64_t tskew; 4930 int64_t av_tskew; 4931 4932 kpreempt_disable(); 4933 /* wait for the master side */ 4934 while (stick_sync_cmd != SLAVE_START) 4935 ; 4936 /* 4937 * Synchronization should only take a few tries at most. But in the 4938 * odd case where the cpu isn't cooperating we'll keep trying. A cpu 4939 * without it's stick synchronized wouldn't be a good citizen. 4940 */ 4941 while (slave_done == 0) { 4942 /* 4943 * Time skew calculation. 4944 */ 4945 av_tskew = tskew = 0; 4946 4947 for (i = 0; i < stick_iter; i++) { 4948 /* make location hot */ 4949 timestamp[EV_A_START] = 0; 4950 stick_timestamp(×tamp[EV_A_START]); 4951 4952 /* tell the master we're ready */ 4953 stick_sync_cmd = MASTER_START; 4954 4955 /* and wait */ 4956 while (stick_sync_cmd != SLAVE_CONT) 4957 ; 4958 /* Event B end */ 4959 stick_timestamp(×tamp[EV_B_END]); 4960 4961 /* calculate time skew */ 4962 tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START]) 4963 - (timestamp[EV_A_END] - 4964 timestamp[EV_A_START])) / 2; 4965 4966 /* keep running count */ 4967 av_tskew += tskew; 4968 } /* for */ 4969 4970 /* 4971 * Adjust stick for time skew if not within the max allowed; 4972 * otherwise we're all done. 4973 */ 4974 if (stick_iter != 0) 4975 av_tskew = av_tskew/stick_iter; 4976 if (ABS(av_tskew) > stick_tsk) { 4977 /* 4978 * If the skew is 1 (the slave's STICK register 4979 * is 1 STICK ahead of the master's), stick_adj 4980 * could fail to adjust the slave's STICK register 4981 * if the STICK read on the slave happens to 4982 * align with the increment of the STICK. 4983 * Therefore, we increment the skew to 2. 4984 */ 4985 if (av_tskew == 1) 4986 av_tskew++; 4987 stick_adj(-av_tskew); 4988 } else 4989 slave_done = 1; 4990 #ifdef DEBUG 4991 if (tries < DSYNC_ATTEMPTS) 4992 stick_sync_stats[CPU->cpu_id].skew_val[tries] = 4993 av_tskew; 4994 ++tries; 4995 #endif /* DEBUG */ 4996 #ifdef lint 4997 tries = tries; 4998 #endif 4999 5000 } /* while */ 5001 5002 /* allow the master to finish */ 5003 stick_sync_cmd = EVENT_NULL; 5004 kpreempt_enable(); 5005 } 5006 5007 /* 5008 * Master CPU side of stick synchronization. 5009 * - timestamp end of Event A 5010 * - timestamp beginning of Event B 5011 */ 5012 void 5013 sticksync_master(void) 5014 { 5015 int i; 5016 5017 kpreempt_disable(); 5018 /* tell the slave we've started */ 5019 slave_done = 0; 5020 stick_sync_cmd = SLAVE_START; 5021 5022 while (slave_done == 0) { 5023 for (i = 0; i < stick_iter; i++) { 5024 /* wait for the slave */ 5025 while (stick_sync_cmd != MASTER_START) 5026 ; 5027 /* Event A end */ 5028 stick_timestamp(×tamp[EV_A_END]); 5029 5030 /* make location hot */ 5031 timestamp[EV_B_START] = 0; 5032 stick_timestamp(×tamp[EV_B_START]); 5033 5034 /* tell the slave to continue */ 5035 stick_sync_cmd = SLAVE_CONT; 5036 } /* for */ 5037 5038 /* wait while slave calculates time skew */ 5039 while (stick_sync_cmd == SLAVE_CONT) 5040 ; 5041 } /* while */ 5042 kpreempt_enable(); 5043 } 5044 5045 /* 5046 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to 5047 * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also, 5048 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to 5049 * panic idle. 5050 */ 5051 /*ARGSUSED*/ 5052 void 5053 cpu_check_allcpus(struct async_flt *aflt) 5054 {} 5055 5056 struct kmem_cache *ch_private_cache; 5057 5058 /* 5059 * Cpu private unitialization. Uninitialize the Ecache scrubber and 5060 * deallocate the scrubber data structures and cpu_private data structure. 5061 */ 5062 void 5063 cpu_uninit_private(struct cpu *cp) 5064 { 5065 cheetah_private_t *chprp = CPU_PRIVATE(cp); 5066 5067 ASSERT(chprp); 5068 cpu_uninit_ecache_scrub_dr(cp); 5069 CPU_PRIVATE(cp) = NULL; 5070 ch_err_tl1_paddrs[cp->cpu_id] = NULL; 5071 kmem_cache_free(ch_private_cache, chprp); 5072 cmp_delete_cpu(cp->cpu_id); 5073 5074 } 5075 5076 /* 5077 * Cheetah Cache Scrubbing 5078 * 5079 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure 5080 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not 5081 * protected by either parity or ECC. 5082 * 5083 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the 5084 * cache per second). Due to the the specifics of how the I$ control 5085 * logic works with respect to the ASI used to scrub I$ lines, the entire 5086 * I$ is scanned at once. 5087 */ 5088 5089 /* 5090 * Tuneables to enable and disable the scrubbing of the caches, and to tune 5091 * scrubbing behavior. These may be changed via /etc/system or using mdb 5092 * on a running system. 5093 */ 5094 int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */ 5095 5096 /* 5097 * The following are the PIL levels that the softints/cross traps will fire at. 5098 */ 5099 uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */ 5100 uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */ 5101 uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */ 5102 5103 #if defined(JALAPENO) 5104 5105 /* 5106 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber 5107 * on Jalapeno. 5108 */ 5109 int ecache_scrub_enable = 0; 5110 5111 #else /* JALAPENO */ 5112 5113 /* 5114 * With all other cpu types, E$ scrubbing is on by default 5115 */ 5116 int ecache_scrub_enable = 1; 5117 5118 #endif /* JALAPENO */ 5119 5120 5121 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO) 5122 5123 /* 5124 * The I$ scrubber tends to cause latency problems for real-time SW, so it 5125 * is disabled by default on non-Cheetah systems 5126 */ 5127 int icache_scrub_enable = 0; 5128 5129 /* 5130 * Tuneables specifying the scrub calls per second and the scan rate 5131 * for each cache 5132 * 5133 * The cyclic times are set during boot based on the following values. 5134 * Changing these values in mdb after this time will have no effect. If 5135 * a different value is desired, it must be set in /etc/system before a 5136 * reboot. 5137 */ 5138 int ecache_calls_a_sec = 1; 5139 int dcache_calls_a_sec = 2; 5140 int icache_calls_a_sec = 2; 5141 5142 int ecache_scan_rate_idle = 1; 5143 int ecache_scan_rate_busy = 1; 5144 int dcache_scan_rate_idle = 1; 5145 int dcache_scan_rate_busy = 1; 5146 int icache_scan_rate_idle = 1; 5147 int icache_scan_rate_busy = 1; 5148 5149 #else /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5150 5151 int icache_scrub_enable = 1; /* I$ scrubbing is on by default */ 5152 5153 int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */ 5154 int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */ 5155 int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */ 5156 5157 int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */ 5158 int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */ 5159 int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */ 5160 int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */ 5161 int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */ 5162 int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */ 5163 5164 #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5165 5166 /* 5167 * In order to scrub on offline cpus, a cross trap is sent. The handler will 5168 * increment the outstanding request counter and schedule a softint to run 5169 * the scrubber. 5170 */ 5171 extern xcfunc_t cache_scrubreq_tl1; 5172 5173 /* 5174 * These are the softint functions for each cache scrubber 5175 */ 5176 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2); 5177 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2); 5178 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2); 5179 5180 /* 5181 * The cache scrub info table contains cache specific information 5182 * and allows for some of the scrub code to be table driven, reducing 5183 * duplication of cache similar code. 5184 * 5185 * This table keeps a copy of the value in the calls per second variable 5186 * (?cache_calls_a_sec). This makes it much more difficult for someone 5187 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in 5188 * mdb in a misguided attempt to disable the scrubber). 5189 */ 5190 struct scrub_info { 5191 int *csi_enable; /* scrubber enable flag */ 5192 int csi_freq; /* scrubber calls per second */ 5193 int csi_index; /* index to chsm_outstanding[] */ 5194 uint_t csi_inum; /* scrubber interrupt number */ 5195 cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */ 5196 cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */ 5197 char csi_name[3]; /* cache name for this scrub entry */ 5198 } cache_scrub_info[] = { 5199 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"}, 5200 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"}, 5201 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"} 5202 }; 5203 5204 /* 5205 * If scrubbing is enabled, increment the outstanding request counter. If it 5206 * is 1 (meaning there were no previous requests outstanding), call 5207 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing 5208 * a self trap. 5209 */ 5210 static void 5211 do_scrub(struct scrub_info *csi) 5212 { 5213 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5214 int index = csi->csi_index; 5215 uint32_t *outstanding = &csmp->chsm_outstanding[index]; 5216 5217 if (*(csi->csi_enable) && (csmp->chsm_enable[index])) { 5218 if (atomic_add_32_nv(outstanding, 1) == 1) { 5219 xt_one_unchecked(CPU->cpu_id, setsoftint_tl1, 5220 csi->csi_inum, 0); 5221 } 5222 } 5223 } 5224 5225 /* 5226 * Omni cyclics don't fire on offline cpus, so we use another cyclic to 5227 * cross-trap the offline cpus. 5228 */ 5229 static void 5230 do_scrub_offline(struct scrub_info *csi) 5231 { 5232 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5233 5234 if (CPUSET_ISNULL(cpu_offline_set)) { 5235 /* 5236 * No offline cpus - nothing to do 5237 */ 5238 return; 5239 } 5240 5241 if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) { 5242 xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum, 5243 csi->csi_index); 5244 } 5245 } 5246 5247 /* 5248 * This is the initial setup for the scrubber cyclics - it sets the 5249 * interrupt level, frequency, and function to call. 5250 */ 5251 /*ARGSUSED*/ 5252 static void 5253 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 5254 cyc_time_t *when) 5255 { 5256 struct scrub_info *csi = (struct scrub_info *)arg; 5257 5258 ASSERT(csi != NULL); 5259 hdlr->cyh_func = (cyc_func_t)do_scrub; 5260 hdlr->cyh_level = CY_LOW_LEVEL; 5261 hdlr->cyh_arg = arg; 5262 5263 when->cyt_when = 0; /* Start immediately */ 5264 when->cyt_interval = NANOSEC / csi->csi_freq; 5265 } 5266 5267 /* 5268 * Initialization for cache scrubbing. 5269 * This routine is called AFTER all cpus have had cpu_init_private called 5270 * to initialize their private data areas. 5271 */ 5272 void 5273 cpu_init_cache_scrub(void) 5274 { 5275 int i; 5276 struct scrub_info *csi; 5277 cyc_omni_handler_t omni_hdlr; 5278 cyc_handler_t offline_hdlr; 5279 cyc_time_t when; 5280 5281 /* 5282 * save away the maximum number of lines for the D$ 5283 */ 5284 dcache_nlines = dcache_size / dcache_linesize; 5285 5286 /* 5287 * register the softints for the cache scrubbing 5288 */ 5289 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum = 5290 add_softintr(ecache_scrub_pil, scrub_ecache_line_intr, 5291 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]); 5292 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec; 5293 5294 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum = 5295 add_softintr(dcache_scrub_pil, scrub_dcache_line_intr, 5296 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]); 5297 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec; 5298 5299 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum = 5300 add_softintr(icache_scrub_pil, scrub_icache_line_intr, 5301 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]); 5302 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec; 5303 5304 /* 5305 * start the scrubbing for all the caches 5306 */ 5307 mutex_enter(&cpu_lock); 5308 for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) { 5309 5310 csi = &cache_scrub_info[i]; 5311 5312 if (!(*csi->csi_enable)) 5313 continue; 5314 5315 /* 5316 * force the following to be true: 5317 * 1 <= calls_a_sec <= hz 5318 */ 5319 if (csi->csi_freq > hz) { 5320 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high " 5321 "(%d); resetting to hz (%d)", csi->csi_name, 5322 csi->csi_freq, hz); 5323 csi->csi_freq = hz; 5324 } else if (csi->csi_freq < 1) { 5325 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low " 5326 "(%d); resetting to 1", csi->csi_name, 5327 csi->csi_freq); 5328 csi->csi_freq = 1; 5329 } 5330 5331 omni_hdlr.cyo_online = cpu_scrub_cyclic_setup; 5332 omni_hdlr.cyo_offline = NULL; 5333 omni_hdlr.cyo_arg = (void *)csi; 5334 5335 offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline; 5336 offline_hdlr.cyh_arg = (void *)csi; 5337 offline_hdlr.cyh_level = CY_LOW_LEVEL; 5338 5339 when.cyt_when = 0; /* Start immediately */ 5340 when.cyt_interval = NANOSEC / csi->csi_freq; 5341 5342 csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr); 5343 csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when); 5344 } 5345 register_cpu_setup_func(cpu_scrub_cpu_setup, NULL); 5346 mutex_exit(&cpu_lock); 5347 } 5348 5349 /* 5350 * Indicate that the specified cpu is idle. 5351 */ 5352 void 5353 cpu_idle_ecache_scrub(struct cpu *cp) 5354 { 5355 if (CPU_PRIVATE(cp) != NULL) { 5356 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5357 csmp->chsm_ecache_busy = ECACHE_CPU_IDLE; 5358 } 5359 } 5360 5361 /* 5362 * Indicate that the specified cpu is busy. 5363 */ 5364 void 5365 cpu_busy_ecache_scrub(struct cpu *cp) 5366 { 5367 if (CPU_PRIVATE(cp) != NULL) { 5368 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5369 csmp->chsm_ecache_busy = ECACHE_CPU_BUSY; 5370 } 5371 } 5372 5373 /* 5374 * Initialization for cache scrubbing for the specified cpu. 5375 */ 5376 void 5377 cpu_init_ecache_scrub_dr(struct cpu *cp) 5378 { 5379 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5380 int cpuid = cp->cpu_id; 5381 5382 /* initialize the number of lines in the caches */ 5383 csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size / 5384 cpunodes[cpuid].ecache_linesize; 5385 csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) / 5386 CPU_PRIVATE_VAL(cp, chpr_icache_linesize); 5387 5388 /* 5389 * do_scrub() and do_scrub_offline() check both the global 5390 * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers 5391 * check this value before scrubbing. Currently, we use it to 5392 * disable the E$ scrubber on multi-core cpus or while running at 5393 * slowed speed. For now, just turn everything on and allow 5394 * cpu_init_private() to change it if necessary. 5395 */ 5396 csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 5397 csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1; 5398 csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1; 5399 5400 cpu_busy_ecache_scrub(cp); 5401 } 5402 5403 /* 5404 * Un-initialization for cache scrubbing for the specified cpu. 5405 */ 5406 static void 5407 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 5408 { 5409 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5410 5411 /* 5412 * un-initialize bookkeeping for cache scrubbing 5413 */ 5414 bzero(csmp, sizeof (ch_scrub_misc_t)); 5415 5416 cpu_idle_ecache_scrub(cp); 5417 } 5418 5419 /* 5420 * Called periodically on each CPU to scrub the D$. 5421 */ 5422 static void 5423 scrub_dcache(int how_many) 5424 { 5425 int i; 5426 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5427 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D]; 5428 5429 /* 5430 * scrub the desired number of lines 5431 */ 5432 for (i = 0; i < how_many; i++) { 5433 /* 5434 * scrub a D$ line 5435 */ 5436 dcache_inval_line(index); 5437 5438 /* 5439 * calculate the next D$ line to scrub, assumes 5440 * that dcache_nlines is a power of 2 5441 */ 5442 index = (index + 1) & (dcache_nlines - 1); 5443 } 5444 5445 /* 5446 * set the scrub index for the next visit 5447 */ 5448 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index; 5449 } 5450 5451 /* 5452 * Handler for D$ scrub inum softint. Call scrub_dcache until 5453 * we decrement the outstanding request count to zero. 5454 */ 5455 /*ARGSUSED*/ 5456 static uint_t 5457 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2) 5458 { 5459 int i; 5460 int how_many; 5461 int outstanding; 5462 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5463 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D]; 5464 struct scrub_info *csi = (struct scrub_info *)arg1; 5465 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5466 dcache_scan_rate_idle : dcache_scan_rate_busy; 5467 5468 /* 5469 * The scan rates are expressed in units of tenths of a 5470 * percent. A scan rate of 1000 (100%) means the whole 5471 * cache is scanned every second. 5472 */ 5473 how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq); 5474 5475 do { 5476 outstanding = *countp; 5477 for (i = 0; i < outstanding; i++) { 5478 scrub_dcache(how_many); 5479 } 5480 } while (atomic_add_32_nv(countp, -outstanding)); 5481 5482 return (DDI_INTR_CLAIMED); 5483 } 5484 5485 /* 5486 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed 5487 * by invalidating lines. Due to the characteristics of the ASI which 5488 * is used to invalidate an I$ line, the entire I$ must be invalidated 5489 * vs. an individual I$ line. 5490 */ 5491 static void 5492 scrub_icache(int how_many) 5493 { 5494 int i; 5495 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5496 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I]; 5497 int icache_nlines = csmp->chsm_icache_nlines; 5498 5499 /* 5500 * scrub the desired number of lines 5501 */ 5502 for (i = 0; i < how_many; i++) { 5503 /* 5504 * since the entire I$ must be scrubbed at once, 5505 * wait until the index wraps to zero to invalidate 5506 * the entire I$ 5507 */ 5508 if (index == 0) { 5509 icache_inval_all(); 5510 } 5511 5512 /* 5513 * calculate the next I$ line to scrub, assumes 5514 * that chsm_icache_nlines is a power of 2 5515 */ 5516 index = (index + 1) & (icache_nlines - 1); 5517 } 5518 5519 /* 5520 * set the scrub index for the next visit 5521 */ 5522 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index; 5523 } 5524 5525 /* 5526 * Handler for I$ scrub inum softint. Call scrub_icache until 5527 * we decrement the outstanding request count to zero. 5528 */ 5529 /*ARGSUSED*/ 5530 static uint_t 5531 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2) 5532 { 5533 int i; 5534 int how_many; 5535 int outstanding; 5536 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5537 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I]; 5538 struct scrub_info *csi = (struct scrub_info *)arg1; 5539 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5540 icache_scan_rate_idle : icache_scan_rate_busy; 5541 int icache_nlines = csmp->chsm_icache_nlines; 5542 5543 /* 5544 * The scan rates are expressed in units of tenths of a 5545 * percent. A scan rate of 1000 (100%) means the whole 5546 * cache is scanned every second. 5547 */ 5548 how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq); 5549 5550 do { 5551 outstanding = *countp; 5552 for (i = 0; i < outstanding; i++) { 5553 scrub_icache(how_many); 5554 } 5555 } while (atomic_add_32_nv(countp, -outstanding)); 5556 5557 return (DDI_INTR_CLAIMED); 5558 } 5559 5560 /* 5561 * Called periodically on each CPU to scrub the E$. 5562 */ 5563 static void 5564 scrub_ecache(int how_many) 5565 { 5566 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5567 int i; 5568 int cpuid = CPU->cpu_id; 5569 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 5570 int nlines = csmp->chsm_ecache_nlines; 5571 int linesize = cpunodes[cpuid].ecache_linesize; 5572 int ec_set_size = cpu_ecache_set_size(CPU); 5573 5574 /* 5575 * scrub the desired number of lines 5576 */ 5577 for (i = 0; i < how_many; i++) { 5578 /* 5579 * scrub the E$ line 5580 */ 5581 ecache_flush_line(ecache_flushaddr + (index * linesize), 5582 ec_set_size); 5583 5584 /* 5585 * calculate the next E$ line to scrub based on twice 5586 * the number of E$ lines (to displace lines containing 5587 * flush area data), assumes that the number of lines 5588 * is a power of 2 5589 */ 5590 index = (index + 1) & ((nlines << 1) - 1); 5591 } 5592 5593 /* 5594 * set the ecache scrub index for the next visit 5595 */ 5596 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index; 5597 } 5598 5599 /* 5600 * Handler for E$ scrub inum softint. Call the E$ scrubber until 5601 * we decrement the outstanding request count to zero. 5602 * 5603 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may 5604 * become negative after the atomic_add_32_nv(). This is not a problem, as 5605 * the next trip around the loop won't scrub anything, and the next add will 5606 * reset the count back to zero. 5607 */ 5608 /*ARGSUSED*/ 5609 static uint_t 5610 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 5611 { 5612 int i; 5613 int how_many; 5614 int outstanding; 5615 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5616 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E]; 5617 struct scrub_info *csi = (struct scrub_info *)arg1; 5618 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5619 ecache_scan_rate_idle : ecache_scan_rate_busy; 5620 int ecache_nlines = csmp->chsm_ecache_nlines; 5621 5622 /* 5623 * The scan rates are expressed in units of tenths of a 5624 * percent. A scan rate of 1000 (100%) means the whole 5625 * cache is scanned every second. 5626 */ 5627 how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq); 5628 5629 do { 5630 outstanding = *countp; 5631 for (i = 0; i < outstanding; i++) { 5632 scrub_ecache(how_many); 5633 } 5634 } while (atomic_add_32_nv(countp, -outstanding)); 5635 5636 return (DDI_INTR_CLAIMED); 5637 } 5638 5639 /* 5640 * Timeout function to reenable CE 5641 */ 5642 static void 5643 cpu_delayed_check_ce_errors(void *arg) 5644 { 5645 if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg, 5646 TQ_NOSLEEP)) { 5647 (void) timeout(cpu_delayed_check_ce_errors, arg, 5648 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5649 } 5650 } 5651 5652 /* 5653 * CE Deferred Re-enable after trap. 5654 * 5655 * When the CPU gets a disrupting trap for any of the errors 5656 * controlled by the CEEN bit, CEEN is disabled in the trap handler 5657 * immediately. To eliminate the possibility of multiple CEs causing 5658 * recursive stack overflow in the trap handler, we cannot 5659 * reenable CEEN while still running in the trap handler. Instead, 5660 * after a CE is logged on a CPU, we schedule a timeout function, 5661 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs 5662 * seconds. This function will check whether any further CEs 5663 * have occurred on that CPU, and if none have, will reenable CEEN. 5664 * 5665 * If further CEs have occurred while CEEN is disabled, another 5666 * timeout will be scheduled. This is to ensure that the CPU can 5667 * make progress in the face of CE 'storms', and that it does not 5668 * spend all its time logging CE errors. 5669 */ 5670 static void 5671 cpu_check_ce_errors(void *arg) 5672 { 5673 int cpuid = (int)arg; 5674 cpu_t *cp; 5675 5676 /* 5677 * We acquire cpu_lock. 5678 */ 5679 ASSERT(curthread->t_pil == 0); 5680 5681 /* 5682 * verify that the cpu is still around, DR 5683 * could have got there first ... 5684 */ 5685 mutex_enter(&cpu_lock); 5686 cp = cpu_get(cpuid); 5687 if (cp == NULL) { 5688 mutex_exit(&cpu_lock); 5689 return; 5690 } 5691 /* 5692 * make sure we don't migrate across CPUs 5693 * while checking our CE status. 5694 */ 5695 kpreempt_disable(); 5696 5697 /* 5698 * If we are running on the CPU that got the 5699 * CE, we can do the checks directly. 5700 */ 5701 if (cp->cpu_id == CPU->cpu_id) { 5702 mutex_exit(&cpu_lock); 5703 cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0); 5704 kpreempt_enable(); 5705 return; 5706 } 5707 kpreempt_enable(); 5708 5709 /* 5710 * send an x-call to get the CPU that originally 5711 * got the CE to do the necessary checks. If we can't 5712 * send the x-call, reschedule the timeout, otherwise we 5713 * lose CEEN forever on that CPU. 5714 */ 5715 if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) { 5716 xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce, 5717 TIMEOUT_CEEN_CHECK, 0); 5718 mutex_exit(&cpu_lock); 5719 } else { 5720 /* 5721 * When the CPU is not accepting xcalls, or 5722 * the processor is offlined, we don't want to 5723 * incur the extra overhead of trying to schedule the 5724 * CE timeout indefinitely. However, we don't want to lose 5725 * CE checking forever. 5726 * 5727 * Keep rescheduling the timeout, accepting the additional 5728 * overhead as the cost of correctness in the case where we get 5729 * a CE, disable CEEN, offline the CPU during the 5730 * the timeout interval, and then online it at some 5731 * point in the future. This is unlikely given the short 5732 * cpu_ceen_delay_secs. 5733 */ 5734 mutex_exit(&cpu_lock); 5735 (void) timeout(cpu_delayed_check_ce_errors, (void *)cp->cpu_id, 5736 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5737 } 5738 } 5739 5740 /* 5741 * This routine will check whether CEs have occurred while 5742 * CEEN is disabled. Any CEs detected will be logged and, if 5743 * possible, scrubbed. 5744 * 5745 * The memscrubber will also use this routine to clear any errors 5746 * caused by its scrubbing with CEEN disabled. 5747 * 5748 * flag == SCRUBBER_CEEN_CHECK 5749 * called from memscrubber, just check/scrub, no reset 5750 * paddr physical addr. for start of scrub pages 5751 * vaddr virtual addr. for scrub area 5752 * psz page size of area to be scrubbed 5753 * 5754 * flag == TIMEOUT_CEEN_CHECK 5755 * timeout function has triggered, reset timeout or CEEN 5756 * 5757 * Note: We must not migrate cpus during this function. This can be 5758 * achieved by one of: 5759 * - invoking as target of an x-call in which case we're at XCALL_PIL 5760 * The flag value must be first xcall argument. 5761 * - disabling kernel preemption. This should be done for very short 5762 * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might 5763 * scrub an extended area with cpu_check_block. The call for 5764 * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept 5765 * brief for this case. 5766 * - binding to a cpu, eg with thread_affinity_set(). This is used 5767 * in the SCRUBBER_CEEN_CHECK case, but is not practical for 5768 * the TIMEOUT_CEEN_CHECK because both need cpu_lock. 5769 */ 5770 void 5771 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 5772 { 5773 ch_cpu_errors_t cpu_error_regs; 5774 uint64_t ec_err_enable; 5775 uint64_t page_offset; 5776 5777 /* Read AFSR */ 5778 get_cpu_error_state(&cpu_error_regs); 5779 5780 /* 5781 * If no CEEN errors have occurred during the timeout 5782 * interval, it is safe to re-enable CEEN and exit. 5783 */ 5784 if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) { 5785 if (flag == TIMEOUT_CEEN_CHECK && 5786 !((ec_err_enable = get_error_enable()) & EN_REG_CEEN)) 5787 set_error_enable(ec_err_enable | EN_REG_CEEN); 5788 return; 5789 } 5790 5791 /* 5792 * Ensure that CEEN was not reenabled (maybe by DR) before 5793 * we log/clear the error. 5794 */ 5795 if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN) 5796 set_error_enable(ec_err_enable & ~EN_REG_CEEN); 5797 5798 /* 5799 * log/clear the CE. If CE_CEEN_DEFER is passed, the 5800 * timeout will be rescheduled when the error is logged. 5801 */ 5802 if (!(cpu_error_regs.afsr & cpu_ce_not_deferred)) 5803 cpu_ce_detected(&cpu_error_regs, 5804 CE_CEEN_DEFER | CE_CEEN_TIMEOUT); 5805 else 5806 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT); 5807 5808 /* 5809 * If the memory scrubber runs while CEEN is 5810 * disabled, (or if CEEN is disabled during the 5811 * scrub as a result of a CE being triggered by 5812 * it), the range being scrubbed will not be 5813 * completely cleaned. If there are multiple CEs 5814 * in the range at most two of these will be dealt 5815 * with, (one by the trap handler and one by the 5816 * timeout). It is also possible that none are dealt 5817 * with, (CEEN disabled and another CE occurs before 5818 * the timeout triggers). So to ensure that the 5819 * memory is actually scrubbed, we have to access each 5820 * memory location in the range and then check whether 5821 * that access causes a CE. 5822 */ 5823 if (flag == SCRUBBER_CEEN_CHECK && va) { 5824 if ((cpu_error_regs.afar >= pa) && 5825 (cpu_error_regs.afar < (pa + psz))) { 5826 /* 5827 * Force a load from physical memory for each 5828 * 64-byte block, then check AFSR to determine 5829 * whether this access caused an error. 5830 * 5831 * This is a slow way to do a scrub, but as it will 5832 * only be invoked when the memory scrubber actually 5833 * triggered a CE, it should not happen too 5834 * frequently. 5835 * 5836 * cut down what we need to check as the scrubber 5837 * has verified up to AFAR, so get it's offset 5838 * into the page and start there. 5839 */ 5840 page_offset = (uint64_t)(cpu_error_regs.afar & 5841 (psz - 1)); 5842 va = (caddr_t)(va + (P2ALIGN(page_offset, 64))); 5843 psz -= (uint_t)(P2ALIGN(page_offset, 64)); 5844 cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)), 5845 psz); 5846 } 5847 } 5848 5849 /* 5850 * Reset error enable if this CE is not masked. 5851 */ 5852 if ((flag == TIMEOUT_CEEN_CHECK) && 5853 (cpu_error_regs.afsr & cpu_ce_not_deferred)) 5854 set_error_enable(ec_err_enable | EN_REG_CEEN); 5855 5856 } 5857 5858 /* 5859 * Attempt a cpu logout for an error that we did not trap for, such 5860 * as a CE noticed with CEEN off. It is assumed that we are still running 5861 * on the cpu that took the error and that we cannot migrate. Returns 5862 * 0 on success, otherwise nonzero. 5863 */ 5864 static int 5865 cpu_ce_delayed_ec_logout(uint64_t afar) 5866 { 5867 ch_cpu_logout_t *clop; 5868 5869 if (CPU_PRIVATE(CPU) == NULL) 5870 return (0); 5871 5872 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5873 if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) != 5874 LOGOUT_INVALID) 5875 return (0); 5876 5877 cpu_delayed_logout(afar, clop); 5878 return (1); 5879 } 5880 5881 /* 5882 * We got an error while CEEN was disabled. We 5883 * need to clean up after it and log whatever 5884 * information we have on the CE. 5885 */ 5886 void 5887 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag) 5888 { 5889 ch_async_flt_t ch_flt; 5890 struct async_flt *aflt; 5891 char pr_reason[MAX_REASON_STRING]; 5892 5893 bzero(&ch_flt, sizeof (ch_async_flt_t)); 5894 ch_flt.flt_trapped_ce = flag; 5895 aflt = (struct async_flt *)&ch_flt; 5896 aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK; 5897 ch_flt.afsr_ext = cpu_error_regs->afsr_ext; 5898 ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) | 5899 (cpu_error_regs->afsr & C_AFSR_ALL_ERRS); 5900 aflt->flt_addr = cpu_error_regs->afar; 5901 #if defined(SERRANO) 5902 ch_flt.afar2 = cpu_error_regs->afar2; 5903 #endif /* SERRANO */ 5904 aflt->flt_pc = NULL; 5905 aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0); 5906 aflt->flt_tl = 0; 5907 aflt->flt_panic = 0; 5908 cpu_log_and_clear_ce(&ch_flt); 5909 5910 /* 5911 * check if we caused any errors during cleanup 5912 */ 5913 if (clear_errors(&ch_flt)) { 5914 pr_reason[0] = '\0'; 5915 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 5916 NULL); 5917 } 5918 } 5919 5920 /* 5921 * Log/clear CEEN-controlled disrupting errors 5922 */ 5923 static void 5924 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt) 5925 { 5926 struct async_flt *aflt; 5927 uint64_t afsr, afsr_errs; 5928 ch_cpu_logout_t *clop; 5929 char pr_reason[MAX_REASON_STRING]; 5930 on_trap_data_t *otp = curthread->t_ontrap; 5931 5932 aflt = (struct async_flt *)ch_flt; 5933 afsr = aflt->flt_stat; 5934 afsr_errs = ch_flt->afsr_errs; 5935 aflt->flt_id = gethrtime_waitfree(); 5936 aflt->flt_bus_id = getprocessorid(); 5937 aflt->flt_inst = CPU->cpu_id; 5938 aflt->flt_prot = AFLT_PROT_NONE; 5939 aflt->flt_class = CPU_FAULT; 5940 aflt->flt_status = ECC_C_TRAP; 5941 5942 pr_reason[0] = '\0'; 5943 /* 5944 * Get the CPU log out info for Disrupting Trap. 5945 */ 5946 if (CPU_PRIVATE(CPU) == NULL) { 5947 clop = NULL; 5948 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 5949 } else { 5950 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5951 } 5952 5953 if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) { 5954 ch_cpu_errors_t cpu_error_regs; 5955 5956 get_cpu_error_state(&cpu_error_regs); 5957 (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar); 5958 clop->clo_data.chd_afsr = cpu_error_regs.afsr; 5959 clop->clo_data.chd_afar = cpu_error_regs.afar; 5960 clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext; 5961 clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr; 5962 clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar; 5963 clop->clo_sdw_data.chd_afsr_ext = 5964 cpu_error_regs.shadow_afsr_ext; 5965 #if defined(SERRANO) 5966 clop->clo_data.chd_afar2 = cpu_error_regs.afar2; 5967 #endif /* SERRANO */ 5968 ch_flt->flt_data_incomplete = 1; 5969 5970 /* 5971 * The logging/clear code expects AFSR/AFAR to be cleared. 5972 * The trap handler does it for CEEN enabled errors 5973 * so we need to do it here. 5974 */ 5975 set_cpu_error_state(&cpu_error_regs); 5976 } 5977 5978 #if defined(JALAPENO) || defined(SERRANO) 5979 /* 5980 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno. 5981 * For Serrano, even thou we do have the AFAR, we still do the 5982 * scrub on the RCE side since that's where the error type can 5983 * be properly classified as intermittent, persistent, etc. 5984 * 5985 * CE/RCE: If error is in memory and AFAR is valid, scrub the memory. 5986 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 5987 * the flt_status bits. 5988 */ 5989 if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) && 5990 (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 5991 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) { 5992 cpu_ce_scrub_mem_err(aflt, B_TRUE); 5993 } 5994 #else /* JALAPENO || SERRANO */ 5995 /* 5996 * CE/EMC: If error is in memory and AFAR is valid, scrub the memory. 5997 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 5998 * the flt_status bits. 5999 */ 6000 if (afsr & (C_AFSR_CE|C_AFSR_EMC)) { 6001 if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 6002 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) { 6003 cpu_ce_scrub_mem_err(aflt, B_TRUE); 6004 } 6005 } 6006 6007 #endif /* JALAPENO || SERRANO */ 6008 6009 /* 6010 * Update flt_prot if this error occurred under on_trap protection. 6011 */ 6012 if (otp != NULL && (otp->ot_prot & OT_DATA_EC)) 6013 aflt->flt_prot = AFLT_PROT_EC; 6014 6015 /* 6016 * Queue events on the async event queue, one event per error bit. 6017 */ 6018 if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 || 6019 (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) { 6020 ch_flt->flt_type = CPU_INV_AFSR; 6021 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 6022 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 6023 aflt->flt_panic); 6024 } 6025 6026 /* 6027 * Zero out + invalidate CPU logout. 6028 */ 6029 if (clop) { 6030 bzero(clop, sizeof (ch_cpu_logout_t)); 6031 clop->clo_data.chd_afar = LOGOUT_INVALID; 6032 } 6033 6034 /* 6035 * If either a CPC, WDC or EDC error has occurred while CEEN 6036 * was disabled, we need to flush either the entire 6037 * E$ or an E$ line. 6038 */ 6039 #if defined(JALAPENO) || defined(SERRANO) 6040 if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC)) 6041 #else /* JALAPENO || SERRANO */ 6042 if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC | 6043 C_AFSR_L3_CPC | C_AFSR_L3_WDC)) 6044 #endif /* JALAPENO || SERRANO */ 6045 cpu_error_ecache_flush(ch_flt); 6046 6047 } 6048 6049 /* 6050 * depending on the error type, we determine whether we 6051 * need to flush the entire ecache or just a line. 6052 */ 6053 static int 6054 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt) 6055 { 6056 struct async_flt *aflt; 6057 uint64_t afsr; 6058 uint64_t afsr_errs = ch_flt->afsr_errs; 6059 6060 aflt = (struct async_flt *)ch_flt; 6061 afsr = aflt->flt_stat; 6062 6063 /* 6064 * If we got multiple errors, no point in trying 6065 * the individual cases, just flush the whole cache 6066 */ 6067 if (afsr & C_AFSR_ME) { 6068 return (ECACHE_FLUSH_ALL); 6069 } 6070 6071 /* 6072 * If either a CPC, WDC or EDC error has occurred while CEEN 6073 * was disabled, we need to flush entire E$. We can't just 6074 * flush the cache line affected as the ME bit 6075 * is not set when multiple correctable errors of the same 6076 * type occur, so we might have multiple CPC or EDC errors, 6077 * with only the first recorded. 6078 */ 6079 #if defined(JALAPENO) || defined(SERRANO) 6080 if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) { 6081 #else /* JALAPENO || SERRANO */ 6082 if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC | 6083 C_AFSR_L3_EDC | C_AFSR_L3_WDC)) { 6084 #endif /* JALAPENO || SERRANO */ 6085 return (ECACHE_FLUSH_ALL); 6086 } 6087 6088 #if defined(JALAPENO) || defined(SERRANO) 6089 /* 6090 * If only UE or RUE is set, flush the Ecache line, otherwise 6091 * flush the entire Ecache. 6092 */ 6093 if (afsr & (C_AFSR_UE|C_AFSR_RUE)) { 6094 if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE || 6095 (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) { 6096 return (ECACHE_FLUSH_LINE); 6097 } else { 6098 return (ECACHE_FLUSH_ALL); 6099 } 6100 } 6101 #else /* JALAPENO || SERRANO */ 6102 /* 6103 * If UE only is set, flush the Ecache line, otherwise 6104 * flush the entire Ecache. 6105 */ 6106 if (afsr_errs & C_AFSR_UE) { 6107 if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == 6108 C_AFSR_UE) { 6109 return (ECACHE_FLUSH_LINE); 6110 } else { 6111 return (ECACHE_FLUSH_ALL); 6112 } 6113 } 6114 #endif /* JALAPENO || SERRANO */ 6115 6116 /* 6117 * EDU: If EDU only is set, flush the ecache line, otherwise 6118 * flush the entire Ecache. 6119 */ 6120 if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) { 6121 if (((afsr_errs & ~C_AFSR_EDU) == 0) || 6122 ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) { 6123 return (ECACHE_FLUSH_LINE); 6124 } else { 6125 return (ECACHE_FLUSH_ALL); 6126 } 6127 } 6128 6129 /* 6130 * BERR: If BERR only is set, flush the Ecache line, otherwise 6131 * flush the entire Ecache. 6132 */ 6133 if (afsr_errs & C_AFSR_BERR) { 6134 if ((afsr_errs & ~C_AFSR_BERR) == 0) { 6135 return (ECACHE_FLUSH_LINE); 6136 } else { 6137 return (ECACHE_FLUSH_ALL); 6138 } 6139 } 6140 6141 return (0); 6142 } 6143 6144 void 6145 cpu_error_ecache_flush(ch_async_flt_t *ch_flt) 6146 { 6147 int ecache_flush_flag = 6148 cpu_error_ecache_flush_required(ch_flt); 6149 6150 /* 6151 * Flush Ecache line or entire Ecache based on above checks. 6152 */ 6153 if (ecache_flush_flag == ECACHE_FLUSH_ALL) 6154 cpu_flush_ecache(); 6155 else if (ecache_flush_flag == ECACHE_FLUSH_LINE) { 6156 cpu_flush_ecache_line(ch_flt); 6157 } 6158 6159 } 6160 6161 /* 6162 * Extract the PA portion from the E$ tag. 6163 */ 6164 uint64_t 6165 cpu_ectag_to_pa(int setsize, uint64_t tag) 6166 { 6167 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6168 return (JG_ECTAG_TO_PA(setsize, tag)); 6169 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6170 return (PN_L3TAG_TO_PA(tag)); 6171 else 6172 return (CH_ECTAG_TO_PA(setsize, tag)); 6173 } 6174 6175 /* 6176 * Convert the E$ tag PA into an E$ subblock index. 6177 */ 6178 static int 6179 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr) 6180 { 6181 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6182 return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6183 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6184 /* Panther has only one subblock per line */ 6185 return (0); 6186 else 6187 return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6188 } 6189 6190 /* 6191 * All subblocks in an E$ line must be invalid for 6192 * the line to be invalid. 6193 */ 6194 int 6195 cpu_ectag_line_invalid(int cachesize, uint64_t tag) 6196 { 6197 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6198 return (JG_ECTAG_LINE_INVALID(cachesize, tag)); 6199 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6200 return (PN_L3_LINE_INVALID(tag)); 6201 else 6202 return (CH_ECTAG_LINE_INVALID(cachesize, tag)); 6203 } 6204 6205 /* 6206 * Extract state bits for a subblock given the tag. Note that for Panther 6207 * this works on both l2 and l3 tags. 6208 */ 6209 static int 6210 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag) 6211 { 6212 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6213 return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6214 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6215 return (tag & CH_ECSTATE_MASK); 6216 else 6217 return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6218 } 6219 6220 /* 6221 * Cpu specific initialization. 6222 */ 6223 void 6224 cpu_mp_init(void) 6225 { 6226 #ifdef CHEETAHPLUS_ERRATUM_25 6227 if (cheetah_sendmondo_recover) { 6228 cheetah_nudge_init(); 6229 } 6230 #endif 6231 } 6232 6233 void 6234 cpu_ereport_post(struct async_flt *aflt) 6235 { 6236 char *cpu_type, buf[FM_MAX_CLASS]; 6237 nv_alloc_t *nva = NULL; 6238 nvlist_t *ereport, *detector, *resource; 6239 errorq_elem_t *eqep; 6240 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6241 char unum[UNUM_NAMLEN]; 6242 int len = 0; 6243 uint8_t msg_type; 6244 plat_ecc_ch_async_flt_t plat_ecc_ch_flt; 6245 6246 if (aflt->flt_panic || panicstr) { 6247 eqep = errorq_reserve(ereport_errorq); 6248 if (eqep == NULL) 6249 return; 6250 ereport = errorq_elem_nvl(ereport_errorq, eqep); 6251 nva = errorq_elem_nva(ereport_errorq, eqep); 6252 } else { 6253 ereport = fm_nvlist_create(nva); 6254 } 6255 6256 /* 6257 * Create the scheme "cpu" FMRI. 6258 */ 6259 detector = fm_nvlist_create(nva); 6260 resource = fm_nvlist_create(nva); 6261 switch (cpunodes[aflt->flt_inst].implementation) { 6262 case CHEETAH_IMPL: 6263 cpu_type = FM_EREPORT_CPU_USIII; 6264 break; 6265 case CHEETAH_PLUS_IMPL: 6266 cpu_type = FM_EREPORT_CPU_USIIIplus; 6267 break; 6268 case JALAPENO_IMPL: 6269 cpu_type = FM_EREPORT_CPU_USIIIi; 6270 break; 6271 case SERRANO_IMPL: 6272 cpu_type = FM_EREPORT_CPU_USIIIiplus; 6273 break; 6274 case JAGUAR_IMPL: 6275 cpu_type = FM_EREPORT_CPU_USIV; 6276 break; 6277 case PANTHER_IMPL: 6278 cpu_type = FM_EREPORT_CPU_USIVplus; 6279 break; 6280 default: 6281 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 6282 break; 6283 } 6284 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 6285 aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version, 6286 cpunodes[aflt->flt_inst].device_id); 6287 6288 /* 6289 * Encode all the common data into the ereport. 6290 */ 6291 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 6292 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 6293 6294 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 6295 fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1), 6296 detector, NULL); 6297 6298 /* 6299 * Encode the error specific data that was saved in 6300 * the async_flt structure into the ereport. 6301 */ 6302 cpu_payload_add_aflt(aflt, ereport, resource, 6303 &plat_ecc_ch_flt.ecaf_afar_status, 6304 &plat_ecc_ch_flt.ecaf_synd_status); 6305 6306 if (aflt->flt_panic || panicstr) { 6307 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 6308 } else { 6309 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 6310 fm_nvlist_destroy(ereport, FM_NVA_FREE); 6311 fm_nvlist_destroy(detector, FM_NVA_FREE); 6312 fm_nvlist_destroy(resource, FM_NVA_FREE); 6313 } 6314 /* 6315 * Send the enhanced error information (plat_ecc_error2_data_t) 6316 * to the SC olny if it can process it. 6317 */ 6318 6319 if (&plat_ecc_capability_sc_get && 6320 plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) { 6321 msg_type = cpu_flt_bit_to_plat_error(aflt); 6322 if (msg_type != PLAT_ECC_ERROR2_NONE) { 6323 /* 6324 * If afar status is not invalid do a unum lookup. 6325 */ 6326 if (plat_ecc_ch_flt.ecaf_afar_status != 6327 AFLT_STAT_INVALID) { 6328 (void) cpu_get_mem_unum_aflt( 6329 plat_ecc_ch_flt.ecaf_synd_status, aflt, 6330 unum, UNUM_NAMLEN, &len); 6331 } else { 6332 unum[0] = '\0'; 6333 } 6334 plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar; 6335 plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr; 6336 plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext; 6337 plat_ecc_ch_flt.ecaf_sdw_afsr_ext = 6338 ch_flt->flt_sdw_afsr_ext; 6339 6340 if (&plat_log_fruid_error2) 6341 plat_log_fruid_error2(msg_type, unum, aflt, 6342 &plat_ecc_ch_flt); 6343 } 6344 } 6345 } 6346 6347 void 6348 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 6349 { 6350 int status; 6351 ddi_fm_error_t de; 6352 6353 bzero(&de, sizeof (ddi_fm_error_t)); 6354 6355 de.fme_version = DDI_FME_VERSION; 6356 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 6357 FM_ENA_FMT1); 6358 de.fme_flag = expected; 6359 de.fme_bus_specific = (void *)aflt->flt_addr; 6360 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 6361 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 6362 aflt->flt_panic = 1; 6363 } 6364 6365 void 6366 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 6367 errorq_t *eqp, uint_t flag) 6368 { 6369 struct async_flt *aflt = (struct async_flt *)payload; 6370 6371 aflt->flt_erpt_class = error_class; 6372 errorq_dispatch(eqp, payload, payload_sz, flag); 6373 } 6374 6375 /* 6376 * This routine may be called by the IO module, but does not do 6377 * anything in this cpu module. The SERD algorithm is handled by 6378 * cpumem-diagnosis engine instead. 6379 */ 6380 /*ARGSUSED*/ 6381 void 6382 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 6383 {} 6384 6385 void 6386 adjust_hw_copy_limits(int ecache_size) 6387 { 6388 /* 6389 * Set hw copy limits. 6390 * 6391 * /etc/system will be parsed later and can override one or more 6392 * of these settings. 6393 * 6394 * At this time, ecache size seems only mildly relevant. 6395 * We seem to run into issues with the d-cache and stalls 6396 * we see on misses. 6397 * 6398 * Cycle measurement indicates that 2 byte aligned copies fare 6399 * little better than doing things with VIS at around 512 bytes. 6400 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 6401 * aligned is faster whenever the source and destination data 6402 * in cache and the total size is less than 2 Kbytes. The 2K 6403 * limit seems to be driven by the 2K write cache. 6404 * When more than 2K of copies are done in non-VIS mode, stores 6405 * backup in the write cache. In VIS mode, the write cache is 6406 * bypassed, allowing faster cache-line writes aligned on cache 6407 * boundaries. 6408 * 6409 * In addition, in non-VIS mode, there is no prefetching, so 6410 * for larger copies, the advantage of prefetching to avoid even 6411 * occasional cache misses is enough to justify using the VIS code. 6412 * 6413 * During testing, it was discovered that netbench ran 3% slower 6414 * when hw_copy_limit_8 was 2K or larger. Apparently for server 6415 * applications, data is only used once (copied to the output 6416 * buffer, then copied by the network device off the system). Using 6417 * the VIS copy saves more L2 cache state. Network copies are 6418 * around 1.3K to 1.5K in size for historical reasons. 6419 * 6420 * Therefore, a limit of 1K bytes will be used for the 8 byte 6421 * aligned copy even for large caches and 8 MB ecache. The 6422 * infrastructure to allow different limits for different sized 6423 * caches is kept to allow further tuning in later releases. 6424 */ 6425 6426 if (min_ecache_size == 0 && use_hw_bcopy) { 6427 /* 6428 * First time through - should be before /etc/system 6429 * is read. 6430 * Could skip the checks for zero but this lets us 6431 * preserve any debugger rewrites. 6432 */ 6433 if (hw_copy_limit_1 == 0) { 6434 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 6435 priv_hcl_1 = hw_copy_limit_1; 6436 } 6437 if (hw_copy_limit_2 == 0) { 6438 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 6439 priv_hcl_2 = hw_copy_limit_2; 6440 } 6441 if (hw_copy_limit_4 == 0) { 6442 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 6443 priv_hcl_4 = hw_copy_limit_4; 6444 } 6445 if (hw_copy_limit_8 == 0) { 6446 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 6447 priv_hcl_8 = hw_copy_limit_8; 6448 } 6449 min_ecache_size = ecache_size; 6450 } else { 6451 /* 6452 * MP initialization. Called *after* /etc/system has 6453 * been parsed. One CPU has already been initialized. 6454 * Need to cater for /etc/system having scragged one 6455 * of our values. 6456 */ 6457 if (ecache_size == min_ecache_size) { 6458 /* 6459 * Same size ecache. We do nothing unless we 6460 * have a pessimistic ecache setting. In that 6461 * case we become more optimistic (if the cache is 6462 * large enough). 6463 */ 6464 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 6465 /* 6466 * Need to adjust hw_copy_limit* from our 6467 * pessimistic uniprocessor value to a more 6468 * optimistic UP value *iff* it hasn't been 6469 * reset. 6470 */ 6471 if ((ecache_size > 1048576) && 6472 (priv_hcl_8 == hw_copy_limit_8)) { 6473 if (ecache_size <= 2097152) 6474 hw_copy_limit_8 = 4 * 6475 VIS_COPY_THRESHOLD; 6476 else if (ecache_size <= 4194304) 6477 hw_copy_limit_8 = 4 * 6478 VIS_COPY_THRESHOLD; 6479 else 6480 hw_copy_limit_8 = 4 * 6481 VIS_COPY_THRESHOLD; 6482 priv_hcl_8 = hw_copy_limit_8; 6483 } 6484 } 6485 } else if (ecache_size < min_ecache_size) { 6486 /* 6487 * A different ecache size. Can this even happen? 6488 */ 6489 if (priv_hcl_8 == hw_copy_limit_8) { 6490 /* 6491 * The previous value that we set 6492 * is unchanged (i.e., it hasn't been 6493 * scragged by /etc/system). Rewrite it. 6494 */ 6495 if (ecache_size <= 1048576) 6496 hw_copy_limit_8 = 8 * 6497 VIS_COPY_THRESHOLD; 6498 else if (ecache_size <= 2097152) 6499 hw_copy_limit_8 = 8 * 6500 VIS_COPY_THRESHOLD; 6501 else if (ecache_size <= 4194304) 6502 hw_copy_limit_8 = 8 * 6503 VIS_COPY_THRESHOLD; 6504 else 6505 hw_copy_limit_8 = 10 * 6506 VIS_COPY_THRESHOLD; 6507 priv_hcl_8 = hw_copy_limit_8; 6508 min_ecache_size = ecache_size; 6509 } 6510 } 6511 } 6512 } 6513 6514 /* 6515 * Called from illegal instruction trap handler to see if we can attribute 6516 * the trap to a fpras check. 6517 */ 6518 int 6519 fpras_chktrap(struct regs *rp) 6520 { 6521 int op; 6522 struct fpras_chkfngrp *cgp; 6523 uintptr_t tpc = (uintptr_t)rp->r_pc; 6524 6525 if (fpras_chkfngrps == NULL) 6526 return (0); 6527 6528 cgp = &fpras_chkfngrps[CPU->cpu_id]; 6529 for (op = 0; op < FPRAS_NCOPYOPS; ++op) { 6530 if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 && 6531 tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult) 6532 break; 6533 } 6534 if (op == FPRAS_NCOPYOPS) 6535 return (0); 6536 6537 /* 6538 * This is an fpRAS failure caught through an illegal 6539 * instruction - trampoline. 6540 */ 6541 rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline; 6542 rp->r_npc = rp->r_pc + 4; 6543 return (1); 6544 } 6545 6546 /* 6547 * fpras_failure is called when a fpras check detects a bad calculation 6548 * result or an illegal instruction trap is attributed to an fpras 6549 * check. In all cases we are still bound to CPU. 6550 */ 6551 int 6552 fpras_failure(int op, int how) 6553 { 6554 int use_hw_bcopy_orig, use_hw_bzero_orig; 6555 uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig; 6556 ch_async_flt_t ch_flt; 6557 struct async_flt *aflt = (struct async_flt *)&ch_flt; 6558 struct fpras_chkfn *sfp, *cfp; 6559 uint32_t *sip, *cip; 6560 int i; 6561 6562 /* 6563 * We're running on a sick CPU. Avoid further FPU use at least for 6564 * the time in which we dispatch an ereport and (if applicable) panic. 6565 */ 6566 use_hw_bcopy_orig = use_hw_bcopy; 6567 use_hw_bzero_orig = use_hw_bzero; 6568 hcl1_orig = hw_copy_limit_1; 6569 hcl2_orig = hw_copy_limit_2; 6570 hcl4_orig = hw_copy_limit_4; 6571 hcl8_orig = hw_copy_limit_8; 6572 use_hw_bcopy = use_hw_bzero = 0; 6573 hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 = 6574 hw_copy_limit_8 = 0; 6575 6576 bzero(&ch_flt, sizeof (ch_async_flt_t)); 6577 aflt->flt_id = gethrtime_waitfree(); 6578 aflt->flt_class = CPU_FAULT; 6579 aflt->flt_inst = CPU->cpu_id; 6580 aflt->flt_status = (how << 8) | op; 6581 aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY; 6582 ch_flt.flt_type = CPU_FPUERR; 6583 6584 /* 6585 * We must panic if the copy operation had no lofault protection - 6586 * ie, don't panic for copyin, copyout, kcopy and bcopy called 6587 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy. 6588 */ 6589 aflt->flt_panic = (curthread->t_lofault == NULL); 6590 6591 /* 6592 * XOR the source instruction block with the copied instruction 6593 * block - this will show us which bit(s) are corrupted. 6594 */ 6595 sfp = (struct fpras_chkfn *)fpras_chkfn_type1; 6596 cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op]; 6597 if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) { 6598 sip = &sfp->fpras_blk0[0]; 6599 cip = &cfp->fpras_blk0[0]; 6600 } else { 6601 sip = &sfp->fpras_blk1[0]; 6602 cip = &cfp->fpras_blk1[0]; 6603 } 6604 for (i = 0; i < 16; ++i, ++sip, ++cip) 6605 ch_flt.flt_fpdata[i] = *sip ^ *cip; 6606 6607 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt, 6608 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 6609 6610 if (aflt->flt_panic) 6611 fm_panic("FPU failure on CPU %d", CPU->cpu_id); 6612 6613 /* 6614 * We get here for copyin/copyout and kcopy or bcopy where the 6615 * caller has used on_fault. We will flag the error so that 6616 * the process may be killed The trap_async_hwerr mechanism will 6617 * take appropriate further action (such as a reboot, contract 6618 * notification etc). Since we may be continuing we will 6619 * restore the global hardware copy acceleration switches. 6620 * 6621 * When we return from this function to the copy function we want to 6622 * avoid potentially bad data being used, ie we want the affected 6623 * copy function to return an error. The caller should therefore 6624 * invoke its lofault handler (which always exists for these functions) 6625 * which will return the appropriate error. 6626 */ 6627 ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR; 6628 aston(curthread); 6629 6630 use_hw_bcopy = use_hw_bcopy_orig; 6631 use_hw_bzero = use_hw_bzero_orig; 6632 hw_copy_limit_1 = hcl1_orig; 6633 hw_copy_limit_2 = hcl2_orig; 6634 hw_copy_limit_4 = hcl4_orig; 6635 hw_copy_limit_8 = hcl8_orig; 6636 6637 return (1); 6638 } 6639 6640 #define VIS_BLOCKSIZE 64 6641 6642 int 6643 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 6644 { 6645 int ret, watched; 6646 6647 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6648 ret = dtrace_blksuword32(addr, data, 0); 6649 if (watched) 6650 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6651 6652 return (ret); 6653 } 6654 6655 /* 6656 * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the 6657 * faulted cpu into that state). Cross-trap to the faulted cpu to clear 6658 * CEEN from the EER to disable traps for further disrupting error types 6659 * on that cpu. We could cross-call instead, but that has a larger 6660 * instruction and data footprint than cross-trapping, and the cpu is known 6661 * to be faulted. 6662 */ 6663 6664 void 6665 cpu_faulted_enter(struct cpu *cp) 6666 { 6667 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS); 6668 } 6669 6670 /* 6671 * Called when a cpu leaves the CPU_FAULTED state to return to one of 6672 * offline, spare, or online (by the cpu requesting this state change). 6673 * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of 6674 * disrupting error bits that have accumulated without trapping, then 6675 * we cross-trap to re-enable CEEN controlled traps. 6676 */ 6677 void 6678 cpu_faulted_exit(struct cpu *cp) 6679 { 6680 ch_cpu_errors_t cpu_error_regs; 6681 6682 cpu_error_regs.afsr = C_AFSR_CECC_ERRS; 6683 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) 6684 cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS; 6685 xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state, 6686 (uint64_t)&cpu_error_regs, 0); 6687 6688 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS); 6689 } 6690 6691 /* 6692 * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by 6693 * the errors in the original AFSR, 0 otherwise. 6694 * 6695 * For all procs if the initial error was a BERR or TO, then it is possible 6696 * that we may have caused a secondary BERR or TO in the process of logging the 6697 * inital error via cpu_run_bus_error_handlers(). If this is the case then 6698 * if the request was protected then a panic is still not necessary, if not 6699 * protected then aft_panic is already set - so either way there's no need 6700 * to set aft_panic for the secondary error. 6701 * 6702 * For Cheetah and Jalapeno, if the original error was a UE which occurred on 6703 * a store merge, then the error handling code will call cpu_deferred_error(). 6704 * When clear_errors() is called, it will determine that secondary errors have 6705 * occurred - in particular, the store merge also caused a EDU and WDU that 6706 * weren't discovered until this point. 6707 * 6708 * We do three checks to verify that we are in this case. If we pass all three 6709 * checks, we return 1 to indicate that we should not panic. If any unexpected 6710 * errors occur, we return 0. 6711 * 6712 * For Cheetah+ and derivative procs, the store merge causes a DUE, which is 6713 * handled in cpu_disrupting_errors(). Since this function is not even called 6714 * in the case we are interested in, we just return 0 for these processors. 6715 */ 6716 /*ARGSUSED*/ 6717 static int 6718 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs, 6719 uint64_t t_afar) 6720 { 6721 #if defined(CHEETAH_PLUS) 6722 #else /* CHEETAH_PLUS */ 6723 struct async_flt *aflt = (struct async_flt *)ch_flt; 6724 #endif /* CHEETAH_PLUS */ 6725 6726 /* 6727 * Was the original error a BERR or TO and only a BERR or TO 6728 * (multiple errors are also OK) 6729 */ 6730 if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) { 6731 /* 6732 * Is the new error a BERR or TO and only a BERR or TO 6733 * (multiple errors are also OK) 6734 */ 6735 if ((ch_flt->afsr_errs & 6736 ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) 6737 return (1); 6738 } 6739 6740 #if defined(CHEETAH_PLUS) 6741 return (0); 6742 #else /* CHEETAH_PLUS */ 6743 /* 6744 * Now look for secondary effects of a UE on cheetah/jalapeno 6745 * 6746 * Check the original error was a UE, and only a UE. Note that 6747 * the ME bit will cause us to fail this check. 6748 */ 6749 if (t_afsr_errs != C_AFSR_UE) 6750 return (0); 6751 6752 /* 6753 * Check the secondary errors were exclusively an EDU and/or WDU. 6754 */ 6755 if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0) 6756 return (0); 6757 6758 /* 6759 * Check the AFAR of the original error and secondary errors 6760 * match to the 64-byte boundary 6761 */ 6762 if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64)) 6763 return (0); 6764 6765 /* 6766 * We've passed all the checks, so it's a secondary error! 6767 */ 6768 return (1); 6769 #endif /* CHEETAH_PLUS */ 6770 } 6771 6772 /* 6773 * Translate the flt_bit or flt_type into an error type. First, flt_bit 6774 * is checked for any valid errors. If found, the error type is 6775 * returned. If not found, the flt_type is checked for L1$ parity errors. 6776 */ 6777 /*ARGSUSED*/ 6778 static uint8_t 6779 cpu_flt_bit_to_plat_error(struct async_flt *aflt) 6780 { 6781 #if defined(JALAPENO) 6782 /* 6783 * Currently, logging errors to the SC is not supported on Jalapeno 6784 */ 6785 return (PLAT_ECC_ERROR2_NONE); 6786 #else 6787 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6788 6789 switch (ch_flt->flt_bit) { 6790 case C_AFSR_CE: 6791 return (PLAT_ECC_ERROR2_CE); 6792 case C_AFSR_UCC: 6793 case C_AFSR_EDC: 6794 case C_AFSR_WDC: 6795 case C_AFSR_CPC: 6796 return (PLAT_ECC_ERROR2_L2_CE); 6797 case C_AFSR_EMC: 6798 return (PLAT_ECC_ERROR2_EMC); 6799 case C_AFSR_IVC: 6800 return (PLAT_ECC_ERROR2_IVC); 6801 case C_AFSR_UE: 6802 return (PLAT_ECC_ERROR2_UE); 6803 case C_AFSR_UCU: 6804 case C_AFSR_EDU: 6805 case C_AFSR_WDU: 6806 case C_AFSR_CPU: 6807 return (PLAT_ECC_ERROR2_L2_UE); 6808 case C_AFSR_IVU: 6809 return (PLAT_ECC_ERROR2_IVU); 6810 case C_AFSR_TO: 6811 return (PLAT_ECC_ERROR2_TO); 6812 case C_AFSR_BERR: 6813 return (PLAT_ECC_ERROR2_BERR); 6814 #if defined(CHEETAH_PLUS) 6815 case C_AFSR_L3_EDC: 6816 case C_AFSR_L3_UCC: 6817 case C_AFSR_L3_CPC: 6818 case C_AFSR_L3_WDC: 6819 return (PLAT_ECC_ERROR2_L3_CE); 6820 case C_AFSR_IMC: 6821 return (PLAT_ECC_ERROR2_IMC); 6822 case C_AFSR_TSCE: 6823 return (PLAT_ECC_ERROR2_L2_TSCE); 6824 case C_AFSR_THCE: 6825 return (PLAT_ECC_ERROR2_L2_THCE); 6826 case C_AFSR_L3_MECC: 6827 return (PLAT_ECC_ERROR2_L3_MECC); 6828 case C_AFSR_L3_THCE: 6829 return (PLAT_ECC_ERROR2_L3_THCE); 6830 case C_AFSR_L3_CPU: 6831 case C_AFSR_L3_EDU: 6832 case C_AFSR_L3_UCU: 6833 case C_AFSR_L3_WDU: 6834 return (PLAT_ECC_ERROR2_L3_UE); 6835 case C_AFSR_DUE: 6836 return (PLAT_ECC_ERROR2_DUE); 6837 case C_AFSR_DTO: 6838 return (PLAT_ECC_ERROR2_DTO); 6839 case C_AFSR_DBERR: 6840 return (PLAT_ECC_ERROR2_DBERR); 6841 #endif /* CHEETAH_PLUS */ 6842 default: 6843 switch (ch_flt->flt_type) { 6844 #if defined(CPU_IMP_L1_CACHE_PARITY) 6845 case CPU_IC_PARITY: 6846 return (PLAT_ECC_ERROR2_IPE); 6847 case CPU_DC_PARITY: 6848 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 6849 if (ch_flt->parity_data.dpe.cpl_cache == 6850 CPU_PC_PARITY) { 6851 return (PLAT_ECC_ERROR2_PCACHE); 6852 } 6853 } 6854 return (PLAT_ECC_ERROR2_DPE); 6855 #endif /* CPU_IMP_L1_CACHE_PARITY */ 6856 case CPU_ITLB_PARITY: 6857 return (PLAT_ECC_ERROR2_ITLB); 6858 case CPU_DTLB_PARITY: 6859 return (PLAT_ECC_ERROR2_DTLB); 6860 default: 6861 return (PLAT_ECC_ERROR2_NONE); 6862 } 6863 } 6864 #endif /* JALAPENO */ 6865 } 6866