1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/ddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/archsystm.h> 34 #include <sys/vmsystm.h> 35 #include <sys/machparam.h> 36 #include <sys/machsystm.h> 37 #include <sys/machthread.h> 38 #include <sys/cpu.h> 39 #include <sys/cmp.h> 40 #include <sys/elf_SPARC.h> 41 #include <vm/vm_dep.h> 42 #include <vm/hat_sfmmu.h> 43 #include <vm/seg_kpm.h> 44 #include <sys/cpuvar.h> 45 #include <sys/cheetahregs.h> 46 #include <sys/us3_module.h> 47 #include <sys/async.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/dditypes.h> 51 #include <sys/prom_debug.h> 52 #include <sys/prom_plat.h> 53 #include <sys/cpu_module.h> 54 #include <sys/sysmacros.h> 55 #include <sys/intreg.h> 56 #include <sys/clock.h> 57 #include <sys/platform_module.h> 58 #include <sys/machtrap.h> 59 #include <sys/ontrap.h> 60 #include <sys/panic.h> 61 #include <sys/memlist.h> 62 #include <sys/bootconf.h> 63 #include <sys/ivintr.h> 64 #include <sys/atomic.h> 65 #include <sys/taskq.h> 66 #include <sys/note.h> 67 #include <sys/ndifm.h> 68 #include <sys/ddifm.h> 69 #include <sys/fm/protocol.h> 70 #include <sys/fm/util.h> 71 #include <sys/fm/cpu/UltraSPARC-III.h> 72 #include <sys/fpras_impl.h> 73 #include <sys/dtrace.h> 74 #include <sys/watchpoint.h> 75 #include <sys/plat_ecc_unum.h> 76 #include <sys/cyclic.h> 77 #include <sys/errorq.h> 78 #include <sys/errclassify.h> 79 80 #ifdef CHEETAHPLUS_ERRATUM_25 81 #include <sys/xc_impl.h> 82 #endif /* CHEETAHPLUS_ERRATUM_25 */ 83 84 /* 85 * Note that 'Cheetah PRM' refers to: 86 * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III 87 */ 88 89 /* 90 * Per CPU pointers to physical address of TL>0 logout data areas. 91 * These pointers have to be in the kernel nucleus to avoid MMU 92 * misses. 93 */ 94 uint64_t ch_err_tl1_paddrs[NCPU]; 95 96 /* 97 * One statically allocated structure to use during startup/DR 98 * to prevent unnecessary panics. 99 */ 100 ch_err_tl1_data_t ch_err_tl1_data; 101 102 /* 103 * Per CPU pending error at TL>0, used by level15 softint handler 104 */ 105 uchar_t ch_err_tl1_pending[NCPU]; 106 107 /* 108 * For deferred CE re-enable after trap. 109 */ 110 taskq_t *ch_check_ce_tq; 111 112 /* 113 * Internal functions. 114 */ 115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep); 116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt); 117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 118 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp); 119 static int clear_ecc(struct async_flt *ecc); 120 #if defined(CPU_IMP_ECACHE_ASSOC) 121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt); 122 #endif 123 static int cpu_ecache_set_size(struct cpu *cp); 124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag); 125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr); 126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag); 127 static int cpu_ectag_pa_to_subblk_state(int cachesize, 128 uint64_t subaddr, uint64_t tag); 129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt); 130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit); 131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit); 132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit); 133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit); 134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit); 135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp); 136 static void cpu_scrubphys(struct async_flt *aflt); 137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *, 138 int *, int *); 139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *); 140 static void cpu_ereport_init(struct async_flt *aflt); 141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t); 142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt); 143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 144 uint64_t nceen, ch_cpu_logout_t *clop); 145 static int cpu_ce_delayed_ec_logout(uint64_t); 146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *); 147 148 #ifdef CHEETAHPLUS_ERRATUM_25 149 static int mondo_recover_proc(uint16_t, int); 150 static void cheetah_nudge_init(void); 151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 152 cyc_time_t *when); 153 static void cheetah_nudge_buddy(void); 154 #endif /* CHEETAHPLUS_ERRATUM_25 */ 155 156 #if defined(CPU_IMP_L1_CACHE_PARITY) 157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt); 158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index); 159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 160 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word); 161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt); 162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index); 163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt); 164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index); 165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *); 166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *); 167 #endif /* CPU_IMP_L1_CACHE_PARITY */ 168 169 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 170 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 171 int *segsp, int *banksp, int *mcidp); 172 173 /* 174 * This table is used to determine which bit(s) is(are) bad when an ECC 175 * error occurs. The array is indexed by an 9-bit syndrome. The entries 176 * of this array have the following semantics: 177 * 178 * 00-127 The number of the bad bit, when only one bit is bad. 179 * 128 ECC bit C0 is bad. 180 * 129 ECC bit C1 is bad. 181 * 130 ECC bit C2 is bad. 182 * 131 ECC bit C3 is bad. 183 * 132 ECC bit C4 is bad. 184 * 133 ECC bit C5 is bad. 185 * 134 ECC bit C6 is bad. 186 * 135 ECC bit C7 is bad. 187 * 136 ECC bit C8 is bad. 188 * 137-143 reserved for Mtag Data and ECC. 189 * 144(M2) Two bits are bad within a nibble. 190 * 145(M3) Three bits are bad within a nibble. 191 * 146(M3) Four bits are bad within a nibble. 192 * 147(M) Multiple bits (5 or more) are bad. 193 * 148 NO bits are bad. 194 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5. 195 */ 196 197 #define C0 128 198 #define C1 129 199 #define C2 130 200 #define C3 131 201 #define C4 132 202 #define C5 133 203 #define C6 134 204 #define C7 135 205 #define C8 136 206 #define MT0 137 /* Mtag Data bit 0 */ 207 #define MT1 138 208 #define MT2 139 209 #define MTC0 140 /* Mtag Check bit 0 */ 210 #define MTC1 141 211 #define MTC2 142 212 #define MTC3 143 213 #define M2 144 214 #define M3 145 215 #define M4 146 216 #define M 147 217 #define NA 148 218 #if defined(JALAPENO) || defined(SERRANO) 219 #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */ 220 #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */ 221 #define SLAST S003MEM /* last special syndrome */ 222 #else /* JALAPENO || SERRANO */ 223 #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */ 224 #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */ 225 #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */ 226 #define SLAST S11C /* last special syndrome */ 227 #endif /* JALAPENO || SERRANO */ 228 #if defined(JALAPENO) || defined(SERRANO) 229 #define BPAR0 152 /* syndrom 152 through 167 for bus parity */ 230 #define BPAR15 167 231 #endif /* JALAPENO || SERRANO */ 232 233 static uint8_t ecc_syndrome_tab[] = 234 { 235 NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, 236 C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, 237 C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, 238 M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, 239 C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, 240 M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, 241 M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, 242 #if defined(JALAPENO) || defined(SERRANO) 243 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 244 #else /* JALAPENO || SERRANO */ 245 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 246 #endif /* JALAPENO || SERRANO */ 247 C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, 248 M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, 249 M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, 250 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, 251 M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, 252 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, 253 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, 254 M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, 255 C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, 256 #if defined(JALAPENO) || defined(SERRANO) 257 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, 258 #else /* JALAPENO || SERRANO */ 259 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M, 260 #endif /* JALAPENO || SERRANO */ 261 M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, 262 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, 263 M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, 264 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, 265 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, 266 M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, 267 M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, 268 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, 269 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, 270 M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, 271 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, 272 M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, 273 M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, 274 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M 275 }; 276 277 #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t)) 278 279 #if !(defined(JALAPENO) || defined(SERRANO)) 280 /* 281 * This table is used to determine which bit(s) is(are) bad when a Mtag 282 * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries 283 * of this array have the following semantics: 284 * 285 * -1 Invalid mtag syndrome. 286 * 137 Mtag Data 0 is bad. 287 * 138 Mtag Data 1 is bad. 288 * 139 Mtag Data 2 is bad. 289 * 140 Mtag ECC 0 is bad. 290 * 141 Mtag ECC 1 is bad. 291 * 142 Mtag ECC 2 is bad. 292 * 143 Mtag ECC 3 is bad. 293 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6. 294 */ 295 short mtag_syndrome_tab[] = 296 { 297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2 298 }; 299 300 #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short)) 301 302 #else /* !(JALAPENO || SERRANO) */ 303 304 #define BSYND_TBL_SIZE 16 305 306 #endif /* !(JALAPENO || SERRANO) */ 307 308 /* 309 * CE initial classification and subsequent action lookup table 310 */ 311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE]; 312 static int ce_disp_inited; 313 314 /* 315 * Set to disable leaky and partner check for memory correctables 316 */ 317 int ce_xdiag_off; 318 319 /* 320 * The following are not incremented atomically so are indicative only 321 */ 322 static int ce_xdiag_drops; 323 static int ce_xdiag_lkydrops; 324 static int ce_xdiag_ptnrdrops; 325 static int ce_xdiag_bad; 326 327 /* 328 * CE leaky check callback structure 329 */ 330 typedef struct { 331 struct async_flt *lkycb_aflt; 332 errorq_t *lkycb_eqp; 333 errorq_elem_t *lkycb_eqep; 334 } ce_lkychk_cb_t; 335 336 /* 337 * defines for various ecache_flush_flag's 338 */ 339 #define ECACHE_FLUSH_LINE 1 340 #define ECACHE_FLUSH_ALL 2 341 342 /* 343 * STICK sync 344 */ 345 #define STICK_ITERATION 10 346 #define MAX_TSKEW 1 347 #define EV_A_START 0 348 #define EV_A_END 1 349 #define EV_B_START 2 350 #define EV_B_END 3 351 #define EVENTS 4 352 353 static int64_t stick_iter = STICK_ITERATION; 354 static int64_t stick_tsk = MAX_TSKEW; 355 356 typedef enum { 357 EVENT_NULL = 0, 358 SLAVE_START, 359 SLAVE_CONT, 360 MASTER_START 361 } event_cmd_t; 362 363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL; 364 static int64_t timestamp[EVENTS]; 365 static volatile int slave_done; 366 367 #ifdef DEBUG 368 #define DSYNC_ATTEMPTS 64 369 typedef struct { 370 int64_t skew_val[DSYNC_ATTEMPTS]; 371 } ss_t; 372 373 ss_t stick_sync_stats[NCPU]; 374 #endif /* DEBUG */ 375 376 /* 377 * Maximum number of contexts for Cheetah. 378 */ 379 #define MAX_NCTXS (1 << 13) 380 381 /* Will be set !NULL for Cheetah+ and derivatives. */ 382 uchar_t *ctx_pgsz_array = NULL; 383 #if defined(CPU_IMP_DUAL_PAGESIZE) 384 static uchar_t ctx_pgsz_arr[MAX_NCTXS]; 385 uint_t disable_dual_pgsz = 0; 386 #endif /* CPU_IMP_DUAL_PAGESIZE */ 387 388 /* 389 * Save the cache bootup state for use when internal 390 * caches are to be re-enabled after an error occurs. 391 */ 392 uint64_t cache_boot_state; 393 394 /* 395 * PA[22:0] represent Displacement in Safari configuration space. 396 */ 397 uint_t root_phys_addr_lo_mask = 0x7fffffu; 398 399 bus_config_eclk_t bus_config_eclk[] = { 400 #if defined(JALAPENO) || defined(SERRANO) 401 {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1}, 402 {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2}, 403 {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32}, 404 #else /* JALAPENO || SERRANO */ 405 {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1}, 406 {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2}, 407 {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32}, 408 #endif /* JALAPENO || SERRANO */ 409 {0, 0} 410 }; 411 412 /* 413 * Interval for deferred CEEN reenable 414 */ 415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS; 416 417 /* 418 * set in /etc/system to control logging of user BERR/TO's 419 */ 420 int cpu_berr_to_verbose = 0; 421 422 /* 423 * set to 0 in /etc/system to defer CEEN reenable for all CEs 424 */ 425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED; 426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT; 427 428 /* 429 * Set of all offline cpus 430 */ 431 cpuset_t cpu_offline_set; 432 433 static void cpu_delayed_check_ce_errors(void *); 434 static void cpu_check_ce_errors(void *); 435 void cpu_error_ecache_flush(ch_async_flt_t *); 436 static int cpu_error_ecache_flush_required(ch_async_flt_t *); 437 static void cpu_log_and_clear_ce(ch_async_flt_t *); 438 void cpu_ce_detected(ch_cpu_errors_t *, int); 439 440 /* 441 * CE Leaky check timeout in microseconds. This is chosen to be twice the 442 * memory refresh interval of current DIMMs (64ms). After initial fix that 443 * gives at least one full refresh cycle in which the cell can leak 444 * (whereafter further refreshes simply reinforce any incorrect bit value). 445 */ 446 clock_t cpu_ce_lkychk_timeout_usec = 128000; 447 448 /* 449 * CE partner check partner caching period in seconds 450 */ 451 int cpu_ce_ptnr_cachetime_sec = 60; 452 453 /* 454 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel 455 */ 456 #define CH_SET_TRAP(ttentry, ttlabel) \ 457 bcopy((const void *)&ttlabel, &ttentry, 32); \ 458 flush_instr_mem((caddr_t)&ttentry, 32); 459 460 static int min_ecache_size; 461 static uint_t priv_hcl_1; 462 static uint_t priv_hcl_2; 463 static uint_t priv_hcl_4; 464 static uint_t priv_hcl_8; 465 466 void 467 cpu_setup(void) 468 { 469 extern int at_flags; 470 extern int disable_delay_tlb_flush, delay_tlb_flush; 471 extern int cpc_has_overflow_intr; 472 extern int disable_text_largepages; 473 extern int use_text_pgsz4m; 474 475 /* 476 * Setup chip-specific trap handlers. 477 */ 478 cpu_init_trap(); 479 480 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 481 482 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 483 484 /* 485 * save the cache bootup state. 486 */ 487 cache_boot_state = get_dcu() & DCU_CACHE; 488 489 /* 490 * Use the maximum number of contexts available for Cheetah 491 * unless it has been tuned for debugging. 492 * We are checking against 0 here since this value can be patched 493 * while booting. It can not be patched via /etc/system since it 494 * will be patched too late and thus cause the system to panic. 495 */ 496 if (nctxs == 0) 497 nctxs = MAX_NCTXS; 498 499 /* 500 * Due to the number of entries in the fully-associative tlb 501 * this may have to be tuned lower than in spitfire. 502 */ 503 pp_slots = MIN(8, MAXPP_SLOTS); 504 505 /* 506 * Block stores do not invalidate all pages of the d$, pagecopy 507 * et. al. need virtual translations with virtual coloring taken 508 * into consideration. prefetch/ldd will pollute the d$ on the 509 * load side. 510 */ 511 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 512 513 if (use_page_coloring) { 514 do_pg_coloring = 1; 515 if (use_virtual_coloring) 516 do_virtual_coloring = 1; 517 } 518 519 isa_list = 520 "sparcv9+vis2 sparcv9+vis sparcv9 " 521 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 522 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 523 524 /* 525 * On Panther-based machines, this should 526 * also include AV_SPARC_POPC too 527 */ 528 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; 529 530 /* 531 * On cheetah, there's no hole in the virtual address space 532 */ 533 hole_start = hole_end = 0; 534 535 /* 536 * The kpm mapping window. 537 * kpm_size: 538 * The size of a single kpm range. 539 * The overall size will be: kpm_size * vac_colors. 540 * kpm_vbase: 541 * The virtual start address of the kpm range within the kernel 542 * virtual address space. kpm_vbase has to be kpm_size aligned. 543 */ 544 kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */ 545 kpm_size_shift = 43; 546 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 547 kpm_smallpages = 1; 548 549 /* 550 * The traptrace code uses either %tick or %stick for 551 * timestamping. We have %stick so we can use it. 552 */ 553 traptrace_use_stick = 1; 554 555 /* 556 * Cheetah has a performance counter overflow interrupt 557 */ 558 cpc_has_overflow_intr = 1; 559 560 /* 561 * Use cheetah flush-all support 562 */ 563 if (!disable_delay_tlb_flush) 564 delay_tlb_flush = 1; 565 566 #if defined(CPU_IMP_DUAL_PAGESIZE) 567 /* 568 * Use Cheetah+ and later dual page size support. 569 */ 570 if (!disable_dual_pgsz) { 571 ctx_pgsz_array = ctx_pgsz_arr; 572 } 573 #endif /* CPU_IMP_DUAL_PAGESIZE */ 574 575 /* 576 * Declare that this architecture/cpu combination does fpRAS. 577 */ 578 fpras_implemented = 1; 579 580 /* 581 * Enable 4M pages to be used for mapping user text by default. Don't 582 * use large pages for initialized data segments since we may not know 583 * at exec() time what should be the preferred large page size for DTLB 584 * programming. 585 */ 586 use_text_pgsz4m = 1; 587 disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | 588 (1 << TTE32M) | (1 << TTE256M); 589 590 /* 591 * Setup CE lookup table 592 */ 593 CE_INITDISPTBL_POPULATE(ce_disp_table); 594 ce_disp_inited = 1; 595 } 596 597 /* 598 * Called by setcpudelay 599 */ 600 void 601 cpu_init_tick_freq(void) 602 { 603 /* 604 * For UltraSPARC III and beyond we want to use the 605 * system clock rate as the basis for low level timing, 606 * due to support of mixed speed CPUs and power managment. 607 */ 608 if (system_clock_freq == 0) 609 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 610 611 sys_tick_freq = system_clock_freq; 612 } 613 614 #ifdef CHEETAHPLUS_ERRATUM_25 615 /* 616 * Tunables 617 */ 618 int cheetah_bpe_off = 0; 619 int cheetah_sendmondo_recover = 1; 620 int cheetah_sendmondo_fullscan = 0; 621 int cheetah_sendmondo_recover_delay = 5; 622 623 #define CHEETAH_LIVELOCK_MIN_DELAY 1 624 625 /* 626 * Recovery Statistics 627 */ 628 typedef struct cheetah_livelock_entry { 629 int cpuid; /* fallen cpu */ 630 int buddy; /* cpu that ran recovery */ 631 clock_t lbolt; /* when recovery started */ 632 hrtime_t recovery_time; /* time spent in recovery */ 633 } cheetah_livelock_entry_t; 634 635 #define CHEETAH_LIVELOCK_NENTRY 32 636 637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY]; 638 int cheetah_livelock_entry_nxt; 639 640 #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \ 641 statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \ 642 if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \ 643 cheetah_livelock_entry_nxt = 0; \ 644 } \ 645 } 646 647 #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val 648 649 struct { 650 hrtime_t hrt; /* maximum recovery time */ 651 int recovery; /* recovered */ 652 int full_claimed; /* maximum pages claimed in full recovery */ 653 int proc_entry; /* attempted to claim TSB */ 654 int proc_tsb_scan; /* tsb scanned */ 655 int proc_tsb_partscan; /* tsb partially scanned */ 656 int proc_tsb_fullscan; /* whole tsb scanned */ 657 int proc_claimed; /* maximum pages claimed in tsb scan */ 658 int proc_user; /* user thread */ 659 int proc_kernel; /* kernel thread */ 660 int proc_onflt; /* bad stack */ 661 int proc_cpu; /* null cpu */ 662 int proc_thread; /* null thread */ 663 int proc_proc; /* null proc */ 664 int proc_as; /* null as */ 665 int proc_hat; /* null hat */ 666 int proc_hat_inval; /* hat contents don't make sense */ 667 int proc_hat_busy; /* hat is changing TSBs */ 668 int proc_tsb_reloc; /* TSB skipped because being relocated */ 669 int proc_cnum_bad; /* cnum out of range */ 670 int proc_cnum; /* last cnum processed */ 671 tte_t proc_tte; /* last tte processed */ 672 } cheetah_livelock_stat; 673 674 #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++ 675 676 #define CHEETAH_LIVELOCK_STATSET(item, value) \ 677 cheetah_livelock_stat.item = value 678 679 #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \ 680 if (value > cheetah_livelock_stat.item) \ 681 cheetah_livelock_stat.item = value; \ 682 } 683 684 /* 685 * Attempt to recover a cpu by claiming every cache line as saved 686 * in the TSB that the non-responsive cpu is using. Since we can't 687 * grab any adaptive lock, this is at best an attempt to do so. Because 688 * we don't grab any locks, we must operate under the protection of 689 * on_fault(). 690 * 691 * Return 1 if cpuid could be recovered, 0 if failed. 692 */ 693 int 694 mondo_recover_proc(uint16_t cpuid, int bn) 695 { 696 label_t ljb; 697 cpu_t *cp; 698 kthread_t *t; 699 proc_t *p; 700 struct as *as; 701 struct hat *hat; 702 short cnum; 703 struct tsb_info *tsbinfop; 704 struct tsbe *tsbep; 705 caddr_t tsbp; 706 caddr_t end_tsbp; 707 uint64_t paddr; 708 uint64_t idsr; 709 u_longlong_t pahi, palo; 710 int pages_claimed = 0; 711 tte_t tsbe_tte; 712 int tried_kernel_tsb = 0; 713 714 CHEETAH_LIVELOCK_STAT(proc_entry); 715 716 if (on_fault(&ljb)) { 717 CHEETAH_LIVELOCK_STAT(proc_onflt); 718 goto badstruct; 719 } 720 721 if ((cp = cpu[cpuid]) == NULL) { 722 CHEETAH_LIVELOCK_STAT(proc_cpu); 723 goto badstruct; 724 } 725 726 if ((t = cp->cpu_thread) == NULL) { 727 CHEETAH_LIVELOCK_STAT(proc_thread); 728 goto badstruct; 729 } 730 731 if ((p = ttoproc(t)) == NULL) { 732 CHEETAH_LIVELOCK_STAT(proc_proc); 733 goto badstruct; 734 } 735 736 if ((as = p->p_as) == NULL) { 737 CHEETAH_LIVELOCK_STAT(proc_as); 738 goto badstruct; 739 } 740 741 if ((hat = as->a_hat) == NULL) { 742 CHEETAH_LIVELOCK_STAT(proc_hat); 743 goto badstruct; 744 } 745 746 if (hat != ksfmmup) { 747 CHEETAH_LIVELOCK_STAT(proc_user); 748 if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) { 749 CHEETAH_LIVELOCK_STAT(proc_hat_busy); 750 goto badstruct; 751 } 752 tsbinfop = hat->sfmmu_tsb; 753 if (tsbinfop == NULL) { 754 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 755 goto badstruct; 756 } 757 tsbp = tsbinfop->tsb_va; 758 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 759 } else { 760 CHEETAH_LIVELOCK_STAT(proc_kernel); 761 tsbinfop = NULL; 762 tsbp = ktsb_base; 763 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 764 } 765 766 /* Verify as */ 767 if (hat->sfmmu_as != as) { 768 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 769 goto badstruct; 770 } 771 772 cnum = hat->sfmmu_cnum; 773 CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum); 774 775 if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) { 776 CHEETAH_LIVELOCK_STAT(proc_cnum_bad); 777 goto badstruct; 778 } 779 780 do { 781 CHEETAH_LIVELOCK_STAT(proc_tsb_scan); 782 783 /* 784 * Skip TSBs being relocated. This is important because 785 * we want to avoid the following deadlock scenario: 786 * 787 * 1) when we came in we set ourselves to "in recover" state. 788 * 2) when we try to touch TSB being relocated the mapping 789 * will be in the suspended state so we'll spin waiting 790 * for it to be unlocked. 791 * 3) when the CPU that holds the TSB mapping locked tries to 792 * unlock it it will send a xtrap which will fail to xcall 793 * us or the CPU we're trying to recover, and will in turn 794 * enter the mondo code. 795 * 4) since we are still spinning on the locked mapping 796 * no further progress will be made and the system will 797 * inevitably hard hang. 798 * 799 * A TSB not being relocated can't begin being relocated 800 * while we're accessing it because we check 801 * sendmondo_in_recover before relocating TSBs. 802 */ 803 if (hat != ksfmmup && 804 (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) { 805 CHEETAH_LIVELOCK_STAT(proc_tsb_reloc); 806 goto next_tsbinfo; 807 } 808 809 for (tsbep = (struct tsbe *)tsbp; 810 tsbep < (struct tsbe *)end_tsbp; tsbep++) { 811 tsbe_tte = tsbep->tte_data; 812 813 if (tsbe_tte.tte_val == 0) { 814 /* 815 * Invalid tte 816 */ 817 continue; 818 } 819 if (tsbe_tte.tte_se) { 820 /* 821 * Don't want device registers 822 */ 823 continue; 824 } 825 if (tsbe_tte.tte_cp == 0) { 826 /* 827 * Must be cached in E$ 828 */ 829 continue; 830 } 831 CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte); 832 idsr = getidsr(); 833 if ((idsr & (IDSR_NACK_BIT(bn) | 834 IDSR_BUSY_BIT(bn))) == 0) { 835 CHEETAH_LIVELOCK_STAT(proc_tsb_partscan); 836 goto done; 837 } 838 pahi = tsbe_tte.tte_pahi; 839 palo = tsbe_tte.tte_palo; 840 paddr = (uint64_t)((pahi << 32) | 841 (palo << MMU_PAGESHIFT)); 842 claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)), 843 CH_ECACHE_SUBBLK_SIZE); 844 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 845 shipit(cpuid, bn); 846 } 847 pages_claimed++; 848 } 849 next_tsbinfo: 850 if (tsbinfop != NULL) 851 tsbinfop = tsbinfop->tsb_next; 852 if (tsbinfop != NULL) { 853 tsbp = tsbinfop->tsb_va; 854 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 855 } else if (tsbp == ktsb_base) { 856 tried_kernel_tsb = 1; 857 } else if (!tried_kernel_tsb) { 858 tsbp = ktsb_base; 859 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 860 hat = ksfmmup; 861 tsbinfop = NULL; 862 } 863 } while (tsbinfop != NULL || 864 ((tsbp == ktsb_base) && !tried_kernel_tsb)); 865 866 CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan); 867 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 868 no_fault(); 869 idsr = getidsr(); 870 if ((idsr & (IDSR_NACK_BIT(bn) | 871 IDSR_BUSY_BIT(bn))) == 0) { 872 return (1); 873 } else { 874 return (0); 875 } 876 877 done: 878 no_fault(); 879 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 880 return (1); 881 882 badstruct: 883 no_fault(); 884 return (0); 885 } 886 887 /* 888 * Attempt to claim ownership, temporarily, of every cache line that a 889 * non-responsive cpu might be using. This might kick that cpu out of 890 * this state. 891 * 892 * The return value indicates to the caller if we have exhausted all recovery 893 * techniques. If 1 is returned, it is useless to call this function again 894 * even for a different target CPU. 895 */ 896 int 897 mondo_recover(uint16_t cpuid, int bn) 898 { 899 struct memseg *seg; 900 uint64_t begin_pa, end_pa, cur_pa; 901 hrtime_t begin_hrt, end_hrt; 902 int retval = 0; 903 int pages_claimed = 0; 904 cheetah_livelock_entry_t *histp; 905 uint64_t idsr; 906 907 if (cas32(&sendmondo_in_recover, 0, 1) != 0) { 908 /* 909 * Wait while recovery takes place 910 */ 911 while (sendmondo_in_recover) { 912 drv_usecwait(1); 913 } 914 /* 915 * Assume we didn't claim the whole memory. If 916 * the target of this caller is not recovered, 917 * it will come back. 918 */ 919 return (retval); 920 } 921 922 CHEETAH_LIVELOCK_ENTRY_NEXT(histp) 923 CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt); 924 CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid); 925 CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id); 926 927 begin_hrt = gethrtime_waitfree(); 928 /* 929 * First try to claim the lines in the TSB the target 930 * may have been using. 931 */ 932 if (mondo_recover_proc(cpuid, bn) == 1) { 933 /* 934 * Didn't claim the whole memory 935 */ 936 goto done; 937 } 938 939 /* 940 * We tried using the TSB. The target is still 941 * not recovered. Check if complete memory scan is 942 * enabled. 943 */ 944 if (cheetah_sendmondo_fullscan == 0) { 945 /* 946 * Full memory scan is disabled. 947 */ 948 retval = 1; 949 goto done; 950 } 951 952 /* 953 * Try claiming the whole memory. 954 */ 955 for (seg = memsegs; seg; seg = seg->next) { 956 begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT; 957 end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT; 958 for (cur_pa = begin_pa; cur_pa < end_pa; 959 cur_pa += MMU_PAGESIZE) { 960 idsr = getidsr(); 961 if ((idsr & (IDSR_NACK_BIT(bn) | 962 IDSR_BUSY_BIT(bn))) == 0) { 963 /* 964 * Didn't claim all memory 965 */ 966 goto done; 967 } 968 claimlines(cur_pa, MMU_PAGESIZE, 969 CH_ECACHE_SUBBLK_SIZE); 970 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 971 shipit(cpuid, bn); 972 } 973 pages_claimed++; 974 } 975 } 976 977 /* 978 * We did all we could. 979 */ 980 retval = 1; 981 982 done: 983 /* 984 * Update statistics 985 */ 986 end_hrt = gethrtime_waitfree(); 987 CHEETAH_LIVELOCK_STAT(recovery); 988 CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt)); 989 CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed); 990 CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \ 991 (end_hrt - begin_hrt)); 992 993 while (cas32(&sendmondo_in_recover, 1, 0) != 1); 994 995 return (retval); 996 } 997 998 /* 999 * This is called by the cyclic framework when this CPU becomes online 1000 */ 1001 /*ARGSUSED*/ 1002 static void 1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 1004 { 1005 1006 hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy; 1007 hdlr->cyh_level = CY_LOW_LEVEL; 1008 hdlr->cyh_arg = NULL; 1009 1010 /* 1011 * Stagger the start time 1012 */ 1013 when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU); 1014 if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) { 1015 cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY; 1016 } 1017 when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC; 1018 } 1019 1020 /* 1021 * Create a low level cyclic to send a xtrap to the next cpu online. 1022 * However, there's no need to have this running on a uniprocessor system. 1023 */ 1024 static void 1025 cheetah_nudge_init(void) 1026 { 1027 cyc_omni_handler_t hdlr; 1028 1029 if (max_ncpus == 1) { 1030 return; 1031 } 1032 1033 hdlr.cyo_online = cheetah_nudge_onln; 1034 hdlr.cyo_offline = NULL; 1035 hdlr.cyo_arg = NULL; 1036 1037 mutex_enter(&cpu_lock); 1038 (void) cyclic_add_omni(&hdlr); 1039 mutex_exit(&cpu_lock); 1040 } 1041 1042 /* 1043 * Cyclic handler to wake up buddy 1044 */ 1045 void 1046 cheetah_nudge_buddy(void) 1047 { 1048 /* 1049 * Disable kernel preemption to protect the cpu list 1050 */ 1051 kpreempt_disable(); 1052 if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) { 1053 xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1, 1054 0, 0); 1055 } 1056 kpreempt_enable(); 1057 } 1058 1059 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1060 1061 #ifdef SEND_MONDO_STATS 1062 uint32_t x_one_stimes[64]; 1063 uint32_t x_one_ltimes[16]; 1064 uint32_t x_set_stimes[64]; 1065 uint32_t x_set_ltimes[16]; 1066 uint32_t x_set_cpus[NCPU]; 1067 uint32_t x_nack_stimes[64]; 1068 #endif 1069 1070 /* 1071 * Note: A version of this function is used by the debugger via the KDI, 1072 * and must be kept in sync with this version. Any changes made to this 1073 * function to support new chips or to accomodate errata must also be included 1074 * in the KDI-specific version. See us3_kdi.c. 1075 */ 1076 void 1077 send_one_mondo(int cpuid) 1078 { 1079 int busy, nack; 1080 uint64_t idsr, starttick, endtick, tick, lasttick; 1081 uint64_t busymask; 1082 #ifdef CHEETAHPLUS_ERRATUM_25 1083 int recovered = 0; 1084 #endif 1085 1086 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 1087 starttick = lasttick = gettick(); 1088 shipit(cpuid, 0); 1089 endtick = starttick + xc_tick_limit; 1090 busy = nack = 0; 1091 #if defined(JALAPENO) || defined(SERRANO) 1092 /* 1093 * Lower 2 bits of the agent ID determine which BUSY/NACK pair 1094 * will be used for dispatching interrupt. For now, assume 1095 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing 1096 * issues with respect to BUSY/NACK pair usage. 1097 */ 1098 busymask = IDSR_BUSY_BIT(cpuid); 1099 #else /* JALAPENO || SERRANO */ 1100 busymask = IDSR_BUSY; 1101 #endif /* JALAPENO || SERRANO */ 1102 for (;;) { 1103 idsr = getidsr(); 1104 if (idsr == 0) 1105 break; 1106 1107 tick = gettick(); 1108 /* 1109 * If there is a big jump between the current tick 1110 * count and lasttick, we have probably hit a break 1111 * point. Adjust endtick accordingly to avoid panic. 1112 */ 1113 if (tick > (lasttick + xc_tick_jump_limit)) 1114 endtick += (tick - lasttick); 1115 lasttick = tick; 1116 if (tick > endtick) { 1117 if (panic_quiesce) 1118 return; 1119 #ifdef CHEETAHPLUS_ERRATUM_25 1120 if (cheetah_sendmondo_recover && recovered == 0) { 1121 if (mondo_recover(cpuid, 0)) { 1122 /* 1123 * We claimed the whole memory or 1124 * full scan is disabled. 1125 */ 1126 recovered++; 1127 } 1128 tick = gettick(); 1129 endtick = tick + xc_tick_limit; 1130 lasttick = tick; 1131 /* 1132 * Recheck idsr 1133 */ 1134 continue; 1135 } else 1136 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1137 { 1138 cmn_err(CE_PANIC, "send mondo timeout " 1139 "(target 0x%x) [%d NACK %d BUSY]", 1140 cpuid, nack, busy); 1141 } 1142 } 1143 1144 if (idsr & busymask) { 1145 busy++; 1146 continue; 1147 } 1148 drv_usecwait(1); 1149 shipit(cpuid, 0); 1150 nack++; 1151 busy = 0; 1152 } 1153 #ifdef SEND_MONDO_STATS 1154 { 1155 int n = gettick() - starttick; 1156 if (n < 8192) 1157 x_one_stimes[n >> 7]++; 1158 else 1159 x_one_ltimes[(n >> 13) & 0xf]++; 1160 } 1161 #endif 1162 } 1163 1164 void 1165 syncfpu(void) 1166 { 1167 } 1168 1169 /* 1170 * Return processor specific async error structure 1171 * size used. 1172 */ 1173 int 1174 cpu_aflt_size(void) 1175 { 1176 return (sizeof (ch_async_flt_t)); 1177 } 1178 1179 /* 1180 * The fast_ecc_err handler transfers control here for UCU, UCC events. 1181 * Note that we flush Ecache twice, once in the fast_ecc_err handler to 1182 * flush the error that caused the UCU/UCC, then again here at the end to 1183 * flush the TL=1 trap handler code out of the Ecache, so we can minimize 1184 * the probability of getting a TL>1 Fast ECC trap when we're fielding 1185 * another Fast ECC trap. 1186 * 1187 * Cheetah+ also handles: TSCE: No additional processing required. 1188 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT. 1189 * 1190 * Note that the p_clo_flags input is only valid in cases where the 1191 * cpu_private struct is not yet initialized (since that is the only 1192 * time that information cannot be obtained from the logout struct.) 1193 */ 1194 /*ARGSUSED*/ 1195 void 1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags) 1197 { 1198 ch_cpu_logout_t *clop; 1199 uint64_t ceen, nceen; 1200 1201 /* 1202 * Get the CPU log out info. If we can't find our CPU private 1203 * pointer, then we will have to make due without any detailed 1204 * logout information. 1205 */ 1206 if (CPU_PRIVATE(CPU) == NULL) { 1207 clop = NULL; 1208 ceen = p_clo_flags & EN_REG_CEEN; 1209 nceen = p_clo_flags & EN_REG_NCEEN; 1210 } else { 1211 clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout); 1212 ceen = clop->clo_flags & EN_REG_CEEN; 1213 nceen = clop->clo_flags & EN_REG_NCEEN; 1214 } 1215 1216 cpu_log_fast_ecc_error((caddr_t)rp->r_pc, 1217 (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop); 1218 } 1219 1220 /* 1221 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast 1222 * ECC at TL>0. Need to supply either a error register pointer or a 1223 * cpu logout structure pointer. 1224 */ 1225 static void 1226 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 1227 uint64_t nceen, ch_cpu_logout_t *clop) 1228 { 1229 struct async_flt *aflt; 1230 ch_async_flt_t ch_flt; 1231 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1232 char pr_reason[MAX_REASON_STRING]; 1233 ch_cpu_errors_t cpu_error_regs; 1234 1235 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1236 /* 1237 * If no cpu logout data, then we will have to make due without 1238 * any detailed logout information. 1239 */ 1240 if (clop == NULL) { 1241 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1242 get_cpu_error_state(&cpu_error_regs); 1243 set_cpu_error_state(&cpu_error_regs); 1244 t_afar = cpu_error_regs.afar; 1245 t_afsr = cpu_error_regs.afsr; 1246 t_afsr_ext = cpu_error_regs.afsr_ext; 1247 #if defined(SERRANO) 1248 ch_flt.afar2 = cpu_error_regs.afar2; 1249 #endif /* SERRANO */ 1250 } else { 1251 t_afar = clop->clo_data.chd_afar; 1252 t_afsr = clop->clo_data.chd_afsr; 1253 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1254 #if defined(SERRANO) 1255 ch_flt.afar2 = clop->clo_data.chd_afar2; 1256 #endif /* SERRANO */ 1257 } 1258 1259 /* 1260 * In order to simplify code, we maintain this afsr_errs 1261 * variable which holds the aggregate of AFSR and AFSR_EXT 1262 * sticky bits. 1263 */ 1264 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1265 (t_afsr & C_AFSR_ALL_ERRS); 1266 pr_reason[0] = '\0'; 1267 1268 /* Setup the async fault structure */ 1269 aflt = (struct async_flt *)&ch_flt; 1270 aflt->flt_id = gethrtime_waitfree(); 1271 ch_flt.afsr_ext = t_afsr_ext; 1272 ch_flt.afsr_errs = t_afsr_errs; 1273 aflt->flt_stat = t_afsr; 1274 aflt->flt_addr = t_afar; 1275 aflt->flt_bus_id = getprocessorid(); 1276 aflt->flt_inst = CPU->cpu_id; 1277 aflt->flt_pc = tpc; 1278 aflt->flt_prot = AFLT_PROT_NONE; 1279 aflt->flt_class = CPU_FAULT; 1280 aflt->flt_priv = priv; 1281 aflt->flt_tl = tl; 1282 aflt->flt_status = ECC_F_TRAP; 1283 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1284 1285 /* 1286 * XXXX - Phenomenal hack to get around Solaris not getting all the 1287 * cmn_err messages out to the console. The situation is a UCU (in 1288 * priv mode) which causes a WDU which causes a UE (on the retry). 1289 * The messages for the UCU and WDU are enqueued and then pulled off 1290 * the async queue via softint and syslogd starts to process them 1291 * but doesn't get them to the console. The UE causes a panic, but 1292 * since the UCU/WDU messages are already in transit, those aren't 1293 * on the async queue. The hack is to check if we have a matching 1294 * WDU event for the UCU, and if it matches, we're more than likely 1295 * going to panic with a UE, unless we're under protection. So, we 1296 * check to see if we got a matching WDU event and if we're under 1297 * protection. 1298 * 1299 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about 1300 * looks like this: 1301 * UCU->WDU->UE 1302 * For Panther, it could look like either of these: 1303 * UCU---->WDU->L3_WDU->UE 1304 * L3_UCU->WDU->L3_WDU->UE 1305 */ 1306 if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) && 1307 aflt->flt_panic == 0 && aflt->flt_priv != 0 && 1308 curthread->t_ontrap == NULL && curthread->t_lofault == NULL) { 1309 get_cpu_error_state(&cpu_error_regs); 1310 aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) && 1311 (cpu_error_regs.afar == t_afar)); 1312 aflt->flt_panic |= ((clop == NULL) && 1313 (t_afsr_errs & C_AFSR_WDU)); 1314 } 1315 1316 /* 1317 * Queue events on the async event queue, one event per error bit. 1318 * If no events are queued or no Fast ECC events are on in the AFSR, 1319 * queue an event to complain. 1320 */ 1321 if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 || 1322 ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) { 1323 ch_flt.flt_type = CPU_INV_AFSR; 1324 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1325 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1326 aflt->flt_panic); 1327 } 1328 1329 /* 1330 * Zero out + invalidate CPU logout. 1331 */ 1332 if (clop) { 1333 bzero(clop, sizeof (ch_cpu_logout_t)); 1334 clop->clo_data.chd_afar = LOGOUT_INVALID; 1335 } 1336 1337 /* 1338 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1339 * or disrupting errors have happened. We do this because if a 1340 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1341 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1342 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1343 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1344 * deferred or disrupting error happening between checking the AFSR and 1345 * enabling NCEEN/CEEN. 1346 * 1347 * Note: CEEN and NCEEN are only reenabled if they were on when trap 1348 * taken. 1349 */ 1350 set_error_enable(get_error_enable() | (nceen | ceen)); 1351 if (clear_errors(&ch_flt)) { 1352 aflt->flt_panic |= ((ch_flt.afsr_errs & 1353 (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0); 1354 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1355 NULL); 1356 } 1357 1358 /* 1359 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1360 * be logged as part of the panic flow. 1361 */ 1362 if (aflt->flt_panic) 1363 fm_panic("%sError(s)", pr_reason); 1364 1365 /* 1366 * Flushing the Ecache here gets the part of the trap handler that 1367 * is run at TL=1 out of the Ecache. 1368 */ 1369 cpu_flush_ecache(); 1370 } 1371 1372 /* 1373 * This is called via sys_trap from pil15_interrupt code if the 1374 * corresponding entry in ch_err_tl1_pending is set. Checks the 1375 * various ch_err_tl1_data structures for valid entries based on the bit 1376 * settings in the ch_err_tl1_flags entry of the structure. 1377 */ 1378 /*ARGSUSED*/ 1379 void 1380 cpu_tl1_error(struct regs *rp, int panic) 1381 { 1382 ch_err_tl1_data_t *cl1p, cl1; 1383 int i, ncl1ps; 1384 uint64_t me_flags; 1385 uint64_t ceen, nceen; 1386 1387 if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) { 1388 cl1p = &ch_err_tl1_data; 1389 ncl1ps = 1; 1390 } else if (CPU_PRIVATE(CPU) != NULL) { 1391 cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]); 1392 ncl1ps = CH_ERR_TL1_TLMAX; 1393 } else { 1394 ncl1ps = 0; 1395 } 1396 1397 for (i = 0; i < ncl1ps; i++, cl1p++) { 1398 if (cl1p->ch_err_tl1_flags == 0) 1399 continue; 1400 1401 /* 1402 * Grab a copy of the logout data and invalidate 1403 * the logout area. 1404 */ 1405 cl1 = *cl1p; 1406 bzero(cl1p, sizeof (ch_err_tl1_data_t)); 1407 cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID; 1408 me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags); 1409 1410 /* 1411 * Log "first error" in ch_err_tl1_data. 1412 */ 1413 if (cl1.ch_err_tl1_flags & CH_ERR_FECC) { 1414 ceen = get_error_enable() & EN_REG_CEEN; 1415 nceen = get_error_enable() & EN_REG_NCEEN; 1416 cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1, 1417 1, ceen, nceen, &cl1.ch_err_tl1_logout); 1418 } 1419 #if defined(CPU_IMP_L1_CACHE_PARITY) 1420 if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1421 cpu_parity_error(rp, cl1.ch_err_tl1_flags, 1422 (caddr_t)cl1.ch_err_tl1_tpc); 1423 } 1424 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1425 1426 /* 1427 * Log "multiple events" in ch_err_tl1_data. Note that 1428 * we don't read and clear the AFSR/AFAR in the TL>0 code 1429 * if the structure is busy, we just do the cache flushing 1430 * we have to do and then do the retry. So the AFSR/AFAR 1431 * at this point *should* have some relevant info. If there 1432 * are no valid errors in the AFSR, we'll assume they've 1433 * already been picked up and logged. For I$/D$ parity, 1434 * we just log an event with an "Unknown" (NULL) TPC. 1435 */ 1436 if (me_flags & CH_ERR_FECC) { 1437 ch_cpu_errors_t cpu_error_regs; 1438 uint64_t t_afsr_errs; 1439 1440 /* 1441 * Get the error registers and see if there's 1442 * a pending error. If not, don't bother 1443 * generating an "Invalid AFSR" error event. 1444 */ 1445 get_cpu_error_state(&cpu_error_regs); 1446 t_afsr_errs = (cpu_error_regs.afsr_ext & 1447 C_AFSR_EXT_ALL_ERRS) | 1448 (cpu_error_regs.afsr & C_AFSR_ALL_ERRS); 1449 if (t_afsr_errs != 0) { 1450 ceen = get_error_enable() & EN_REG_CEEN; 1451 nceen = get_error_enable() & EN_REG_NCEEN; 1452 cpu_log_fast_ecc_error((caddr_t)NULL, 1, 1453 1, ceen, nceen, NULL); 1454 } 1455 } 1456 #if defined(CPU_IMP_L1_CACHE_PARITY) 1457 if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1458 cpu_parity_error(rp, me_flags, (caddr_t)NULL); 1459 } 1460 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1461 } 1462 } 1463 1464 /* 1465 * Called from Fast ECC TL>0 handler in case of fatal error. 1466 * cpu_tl1_error should always find an associated ch_err_tl1_data structure, 1467 * but if we don't, we'll panic with something reasonable. 1468 */ 1469 /*ARGSUSED*/ 1470 void 1471 cpu_tl1_err_panic(struct regs *rp, ulong_t flags) 1472 { 1473 cpu_tl1_error(rp, 1); 1474 /* 1475 * Should never return, but just in case. 1476 */ 1477 fm_panic("Unsurvivable ECC Error at TL>0"); 1478 } 1479 1480 /* 1481 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST, 1482 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events. 1483 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU 1484 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC 1485 * 1486 * Cheetah+ also handles (No additional processing required): 1487 * DUE, DTO, DBERR (NCEEN controlled) 1488 * THCE (CEEN and ET_ECC_en controlled) 1489 * TUE (ET_ECC_en controlled) 1490 * 1491 * Panther further adds: 1492 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 1493 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 1494 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 1495 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 1496 * THCE (CEEN and L2_tag_ECC_en controlled) 1497 * L3_THCE (CEEN and ET_ECC_en controlled) 1498 * 1499 * Note that the p_clo_flags input is only valid in cases where the 1500 * cpu_private struct is not yet initialized (since that is the only 1501 * time that information cannot be obtained from the logout struct.) 1502 */ 1503 /*ARGSUSED*/ 1504 void 1505 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags) 1506 { 1507 struct async_flt *aflt; 1508 ch_async_flt_t ch_flt; 1509 char pr_reason[MAX_REASON_STRING]; 1510 ch_cpu_logout_t *clop; 1511 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1512 ch_cpu_errors_t cpu_error_regs; 1513 1514 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1515 /* 1516 * Get the CPU log out info. If we can't find our CPU private 1517 * pointer, then we will have to make due without any detailed 1518 * logout information. 1519 */ 1520 if (CPU_PRIVATE(CPU) == NULL) { 1521 clop = NULL; 1522 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1523 get_cpu_error_state(&cpu_error_regs); 1524 set_cpu_error_state(&cpu_error_regs); 1525 t_afar = cpu_error_regs.afar; 1526 t_afsr = cpu_error_regs.afsr; 1527 t_afsr_ext = cpu_error_regs.afsr_ext; 1528 #if defined(SERRANO) 1529 ch_flt.afar2 = cpu_error_regs.afar2; 1530 #endif /* SERRANO */ 1531 } else { 1532 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 1533 t_afar = clop->clo_data.chd_afar; 1534 t_afsr = clop->clo_data.chd_afsr; 1535 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1536 #if defined(SERRANO) 1537 ch_flt.afar2 = clop->clo_data.chd_afar2; 1538 #endif /* SERRANO */ 1539 } 1540 1541 /* 1542 * In order to simplify code, we maintain this afsr_errs 1543 * variable which holds the aggregate of AFSR and AFSR_EXT 1544 * sticky bits. 1545 */ 1546 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1547 (t_afsr & C_AFSR_ALL_ERRS); 1548 1549 pr_reason[0] = '\0'; 1550 /* Setup the async fault structure */ 1551 aflt = (struct async_flt *)&ch_flt; 1552 ch_flt.afsr_ext = t_afsr_ext; 1553 ch_flt.afsr_errs = t_afsr_errs; 1554 aflt->flt_stat = t_afsr; 1555 aflt->flt_addr = t_afar; 1556 aflt->flt_pc = (caddr_t)rp->r_pc; 1557 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1558 aflt->flt_tl = 0; 1559 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1560 1561 /* 1562 * If this trap is a result of one of the errors not masked 1563 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead 1564 * indicate that a timeout is to be set later. 1565 */ 1566 if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) && 1567 !aflt->flt_panic) 1568 ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED; 1569 else 1570 ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED; 1571 1572 /* 1573 * log the CE and clean up 1574 */ 1575 cpu_log_and_clear_ce(&ch_flt); 1576 1577 /* 1578 * We re-enable CEEN (if required) and check if any disrupting errors 1579 * have happened. We do this because if a disrupting error had occurred 1580 * with CEEN off, the trap will not be taken when CEEN is re-enabled. 1581 * Note that CEEN works differently on Cheetah than on Spitfire. Also, 1582 * we enable CEEN *before* checking the AFSR to avoid the small window 1583 * of a error happening between checking the AFSR and enabling CEEN. 1584 */ 1585 if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER) 1586 set_error_enable(get_error_enable() | EN_REG_CEEN); 1587 if (clear_errors(&ch_flt)) { 1588 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1589 NULL); 1590 } 1591 1592 /* 1593 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1594 * be logged as part of the panic flow. 1595 */ 1596 if (aflt->flt_panic) 1597 fm_panic("%sError(s)", pr_reason); 1598 } 1599 1600 /* 1601 * The async_err handler transfers control here for UE, EMU, EDU:BLD, 1602 * L3_EDU:BLD, TO, and BERR events. 1603 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 1604 * 1605 * Cheetah+: No additional errors handled. 1606 * 1607 * Note that the p_clo_flags input is only valid in cases where the 1608 * cpu_private struct is not yet initialized (since that is the only 1609 * time that information cannot be obtained from the logout struct.) 1610 */ 1611 /*ARGSUSED*/ 1612 void 1613 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags) 1614 { 1615 ushort_t ttype, tl; 1616 ch_async_flt_t ch_flt; 1617 struct async_flt *aflt; 1618 int trampolined = 0; 1619 char pr_reason[MAX_REASON_STRING]; 1620 ch_cpu_logout_t *clop; 1621 uint64_t ceen, clo_flags; 1622 uint64_t log_afsr; 1623 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1624 ch_cpu_errors_t cpu_error_regs; 1625 int expected = DDI_FM_ERR_UNEXPECTED; 1626 ddi_acc_hdl_t *hp; 1627 1628 /* 1629 * We need to look at p_flag to determine if the thread detected an 1630 * error while dumping core. We can't grab p_lock here, but it's ok 1631 * because we just need a consistent snapshot and we know that everyone 1632 * else will store a consistent set of bits while holding p_lock. We 1633 * don't have to worry about a race because SDOCORE is set once prior 1634 * to doing i/o from the process's address space and is never cleared. 1635 */ 1636 uint_t pflag = ttoproc(curthread)->p_flag; 1637 1638 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1639 /* 1640 * Get the CPU log out info. If we can't find our CPU private 1641 * pointer then we will have to make due without any detailed 1642 * logout information. 1643 */ 1644 if (CPU_PRIVATE(CPU) == NULL) { 1645 clop = NULL; 1646 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1647 get_cpu_error_state(&cpu_error_regs); 1648 set_cpu_error_state(&cpu_error_regs); 1649 t_afar = cpu_error_regs.afar; 1650 t_afsr = cpu_error_regs.afsr; 1651 t_afsr_ext = cpu_error_regs.afsr_ext; 1652 #if defined(SERRANO) 1653 ch_flt.afar2 = cpu_error_regs.afar2; 1654 #endif /* SERRANO */ 1655 clo_flags = p_clo_flags; 1656 } else { 1657 clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout); 1658 t_afar = clop->clo_data.chd_afar; 1659 t_afsr = clop->clo_data.chd_afsr; 1660 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1661 #if defined(SERRANO) 1662 ch_flt.afar2 = clop->clo_data.chd_afar2; 1663 #endif /* SERRANO */ 1664 clo_flags = clop->clo_flags; 1665 } 1666 1667 /* 1668 * In order to simplify code, we maintain this afsr_errs 1669 * variable which holds the aggregate of AFSR and AFSR_EXT 1670 * sticky bits. 1671 */ 1672 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1673 (t_afsr & C_AFSR_ALL_ERRS); 1674 pr_reason[0] = '\0'; 1675 1676 /* 1677 * Grab information encoded into our clo_flags field. 1678 */ 1679 ceen = clo_flags & EN_REG_CEEN; 1680 tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT; 1681 ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT; 1682 1683 /* 1684 * handle the specific error 1685 */ 1686 aflt = (struct async_flt *)&ch_flt; 1687 aflt->flt_id = gethrtime_waitfree(); 1688 aflt->flt_bus_id = getprocessorid(); 1689 aflt->flt_inst = CPU->cpu_id; 1690 ch_flt.afsr_ext = t_afsr_ext; 1691 ch_flt.afsr_errs = t_afsr_errs; 1692 aflt->flt_stat = t_afsr; 1693 aflt->flt_addr = t_afar; 1694 aflt->flt_pc = (caddr_t)rp->r_pc; 1695 aflt->flt_prot = AFLT_PROT_NONE; 1696 aflt->flt_class = CPU_FAULT; 1697 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1698 aflt->flt_tl = (uchar_t)tl; 1699 aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) || 1700 C_AFSR_PANIC(t_afsr_errs)); 1701 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1702 aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP); 1703 1704 /* 1705 * If the trap occurred in privileged mode at TL=0, we need to check to 1706 * see if we were executing in the kernel under on_trap() or t_lofault 1707 * protection. If so, modify the saved registers so that we return 1708 * from the trap to the appropriate trampoline routine. 1709 */ 1710 if (aflt->flt_priv && tl == 0) { 1711 if (curthread->t_ontrap != NULL) { 1712 on_trap_data_t *otp = curthread->t_ontrap; 1713 1714 if (otp->ot_prot & OT_DATA_EC) { 1715 aflt->flt_prot = AFLT_PROT_EC; 1716 otp->ot_trap |= OT_DATA_EC; 1717 rp->r_pc = otp->ot_trampoline; 1718 rp->r_npc = rp->r_pc + 4; 1719 trampolined = 1; 1720 } 1721 1722 if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) && 1723 (otp->ot_prot & OT_DATA_ACCESS)) { 1724 aflt->flt_prot = AFLT_PROT_ACCESS; 1725 otp->ot_trap |= OT_DATA_ACCESS; 1726 rp->r_pc = otp->ot_trampoline; 1727 rp->r_npc = rp->r_pc + 4; 1728 trampolined = 1; 1729 /* 1730 * for peeks and caut_gets errors are expected 1731 */ 1732 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1733 if (!hp) 1734 expected = DDI_FM_ERR_PEEK; 1735 else if (hp->ah_acc.devacc_attr_access == 1736 DDI_CAUTIOUS_ACC) 1737 expected = DDI_FM_ERR_EXPECTED; 1738 } 1739 1740 } else if (curthread->t_lofault) { 1741 aflt->flt_prot = AFLT_PROT_COPY; 1742 rp->r_g1 = EFAULT; 1743 rp->r_pc = curthread->t_lofault; 1744 rp->r_npc = rp->r_pc + 4; 1745 trampolined = 1; 1746 } 1747 } 1748 1749 /* 1750 * If we're in user mode or we're doing a protected copy, we either 1751 * want the ASTON code below to send a signal to the user process 1752 * or we want to panic if aft_panic is set. 1753 * 1754 * If we're in privileged mode and we're not doing a copy, then we 1755 * need to check if we've trampolined. If we haven't trampolined, 1756 * we should panic. 1757 */ 1758 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1759 if (t_afsr_errs & 1760 ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) & 1761 ~(C_AFSR_BERR | C_AFSR_TO))) 1762 aflt->flt_panic |= aft_panic; 1763 } else if (!trampolined) { 1764 aflt->flt_panic = 1; 1765 } 1766 1767 /* 1768 * If we've trampolined due to a privileged TO or BERR, or if an 1769 * unprivileged TO or BERR occurred, we don't want to enqueue an 1770 * event for that TO or BERR. Queue all other events (if any) besides 1771 * the TO/BERR. Since we may not be enqueing any events, we need to 1772 * ignore the number of events queued. If we haven't trampolined due 1773 * to a TO or BERR, just enqueue events normally. 1774 */ 1775 log_afsr = t_afsr_errs; 1776 if (trampolined) { 1777 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1778 } else if (!aflt->flt_priv) { 1779 /* 1780 * User mode, suppress messages if 1781 * cpu_berr_to_verbose is not set. 1782 */ 1783 if (!cpu_berr_to_verbose) 1784 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1785 } 1786 1787 /* 1788 * Log any errors that occurred 1789 */ 1790 if (((log_afsr & 1791 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) && 1792 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) || 1793 (t_afsr_errs & 1794 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) { 1795 ch_flt.flt_type = CPU_INV_AFSR; 1796 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1797 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1798 aflt->flt_panic); 1799 } 1800 1801 /* 1802 * Zero out + invalidate CPU logout. 1803 */ 1804 if (clop) { 1805 bzero(clop, sizeof (ch_cpu_logout_t)); 1806 clop->clo_data.chd_afar = LOGOUT_INVALID; 1807 } 1808 1809 #if defined(JALAPENO) || defined(SERRANO) 1810 /* 1811 * UE/RUE/BERR/TO: Call our bus nexus friends to check for 1812 * IO errors that may have resulted in this trap. 1813 */ 1814 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) { 1815 cpu_run_bus_error_handlers(aflt, expected); 1816 } 1817 1818 /* 1819 * UE/RUE: If UE or RUE is in memory, we need to flush the bad 1820 * line from the Ecache. We also need to query the bus nexus for 1821 * fatal errors. Attempts to do diagnostic read on caches may 1822 * introduce more errors (especially when the module is bad). 1823 */ 1824 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) { 1825 /* 1826 * Ask our bus nexus friends if they have any fatal errors. If 1827 * so, they will log appropriate error messages. 1828 */ 1829 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1830 aflt->flt_panic = 1; 1831 1832 /* 1833 * We got a UE or RUE and are panicking, save the fault PA in 1834 * a known location so that the platform specific panic code 1835 * can check for copyback errors. 1836 */ 1837 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1838 panic_aflt = *aflt; 1839 } 1840 } 1841 1842 /* 1843 * Flush Ecache line or entire Ecache 1844 */ 1845 if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR)) 1846 cpu_error_ecache_flush(&ch_flt); 1847 #else /* JALAPENO || SERRANO */ 1848 /* 1849 * UE/BERR/TO: Call our bus nexus friends to check for 1850 * IO errors that may have resulted in this trap. 1851 */ 1852 if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) { 1853 cpu_run_bus_error_handlers(aflt, expected); 1854 } 1855 1856 /* 1857 * UE: If the UE is in memory, we need to flush the bad 1858 * line from the Ecache. We also need to query the bus nexus for 1859 * fatal errors. Attempts to do diagnostic read on caches may 1860 * introduce more errors (especially when the module is bad). 1861 */ 1862 if (t_afsr & C_AFSR_UE) { 1863 /* 1864 * Ask our legacy bus nexus friends if they have any fatal 1865 * errors. If so, they will log appropriate error messages. 1866 */ 1867 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1868 aflt->flt_panic = 1; 1869 1870 /* 1871 * We got a UE and are panicking, save the fault PA in a known 1872 * location so that the platform specific panic code can check 1873 * for copyback errors. 1874 */ 1875 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1876 panic_aflt = *aflt; 1877 } 1878 } 1879 1880 /* 1881 * Flush Ecache line or entire Ecache 1882 */ 1883 if (t_afsr_errs & 1884 (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU)) 1885 cpu_error_ecache_flush(&ch_flt); 1886 #endif /* JALAPENO || SERRANO */ 1887 1888 /* 1889 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1890 * or disrupting errors have happened. We do this because if a 1891 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1892 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1893 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1894 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1895 * deferred or disrupting error happening between checking the AFSR and 1896 * enabling NCEEN/CEEN. 1897 * 1898 * Note: CEEN reenabled only if it was on when trap taken. 1899 */ 1900 set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen)); 1901 if (clear_errors(&ch_flt)) { 1902 /* 1903 * Check for secondary errors, and avoid panicking if we 1904 * have them 1905 */ 1906 if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs, 1907 t_afar) == 0) { 1908 aflt->flt_panic |= ((ch_flt.afsr_errs & 1909 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0); 1910 } 1911 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1912 NULL); 1913 } 1914 1915 /* 1916 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1917 * be logged as part of the panic flow. 1918 */ 1919 if (aflt->flt_panic) 1920 fm_panic("%sError(s)", pr_reason); 1921 1922 /* 1923 * If we queued an error and we are going to return from the trap and 1924 * the error was in user mode or inside of a copy routine, set AST flag 1925 * so the queue will be drained before returning to user mode. The 1926 * AST processing will also act on our failure policy. 1927 */ 1928 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1929 int pcb_flag = 0; 1930 1931 if (t_afsr_errs & 1932 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS & 1933 ~(C_AFSR_BERR | C_AFSR_TO))) 1934 pcb_flag |= ASYNC_HWERR; 1935 1936 if (t_afsr & C_AFSR_BERR) 1937 pcb_flag |= ASYNC_BERR; 1938 1939 if (t_afsr & C_AFSR_TO) 1940 pcb_flag |= ASYNC_BTO; 1941 1942 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1943 aston(curthread); 1944 } 1945 } 1946 1947 #if defined(CPU_IMP_L1_CACHE_PARITY) 1948 /* 1949 * Handling of data and instruction parity errors (traps 0x71, 0x72). 1950 * 1951 * For Panther, P$ data parity errors during floating point load hits 1952 * are also detected (reported as TT 0x71) and handled by this trap 1953 * handler. 1954 * 1955 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address) 1956 * is available. 1957 */ 1958 /*ARGSUSED*/ 1959 void 1960 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc) 1961 { 1962 ch_async_flt_t ch_flt; 1963 struct async_flt *aflt; 1964 uchar_t tl = ((flags & CH_ERR_TL) != 0); 1965 uchar_t iparity = ((flags & CH_ERR_IPE) != 0); 1966 uchar_t panic = ((flags & CH_ERR_PANIC) != 0); 1967 char *error_class; 1968 1969 /* 1970 * Log the error. 1971 * For icache parity errors the fault address is the trap PC. 1972 * For dcache/pcache parity errors the instruction would have to 1973 * be decoded to determine the address and that isn't possible 1974 * at high PIL. 1975 */ 1976 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1977 aflt = (struct async_flt *)&ch_flt; 1978 aflt->flt_id = gethrtime_waitfree(); 1979 aflt->flt_bus_id = getprocessorid(); 1980 aflt->flt_inst = CPU->cpu_id; 1981 aflt->flt_pc = tpc; 1982 aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR; 1983 aflt->flt_prot = AFLT_PROT_NONE; 1984 aflt->flt_class = CPU_FAULT; 1985 aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0; 1986 aflt->flt_tl = tl; 1987 aflt->flt_panic = panic; 1988 aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP; 1989 ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY; 1990 1991 if (iparity) { 1992 cpu_icache_parity_info(&ch_flt); 1993 if (ch_flt.parity_data.ipe.cpl_off != -1) 1994 error_class = FM_EREPORT_CPU_USIII_IDSPE; 1995 else if (ch_flt.parity_data.ipe.cpl_way != -1) 1996 error_class = FM_EREPORT_CPU_USIII_ITSPE; 1997 else 1998 error_class = FM_EREPORT_CPU_USIII_IPE; 1999 aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE; 2000 } else { 2001 cpu_dcache_parity_info(&ch_flt); 2002 if (ch_flt.parity_data.dpe.cpl_off != -1) 2003 error_class = FM_EREPORT_CPU_USIII_DDSPE; 2004 else if (ch_flt.parity_data.dpe.cpl_way != -1) 2005 error_class = FM_EREPORT_CPU_USIII_DTSPE; 2006 else 2007 error_class = FM_EREPORT_CPU_USIII_DPE; 2008 aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE; 2009 /* 2010 * For panther we also need to check the P$ for parity errors. 2011 */ 2012 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2013 cpu_pcache_parity_info(&ch_flt); 2014 if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2015 error_class = FM_EREPORT_CPU_USIII_PDSPE; 2016 aflt->flt_payload = 2017 FM_EREPORT_PAYLOAD_PCACHE_PE; 2018 } 2019 } 2020 } 2021 2022 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 2023 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 2024 2025 if (iparity) { 2026 /* 2027 * Invalidate entire I$. 2028 * This is required due to the use of diagnostic ASI 2029 * accesses that may result in a loss of I$ coherency. 2030 */ 2031 if (cache_boot_state & DCU_IC) { 2032 flush_icache(); 2033 } 2034 /* 2035 * According to section P.3.1 of the Panther PRM, we 2036 * need to do a little more for recovery on those 2037 * CPUs after encountering an I$ parity error. 2038 */ 2039 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2040 flush_ipb(); 2041 correct_dcache_parity(dcache_size, 2042 dcache_linesize); 2043 flush_pcache(); 2044 } 2045 } else { 2046 /* 2047 * Since the valid bit is ignored when checking parity the 2048 * D$ data and tag must also be corrected. Set D$ data bits 2049 * to zero and set utag to 0, 1, 2, 3. 2050 */ 2051 correct_dcache_parity(dcache_size, dcache_linesize); 2052 2053 /* 2054 * According to section P.3.3 of the Panther PRM, we 2055 * need to do a little more for recovery on those 2056 * CPUs after encountering a D$ or P$ parity error. 2057 * 2058 * As far as clearing P$ parity errors, it is enough to 2059 * simply invalidate all entries in the P$ since P$ parity 2060 * error traps are only generated for floating point load 2061 * hits. 2062 */ 2063 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2064 flush_icache(); 2065 flush_ipb(); 2066 flush_pcache(); 2067 } 2068 } 2069 2070 /* 2071 * Invalidate entire D$ if it was enabled. 2072 * This is done to avoid stale data in the D$ which might 2073 * occur with the D$ disabled and the trap handler doing 2074 * stores affecting lines already in the D$. 2075 */ 2076 if (cache_boot_state & DCU_DC) { 2077 flush_dcache(); 2078 } 2079 2080 /* 2081 * Restore caches to their bootup state. 2082 */ 2083 set_dcu(get_dcu() | cache_boot_state); 2084 2085 /* 2086 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2087 * be logged as part of the panic flow. 2088 */ 2089 if (aflt->flt_panic) 2090 fm_panic("%sError(s)", iparity ? "IPE " : "DPE "); 2091 2092 /* 2093 * If this error occurred at TL>0 then flush the E$ here to reduce 2094 * the chance of getting an unrecoverable Fast ECC error. This 2095 * flush will evict the part of the parity trap handler that is run 2096 * at TL>1. 2097 */ 2098 if (tl) { 2099 cpu_flush_ecache(); 2100 } 2101 } 2102 2103 /* 2104 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t 2105 * to indicate which portions of the captured data should be in the ereport. 2106 */ 2107 void 2108 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt) 2109 { 2110 int way = ch_flt->parity_data.ipe.cpl_way; 2111 int offset = ch_flt->parity_data.ipe.cpl_off; 2112 int tag_index; 2113 struct async_flt *aflt = (struct async_flt *)ch_flt; 2114 2115 2116 if ((offset != -1) || (way != -1)) { 2117 /* 2118 * Parity error in I$ tag or data 2119 */ 2120 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2121 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2122 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2123 PN_ICIDX_TO_WAY(tag_index); 2124 else 2125 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2126 CH_ICIDX_TO_WAY(tag_index); 2127 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2128 IC_LOGFLAG_MAGIC; 2129 } else { 2130 /* 2131 * Parity error was not identified. 2132 * Log tags and data for all ways. 2133 */ 2134 for (way = 0; way < CH_ICACHE_NWAY; way++) { 2135 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2136 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2137 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2138 PN_ICIDX_TO_WAY(tag_index); 2139 else 2140 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2141 CH_ICIDX_TO_WAY(tag_index); 2142 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2143 IC_LOGFLAG_MAGIC; 2144 } 2145 } 2146 } 2147 2148 /* 2149 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t 2150 * to indicate which portions of the captured data should be in the ereport. 2151 */ 2152 void 2153 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt) 2154 { 2155 int way = ch_flt->parity_data.dpe.cpl_way; 2156 int offset = ch_flt->parity_data.dpe.cpl_off; 2157 int tag_index; 2158 2159 if (offset != -1) { 2160 /* 2161 * Parity error in D$ or P$ data array. 2162 * 2163 * First check to see whether the parity error is in D$ or P$ 2164 * since P$ data parity errors are reported in Panther using 2165 * the same trap. 2166 */ 2167 if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2168 tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx; 2169 ch_flt->parity_data.dpe.cpl_pc[way].pc_way = 2170 CH_PCIDX_TO_WAY(tag_index); 2171 ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag = 2172 PC_LOGFLAG_MAGIC; 2173 } else { 2174 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2175 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2176 CH_DCIDX_TO_WAY(tag_index); 2177 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2178 DC_LOGFLAG_MAGIC; 2179 } 2180 } else if (way != -1) { 2181 /* 2182 * Parity error in D$ tag. 2183 */ 2184 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2185 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2186 CH_DCIDX_TO_WAY(tag_index); 2187 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2188 DC_LOGFLAG_MAGIC; 2189 } 2190 } 2191 #endif /* CPU_IMP_L1_CACHE_PARITY */ 2192 2193 /* 2194 * The cpu_async_log_err() function is called via the [uc]e_drain() function to 2195 * post-process CPU events that are dequeued. As such, it can be invoked 2196 * from softint context, from AST processing in the trap() flow, or from the 2197 * panic flow. We decode the CPU-specific data, and take appropriate actions. 2198 * Historically this entry point was used to log the actual cmn_err(9F) text; 2199 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 2200 * With FMA this function now also returns a flag which indicates to the 2201 * caller whether the ereport should be posted (1) or suppressed (0). 2202 */ 2203 static int 2204 cpu_async_log_err(void *flt, errorq_elem_t *eqep) 2205 { 2206 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 2207 struct async_flt *aflt = (struct async_flt *)flt; 2208 page_t *pp; 2209 2210 switch (ch_flt->flt_type) { 2211 case CPU_INV_AFSR: 2212 /* 2213 * If it is a disrupting trap and the AFSR is zero, then 2214 * the event has probably already been noted. Do not post 2215 * an ereport. 2216 */ 2217 if ((aflt->flt_status & ECC_C_TRAP) && 2218 (!(aflt->flt_stat & C_AFSR_MASK))) 2219 return (0); 2220 else 2221 return (1); 2222 case CPU_TO: 2223 case CPU_BERR: 2224 case CPU_FATAL: 2225 case CPU_FPUERR: 2226 return (1); 2227 2228 case CPU_UE_ECACHE_RETIRE: 2229 cpu_log_err(aflt); 2230 cpu_page_retire(ch_flt); 2231 return (1); 2232 2233 /* 2234 * Cases where we may want to suppress logging or perform 2235 * extended diagnostics. 2236 */ 2237 case CPU_CE: 2238 case CPU_EMC: 2239 pp = page_numtopp_nolock((pfn_t) 2240 (aflt->flt_addr >> MMU_PAGESHIFT)); 2241 2242 /* 2243 * We want to skip logging and further classification 2244 * only if ALL the following conditions are true: 2245 * 2246 * 1. There is only one error 2247 * 2. That error is a correctable memory error 2248 * 3. The error is caused by the memory scrubber (in 2249 * which case the error will have occurred under 2250 * on_trap protection) 2251 * 4. The error is on a retired page 2252 * 2253 * Note: AFLT_PROT_EC is used places other than the memory 2254 * scrubber. However, none of those errors should occur 2255 * on a retired page. 2256 */ 2257 if ((ch_flt->afsr_errs & 2258 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE && 2259 aflt->flt_prot == AFLT_PROT_EC) { 2260 2261 if (pp != NULL && page_isretired(pp)) { 2262 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2263 2264 /* 2265 * Since we're skipping logging, we'll need 2266 * to schedule the re-enabling of CEEN 2267 */ 2268 (void) timeout(cpu_delayed_check_ce_errors, 2269 (void *)aflt->flt_inst, drv_usectohz( 2270 (clock_t)cpu_ceen_delay_secs * MICROSEC)); 2271 } 2272 return (0); 2273 } 2274 } 2275 2276 /* 2277 * Perform/schedule further classification actions, but 2278 * only if the page is healthy (we don't want bad 2279 * pages inducing too much diagnostic activity). If we could 2280 * not find a page pointer then we also skip this. If 2281 * ce_scrub_xdiag_recirc returns nonzero then it has chosen 2282 * to copy and recirculate the event (for further diagnostics) 2283 * and we should not proceed to log it here. 2284 * 2285 * This must be the last step here before the cpu_log_err() 2286 * below - if an event recirculates cpu_ce_log_err() will 2287 * not call the current function but just proceed directly 2288 * to cpu_ereport_post after the cpu_log_err() avoided below. 2289 * 2290 * Note: Check cpu_impl_async_log_err if changing this 2291 */ 2292 if (pp) { 2293 if (page_isretired(pp) || page_deteriorating(pp)) { 2294 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2295 CE_XDIAG_SKIP_PAGEDET); 2296 } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep, 2297 offsetof(ch_async_flt_t, cmn_asyncflt))) { 2298 return (0); 2299 } 2300 } else { 2301 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2302 CE_XDIAG_SKIP_NOPP); 2303 } 2304 /*FALLTHRU*/ 2305 2306 /* 2307 * Cases where we just want to report the error and continue. 2308 */ 2309 case CPU_CE_ECACHE: 2310 case CPU_UE_ECACHE: 2311 case CPU_IV: 2312 case CPU_ORPH: 2313 cpu_log_err(aflt); 2314 return (1); 2315 2316 /* 2317 * Cases where we want to fall through to handle panicking. 2318 */ 2319 case CPU_UE: 2320 /* 2321 * We want to skip logging in the same conditions as the 2322 * CE case. In addition, we want to make sure we're not 2323 * panicking. 2324 */ 2325 if (!panicstr && (ch_flt->afsr_errs & 2326 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE && 2327 aflt->flt_prot == AFLT_PROT_EC) { 2328 page_t *pp = page_numtopp_nolock((pfn_t) 2329 (aflt->flt_addr >> MMU_PAGESHIFT)); 2330 2331 if (pp != NULL && page_isretired(pp)) { 2332 2333 /* Zero the address to clear the error */ 2334 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2335 return (0); 2336 } 2337 } 2338 cpu_log_err(aflt); 2339 break; 2340 2341 default: 2342 /* 2343 * If the us3_common.c code doesn't know the flt_type, it may 2344 * be an implementation-specific code. Call into the impldep 2345 * backend to find out what to do: if it tells us to continue, 2346 * break and handle as if falling through from a UE; if not, 2347 * the impldep backend has handled the error and we're done. 2348 */ 2349 switch (cpu_impl_async_log_err(flt, eqep)) { 2350 case CH_ASYNC_LOG_DONE: 2351 return (1); 2352 case CH_ASYNC_LOG_RECIRC: 2353 return (0); 2354 case CH_ASYNC_LOG_CONTINUE: 2355 break; /* continue on to handle UE-like error */ 2356 default: 2357 cmn_err(CE_WARN, "discarding error 0x%p with " 2358 "invalid fault type (0x%x)", 2359 (void *)aflt, ch_flt->flt_type); 2360 return (0); 2361 } 2362 } 2363 2364 /* ... fall through from the UE case */ 2365 2366 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2367 if (!panicstr) { 2368 cpu_page_retire(ch_flt); 2369 } else { 2370 /* 2371 * Clear UEs on panic so that we don't 2372 * get haunted by them during panic or 2373 * after reboot 2374 */ 2375 cpu_clearphys(aflt); 2376 (void) clear_errors(NULL); 2377 } 2378 } 2379 2380 return (1); 2381 } 2382 2383 /* 2384 * Retire the bad page that may contain the flushed error. 2385 */ 2386 void 2387 cpu_page_retire(ch_async_flt_t *ch_flt) 2388 { 2389 struct async_flt *aflt = (struct async_flt *)ch_flt; 2390 page_t *pp = page_numtopp_nolock(aflt->flt_addr >> MMU_PAGESHIFT); 2391 2392 if (pp != NULL) { 2393 page_settoxic(pp, PAGE_IS_FAULTY); 2394 (void) page_retire(pp, PAGE_IS_TOXIC); 2395 } 2396 } 2397 2398 /* 2399 * The cpu_log_err() function is called by cpu_async_log_err() to perform the 2400 * generic event post-processing for correctable and uncorrectable memory, 2401 * E$, and MTag errors. Historically this entry point was used to log bits of 2402 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be 2403 * converted into an ereport. In addition, it transmits the error to any 2404 * platform-specific service-processor FRU logging routines, if available. 2405 */ 2406 void 2407 cpu_log_err(struct async_flt *aflt) 2408 { 2409 char unum[UNUM_NAMLEN]; 2410 int len = 0; 2411 int synd_status, synd_code, afar_status; 2412 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 2413 2414 /* 2415 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 2416 * For Panther, L2$ is not external, so we don't want to 2417 * generate an E$ unum for those errors. 2418 */ 2419 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 2420 if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS) 2421 aflt->flt_status |= ECC_ECACHE; 2422 } else { 2423 if (ch_flt->flt_bit & C_AFSR_ECACHE) 2424 aflt->flt_status |= ECC_ECACHE; 2425 } 2426 2427 /* 2428 * Determine syndrome status. 2429 */ 2430 synd_status = afsr_to_synd_status(aflt->flt_inst, 2431 ch_flt->afsr_errs, ch_flt->flt_bit); 2432 2433 /* 2434 * Determine afar status. 2435 */ 2436 if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) 2437 afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 2438 ch_flt->flt_bit); 2439 else 2440 afar_status = AFLT_STAT_INVALID; 2441 2442 /* 2443 * If afar status is not invalid do a unum lookup. 2444 */ 2445 if (afar_status != AFLT_STAT_INVALID) { 2446 (void) cpu_get_mem_unum_aflt(synd_status, aflt, unum, 2447 UNUM_NAMLEN, &len); 2448 } else { 2449 unum[0] = '\0'; 2450 } 2451 2452 synd_code = synd_to_synd_code(synd_status, 2453 aflt->flt_synd, ch_flt->flt_bit); 2454 2455 /* 2456 * Do not send the fruid message (plat_ecc_error_data_t) 2457 * to the SC if it can handle the enhanced error information 2458 * (plat_ecc_error2_data_t) or when the tunable 2459 * ecc_log_fruid_enable is set to 0. 2460 */ 2461 2462 if (&plat_ecc_capability_sc_get && 2463 plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) { 2464 if (&plat_log_fruid_error) 2465 plat_log_fruid_error(synd_code, aflt, unum, 2466 ch_flt->flt_bit); 2467 } 2468 2469 if (aflt->flt_func != NULL) 2470 aflt->flt_func(aflt, unum); 2471 2472 if (afar_status != AFLT_STAT_INVALID) 2473 cpu_log_diag_info(ch_flt); 2474 2475 /* 2476 * If we have a CEEN error , we do not reenable CEEN until after 2477 * we exit the trap handler. Otherwise, another error may 2478 * occur causing the handler to be entered recursively. 2479 * We set a timeout to trigger in cpu_ceen_delay_secs seconds, 2480 * to try and ensure that the CPU makes progress in the face 2481 * of a CE storm. 2482 */ 2483 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2484 (void) timeout(cpu_delayed_check_ce_errors, 2485 (void *)aflt->flt_inst, 2486 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 2487 } 2488 } 2489 2490 /* 2491 * Invoked by error_init() early in startup and therefore before 2492 * startup_errorq() is called to drain any error Q - 2493 * 2494 * startup() 2495 * startup_end() 2496 * error_init() 2497 * cpu_error_init() 2498 * errorq_init() 2499 * errorq_drain() 2500 * start_other_cpus() 2501 * 2502 * The purpose of this routine is to create error-related taskqs. Taskqs 2503 * are used for this purpose because cpu_lock can't be grabbed from interrupt 2504 * context. 2505 */ 2506 void 2507 cpu_error_init(int items) 2508 { 2509 /* 2510 * Create taskq(s) to reenable CE 2511 */ 2512 ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri, 2513 items, items, TASKQ_PREPOPULATE); 2514 } 2515 2516 void 2517 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep) 2518 { 2519 char unum[UNUM_NAMLEN]; 2520 int len; 2521 2522 switch (aflt->flt_class) { 2523 case CPU_FAULT: 2524 cpu_ereport_init(aflt); 2525 if (cpu_async_log_err(aflt, eqep)) 2526 cpu_ereport_post(aflt); 2527 break; 2528 2529 case BUS_FAULT: 2530 if (aflt->flt_func != NULL) { 2531 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, 2532 unum, UNUM_NAMLEN, &len); 2533 aflt->flt_func(aflt, unum); 2534 } 2535 break; 2536 2537 case RECIRC_CPU_FAULT: 2538 aflt->flt_class = CPU_FAULT; 2539 cpu_log_err(aflt); 2540 cpu_ereport_post(aflt); 2541 break; 2542 2543 case RECIRC_BUS_FAULT: 2544 ASSERT(aflt->flt_class != RECIRC_BUS_FAULT); 2545 /*FALLTHRU*/ 2546 default: 2547 cmn_err(CE_WARN, "discarding CE error 0x%p with invalid " 2548 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 2549 return; 2550 } 2551 } 2552 2553 /* 2554 * Scrub and classify a CE. This function must not modify the 2555 * fault structure passed to it but instead should return the classification 2556 * information. 2557 */ 2558 2559 static uchar_t 2560 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried) 2561 { 2562 uchar_t disp = CE_XDIAG_EXTALG; 2563 on_trap_data_t otd; 2564 uint64_t orig_err; 2565 ch_cpu_logout_t *clop; 2566 2567 /* 2568 * Clear CEEN. CPU CE TL > 0 trap handling will already have done 2569 * this, but our other callers have not. Disable preemption to 2570 * avoid CPU migration so that we restore CEEN on the correct 2571 * cpu later. 2572 * 2573 * CEEN is cleared so that further CEs that our instruction and 2574 * data footprint induce do not cause use to either creep down 2575 * kernel stack to the point of overflow, or do so much CE 2576 * notification as to make little real forward progress. 2577 * 2578 * NCEEN must not be cleared. However it is possible that 2579 * our accesses to the flt_addr may provoke a bus error or timeout 2580 * if the offending address has just been unconfigured as part of 2581 * a DR action. So we must operate under on_trap protection. 2582 */ 2583 kpreempt_disable(); 2584 orig_err = get_error_enable(); 2585 if (orig_err & EN_REG_CEEN) 2586 set_error_enable(orig_err & ~EN_REG_CEEN); 2587 2588 /* 2589 * Our classification algorithm includes the line state before 2590 * the scrub; we'd like this captured after the detection and 2591 * before the algorithm below - the earlier the better. 2592 * 2593 * If we've come from a cpu CE trap then this info already exists 2594 * in the cpu logout area. 2595 * 2596 * For a CE detected by memscrub for which there was no trap 2597 * (running with CEEN off) cpu_log_and_clear_ce has called 2598 * cpu_ce_delayed_ec_logout to capture some cache data, and 2599 * marked the fault structure as incomplete as a flag to later 2600 * logging code. 2601 * 2602 * If called directly from an IO detected CE there has been 2603 * no line data capture. In this case we logout to the cpu logout 2604 * area - that's appropriate since it's the cpu cache data we need 2605 * for classification. We thus borrow the cpu logout area for a 2606 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in 2607 * this time (we will invalidate it again below). 2608 * 2609 * If called from the partner check xcall handler then this cpu 2610 * (the partner) has not necessarily experienced a CE at this 2611 * address. But we want to capture line state before its scrub 2612 * attempt since we use that in our classification. 2613 */ 2614 if (logout_tried == B_FALSE) { 2615 if (!cpu_ce_delayed_ec_logout(ecc->flt_addr)) 2616 disp |= CE_XDIAG_NOLOGOUT; 2617 } 2618 2619 /* 2620 * Scrub memory, then check AFSR for errors. The AFAR we scrub may 2621 * no longer be valid (if DR'd since the initial event) so we 2622 * perform this scrub under on_trap protection. If this access is 2623 * ok then further accesses below will also be ok - DR cannot 2624 * proceed while this thread is active (preemption is disabled); 2625 * to be safe we'll nonetheless use on_trap again below. 2626 */ 2627 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2628 cpu_scrubphys(ecc); 2629 } else { 2630 no_trap(); 2631 if (orig_err & EN_REG_CEEN) 2632 set_error_enable(orig_err); 2633 kpreempt_enable(); 2634 return (disp); 2635 } 2636 no_trap(); 2637 2638 /* 2639 * Did the casx read of the scrub log a CE that matches the AFAR? 2640 * Note that it's quite possible that the read sourced the data from 2641 * another cpu. 2642 */ 2643 if (clear_ecc(ecc)) 2644 disp |= CE_XDIAG_CE1; 2645 2646 /* 2647 * Read the data again. This time the read is very likely to 2648 * come from memory since the scrub induced a writeback to memory. 2649 */ 2650 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2651 (void) lddphys(P2ALIGN(ecc->flt_addr, 8)); 2652 } else { 2653 no_trap(); 2654 if (orig_err & EN_REG_CEEN) 2655 set_error_enable(orig_err); 2656 kpreempt_enable(); 2657 return (disp); 2658 } 2659 no_trap(); 2660 2661 /* Did that read induce a CE that matches the AFAR? */ 2662 if (clear_ecc(ecc)) 2663 disp |= CE_XDIAG_CE2; 2664 2665 /* 2666 * Look at the logout information and record whether we found the 2667 * line in l2/l3 cache. For Panther we are interested in whether 2668 * we found it in either cache (it won't reside in both but 2669 * it is possible to read it that way given the moving target). 2670 */ 2671 clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL; 2672 if (!(disp & CE_XDIAG_NOLOGOUT) && clop && 2673 clop->clo_data.chd_afar != LOGOUT_INVALID) { 2674 int hit, level; 2675 int state; 2676 int totalsize; 2677 ch_ec_data_t *ecp; 2678 2679 /* 2680 * If hit is nonzero then a match was found and hit will 2681 * be one greater than the index which hit. For Panther we 2682 * also need to pay attention to level to see which of l2$ or 2683 * l3$ it hit in. 2684 */ 2685 hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data, 2686 0, &level); 2687 2688 if (hit) { 2689 --hit; 2690 disp |= CE_XDIAG_AFARMATCH; 2691 2692 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2693 if (level == 2) 2694 ecp = &clop->clo_data.chd_l2_data[hit]; 2695 else 2696 ecp = &clop->clo_data.chd_ec_data[hit]; 2697 } else { 2698 ASSERT(level == 2); 2699 ecp = &clop->clo_data.chd_ec_data[hit]; 2700 } 2701 totalsize = cpunodes[CPU->cpu_id].ecache_size; 2702 state = cpu_ectag_pa_to_subblk_state(totalsize, 2703 ecc->flt_addr, ecp->ec_tag); 2704 2705 /* 2706 * Cheetah variants use different state encodings - 2707 * the CH_ECSTATE_* defines vary depending on the 2708 * module we're compiled for. Translate into our 2709 * one true version. Conflate Owner-Shared state 2710 * of SSM mode with Owner as victimisation of such 2711 * lines may cause a writeback. 2712 */ 2713 switch (state) { 2714 case CH_ECSTATE_MOD: 2715 disp |= EC_STATE_M; 2716 break; 2717 2718 case CH_ECSTATE_OWN: 2719 case CH_ECSTATE_OWS: 2720 disp |= EC_STATE_O; 2721 break; 2722 2723 case CH_ECSTATE_EXL: 2724 disp |= EC_STATE_E; 2725 break; 2726 2727 case CH_ECSTATE_SHR: 2728 disp |= EC_STATE_S; 2729 break; 2730 2731 default: 2732 disp |= EC_STATE_I; 2733 break; 2734 } 2735 } 2736 2737 /* 2738 * If we initiated the delayed logout then we are responsible 2739 * for invalidating the logout area. 2740 */ 2741 if (logout_tried == B_FALSE) { 2742 bzero(clop, sizeof (ch_cpu_logout_t)); 2743 clop->clo_data.chd_afar = LOGOUT_INVALID; 2744 } 2745 } 2746 2747 /* 2748 * Re-enable CEEN if we turned it off. 2749 */ 2750 if (orig_err & EN_REG_CEEN) 2751 set_error_enable(orig_err); 2752 kpreempt_enable(); 2753 2754 return (disp); 2755 } 2756 2757 /* 2758 * Scrub a correctable memory error and collect data for classification 2759 * of CE type. This function is called in the detection path, ie tl0 handling 2760 * of a correctable error trap (cpus) or interrupt (IO) at high PIL. 2761 */ 2762 void 2763 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried) 2764 { 2765 /* 2766 * Cheetah CE classification does not set any bits in flt_status. 2767 * Instead we will record classification datapoints in flt_disp. 2768 */ 2769 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 2770 2771 /* 2772 * To check if the error detected by IO is persistent, sticky or 2773 * intermittent. This is noticed by clear_ecc(). 2774 */ 2775 if (ecc->flt_status & ECC_IOBUS) 2776 ecc->flt_stat = C_AFSR_MEMORY; 2777 2778 /* 2779 * Record information from this first part of the algorithm in 2780 * flt_disp. 2781 */ 2782 ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried); 2783 } 2784 2785 /* 2786 * Select a partner to perform a further CE classification check from. 2787 * Must be called with kernel preemption disabled (to stop the cpu list 2788 * from changing). The detecting cpu we are partnering has cpuid 2789 * aflt->flt_inst; we might not be running on the detecting cpu. 2790 * 2791 * Restrict choice to active cpus in the same cpu partition as ourselves in 2792 * an effort to stop bad cpus in one partition causing other partitions to 2793 * perform excessive diagnostic activity. Actually since the errorq drain 2794 * is run from a softint most of the time and that is a global mechanism 2795 * this isolation is only partial. Return NULL if we fail to find a 2796 * suitable partner. 2797 * 2798 * We prefer a partner that is in a different latency group to ourselves as 2799 * we will share fewer datapaths. If such a partner is unavailable then 2800 * choose one in the same lgroup but prefer a different chip and only allow 2801 * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and 2802 * flags includes PTNR_SELFOK then permit selection of the original detector. 2803 * 2804 * We keep a cache of the last partner selected for a cpu, and we'll try to 2805 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds 2806 * have passed since that selection was made. This provides the benefit 2807 * of the point-of-view of different partners over time but without 2808 * requiring frequent cpu list traversals. 2809 */ 2810 2811 #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */ 2812 #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */ 2813 2814 static cpu_t * 2815 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) 2816 { 2817 cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr; 2818 hrtime_t lasttime, thistime; 2819 2820 ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL); 2821 2822 dtcr = cpu[aflt->flt_inst]; 2823 2824 /* 2825 * Short-circuit for the following cases: 2826 * . the dtcr is not flagged active 2827 * . there is just one cpu present 2828 * . the detector has disappeared 2829 * . we were given a bad flt_inst cpuid; this should not happen 2830 * (eg PCI code now fills flt_inst) but if it does it is no 2831 * reason to panic. 2832 * . there is just one cpu left online in the cpu partition 2833 * 2834 * If we return NULL after this point then we do not update the 2835 * chpr_ceptnr_seltime which will cause us to perform a full lookup 2836 * again next time; this is the case where the only other cpu online 2837 * in the detector's partition is on the same chip as the detector 2838 * and since CEEN re-enable is throttled even that case should not 2839 * hurt performance. 2840 */ 2841 if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) { 2842 return (NULL); 2843 } 2844 if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) { 2845 if (flags & PTNR_SELFOK) { 2846 *typep = CE_XDIAG_PTNR_SELF; 2847 return (dtcr); 2848 } else { 2849 return (NULL); 2850 } 2851 } 2852 2853 thistime = gethrtime(); 2854 lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime); 2855 2856 /* 2857 * Select a starting point. 2858 */ 2859 if (!lasttime) { 2860 /* 2861 * We've never selected a partner for this detector before. 2862 * Start the scan at the next online cpu in the same cpu 2863 * partition. 2864 */ 2865 sp = dtcr->cpu_next_part; 2866 } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) { 2867 /* 2868 * Our last selection has not aged yet. If this partner: 2869 * . is still a valid cpu, 2870 * . is still in the same partition as the detector 2871 * . is still marked active 2872 * . satisfies the 'flags' argument criteria 2873 * then select it again without updating the timestamp. 2874 */ 2875 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2876 if (sp == NULL || sp->cpu_part != dtcr->cpu_part || 2877 !cpu_flagged_active(sp->cpu_flags) || 2878 (sp == dtcr && !(flags & PTNR_SELFOK)) || 2879 (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id && 2880 !(flags & PTNR_SIBLINGOK))) { 2881 sp = dtcr->cpu_next_part; 2882 } else { 2883 if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2884 *typep = CE_XDIAG_PTNR_REMOTE; 2885 } else if (sp == dtcr) { 2886 *typep = CE_XDIAG_PTNR_SELF; 2887 } else if (sp->cpu_chip->chip_id == 2888 dtcr->cpu_chip->chip_id) { 2889 *typep = CE_XDIAG_PTNR_SIBLING; 2890 } else { 2891 *typep = CE_XDIAG_PTNR_LOCAL; 2892 } 2893 return (sp); 2894 } 2895 } else { 2896 /* 2897 * Our last selection has aged. If it is nonetheless still a 2898 * valid cpu then start the scan at the next cpu in the 2899 * partition after our last partner. If the last selection 2900 * is no longer a valid cpu then go with our default. In 2901 * this way we slowly cycle through possible partners to 2902 * obtain multiple viewpoints over time. 2903 */ 2904 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2905 if (sp == NULL) { 2906 sp = dtcr->cpu_next_part; 2907 } else { 2908 sp = sp->cpu_next_part; /* may be dtcr */ 2909 if (sp->cpu_part != dtcr->cpu_part) 2910 sp = dtcr; 2911 } 2912 } 2913 2914 /* 2915 * We have a proposed starting point for our search, but if this 2916 * cpu is offline then its cpu_next_part will point to itself 2917 * so we can't use that to iterate over cpus in this partition in 2918 * the loop below. We still want to avoid iterating over cpus not 2919 * in our partition, so in the case that our starting point is offline 2920 * we will repoint it to be the detector itself; and if the detector 2921 * happens to be offline we'll return NULL from the following loop. 2922 */ 2923 if (!cpu_flagged_active(sp->cpu_flags)) { 2924 sp = dtcr; 2925 } 2926 2927 ptnr = sp; 2928 locptnr = NULL; 2929 sibptnr = NULL; 2930 do { 2931 if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags)) 2932 continue; 2933 if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2934 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id; 2935 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2936 *typep = CE_XDIAG_PTNR_REMOTE; 2937 return (ptnr); 2938 } 2939 if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) { 2940 if (sibptnr == NULL) 2941 sibptnr = ptnr; 2942 continue; 2943 } 2944 if (locptnr == NULL) 2945 locptnr = ptnr; 2946 } while ((ptnr = ptnr->cpu_next_part) != sp); 2947 2948 /* 2949 * A foreign partner has already been returned if one was available. 2950 * 2951 * If locptnr is not NULL it is a cpu in the same lgroup as the 2952 * detector, is active, and is not a sibling of the detector. 2953 * 2954 * If sibptnr is not NULL it is a sibling of the detector, and is 2955 * active. 2956 * 2957 * If we have to resort to using the detector itself we have already 2958 * checked that it is active. 2959 */ 2960 if (locptnr) { 2961 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id; 2962 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2963 *typep = CE_XDIAG_PTNR_LOCAL; 2964 return (locptnr); 2965 } else if (sibptnr && flags & PTNR_SIBLINGOK) { 2966 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id; 2967 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2968 *typep = CE_XDIAG_PTNR_SIBLING; 2969 return (sibptnr); 2970 } else if (flags & PTNR_SELFOK) { 2971 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id; 2972 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2973 *typep = CE_XDIAG_PTNR_SELF; 2974 return (dtcr); 2975 } 2976 2977 return (NULL); 2978 } 2979 2980 /* 2981 * Cross call handler that is requested to run on the designated partner of 2982 * a cpu that experienced a possibly sticky or possibly persistnet CE. 2983 */ 2984 static void 2985 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp) 2986 { 2987 *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE); 2988 } 2989 2990 /* 2991 * The associated errorqs are never destroyed so we do not need to deal with 2992 * them disappearing before this timeout fires. If the affected memory 2993 * has been DR'd out since the original event the scrub algrithm will catch 2994 * any errors and return null disposition info. If the original detecting 2995 * cpu has been DR'd out then ereport detector info will not be able to 2996 * lookup CPU type; with a small timeout this is unlikely. 2997 */ 2998 static void 2999 ce_lkychk_cb(ce_lkychk_cb_t *cbarg) 3000 { 3001 struct async_flt *aflt = cbarg->lkycb_aflt; 3002 uchar_t disp; 3003 cpu_t *cp; 3004 int ptnrtype; 3005 3006 kpreempt_disable(); 3007 if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK, 3008 &ptnrtype)) { 3009 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt, 3010 (uint64_t)&disp); 3011 CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp); 3012 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3013 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3014 } else { 3015 ce_xdiag_lkydrops++; 3016 if (ncpus > 1) 3017 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3018 CE_XDIAG_SKIP_NOPTNR); 3019 } 3020 kpreempt_enable(); 3021 3022 errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC); 3023 kmem_free(cbarg, sizeof (ce_lkychk_cb_t)); 3024 } 3025 3026 /* 3027 * Called from errorq drain code when processing a CE error, both from 3028 * CPU and PCI drain functions. Decide what further classification actions, 3029 * if any, we will perform. Perform immediate actions now, and schedule 3030 * delayed actions as required. Note that we are no longer necessarily running 3031 * on the detecting cpu, and that the async_flt structure will not persist on 3032 * return from this function. 3033 * 3034 * Calls to this function should aim to be self-throtlling in some way. With 3035 * the delayed re-enable of CEEN the absolute rate of calls should not 3036 * be excessive. Callers should also avoid performing in-depth classification 3037 * for events in pages that are already known to be suspect. 3038 * 3039 * We return nonzero to indicate that the event has been copied and 3040 * recirculated for further testing. The caller should not log the event 3041 * in this case - it will be logged when further test results are available. 3042 * 3043 * Our possible contexts are that of errorq_drain: below lock level or from 3044 * panic context. We can assume that the cpu we are running on is online. 3045 */ 3046 3047 3048 #ifdef DEBUG 3049 static int ce_xdiag_forceaction; 3050 #endif 3051 3052 int 3053 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 3054 errorq_elem_t *eqep, size_t afltoffset) 3055 { 3056 ce_dispact_t dispact, action; 3057 cpu_t *cp; 3058 uchar_t dtcrinfo, disp; 3059 int ptnrtype; 3060 3061 if (!ce_disp_inited || panicstr || ce_xdiag_off) { 3062 ce_xdiag_drops++; 3063 return (0); 3064 } else if (!aflt->flt_in_memory) { 3065 ce_xdiag_drops++; 3066 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM); 3067 return (0); 3068 } 3069 3070 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 3071 3072 /* 3073 * Some correctable events are not scrubbed/classified, such as those 3074 * noticed at the tail of cpu_deferred_error. So if there is no 3075 * initial detector classification go no further. 3076 */ 3077 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) { 3078 ce_xdiag_drops++; 3079 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB); 3080 return (0); 3081 } 3082 3083 dispact = CE_DISPACT(ce_disp_table, 3084 CE_XDIAG_AFARMATCHED(dtcrinfo), 3085 CE_XDIAG_STATE(dtcrinfo), 3086 CE_XDIAG_CE1SEEN(dtcrinfo), 3087 CE_XDIAG_CE2SEEN(dtcrinfo)); 3088 3089 3090 action = CE_ACT(dispact); /* bad lookup caught below */ 3091 #ifdef DEBUG 3092 if (ce_xdiag_forceaction != 0) 3093 action = ce_xdiag_forceaction; 3094 #endif 3095 3096 switch (action) { 3097 case CE_ACT_LKYCHK: { 3098 caddr_t ndata; 3099 errorq_elem_t *neqep; 3100 struct async_flt *ecc; 3101 ce_lkychk_cb_t *cbargp; 3102 3103 if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) { 3104 ce_xdiag_lkydrops++; 3105 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3106 CE_XDIAG_SKIP_DUPFAIL); 3107 break; 3108 } 3109 ecc = (struct async_flt *)(ndata + afltoffset); 3110 3111 ASSERT(ecc->flt_class == CPU_FAULT || 3112 ecc->flt_class == BUS_FAULT); 3113 ecc->flt_class = (ecc->flt_class == CPU_FAULT) ? 3114 RECIRC_CPU_FAULT : RECIRC_BUS_FAULT; 3115 3116 cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP); 3117 cbargp->lkycb_aflt = ecc; 3118 cbargp->lkycb_eqp = eqp; 3119 cbargp->lkycb_eqep = neqep; 3120 3121 (void) timeout((void (*)(void *))ce_lkychk_cb, 3122 (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec)); 3123 return (1); 3124 } 3125 3126 case CE_ACT_PTNRCHK: 3127 kpreempt_disable(); /* stop cpu list changing */ 3128 if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) { 3129 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, 3130 (uint64_t)aflt, (uint64_t)&disp); 3131 CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp); 3132 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3133 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3134 } else if (ncpus > 1) { 3135 ce_xdiag_ptnrdrops++; 3136 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3137 CE_XDIAG_SKIP_NOPTNR); 3138 } else { 3139 ce_xdiag_ptnrdrops++; 3140 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3141 CE_XDIAG_SKIP_UNIPROC); 3142 } 3143 kpreempt_enable(); 3144 break; 3145 3146 case CE_ACT_DONE: 3147 break; 3148 3149 case CE_ACT(CE_DISP_BAD): 3150 default: 3151 #ifdef DEBUG 3152 cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action); 3153 #endif 3154 ce_xdiag_bad++; 3155 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD); 3156 break; 3157 } 3158 3159 return (0); 3160 } 3161 3162 /* 3163 * We route all errors through a single switch statement. 3164 */ 3165 void 3166 cpu_ue_log_err(struct async_flt *aflt) 3167 { 3168 switch (aflt->flt_class) { 3169 case CPU_FAULT: 3170 cpu_ereport_init(aflt); 3171 if (cpu_async_log_err(aflt, NULL)) 3172 cpu_ereport_post(aflt); 3173 break; 3174 3175 case BUS_FAULT: 3176 bus_async_log_err(aflt); 3177 break; 3178 3179 default: 3180 cmn_err(CE_WARN, "discarding async error %p with invalid " 3181 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 3182 return; 3183 } 3184 } 3185 3186 /* 3187 * Routine for panic hook callback from panic_idle(). 3188 */ 3189 void 3190 cpu_async_panic_callb(void) 3191 { 3192 ch_async_flt_t ch_flt; 3193 struct async_flt *aflt; 3194 ch_cpu_errors_t cpu_error_regs; 3195 uint64_t afsr_errs; 3196 3197 get_cpu_error_state(&cpu_error_regs); 3198 3199 afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3200 (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS); 3201 3202 if (afsr_errs) { 3203 3204 bzero(&ch_flt, sizeof (ch_async_flt_t)); 3205 aflt = (struct async_flt *)&ch_flt; 3206 aflt->flt_id = gethrtime_waitfree(); 3207 aflt->flt_bus_id = getprocessorid(); 3208 aflt->flt_inst = CPU->cpu_id; 3209 aflt->flt_stat = cpu_error_regs.afsr; 3210 aflt->flt_addr = cpu_error_regs.afar; 3211 aflt->flt_prot = AFLT_PROT_NONE; 3212 aflt->flt_class = CPU_FAULT; 3213 aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0); 3214 aflt->flt_panic = 1; 3215 ch_flt.afsr_ext = cpu_error_regs.afsr_ext; 3216 ch_flt.afsr_errs = afsr_errs; 3217 #if defined(SERRANO) 3218 ch_flt.afar2 = cpu_error_regs.afar2; 3219 #endif /* SERRANO */ 3220 (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL); 3221 } 3222 } 3223 3224 /* 3225 * Routine to convert a syndrome into a syndrome code. 3226 */ 3227 static int 3228 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit) 3229 { 3230 if (synd_status == AFLT_STAT_INVALID) 3231 return (-1); 3232 3233 /* 3234 * Use the syndrome to index the appropriate syndrome table, 3235 * to get the code indicating which bit(s) is(are) bad. 3236 */ 3237 if (afsr_bit & 3238 (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 3239 if (afsr_bit & C_AFSR_MSYND_ERRS) { 3240 #if defined(JALAPENO) || defined(SERRANO) 3241 if ((synd == 0) || (synd >= BSYND_TBL_SIZE)) 3242 return (-1); 3243 else 3244 return (BPAR0 + synd); 3245 #else /* JALAPENO || SERRANO */ 3246 if ((synd == 0) || (synd >= MSYND_TBL_SIZE)) 3247 return (-1); 3248 else 3249 return (mtag_syndrome_tab[synd]); 3250 #endif /* JALAPENO || SERRANO */ 3251 } else { 3252 if ((synd == 0) || (synd >= ESYND_TBL_SIZE)) 3253 return (-1); 3254 else 3255 return (ecc_syndrome_tab[synd]); 3256 } 3257 } else { 3258 return (-1); 3259 } 3260 } 3261 3262 /* 3263 * Routine to return a string identifying the physical name 3264 * associated with a memory/cache error. 3265 */ 3266 int 3267 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 3268 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 3269 ushort_t flt_status, char *buf, int buflen, int *lenp) 3270 { 3271 int synd_code; 3272 int ret; 3273 3274 /* 3275 * An AFSR of -1 defaults to a memory syndrome. 3276 */ 3277 if (flt_stat == (uint64_t)-1) 3278 flt_stat = C_AFSR_CE; 3279 3280 synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat); 3281 3282 /* 3283 * Syndrome code must be either a single-bit error code 3284 * (0...143) or -1 for unum lookup. 3285 */ 3286 if (synd_code < 0 || synd_code >= M2) 3287 synd_code = -1; 3288 if (&plat_get_mem_unum) { 3289 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 3290 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 3291 buf[0] = '\0'; 3292 *lenp = 0; 3293 } 3294 3295 return (ret); 3296 } 3297 3298 return (ENOTSUP); 3299 } 3300 3301 /* 3302 * Wrapper for cpu_get_mem_unum() routine that takes an 3303 * async_flt struct rather than explicit arguments. 3304 */ 3305 int 3306 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 3307 char *buf, int buflen, int *lenp) 3308 { 3309 /* 3310 * If we come thru here for an IO bus error aflt->flt_stat will 3311 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum() 3312 * so it will interpret this as a memory error. 3313 */ 3314 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 3315 (aflt->flt_class == BUS_FAULT) ? 3316 (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs, 3317 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 3318 aflt->flt_status, buf, buflen, lenp)); 3319 } 3320 3321 /* 3322 * This routine is a more generic interface to cpu_get_mem_unum() 3323 * that may be used by other modules (e.g. mm). 3324 */ 3325 int 3326 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 3327 char *buf, int buflen, int *lenp) 3328 { 3329 int synd_status, flt_in_memory, ret; 3330 ushort_t flt_status = 0; 3331 char unum[UNUM_NAMLEN]; 3332 3333 /* 3334 * Check for an invalid address. 3335 */ 3336 if (afar == (uint64_t)-1) 3337 return (ENXIO); 3338 3339 if (synd == (uint64_t)-1) 3340 synd_status = AFLT_STAT_INVALID; 3341 else 3342 synd_status = AFLT_STAT_VALID; 3343 3344 flt_in_memory = (*afsr & C_AFSR_MEMORY) && 3345 pf_is_memory(afar >> MMU_PAGESHIFT); 3346 3347 /* 3348 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 3349 * For Panther, L2$ is not external, so we don't want to 3350 * generate an E$ unum for those errors. 3351 */ 3352 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3353 if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS) 3354 flt_status |= ECC_ECACHE; 3355 } else { 3356 if (*afsr & C_AFSR_ECACHE) 3357 flt_status |= ECC_ECACHE; 3358 } 3359 3360 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 3361 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 3362 if (ret != 0) 3363 return (ret); 3364 3365 if (*lenp >= buflen) 3366 return (ENAMETOOLONG); 3367 3368 (void) strncpy(buf, unum, buflen); 3369 3370 return (0); 3371 } 3372 3373 /* 3374 * Routine to return memory information associated 3375 * with a physical address and syndrome. 3376 */ 3377 int 3378 cpu_get_mem_info(uint64_t synd, uint64_t afar, 3379 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 3380 int *segsp, int *banksp, int *mcidp) 3381 { 3382 int synd_status, synd_code; 3383 3384 if (afar == (uint64_t)-1) 3385 return (ENXIO); 3386 3387 if (synd == (uint64_t)-1) 3388 synd_status = AFLT_STAT_INVALID; 3389 else 3390 synd_status = AFLT_STAT_VALID; 3391 3392 synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE); 3393 3394 if (p2get_mem_info != NULL) 3395 return ((p2get_mem_info)(synd_code, afar, 3396 mem_sizep, seg_sizep, bank_sizep, 3397 segsp, banksp, mcidp)); 3398 else 3399 return (ENOTSUP); 3400 } 3401 3402 /* 3403 * Routine to return a string identifying the physical 3404 * name associated with a cpuid. 3405 */ 3406 int 3407 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 3408 { 3409 int ret; 3410 char unum[UNUM_NAMLEN]; 3411 3412 if (&plat_get_cpu_unum) { 3413 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 3414 != 0) 3415 return (ret); 3416 } else { 3417 return (ENOTSUP); 3418 } 3419 3420 if (*lenp >= buflen) 3421 return (ENAMETOOLONG); 3422 3423 (void) strncpy(buf, unum, buflen); 3424 3425 return (0); 3426 } 3427 3428 /* 3429 * This routine exports the name buffer size. 3430 */ 3431 size_t 3432 cpu_get_name_bufsize() 3433 { 3434 return (UNUM_NAMLEN); 3435 } 3436 3437 /* 3438 * Historical function, apparantly not used. 3439 */ 3440 /* ARGSUSED */ 3441 void 3442 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 3443 {} 3444 3445 /* 3446 * Historical function only called for SBus errors in debugging. 3447 */ 3448 /*ARGSUSED*/ 3449 void 3450 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 3451 {} 3452 3453 /* 3454 * Clear the AFSR sticky bits. The routine returns a non-zero value if 3455 * any of the AFSR's sticky errors are detected. If a non-null pointer to 3456 * an async fault structure argument is passed in, the captured error state 3457 * (AFSR, AFAR) info will be returned in the structure. 3458 */ 3459 int 3460 clear_errors(ch_async_flt_t *ch_flt) 3461 { 3462 struct async_flt *aflt = (struct async_flt *)ch_flt; 3463 ch_cpu_errors_t cpu_error_regs; 3464 3465 get_cpu_error_state(&cpu_error_regs); 3466 3467 if (ch_flt != NULL) { 3468 aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK; 3469 aflt->flt_addr = cpu_error_regs.afar; 3470 ch_flt->afsr_ext = cpu_error_regs.afsr_ext; 3471 ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3472 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); 3473 #if defined(SERRANO) 3474 ch_flt->afar2 = cpu_error_regs.afar2; 3475 #endif /* SERRANO */ 3476 } 3477 3478 set_cpu_error_state(&cpu_error_regs); 3479 3480 return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3481 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0); 3482 } 3483 3484 /* 3485 * Clear any AFSR error bits, and check for persistence. 3486 * 3487 * It would be desirable to also insist that syndrome match. PCI handling 3488 * has already filled flt_synd. For errors trapped by CPU we only fill 3489 * flt_synd when we queue the event, so we do not have a valid flt_synd 3490 * during initial classification (it is valid if we're called as part of 3491 * subsequent low-pil additional classification attempts). We could try 3492 * to determine which syndrome to use: we know we're only called for 3493 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use 3494 * would be esynd/none and esynd/msynd, respectively. If that is 3495 * implemented then what do we do in the case that we do experience an 3496 * error on the same afar but with different syndrome? At the very least 3497 * we should count such occurences. Anyway, for now, we'll leave it as 3498 * it has been for ages. 3499 */ 3500 static int 3501 clear_ecc(struct async_flt *aflt) 3502 { 3503 ch_cpu_errors_t cpu_error_regs; 3504 3505 /* 3506 * Snapshot the AFSR and AFAR and clear any errors 3507 */ 3508 get_cpu_error_state(&cpu_error_regs); 3509 set_cpu_error_state(&cpu_error_regs); 3510 3511 /* 3512 * If any of the same memory access error bits are still on and 3513 * the AFAR matches, return that the error is persistent. 3514 */ 3515 return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 && 3516 cpu_error_regs.afar == aflt->flt_addr); 3517 } 3518 3519 /* 3520 * Turn off all cpu error detection, normally only used for panics. 3521 */ 3522 void 3523 cpu_disable_errors(void) 3524 { 3525 xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE); 3526 } 3527 3528 /* 3529 * Enable errors. 3530 */ 3531 void 3532 cpu_enable_errors(void) 3533 { 3534 xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE); 3535 } 3536 3537 /* 3538 * Flush the entire ecache using displacement flush by reading through a 3539 * physical address range twice as large as the Ecache. 3540 */ 3541 void 3542 cpu_flush_ecache(void) 3543 { 3544 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 3545 cpunodes[CPU->cpu_id].ecache_linesize); 3546 } 3547 3548 /* 3549 * Return CPU E$ set size - E$ size divided by the associativity. 3550 * We use this function in places where the CPU_PRIVATE ptr may not be 3551 * initialized yet. Note that for send_mondo and in the Ecache scrubber, 3552 * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set 3553 * up before the kernel switches from OBP's to the kernel's trap table, so 3554 * we don't have to worry about cpunodes being unitialized. 3555 */ 3556 int 3557 cpu_ecache_set_size(struct cpu *cp) 3558 { 3559 if (CPU_PRIVATE(cp)) 3560 return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size)); 3561 3562 return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway()); 3563 } 3564 3565 /* 3566 * Flush Ecache line. 3567 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno. 3568 * Uses normal displacement flush for Cheetah. 3569 */ 3570 static void 3571 cpu_flush_ecache_line(ch_async_flt_t *ch_flt) 3572 { 3573 struct async_flt *aflt = (struct async_flt *)ch_flt; 3574 int ec_set_size = cpu_ecache_set_size(CPU); 3575 3576 ecache_flush_line(aflt->flt_addr, ec_set_size); 3577 } 3578 3579 /* 3580 * Scrub physical address. 3581 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3582 * Ecache or direct-mapped Ecache. 3583 */ 3584 static void 3585 cpu_scrubphys(struct async_flt *aflt) 3586 { 3587 int ec_set_size = cpu_ecache_set_size(CPU); 3588 3589 scrubphys(aflt->flt_addr, ec_set_size); 3590 } 3591 3592 /* 3593 * Clear physical address. 3594 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3595 * Ecache or direct-mapped Ecache. 3596 */ 3597 void 3598 cpu_clearphys(struct async_flt *aflt) 3599 { 3600 int lsize = cpunodes[CPU->cpu_id].ecache_linesize; 3601 int ec_set_size = cpu_ecache_set_size(CPU); 3602 3603 3604 clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize); 3605 } 3606 3607 #if defined(CPU_IMP_ECACHE_ASSOC) 3608 /* 3609 * Check for a matching valid line in all the sets. 3610 * If found, return set# + 1. Otherwise return 0. 3611 */ 3612 static int 3613 cpu_ecache_line_valid(ch_async_flt_t *ch_flt) 3614 { 3615 struct async_flt *aflt = (struct async_flt *)ch_flt; 3616 int totalsize = cpunodes[CPU->cpu_id].ecache_size; 3617 int ec_set_size = cpu_ecache_set_size(CPU); 3618 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 3619 int nway = cpu_ecache_nway(); 3620 int i; 3621 3622 for (i = 0; i < nway; i++, ecp++) { 3623 if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) && 3624 (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) == 3625 cpu_ectag_to_pa(ec_set_size, ecp->ec_tag)) 3626 return (i+1); 3627 } 3628 return (0); 3629 } 3630 #endif /* CPU_IMP_ECACHE_ASSOC */ 3631 3632 /* 3633 * Check whether a line in the given logout info matches the specified 3634 * fault address. If reqval is set then the line must not be Invalid. 3635 * Returns 0 on failure; on success (way + 1) is returned an *level is 3636 * set to 2 for l2$ or 3 for l3$. 3637 */ 3638 static int 3639 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level) 3640 { 3641 ch_diag_data_t *cdp = data; 3642 ch_ec_data_t *ecp; 3643 int totalsize, ec_set_size; 3644 int i, ways; 3645 int match = 0; 3646 int tagvalid; 3647 uint64_t addr, tagpa; 3648 int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation); 3649 3650 /* 3651 * Check the l2$ logout data 3652 */ 3653 if (ispanther) { 3654 ecp = &cdp->chd_l2_data[0]; 3655 ec_set_size = PN_L2_SET_SIZE; 3656 ways = PN_L2_NWAYS; 3657 } else { 3658 ecp = &cdp->chd_ec_data[0]; 3659 ec_set_size = cpu_ecache_set_size(CPU); 3660 ways = cpu_ecache_nway(); 3661 totalsize = cpunodes[CPU->cpu_id].ecache_size; 3662 } 3663 /* remove low order PA bits from fault address not used in PA tag */ 3664 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3665 for (i = 0; i < ways; i++, ecp++) { 3666 if (ispanther) { 3667 tagpa = PN_L2TAG_TO_PA(ecp->ec_tag); 3668 tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag); 3669 } else { 3670 tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag); 3671 tagvalid = !cpu_ectag_line_invalid(totalsize, 3672 ecp->ec_tag); 3673 } 3674 if (tagpa == addr && (!reqval || tagvalid)) { 3675 match = i + 1; 3676 *level = 2; 3677 break; 3678 } 3679 } 3680 3681 if (match || !ispanther) 3682 return (match); 3683 3684 /* For Panther we also check the l3$ */ 3685 ecp = &cdp->chd_ec_data[0]; 3686 ec_set_size = PN_L3_SET_SIZE; 3687 ways = PN_L3_NWAYS; 3688 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3689 3690 for (i = 0; i < ways; i++, ecp++) { 3691 if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval || 3692 !PN_L3_LINE_INVALID(ecp->ec_tag))) { 3693 match = i + 1; 3694 *level = 3; 3695 break; 3696 } 3697 } 3698 3699 return (match); 3700 } 3701 3702 #if defined(CPU_IMP_L1_CACHE_PARITY) 3703 /* 3704 * Record information related to the source of an Dcache Parity Error. 3705 */ 3706 static void 3707 cpu_dcache_parity_info(ch_async_flt_t *ch_flt) 3708 { 3709 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3710 int index; 3711 3712 /* 3713 * Since instruction decode cannot be done at high PIL 3714 * just examine the entire Dcache to locate the error. 3715 */ 3716 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3717 ch_flt->parity_data.dpe.cpl_way = -1; 3718 ch_flt->parity_data.dpe.cpl_off = -1; 3719 } 3720 for (index = 0; index < dc_set_size; index += dcache_linesize) 3721 cpu_dcache_parity_check(ch_flt, index); 3722 } 3723 3724 /* 3725 * Check all ways of the Dcache at a specified index for good parity. 3726 */ 3727 static void 3728 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index) 3729 { 3730 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3731 uint64_t parity_bits, pbits, data_word; 3732 static int parity_bits_popc[] = { 0, 1, 1, 0 }; 3733 int way, word, data_byte; 3734 ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0]; 3735 ch_dc_data_t tmp_dcp; 3736 3737 for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) { 3738 /* 3739 * Perform diagnostic read. 3740 */ 3741 get_dcache_dtag(index + way * dc_set_size, 3742 (uint64_t *)&tmp_dcp); 3743 3744 /* 3745 * Check tag for even parity. 3746 * Sum of 1 bits (including parity bit) should be even. 3747 */ 3748 if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) { 3749 /* 3750 * If this is the first error log detailed information 3751 * about it and check the snoop tag. Otherwise just 3752 * record the fact that we found another error. 3753 */ 3754 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3755 ch_flt->parity_data.dpe.cpl_way = way; 3756 ch_flt->parity_data.dpe.cpl_cache = 3757 CPU_DC_PARITY; 3758 ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG; 3759 3760 if (popc64(tmp_dcp.dc_sntag & 3761 CHP_DCSNTAG_PARMASK) & 1) { 3762 ch_flt->parity_data.dpe.cpl_tag |= 3763 CHP_DC_SNTAG; 3764 ch_flt->parity_data.dpe.cpl_lcnt++; 3765 } 3766 3767 bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t)); 3768 } 3769 3770 ch_flt->parity_data.dpe.cpl_lcnt++; 3771 } 3772 3773 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3774 /* 3775 * Panther has more parity bits than the other 3776 * processors for covering dcache data and so each 3777 * byte of data in each word has its own parity bit. 3778 */ 3779 parity_bits = tmp_dcp.dc_pn_data_parity; 3780 for (word = 0; word < 4; word++) { 3781 data_word = tmp_dcp.dc_data[word]; 3782 pbits = parity_bits & PN_DC_DATA_PARITY_MASK; 3783 for (data_byte = 0; data_byte < 8; 3784 data_byte++) { 3785 if (((popc64(data_word & 3786 PN_DC_DATA_PARITY_MASK)) & 1) ^ 3787 (pbits & 1)) { 3788 cpu_record_dc_data_parity( 3789 ch_flt, dcp, &tmp_dcp, way, 3790 word); 3791 } 3792 pbits >>= 1; 3793 data_word >>= 8; 3794 } 3795 parity_bits >>= 8; 3796 } 3797 } else { 3798 /* 3799 * Check data array for even parity. 3800 * The 8 parity bits are grouped into 4 pairs each 3801 * of which covers a 64-bit word. The endianness is 3802 * reversed -- the low-order parity bits cover the 3803 * high-order data words. 3804 */ 3805 parity_bits = tmp_dcp.dc_utag >> 8; 3806 for (word = 0; word < 4; word++) { 3807 pbits = (parity_bits >> (6 - word * 2)) & 3; 3808 if ((popc64(tmp_dcp.dc_data[word]) + 3809 parity_bits_popc[pbits]) & 1) { 3810 cpu_record_dc_data_parity(ch_flt, dcp, 3811 &tmp_dcp, way, word); 3812 } 3813 } 3814 } 3815 } 3816 } 3817 3818 static void 3819 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 3820 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word) 3821 { 3822 /* 3823 * If this is the first error log detailed information about it. 3824 * Otherwise just record the fact that we found another error. 3825 */ 3826 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3827 ch_flt->parity_data.dpe.cpl_way = way; 3828 ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY; 3829 ch_flt->parity_data.dpe.cpl_off = word * 8; 3830 bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t)); 3831 } 3832 ch_flt->parity_data.dpe.cpl_lcnt++; 3833 } 3834 3835 /* 3836 * Record information related to the source of an Icache Parity Error. 3837 * 3838 * Called with the Icache disabled so any diagnostic accesses are safe. 3839 */ 3840 static void 3841 cpu_icache_parity_info(ch_async_flt_t *ch_flt) 3842 { 3843 int ic_set_size; 3844 int ic_linesize; 3845 int index; 3846 3847 if (CPU_PRIVATE(CPU)) { 3848 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3849 CH_ICACHE_NWAY; 3850 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3851 } else { 3852 ic_set_size = icache_size / CH_ICACHE_NWAY; 3853 ic_linesize = icache_linesize; 3854 } 3855 3856 ch_flt->parity_data.ipe.cpl_way = -1; 3857 ch_flt->parity_data.ipe.cpl_off = -1; 3858 3859 for (index = 0; index < ic_set_size; index += ic_linesize) 3860 cpu_icache_parity_check(ch_flt, index); 3861 } 3862 3863 /* 3864 * Check all ways of the Icache at a specified index for good parity. 3865 */ 3866 static void 3867 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index) 3868 { 3869 uint64_t parmask, pn_inst_parity; 3870 int ic_set_size; 3871 int ic_linesize; 3872 int flt_index, way, instr, num_instr; 3873 struct async_flt *aflt = (struct async_flt *)ch_flt; 3874 ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0]; 3875 ch_ic_data_t tmp_icp; 3876 3877 if (CPU_PRIVATE(CPU)) { 3878 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3879 CH_ICACHE_NWAY; 3880 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3881 } else { 3882 ic_set_size = icache_size / CH_ICACHE_NWAY; 3883 ic_linesize = icache_linesize; 3884 } 3885 3886 /* 3887 * Panther has twice as many instructions per icache line and the 3888 * instruction parity bit is in a different location. 3889 */ 3890 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3891 num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t); 3892 pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK; 3893 } else { 3894 num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t); 3895 pn_inst_parity = 0; 3896 } 3897 3898 /* 3899 * Index at which we expect to find the parity error. 3900 */ 3901 flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize); 3902 3903 for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) { 3904 /* 3905 * Diagnostic reads expect address argument in ASI format. 3906 */ 3907 get_icache_dtag(2 * (index + way * ic_set_size), 3908 (uint64_t *)&tmp_icp); 3909 3910 /* 3911 * If this is the index in which we expect to find the 3912 * error log detailed information about each of the ways. 3913 * This information will be displayed later if we can't 3914 * determine the exact way in which the error is located. 3915 */ 3916 if (flt_index == index) 3917 bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t)); 3918 3919 /* 3920 * Check tag for even parity. 3921 * Sum of 1 bits (including parity bit) should be even. 3922 */ 3923 if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) { 3924 /* 3925 * If this way is the one in which we expected 3926 * to find the error record the way and check the 3927 * snoop tag. Otherwise just record the fact we 3928 * found another error. 3929 */ 3930 if (flt_index == index) { 3931 ch_flt->parity_data.ipe.cpl_way = way; 3932 ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG; 3933 3934 if (popc64(tmp_icp.ic_sntag & 3935 CHP_ICSNTAG_PARMASK) & 1) { 3936 ch_flt->parity_data.ipe.cpl_tag |= 3937 CHP_IC_SNTAG; 3938 ch_flt->parity_data.ipe.cpl_lcnt++; 3939 } 3940 3941 } 3942 ch_flt->parity_data.ipe.cpl_lcnt++; 3943 continue; 3944 } 3945 3946 /* 3947 * Check instruction data for even parity. 3948 * Bits participating in parity differ for PC-relative 3949 * versus non-PC-relative instructions. 3950 */ 3951 for (instr = 0; instr < num_instr; instr++) { 3952 parmask = (tmp_icp.ic_data[instr] & 3953 CH_ICDATA_PRED_ISPCREL) ? 3954 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) : 3955 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity); 3956 if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) { 3957 /* 3958 * If this way is the one in which we expected 3959 * to find the error record the way and offset. 3960 * Otherwise just log the fact we found another 3961 * error. 3962 */ 3963 if (flt_index == index) { 3964 ch_flt->parity_data.ipe.cpl_way = way; 3965 ch_flt->parity_data.ipe.cpl_off = 3966 instr * 4; 3967 } 3968 ch_flt->parity_data.ipe.cpl_lcnt++; 3969 continue; 3970 } 3971 } 3972 } 3973 } 3974 3975 /* 3976 * Record information related to the source of an Pcache Parity Error. 3977 */ 3978 static void 3979 cpu_pcache_parity_info(ch_async_flt_t *ch_flt) 3980 { 3981 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 3982 int index; 3983 3984 /* 3985 * Since instruction decode cannot be done at high PIL just 3986 * examine the entire Pcache to check for any parity errors. 3987 */ 3988 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3989 ch_flt->parity_data.dpe.cpl_way = -1; 3990 ch_flt->parity_data.dpe.cpl_off = -1; 3991 } 3992 for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE) 3993 cpu_pcache_parity_check(ch_flt, index); 3994 } 3995 3996 /* 3997 * Check all ways of the Pcache at a specified index for good parity. 3998 */ 3999 static void 4000 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index) 4001 { 4002 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 4003 int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t); 4004 int way, word, pbit, parity_bits; 4005 ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0]; 4006 ch_pc_data_t tmp_pcp; 4007 4008 for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) { 4009 /* 4010 * Perform diagnostic read. 4011 */ 4012 get_pcache_dtag(index + way * pc_set_size, 4013 (uint64_t *)&tmp_pcp); 4014 /* 4015 * Check data array for odd parity. There are 8 parity 4016 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each 4017 * of those bits covers exactly 8 bytes of the data 4018 * array: 4019 * 4020 * parity bit P$ data bytes covered 4021 * ---------- --------------------- 4022 * 50 63:56 4023 * 51 55:48 4024 * 52 47:40 4025 * 53 39:32 4026 * 54 31:24 4027 * 55 23:16 4028 * 56 15:8 4029 * 57 7:0 4030 */ 4031 parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status); 4032 for (word = 0; word < pc_data_words; word++) { 4033 pbit = (parity_bits >> (pc_data_words - word - 1)) & 1; 4034 if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) { 4035 /* 4036 * If this is the first error log detailed 4037 * information about it. Otherwise just record 4038 * the fact that we found another error. 4039 */ 4040 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 4041 ch_flt->parity_data.dpe.cpl_way = way; 4042 ch_flt->parity_data.dpe.cpl_cache = 4043 CPU_PC_PARITY; 4044 ch_flt->parity_data.dpe.cpl_off = 4045 word * sizeof (uint64_t); 4046 bcopy(&tmp_pcp, pcp, 4047 sizeof (ch_pc_data_t)); 4048 } 4049 ch_flt->parity_data.dpe.cpl_lcnt++; 4050 } 4051 } 4052 } 4053 } 4054 4055 4056 /* 4057 * Add L1 Data cache data to the ereport payload. 4058 */ 4059 static void 4060 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl) 4061 { 4062 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4063 ch_dc_data_t *dcp; 4064 ch_dc_data_t dcdata[CH_DCACHE_NWAY]; 4065 uint_t nelem; 4066 int i, ways_to_check, ways_logged = 0; 4067 4068 /* 4069 * If this is an D$ fault then there may be multiple 4070 * ways captured in the ch_parity_log_t structure. 4071 * Otherwise, there will be at most one way captured 4072 * in the ch_diag_data_t struct. 4073 * Check each way to see if it should be encoded. 4074 */ 4075 if (ch_flt->flt_type == CPU_DC_PARITY) 4076 ways_to_check = CH_DCACHE_NWAY; 4077 else 4078 ways_to_check = 1; 4079 for (i = 0; i < ways_to_check; i++) { 4080 if (ch_flt->flt_type == CPU_DC_PARITY) 4081 dcp = &ch_flt->parity_data.dpe.cpl_dc[i]; 4082 else 4083 dcp = &ch_flt->flt_diag_data.chd_dc_data; 4084 if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) { 4085 bcopy(dcp, &dcdata[ways_logged], 4086 sizeof (ch_dc_data_t)); 4087 ways_logged++; 4088 } 4089 } 4090 4091 /* 4092 * Add the dcache data to the payload. 4093 */ 4094 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS, 4095 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4096 if (ways_logged != 0) { 4097 nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged; 4098 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA, 4099 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL); 4100 } 4101 } 4102 4103 /* 4104 * Add L1 Instruction cache data to the ereport payload. 4105 */ 4106 static void 4107 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl) 4108 { 4109 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4110 ch_ic_data_t *icp; 4111 ch_ic_data_t icdata[CH_ICACHE_NWAY]; 4112 uint_t nelem; 4113 int i, ways_to_check, ways_logged = 0; 4114 4115 /* 4116 * If this is an I$ fault then there may be multiple 4117 * ways captured in the ch_parity_log_t structure. 4118 * Otherwise, there will be at most one way captured 4119 * in the ch_diag_data_t struct. 4120 * Check each way to see if it should be encoded. 4121 */ 4122 if (ch_flt->flt_type == CPU_IC_PARITY) 4123 ways_to_check = CH_ICACHE_NWAY; 4124 else 4125 ways_to_check = 1; 4126 for (i = 0; i < ways_to_check; i++) { 4127 if (ch_flt->flt_type == CPU_IC_PARITY) 4128 icp = &ch_flt->parity_data.ipe.cpl_ic[i]; 4129 else 4130 icp = &ch_flt->flt_diag_data.chd_ic_data; 4131 if (icp->ic_logflag == IC_LOGFLAG_MAGIC) { 4132 bcopy(icp, &icdata[ways_logged], 4133 sizeof (ch_ic_data_t)); 4134 ways_logged++; 4135 } 4136 } 4137 4138 /* 4139 * Add the icache data to the payload. 4140 */ 4141 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS, 4142 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4143 if (ways_logged != 0) { 4144 nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged; 4145 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA, 4146 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL); 4147 } 4148 } 4149 4150 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4151 4152 /* 4153 * Add ecache data to payload. 4154 */ 4155 static void 4156 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl) 4157 { 4158 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4159 ch_ec_data_t *ecp; 4160 ch_ec_data_t ecdata[CHD_EC_DATA_SETS]; 4161 uint_t nelem; 4162 int i, ways_logged = 0; 4163 4164 /* 4165 * Check each way to see if it should be encoded 4166 * and concatinate it into a temporary buffer. 4167 */ 4168 for (i = 0; i < CHD_EC_DATA_SETS; i++) { 4169 ecp = &ch_flt->flt_diag_data.chd_ec_data[i]; 4170 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4171 bcopy(ecp, &ecdata[ways_logged], 4172 sizeof (ch_ec_data_t)); 4173 ways_logged++; 4174 } 4175 } 4176 4177 /* 4178 * Panther CPUs have an additional level of cache and so 4179 * what we just collected was the L3 (ecache) and not the 4180 * L2 cache. 4181 */ 4182 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4183 /* 4184 * Add the L3 (ecache) data to the payload. 4185 */ 4186 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS, 4187 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4188 if (ways_logged != 0) { 4189 nelem = sizeof (ch_ec_data_t) / 4190 sizeof (uint64_t) * ways_logged; 4191 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA, 4192 DATA_TYPE_UINT64_ARRAY, nelem, 4193 (uint64_t *)ecdata, NULL); 4194 } 4195 4196 /* 4197 * Now collect the L2 cache. 4198 */ 4199 ways_logged = 0; 4200 for (i = 0; i < PN_L2_NWAYS; i++) { 4201 ecp = &ch_flt->flt_diag_data.chd_l2_data[i]; 4202 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4203 bcopy(ecp, &ecdata[ways_logged], 4204 sizeof (ch_ec_data_t)); 4205 ways_logged++; 4206 } 4207 } 4208 } 4209 4210 /* 4211 * Add the L2 cache data to the payload. 4212 */ 4213 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS, 4214 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4215 if (ways_logged != 0) { 4216 nelem = sizeof (ch_ec_data_t) / 4217 sizeof (uint64_t) * ways_logged; 4218 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA, 4219 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL); 4220 } 4221 } 4222 4223 /* 4224 * Encode the data saved in the ch_async_flt_t struct into 4225 * the FM ereport payload. 4226 */ 4227 static void 4228 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 4229 nvlist_t *resource, int *afar_status, int *synd_status) 4230 { 4231 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4232 *synd_status = AFLT_STAT_INVALID; 4233 *afar_status = AFLT_STAT_INVALID; 4234 4235 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) { 4236 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR, 4237 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 4238 } 4239 4240 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) && 4241 IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4242 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT, 4243 DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL); 4244 } 4245 4246 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) { 4247 *afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 4248 ch_flt->flt_bit); 4249 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, 4250 DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL); 4251 } 4252 4253 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) { 4254 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR, 4255 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 4256 } 4257 4258 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 4259 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 4260 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 4261 } 4262 4263 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 4264 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 4265 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 4266 } 4267 4268 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 4269 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 4270 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 4271 } 4272 4273 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 4274 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 4275 DATA_TYPE_BOOLEAN_VALUE, 4276 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 4277 } 4278 4279 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) { 4280 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME, 4281 DATA_TYPE_BOOLEAN_VALUE, 4282 (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL); 4283 } 4284 4285 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) { 4286 *synd_status = afsr_to_synd_status(aflt->flt_inst, 4287 ch_flt->afsr_errs, ch_flt->flt_bit); 4288 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, 4289 DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL); 4290 } 4291 4292 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) { 4293 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND, 4294 DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL); 4295 } 4296 4297 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) { 4298 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, 4299 DATA_TYPE_STRING, flt_to_error_type(aflt), NULL); 4300 } 4301 4302 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) { 4303 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, 4304 DATA_TYPE_UINT64, aflt->flt_disp, NULL); 4305 } 4306 4307 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2) 4308 cpu_payload_add_ecache(aflt, payload); 4309 4310 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) { 4311 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION, 4312 DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL); 4313 } 4314 4315 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) { 4316 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED, 4317 DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL); 4318 } 4319 4320 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) { 4321 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK, 4322 DATA_TYPE_UINT32_ARRAY, 16, 4323 (uint32_t *)&ch_flt->flt_fpdata, NULL); 4324 } 4325 4326 #if defined(CPU_IMP_L1_CACHE_PARITY) 4327 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D) 4328 cpu_payload_add_dcache(aflt, payload); 4329 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I) 4330 cpu_payload_add_icache(aflt, payload); 4331 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4332 4333 #if defined(CHEETAH_PLUS) 4334 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P) 4335 cpu_payload_add_pcache(aflt, payload); 4336 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB) 4337 cpu_payload_add_tlb(aflt, payload); 4338 #endif /* CHEETAH_PLUS */ 4339 /* 4340 * Create the FMRI that goes into the payload 4341 * and contains the unum info if necessary. 4342 */ 4343 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) && 4344 (*afar_status == AFLT_STAT_VALID)) { 4345 char unum[UNUM_NAMLEN]; 4346 int len; 4347 4348 if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum, 4349 UNUM_NAMLEN, &len) == 0) { 4350 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 4351 NULL, unum, NULL); 4352 fm_payload_set(payload, 4353 FM_EREPORT_PAYLOAD_NAME_RESOURCE, 4354 DATA_TYPE_NVLIST, resource, NULL); 4355 } 4356 } 4357 } 4358 4359 /* 4360 * Initialize the way info if necessary. 4361 */ 4362 void 4363 cpu_ereport_init(struct async_flt *aflt) 4364 { 4365 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4366 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4367 ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0]; 4368 int i; 4369 4370 /* 4371 * Initialize the info in the CPU logout structure. 4372 * The I$/D$ way information is not initialized here 4373 * since it is captured in the logout assembly code. 4374 */ 4375 for (i = 0; i < CHD_EC_DATA_SETS; i++) 4376 (ecp + i)->ec_way = i; 4377 4378 for (i = 0; i < PN_L2_NWAYS; i++) 4379 (l2p + i)->ec_way = i; 4380 } 4381 4382 /* 4383 * Returns whether fault address is valid for this error bit and 4384 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 4385 */ 4386 int 4387 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) 4388 { 4389 struct async_flt *aflt = (struct async_flt *)ch_flt; 4390 4391 return ((aflt->flt_stat & C_AFSR_MEMORY) && 4392 afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) == 4393 AFLT_STAT_VALID && 4394 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 4395 } 4396 4397 static void 4398 cpu_log_diag_info(ch_async_flt_t *ch_flt) 4399 { 4400 struct async_flt *aflt = (struct async_flt *)ch_flt; 4401 ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data; 4402 ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data; 4403 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4404 #if defined(CPU_IMP_ECACHE_ASSOC) 4405 int i, nway; 4406 #endif /* CPU_IMP_ECACHE_ASSOC */ 4407 4408 /* 4409 * Check if the CPU log out captured was valid. 4410 */ 4411 if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID || 4412 ch_flt->flt_data_incomplete) 4413 return; 4414 4415 #if defined(CPU_IMP_ECACHE_ASSOC) 4416 nway = cpu_ecache_nway(); 4417 i = cpu_ecache_line_valid(ch_flt); 4418 if (i == 0 || i > nway) { 4419 for (i = 0; i < nway; i++) 4420 ecp[i].ec_logflag = EC_LOGFLAG_MAGIC; 4421 } else 4422 ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC; 4423 #else /* CPU_IMP_ECACHE_ASSOC */ 4424 ecp->ec_logflag = EC_LOGFLAG_MAGIC; 4425 #endif /* CPU_IMP_ECACHE_ASSOC */ 4426 4427 #if defined(CHEETAH_PLUS) 4428 pn_cpu_log_diag_l2_info(ch_flt); 4429 #endif /* CHEETAH_PLUS */ 4430 4431 if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) { 4432 dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx); 4433 dcp->dc_logflag = DC_LOGFLAG_MAGIC; 4434 } 4435 4436 if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) { 4437 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 4438 icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx); 4439 else 4440 icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx); 4441 icp->ic_logflag = IC_LOGFLAG_MAGIC; 4442 } 4443 } 4444 4445 /* 4446 * Cheetah ECC calculation. 4447 * 4448 * We only need to do the calculation on the data bits and can ignore check 4449 * bit and Mtag bit terms in the calculation. 4450 */ 4451 static uint64_t ch_ecc_table[9][2] = { 4452 /* 4453 * low order 64-bits high-order 64-bits 4454 */ 4455 { 0x46bffffeccd1177f, 0x488800022100014c }, 4456 { 0x42fccc81331ff77f, 0x14424f1010249184 }, 4457 { 0x8898827c222f1ffe, 0x22c1222808184aaf }, 4458 { 0xf7632203e131ccf1, 0xe1241121848292b8 }, 4459 { 0x7f5511421b113809, 0x901c88d84288aafe }, 4460 { 0x1d49412184882487, 0x8f338c87c044c6ef }, 4461 { 0xf552181014448344, 0x7ff8f4443e411911 }, 4462 { 0x2189240808f24228, 0xfeeff8cc81333f42 }, 4463 { 0x3280008440001112, 0xfee88b337ffffd62 }, 4464 }; 4465 4466 /* 4467 * 64-bit population count, use well-known popcnt trick. 4468 * We could use the UltraSPARC V9 POPC instruction, but some 4469 * CPUs including Cheetahplus and Jaguar do not support that 4470 * instruction. 4471 */ 4472 int 4473 popc64(uint64_t val) 4474 { 4475 int cnt; 4476 4477 for (cnt = 0; val != 0; val &= val - 1) 4478 cnt++; 4479 return (cnt); 4480 } 4481 4482 /* 4483 * Generate the 9 ECC bits for the 128-bit chunk based on the table above. 4484 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number 4485 * of 1 bits == 0, so we can just use the least significant bit of the popcnt 4486 * instead of doing all the xor's. 4487 */ 4488 uint32_t 4489 us3_gen_ecc(uint64_t data_low, uint64_t data_high) 4490 { 4491 int bitno, s; 4492 int synd = 0; 4493 4494 for (bitno = 0; bitno < 9; bitno++) { 4495 s = (popc64(data_low & ch_ecc_table[bitno][0]) + 4496 popc64(data_high & ch_ecc_table[bitno][1])) & 1; 4497 synd |= (s << bitno); 4498 } 4499 return (synd); 4500 4501 } 4502 4503 /* 4504 * Queue one event based on ecc_type_to_info entry. If the event has an AFT1 4505 * tag associated with it or is a fatal event (aflt_panic set), it is sent to 4506 * the UE event queue. Otherwise it is dispatched to the CE event queue. 4507 */ 4508 static void 4509 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 4510 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp) 4511 { 4512 struct async_flt *aflt = (struct async_flt *)ch_flt; 4513 4514 if (reason && 4515 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 4516 (void) strcat(reason, eccp->ec_reason); 4517 } 4518 4519 ch_flt->flt_bit = eccp->ec_afsr_bit; 4520 ch_flt->flt_type = eccp->ec_flt_type; 4521 if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID) 4522 ch_flt->flt_diag_data = *cdp; 4523 else 4524 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 4525 aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit); 4526 4527 if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS) 4528 aflt->flt_synd = GET_M_SYND(aflt->flt_stat); 4529 else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) 4530 aflt->flt_synd = GET_E_SYND(aflt->flt_stat); 4531 else 4532 aflt->flt_synd = 0; 4533 4534 aflt->flt_payload = eccp->ec_err_payload; 4535 4536 if (aflt->flt_panic || (eccp->ec_afsr_bit & 4537 (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1))) 4538 cpu_errorq_dispatch(eccp->ec_err_class, 4539 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 4540 aflt->flt_panic); 4541 else 4542 cpu_errorq_dispatch(eccp->ec_err_class, 4543 (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue, 4544 aflt->flt_panic); 4545 } 4546 4547 /* 4548 * Queue events on async event queue one event per error bit. First we 4549 * queue the events that we "expect" for the given trap, then we queue events 4550 * that we may not expect. Return number of events queued. 4551 */ 4552 int 4553 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs, 4554 ch_cpu_logout_t *clop) 4555 { 4556 struct async_flt *aflt = (struct async_flt *)ch_flt; 4557 ecc_type_to_info_t *eccp; 4558 int nevents = 0; 4559 uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat; 4560 #if defined(CHEETAH_PLUS) 4561 uint64_t orig_t_afsr_errs; 4562 #endif 4563 uint64_t primary_afsr_ext = ch_flt->afsr_ext; 4564 uint64_t primary_afsr_errs = ch_flt->afsr_errs; 4565 ch_diag_data_t *cdp = NULL; 4566 4567 t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS); 4568 4569 #if defined(CHEETAH_PLUS) 4570 orig_t_afsr_errs = t_afsr_errs; 4571 4572 /* 4573 * For Cheetah+, log the shadow AFSR/AFAR bits first. 4574 */ 4575 if (clop != NULL) { 4576 /* 4577 * Set the AFSR and AFAR fields to the shadow registers. The 4578 * flt_addr and flt_stat fields will be reset to the primaries 4579 * below, but the sdw_addr and sdw_stat will stay as the 4580 * secondaries. 4581 */ 4582 cdp = &clop->clo_sdw_data; 4583 aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar; 4584 aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr; 4585 ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext; 4586 ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 4587 (cdp->chd_afsr & C_AFSR_ALL_ERRS); 4588 4589 /* 4590 * If the primary and shadow AFSR differ, tag the shadow as 4591 * the first fault. 4592 */ 4593 if ((primary_afar != cdp->chd_afar) || 4594 (primary_afsr_errs != ch_flt->afsr_errs)) { 4595 aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT); 4596 } 4597 4598 /* 4599 * Check AFSR bits as well as AFSR_EXT bits in order of 4600 * the AFAR overwrite priority. Our stored AFSR_EXT value 4601 * is expected to be zero for those CPUs which do not have 4602 * an AFSR_EXT register. 4603 */ 4604 for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) { 4605 if ((eccp->ec_afsr_bit & 4606 (ch_flt->afsr_errs & t_afsr_errs)) && 4607 ((eccp->ec_flags & aflt->flt_status) != 0)) { 4608 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4609 cdp = NULL; 4610 t_afsr_errs &= ~eccp->ec_afsr_bit; 4611 nevents++; 4612 } 4613 } 4614 4615 /* 4616 * If the ME bit is on in the primary AFSR turn all the 4617 * error bits on again that may set the ME bit to make 4618 * sure we see the ME AFSR error logs. 4619 */ 4620 if ((primary_afsr & C_AFSR_ME) != 0) 4621 t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS); 4622 } 4623 #endif /* CHEETAH_PLUS */ 4624 4625 if (clop != NULL) 4626 cdp = &clop->clo_data; 4627 4628 /* 4629 * Queue expected errors, error bit and fault type must match 4630 * in the ecc_type_to_info table. 4631 */ 4632 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4633 eccp++) { 4634 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 4635 (eccp->ec_flags & aflt->flt_status) != 0) { 4636 #if defined(SERRANO) 4637 /* 4638 * For FRC/FRU errors on Serrano the afar2 captures 4639 * the address and the associated data is 4640 * in the shadow logout area. 4641 */ 4642 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4643 if (clop != NULL) 4644 cdp = &clop->clo_sdw_data; 4645 aflt->flt_addr = ch_flt->afar2; 4646 } else { 4647 if (clop != NULL) 4648 cdp = &clop->clo_data; 4649 aflt->flt_addr = primary_afar; 4650 } 4651 #else /* SERRANO */ 4652 aflt->flt_addr = primary_afar; 4653 #endif /* SERRANO */ 4654 aflt->flt_stat = primary_afsr; 4655 ch_flt->afsr_ext = primary_afsr_ext; 4656 ch_flt->afsr_errs = primary_afsr_errs; 4657 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4658 cdp = NULL; 4659 t_afsr_errs &= ~eccp->ec_afsr_bit; 4660 nevents++; 4661 } 4662 } 4663 4664 /* 4665 * Queue unexpected errors, error bit only match. 4666 */ 4667 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4668 eccp++) { 4669 if (eccp->ec_afsr_bit & t_afsr_errs) { 4670 #if defined(SERRANO) 4671 /* 4672 * For FRC/FRU errors on Serrano the afar2 captures 4673 * the address and the associated data is 4674 * in the shadow logout area. 4675 */ 4676 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4677 if (clop != NULL) 4678 cdp = &clop->clo_sdw_data; 4679 aflt->flt_addr = ch_flt->afar2; 4680 } else { 4681 if (clop != NULL) 4682 cdp = &clop->clo_data; 4683 aflt->flt_addr = primary_afar; 4684 } 4685 #else /* SERRANO */ 4686 aflt->flt_addr = primary_afar; 4687 #endif /* SERRANO */ 4688 aflt->flt_stat = primary_afsr; 4689 ch_flt->afsr_ext = primary_afsr_ext; 4690 ch_flt->afsr_errs = primary_afsr_errs; 4691 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4692 cdp = NULL; 4693 t_afsr_errs &= ~eccp->ec_afsr_bit; 4694 nevents++; 4695 } 4696 } 4697 return (nevents); 4698 } 4699 4700 /* 4701 * Return trap type number. 4702 */ 4703 uint8_t 4704 flt_to_trap_type(struct async_flt *aflt) 4705 { 4706 if (aflt->flt_status & ECC_I_TRAP) 4707 return (TRAP_TYPE_ECC_I); 4708 if (aflt->flt_status & ECC_D_TRAP) 4709 return (TRAP_TYPE_ECC_D); 4710 if (aflt->flt_status & ECC_F_TRAP) 4711 return (TRAP_TYPE_ECC_F); 4712 if (aflt->flt_status & ECC_C_TRAP) 4713 return (TRAP_TYPE_ECC_C); 4714 if (aflt->flt_status & ECC_DP_TRAP) 4715 return (TRAP_TYPE_ECC_DP); 4716 if (aflt->flt_status & ECC_IP_TRAP) 4717 return (TRAP_TYPE_ECC_IP); 4718 if (aflt->flt_status & ECC_ITLB_TRAP) 4719 return (TRAP_TYPE_ECC_ITLB); 4720 if (aflt->flt_status & ECC_DTLB_TRAP) 4721 return (TRAP_TYPE_ECC_DTLB); 4722 return (TRAP_TYPE_UNKNOWN); 4723 } 4724 4725 /* 4726 * Decide an error type based on detector and leaky/partner tests. 4727 * The following array is used for quick translation - it must 4728 * stay in sync with ce_dispact_t. 4729 */ 4730 4731 static char *cetypes[] = { 4732 CE_DISP_DESC_U, 4733 CE_DISP_DESC_I, 4734 CE_DISP_DESC_PP, 4735 CE_DISP_DESC_P, 4736 CE_DISP_DESC_L, 4737 CE_DISP_DESC_PS, 4738 CE_DISP_DESC_S 4739 }; 4740 4741 char * 4742 flt_to_error_type(struct async_flt *aflt) 4743 { 4744 ce_dispact_t dispact, disp; 4745 uchar_t dtcrinfo, ptnrinfo, lkyinfo; 4746 4747 /* 4748 * The memory payload bundle is shared by some events that do 4749 * not perform any classification. For those flt_disp will be 4750 * 0 and we will return "unknown". 4751 */ 4752 if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0) 4753 return (cetypes[CE_DISP_UNKNOWN]); 4754 4755 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 4756 4757 /* 4758 * It is also possible that no scrub/classification was performed 4759 * by the detector, for instance where a disrupting error logged 4760 * in the AFSR while CEEN was off in cpu_deferred_error. 4761 */ 4762 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) 4763 return (cetypes[CE_DISP_UNKNOWN]); 4764 4765 /* 4766 * Lookup type in initial classification/action table 4767 */ 4768 dispact = CE_DISPACT(ce_disp_table, 4769 CE_XDIAG_AFARMATCHED(dtcrinfo), 4770 CE_XDIAG_STATE(dtcrinfo), 4771 CE_XDIAG_CE1SEEN(dtcrinfo), 4772 CE_XDIAG_CE2SEEN(dtcrinfo)); 4773 4774 /* 4775 * A bad lookup is not something to panic production systems for. 4776 */ 4777 ASSERT(dispact != CE_DISP_BAD); 4778 if (dispact == CE_DISP_BAD) 4779 return (cetypes[CE_DISP_UNKNOWN]); 4780 4781 disp = CE_DISP(dispact); 4782 4783 switch (disp) { 4784 case CE_DISP_UNKNOWN: 4785 case CE_DISP_INTERMITTENT: 4786 break; 4787 4788 case CE_DISP_POSS_PERS: 4789 /* 4790 * "Possible persistent" errors to which we have applied a valid 4791 * leaky test can be separated into "persistent" or "leaky". 4792 */ 4793 lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp); 4794 if (CE_XDIAG_TESTVALID(lkyinfo)) { 4795 if (CE_XDIAG_CE1SEEN(lkyinfo) || 4796 CE_XDIAG_CE2SEEN(lkyinfo)) 4797 disp = CE_DISP_LEAKY; 4798 else 4799 disp = CE_DISP_PERS; 4800 } 4801 break; 4802 4803 case CE_DISP_POSS_STICKY: 4804 /* 4805 * Promote "possible sticky" results that have been 4806 * confirmed by a partner test to "sticky". Unconfirmed 4807 * "possible sticky" events are left at that status - we do not 4808 * guess at any bad reader/writer etc status here. 4809 */ 4810 ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp); 4811 if (CE_XDIAG_TESTVALID(ptnrinfo) && 4812 CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo)) 4813 disp = CE_DISP_STICKY; 4814 4815 /* 4816 * Promote "possible sticky" results on a uniprocessor 4817 * to "sticky" 4818 */ 4819 if (disp == CE_DISP_POSS_STICKY && 4820 CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC) 4821 disp = CE_DISP_STICKY; 4822 break; 4823 4824 default: 4825 disp = CE_DISP_UNKNOWN; 4826 break; 4827 } 4828 4829 return (cetypes[disp]); 4830 } 4831 4832 /* 4833 * Given the entire afsr, the specific bit to check and a prioritized list of 4834 * error bits, determine the validity of the various overwrite priority 4835 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have 4836 * different overwrite priorities. 4837 * 4838 * Given a specific afsr error bit and the entire afsr, there are three cases: 4839 * INVALID: The specified bit is lower overwrite priority than some other 4840 * error bit which is on in the afsr (or IVU/IVC). 4841 * VALID: The specified bit is higher priority than all other error bits 4842 * which are on in the afsr. 4843 * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified 4844 * bit is on in the afsr. 4845 */ 4846 int 4847 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits) 4848 { 4849 uint64_t afsr_ow; 4850 4851 while ((afsr_ow = *ow_bits++) != 0) { 4852 /* 4853 * If bit is in the priority class, check to see if another 4854 * bit in the same class is on => ambiguous. Otherwise, 4855 * the value is valid. If the bit is not on at this priority 4856 * class, but a higher priority bit is on, then the value is 4857 * invalid. 4858 */ 4859 if (afsr_ow & afsr_bit) { 4860 /* 4861 * If equal pri bit is on, ambiguous. 4862 */ 4863 if (afsr & (afsr_ow & ~afsr_bit)) 4864 return (AFLT_STAT_AMBIGUOUS); 4865 return (AFLT_STAT_VALID); 4866 } else if (afsr & afsr_ow) 4867 break; 4868 } 4869 4870 /* 4871 * We didn't find a match or a higher priority bit was on. Not 4872 * finding a match handles the case of invalid AFAR for IVC, IVU. 4873 */ 4874 return (AFLT_STAT_INVALID); 4875 } 4876 4877 static int 4878 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit) 4879 { 4880 #if defined(SERRANO) 4881 if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) 4882 return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite)); 4883 else 4884 #endif /* SERRANO */ 4885 return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite)); 4886 } 4887 4888 static int 4889 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit) 4890 { 4891 return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite)); 4892 } 4893 4894 static int 4895 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit) 4896 { 4897 return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite)); 4898 } 4899 4900 static int 4901 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit) 4902 { 4903 #ifdef lint 4904 cpuid = cpuid; 4905 #endif 4906 if (afsr_bit & C_AFSR_MSYND_ERRS) { 4907 return (afsr_to_msynd_status(afsr, afsr_bit)); 4908 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 4909 #if defined(CHEETAH_PLUS) 4910 /* 4911 * The E_SYND overwrite policy is slightly different 4912 * for Panther CPUs. 4913 */ 4914 if (IS_PANTHER(cpunodes[cpuid].implementation)) 4915 return (afsr_to_pn_esynd_status(afsr, afsr_bit)); 4916 else 4917 return (afsr_to_esynd_status(afsr, afsr_bit)); 4918 #else /* CHEETAH_PLUS */ 4919 return (afsr_to_esynd_status(afsr, afsr_bit)); 4920 #endif /* CHEETAH_PLUS */ 4921 } else { 4922 return (AFLT_STAT_INVALID); 4923 } 4924 } 4925 4926 /* 4927 * Slave CPU stick synchronization. 4928 */ 4929 void 4930 sticksync_slave(void) 4931 { 4932 int i; 4933 int tries = 0; 4934 int64_t tskew; 4935 int64_t av_tskew; 4936 4937 kpreempt_disable(); 4938 /* wait for the master side */ 4939 while (stick_sync_cmd != SLAVE_START) 4940 ; 4941 /* 4942 * Synchronization should only take a few tries at most. But in the 4943 * odd case where the cpu isn't cooperating we'll keep trying. A cpu 4944 * without it's stick synchronized wouldn't be a good citizen. 4945 */ 4946 while (slave_done == 0) { 4947 /* 4948 * Time skew calculation. 4949 */ 4950 av_tskew = tskew = 0; 4951 4952 for (i = 0; i < stick_iter; i++) { 4953 /* make location hot */ 4954 timestamp[EV_A_START] = 0; 4955 stick_timestamp(×tamp[EV_A_START]); 4956 4957 /* tell the master we're ready */ 4958 stick_sync_cmd = MASTER_START; 4959 4960 /* and wait */ 4961 while (stick_sync_cmd != SLAVE_CONT) 4962 ; 4963 /* Event B end */ 4964 stick_timestamp(×tamp[EV_B_END]); 4965 4966 /* calculate time skew */ 4967 tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START]) 4968 - (timestamp[EV_A_END] - 4969 timestamp[EV_A_START])) / 2; 4970 4971 /* keep running count */ 4972 av_tskew += tskew; 4973 } /* for */ 4974 4975 /* 4976 * Adjust stick for time skew if not within the max allowed; 4977 * otherwise we're all done. 4978 */ 4979 if (stick_iter != 0) 4980 av_tskew = av_tskew/stick_iter; 4981 if (ABS(av_tskew) > stick_tsk) { 4982 /* 4983 * If the skew is 1 (the slave's STICK register 4984 * is 1 STICK ahead of the master's), stick_adj 4985 * could fail to adjust the slave's STICK register 4986 * if the STICK read on the slave happens to 4987 * align with the increment of the STICK. 4988 * Therefore, we increment the skew to 2. 4989 */ 4990 if (av_tskew == 1) 4991 av_tskew++; 4992 stick_adj(-av_tskew); 4993 } else 4994 slave_done = 1; 4995 #ifdef DEBUG 4996 if (tries < DSYNC_ATTEMPTS) 4997 stick_sync_stats[CPU->cpu_id].skew_val[tries] = 4998 av_tskew; 4999 ++tries; 5000 #endif /* DEBUG */ 5001 #ifdef lint 5002 tries = tries; 5003 #endif 5004 5005 } /* while */ 5006 5007 /* allow the master to finish */ 5008 stick_sync_cmd = EVENT_NULL; 5009 kpreempt_enable(); 5010 } 5011 5012 /* 5013 * Master CPU side of stick synchronization. 5014 * - timestamp end of Event A 5015 * - timestamp beginning of Event B 5016 */ 5017 void 5018 sticksync_master(void) 5019 { 5020 int i; 5021 5022 kpreempt_disable(); 5023 /* tell the slave we've started */ 5024 slave_done = 0; 5025 stick_sync_cmd = SLAVE_START; 5026 5027 while (slave_done == 0) { 5028 for (i = 0; i < stick_iter; i++) { 5029 /* wait for the slave */ 5030 while (stick_sync_cmd != MASTER_START) 5031 ; 5032 /* Event A end */ 5033 stick_timestamp(×tamp[EV_A_END]); 5034 5035 /* make location hot */ 5036 timestamp[EV_B_START] = 0; 5037 stick_timestamp(×tamp[EV_B_START]); 5038 5039 /* tell the slave to continue */ 5040 stick_sync_cmd = SLAVE_CONT; 5041 } /* for */ 5042 5043 /* wait while slave calculates time skew */ 5044 while (stick_sync_cmd == SLAVE_CONT) 5045 ; 5046 } /* while */ 5047 kpreempt_enable(); 5048 } 5049 5050 /* 5051 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to 5052 * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also, 5053 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to 5054 * panic idle. 5055 */ 5056 /*ARGSUSED*/ 5057 void 5058 cpu_check_allcpus(struct async_flt *aflt) 5059 {} 5060 5061 struct kmem_cache *ch_private_cache; 5062 5063 /* 5064 * Cpu private unitialization. Uninitialize the Ecache scrubber and 5065 * deallocate the scrubber data structures and cpu_private data structure. 5066 */ 5067 void 5068 cpu_uninit_private(struct cpu *cp) 5069 { 5070 cheetah_private_t *chprp = CPU_PRIVATE(cp); 5071 5072 ASSERT(chprp); 5073 cpu_uninit_ecache_scrub_dr(cp); 5074 CPU_PRIVATE(cp) = NULL; 5075 ch_err_tl1_paddrs[cp->cpu_id] = NULL; 5076 kmem_cache_free(ch_private_cache, chprp); 5077 cmp_delete_cpu(cp->cpu_id); 5078 5079 } 5080 5081 /* 5082 * Cheetah Cache Scrubbing 5083 * 5084 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure 5085 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not 5086 * protected by either parity or ECC. 5087 * 5088 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the 5089 * cache per second). Due to the the specifics of how the I$ control 5090 * logic works with respect to the ASI used to scrub I$ lines, the entire 5091 * I$ is scanned at once. 5092 */ 5093 5094 /* 5095 * Tuneables to enable and disable the scrubbing of the caches, and to tune 5096 * scrubbing behavior. These may be changed via /etc/system or using mdb 5097 * on a running system. 5098 */ 5099 int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */ 5100 5101 /* 5102 * The following are the PIL levels that the softints/cross traps will fire at. 5103 */ 5104 uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */ 5105 uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */ 5106 uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */ 5107 5108 #if defined(JALAPENO) 5109 5110 /* 5111 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber 5112 * on Jalapeno. 5113 */ 5114 int ecache_scrub_enable = 0; 5115 5116 #else /* JALAPENO */ 5117 5118 /* 5119 * With all other cpu types, E$ scrubbing is on by default 5120 */ 5121 int ecache_scrub_enable = 1; 5122 5123 #endif /* JALAPENO */ 5124 5125 5126 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO) 5127 5128 /* 5129 * The I$ scrubber tends to cause latency problems for real-time SW, so it 5130 * is disabled by default on non-Cheetah systems 5131 */ 5132 int icache_scrub_enable = 0; 5133 5134 /* 5135 * Tuneables specifying the scrub calls per second and the scan rate 5136 * for each cache 5137 * 5138 * The cyclic times are set during boot based on the following values. 5139 * Changing these values in mdb after this time will have no effect. If 5140 * a different value is desired, it must be set in /etc/system before a 5141 * reboot. 5142 */ 5143 int ecache_calls_a_sec = 1; 5144 int dcache_calls_a_sec = 2; 5145 int icache_calls_a_sec = 2; 5146 5147 int ecache_scan_rate_idle = 1; 5148 int ecache_scan_rate_busy = 1; 5149 int dcache_scan_rate_idle = 1; 5150 int dcache_scan_rate_busy = 1; 5151 int icache_scan_rate_idle = 1; 5152 int icache_scan_rate_busy = 1; 5153 5154 #else /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5155 5156 int icache_scrub_enable = 1; /* I$ scrubbing is on by default */ 5157 5158 int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */ 5159 int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */ 5160 int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */ 5161 5162 int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */ 5163 int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */ 5164 int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */ 5165 int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */ 5166 int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */ 5167 int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */ 5168 5169 #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5170 5171 /* 5172 * In order to scrub on offline cpus, a cross trap is sent. The handler will 5173 * increment the outstanding request counter and schedule a softint to run 5174 * the scrubber. 5175 */ 5176 extern xcfunc_t cache_scrubreq_tl1; 5177 5178 /* 5179 * These are the softint functions for each cache scrubber 5180 */ 5181 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2); 5182 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2); 5183 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2); 5184 5185 /* 5186 * The cache scrub info table contains cache specific information 5187 * and allows for some of the scrub code to be table driven, reducing 5188 * duplication of cache similar code. 5189 * 5190 * This table keeps a copy of the value in the calls per second variable 5191 * (?cache_calls_a_sec). This makes it much more difficult for someone 5192 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in 5193 * mdb in a misguided attempt to disable the scrubber). 5194 */ 5195 struct scrub_info { 5196 int *csi_enable; /* scrubber enable flag */ 5197 int csi_freq; /* scrubber calls per second */ 5198 int csi_index; /* index to chsm_outstanding[] */ 5199 uint_t csi_inum; /* scrubber interrupt number */ 5200 cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */ 5201 cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */ 5202 char csi_name[3]; /* cache name for this scrub entry */ 5203 } cache_scrub_info[] = { 5204 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"}, 5205 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"}, 5206 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"} 5207 }; 5208 5209 /* 5210 * If scrubbing is enabled, increment the outstanding request counter. If it 5211 * is 1 (meaning there were no previous requests outstanding), call 5212 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing 5213 * a self trap. 5214 */ 5215 static void 5216 do_scrub(struct scrub_info *csi) 5217 { 5218 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5219 int index = csi->csi_index; 5220 uint32_t *outstanding = &csmp->chsm_outstanding[index]; 5221 5222 if (*(csi->csi_enable) && (csmp->chsm_enable[index])) { 5223 if (atomic_add_32_nv(outstanding, 1) == 1) { 5224 xt_one_unchecked(CPU->cpu_id, setsoftint_tl1, 5225 csi->csi_inum, 0); 5226 } 5227 } 5228 } 5229 5230 /* 5231 * Omni cyclics don't fire on offline cpus, so we use another cyclic to 5232 * cross-trap the offline cpus. 5233 */ 5234 static void 5235 do_scrub_offline(struct scrub_info *csi) 5236 { 5237 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5238 5239 if (CPUSET_ISNULL(cpu_offline_set)) { 5240 /* 5241 * No offline cpus - nothing to do 5242 */ 5243 return; 5244 } 5245 5246 if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) { 5247 xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum, 5248 csi->csi_index); 5249 } 5250 } 5251 5252 /* 5253 * This is the initial setup for the scrubber cyclics - it sets the 5254 * interrupt level, frequency, and function to call. 5255 */ 5256 /*ARGSUSED*/ 5257 static void 5258 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 5259 cyc_time_t *when) 5260 { 5261 struct scrub_info *csi = (struct scrub_info *)arg; 5262 5263 ASSERT(csi != NULL); 5264 hdlr->cyh_func = (cyc_func_t)do_scrub; 5265 hdlr->cyh_level = CY_LOW_LEVEL; 5266 hdlr->cyh_arg = arg; 5267 5268 when->cyt_when = 0; /* Start immediately */ 5269 when->cyt_interval = NANOSEC / csi->csi_freq; 5270 } 5271 5272 /* 5273 * Initialization for cache scrubbing. 5274 * This routine is called AFTER all cpus have had cpu_init_private called 5275 * to initialize their private data areas. 5276 */ 5277 void 5278 cpu_init_cache_scrub(void) 5279 { 5280 int i; 5281 struct scrub_info *csi; 5282 cyc_omni_handler_t omni_hdlr; 5283 cyc_handler_t offline_hdlr; 5284 cyc_time_t when; 5285 5286 /* 5287 * save away the maximum number of lines for the D$ 5288 */ 5289 dcache_nlines = dcache_size / dcache_linesize; 5290 5291 /* 5292 * register the softints for the cache scrubbing 5293 */ 5294 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum = 5295 add_softintr(ecache_scrub_pil, scrub_ecache_line_intr, 5296 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]); 5297 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec; 5298 5299 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum = 5300 add_softintr(dcache_scrub_pil, scrub_dcache_line_intr, 5301 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]); 5302 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec; 5303 5304 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum = 5305 add_softintr(icache_scrub_pil, scrub_icache_line_intr, 5306 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]); 5307 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec; 5308 5309 /* 5310 * start the scrubbing for all the caches 5311 */ 5312 mutex_enter(&cpu_lock); 5313 for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) { 5314 5315 csi = &cache_scrub_info[i]; 5316 5317 if (!(*csi->csi_enable)) 5318 continue; 5319 5320 /* 5321 * force the following to be true: 5322 * 1 <= calls_a_sec <= hz 5323 */ 5324 if (csi->csi_freq > hz) { 5325 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high " 5326 "(%d); resetting to hz (%d)", csi->csi_name, 5327 csi->csi_freq, hz); 5328 csi->csi_freq = hz; 5329 } else if (csi->csi_freq < 1) { 5330 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low " 5331 "(%d); resetting to 1", csi->csi_name, 5332 csi->csi_freq); 5333 csi->csi_freq = 1; 5334 } 5335 5336 omni_hdlr.cyo_online = cpu_scrub_cyclic_setup; 5337 omni_hdlr.cyo_offline = NULL; 5338 omni_hdlr.cyo_arg = (void *)csi; 5339 5340 offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline; 5341 offline_hdlr.cyh_arg = (void *)csi; 5342 offline_hdlr.cyh_level = CY_LOW_LEVEL; 5343 5344 when.cyt_when = 0; /* Start immediately */ 5345 when.cyt_interval = NANOSEC / csi->csi_freq; 5346 5347 csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr); 5348 csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when); 5349 } 5350 register_cpu_setup_func(cpu_scrub_cpu_setup, NULL); 5351 mutex_exit(&cpu_lock); 5352 } 5353 5354 /* 5355 * Indicate that the specified cpu is idle. 5356 */ 5357 void 5358 cpu_idle_ecache_scrub(struct cpu *cp) 5359 { 5360 if (CPU_PRIVATE(cp) != NULL) { 5361 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5362 csmp->chsm_ecache_busy = ECACHE_CPU_IDLE; 5363 } 5364 } 5365 5366 /* 5367 * Indicate that the specified cpu is busy. 5368 */ 5369 void 5370 cpu_busy_ecache_scrub(struct cpu *cp) 5371 { 5372 if (CPU_PRIVATE(cp) != NULL) { 5373 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5374 csmp->chsm_ecache_busy = ECACHE_CPU_BUSY; 5375 } 5376 } 5377 5378 /* 5379 * Initialization for cache scrubbing for the specified cpu. 5380 */ 5381 void 5382 cpu_init_ecache_scrub_dr(struct cpu *cp) 5383 { 5384 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5385 int cpuid = cp->cpu_id; 5386 5387 /* initialize the number of lines in the caches */ 5388 csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size / 5389 cpunodes[cpuid].ecache_linesize; 5390 csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) / 5391 CPU_PRIVATE_VAL(cp, chpr_icache_linesize); 5392 5393 /* 5394 * do_scrub() and do_scrub_offline() check both the global 5395 * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers 5396 * check this value before scrubbing. Currently, we use it to 5397 * disable the E$ scrubber on multi-core cpus or while running at 5398 * slowed speed. For now, just turn everything on and allow 5399 * cpu_init_private() to change it if necessary. 5400 */ 5401 csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 5402 csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1; 5403 csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1; 5404 5405 cpu_busy_ecache_scrub(cp); 5406 } 5407 5408 /* 5409 * Un-initialization for cache scrubbing for the specified cpu. 5410 */ 5411 static void 5412 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 5413 { 5414 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5415 5416 /* 5417 * un-initialize bookkeeping for cache scrubbing 5418 */ 5419 bzero(csmp, sizeof (ch_scrub_misc_t)); 5420 5421 cpu_idle_ecache_scrub(cp); 5422 } 5423 5424 /* 5425 * Called periodically on each CPU to scrub the D$. 5426 */ 5427 static void 5428 scrub_dcache(int how_many) 5429 { 5430 int i; 5431 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5432 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D]; 5433 5434 /* 5435 * scrub the desired number of lines 5436 */ 5437 for (i = 0; i < how_many; i++) { 5438 /* 5439 * scrub a D$ line 5440 */ 5441 dcache_inval_line(index); 5442 5443 /* 5444 * calculate the next D$ line to scrub, assumes 5445 * that dcache_nlines is a power of 2 5446 */ 5447 index = (index + 1) & (dcache_nlines - 1); 5448 } 5449 5450 /* 5451 * set the scrub index for the next visit 5452 */ 5453 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index; 5454 } 5455 5456 /* 5457 * Handler for D$ scrub inum softint. Call scrub_dcache until 5458 * we decrement the outstanding request count to zero. 5459 */ 5460 /*ARGSUSED*/ 5461 static uint_t 5462 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2) 5463 { 5464 int i; 5465 int how_many; 5466 int outstanding; 5467 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5468 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D]; 5469 struct scrub_info *csi = (struct scrub_info *)arg1; 5470 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5471 dcache_scan_rate_idle : dcache_scan_rate_busy; 5472 5473 /* 5474 * The scan rates are expressed in units of tenths of a 5475 * percent. A scan rate of 1000 (100%) means the whole 5476 * cache is scanned every second. 5477 */ 5478 how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq); 5479 5480 do { 5481 outstanding = *countp; 5482 for (i = 0; i < outstanding; i++) { 5483 scrub_dcache(how_many); 5484 } 5485 } while (atomic_add_32_nv(countp, -outstanding)); 5486 5487 return (DDI_INTR_CLAIMED); 5488 } 5489 5490 /* 5491 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed 5492 * by invalidating lines. Due to the characteristics of the ASI which 5493 * is used to invalidate an I$ line, the entire I$ must be invalidated 5494 * vs. an individual I$ line. 5495 */ 5496 static void 5497 scrub_icache(int how_many) 5498 { 5499 int i; 5500 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5501 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I]; 5502 int icache_nlines = csmp->chsm_icache_nlines; 5503 5504 /* 5505 * scrub the desired number of lines 5506 */ 5507 for (i = 0; i < how_many; i++) { 5508 /* 5509 * since the entire I$ must be scrubbed at once, 5510 * wait until the index wraps to zero to invalidate 5511 * the entire I$ 5512 */ 5513 if (index == 0) { 5514 icache_inval_all(); 5515 } 5516 5517 /* 5518 * calculate the next I$ line to scrub, assumes 5519 * that chsm_icache_nlines is a power of 2 5520 */ 5521 index = (index + 1) & (icache_nlines - 1); 5522 } 5523 5524 /* 5525 * set the scrub index for the next visit 5526 */ 5527 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index; 5528 } 5529 5530 /* 5531 * Handler for I$ scrub inum softint. Call scrub_icache until 5532 * we decrement the outstanding request count to zero. 5533 */ 5534 /*ARGSUSED*/ 5535 static uint_t 5536 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2) 5537 { 5538 int i; 5539 int how_many; 5540 int outstanding; 5541 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5542 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I]; 5543 struct scrub_info *csi = (struct scrub_info *)arg1; 5544 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5545 icache_scan_rate_idle : icache_scan_rate_busy; 5546 int icache_nlines = csmp->chsm_icache_nlines; 5547 5548 /* 5549 * The scan rates are expressed in units of tenths of a 5550 * percent. A scan rate of 1000 (100%) means the whole 5551 * cache is scanned every second. 5552 */ 5553 how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq); 5554 5555 do { 5556 outstanding = *countp; 5557 for (i = 0; i < outstanding; i++) { 5558 scrub_icache(how_many); 5559 } 5560 } while (atomic_add_32_nv(countp, -outstanding)); 5561 5562 return (DDI_INTR_CLAIMED); 5563 } 5564 5565 /* 5566 * Called periodically on each CPU to scrub the E$. 5567 */ 5568 static void 5569 scrub_ecache(int how_many) 5570 { 5571 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5572 int i; 5573 int cpuid = CPU->cpu_id; 5574 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 5575 int nlines = csmp->chsm_ecache_nlines; 5576 int linesize = cpunodes[cpuid].ecache_linesize; 5577 int ec_set_size = cpu_ecache_set_size(CPU); 5578 5579 /* 5580 * scrub the desired number of lines 5581 */ 5582 for (i = 0; i < how_many; i++) { 5583 /* 5584 * scrub the E$ line 5585 */ 5586 ecache_flush_line(ecache_flushaddr + (index * linesize), 5587 ec_set_size); 5588 5589 /* 5590 * calculate the next E$ line to scrub based on twice 5591 * the number of E$ lines (to displace lines containing 5592 * flush area data), assumes that the number of lines 5593 * is a power of 2 5594 */ 5595 index = (index + 1) & ((nlines << 1) - 1); 5596 } 5597 5598 /* 5599 * set the ecache scrub index for the next visit 5600 */ 5601 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index; 5602 } 5603 5604 /* 5605 * Handler for E$ scrub inum softint. Call the E$ scrubber until 5606 * we decrement the outstanding request count to zero. 5607 * 5608 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may 5609 * become negative after the atomic_add_32_nv(). This is not a problem, as 5610 * the next trip around the loop won't scrub anything, and the next add will 5611 * reset the count back to zero. 5612 */ 5613 /*ARGSUSED*/ 5614 static uint_t 5615 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 5616 { 5617 int i; 5618 int how_many; 5619 int outstanding; 5620 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5621 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E]; 5622 struct scrub_info *csi = (struct scrub_info *)arg1; 5623 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5624 ecache_scan_rate_idle : ecache_scan_rate_busy; 5625 int ecache_nlines = csmp->chsm_ecache_nlines; 5626 5627 /* 5628 * The scan rates are expressed in units of tenths of a 5629 * percent. A scan rate of 1000 (100%) means the whole 5630 * cache is scanned every second. 5631 */ 5632 how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq); 5633 5634 do { 5635 outstanding = *countp; 5636 for (i = 0; i < outstanding; i++) { 5637 scrub_ecache(how_many); 5638 } 5639 } while (atomic_add_32_nv(countp, -outstanding)); 5640 5641 return (DDI_INTR_CLAIMED); 5642 } 5643 5644 /* 5645 * Timeout function to reenable CE 5646 */ 5647 static void 5648 cpu_delayed_check_ce_errors(void *arg) 5649 { 5650 if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg, 5651 TQ_NOSLEEP)) { 5652 (void) timeout(cpu_delayed_check_ce_errors, arg, 5653 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5654 } 5655 } 5656 5657 /* 5658 * CE Deferred Re-enable after trap. 5659 * 5660 * When the CPU gets a disrupting trap for any of the errors 5661 * controlled by the CEEN bit, CEEN is disabled in the trap handler 5662 * immediately. To eliminate the possibility of multiple CEs causing 5663 * recursive stack overflow in the trap handler, we cannot 5664 * reenable CEEN while still running in the trap handler. Instead, 5665 * after a CE is logged on a CPU, we schedule a timeout function, 5666 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs 5667 * seconds. This function will check whether any further CEs 5668 * have occurred on that CPU, and if none have, will reenable CEEN. 5669 * 5670 * If further CEs have occurred while CEEN is disabled, another 5671 * timeout will be scheduled. This is to ensure that the CPU can 5672 * make progress in the face of CE 'storms', and that it does not 5673 * spend all its time logging CE errors. 5674 */ 5675 static void 5676 cpu_check_ce_errors(void *arg) 5677 { 5678 int cpuid = (int)arg; 5679 cpu_t *cp; 5680 5681 /* 5682 * We acquire cpu_lock. 5683 */ 5684 ASSERT(curthread->t_pil == 0); 5685 5686 /* 5687 * verify that the cpu is still around, DR 5688 * could have got there first ... 5689 */ 5690 mutex_enter(&cpu_lock); 5691 cp = cpu_get(cpuid); 5692 if (cp == NULL) { 5693 mutex_exit(&cpu_lock); 5694 return; 5695 } 5696 /* 5697 * make sure we don't migrate across CPUs 5698 * while checking our CE status. 5699 */ 5700 kpreempt_disable(); 5701 5702 /* 5703 * If we are running on the CPU that got the 5704 * CE, we can do the checks directly. 5705 */ 5706 if (cp->cpu_id == CPU->cpu_id) { 5707 mutex_exit(&cpu_lock); 5708 cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0); 5709 kpreempt_enable(); 5710 return; 5711 } 5712 kpreempt_enable(); 5713 5714 /* 5715 * send an x-call to get the CPU that originally 5716 * got the CE to do the necessary checks. If we can't 5717 * send the x-call, reschedule the timeout, otherwise we 5718 * lose CEEN forever on that CPU. 5719 */ 5720 if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) { 5721 xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce, 5722 TIMEOUT_CEEN_CHECK, 0); 5723 mutex_exit(&cpu_lock); 5724 } else { 5725 /* 5726 * When the CPU is not accepting xcalls, or 5727 * the processor is offlined, we don't want to 5728 * incur the extra overhead of trying to schedule the 5729 * CE timeout indefinitely. However, we don't want to lose 5730 * CE checking forever. 5731 * 5732 * Keep rescheduling the timeout, accepting the additional 5733 * overhead as the cost of correctness in the case where we get 5734 * a CE, disable CEEN, offline the CPU during the 5735 * the timeout interval, and then online it at some 5736 * point in the future. This is unlikely given the short 5737 * cpu_ceen_delay_secs. 5738 */ 5739 mutex_exit(&cpu_lock); 5740 (void) timeout(cpu_delayed_check_ce_errors, (void *)cp->cpu_id, 5741 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5742 } 5743 } 5744 5745 /* 5746 * This routine will check whether CEs have occurred while 5747 * CEEN is disabled. Any CEs detected will be logged and, if 5748 * possible, scrubbed. 5749 * 5750 * The memscrubber will also use this routine to clear any errors 5751 * caused by its scrubbing with CEEN disabled. 5752 * 5753 * flag == SCRUBBER_CEEN_CHECK 5754 * called from memscrubber, just check/scrub, no reset 5755 * paddr physical addr. for start of scrub pages 5756 * vaddr virtual addr. for scrub area 5757 * psz page size of area to be scrubbed 5758 * 5759 * flag == TIMEOUT_CEEN_CHECK 5760 * timeout function has triggered, reset timeout or CEEN 5761 * 5762 * Note: We must not migrate cpus during this function. This can be 5763 * achieved by one of: 5764 * - invoking as target of an x-call in which case we're at XCALL_PIL 5765 * The flag value must be first xcall argument. 5766 * - disabling kernel preemption. This should be done for very short 5767 * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might 5768 * scrub an extended area with cpu_check_block. The call for 5769 * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept 5770 * brief for this case. 5771 * - binding to a cpu, eg with thread_affinity_set(). This is used 5772 * in the SCRUBBER_CEEN_CHECK case, but is not practical for 5773 * the TIMEOUT_CEEN_CHECK because both need cpu_lock. 5774 */ 5775 void 5776 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 5777 { 5778 ch_cpu_errors_t cpu_error_regs; 5779 uint64_t ec_err_enable; 5780 uint64_t page_offset; 5781 5782 /* Read AFSR */ 5783 get_cpu_error_state(&cpu_error_regs); 5784 5785 /* 5786 * If no CEEN errors have occurred during the timeout 5787 * interval, it is safe to re-enable CEEN and exit. 5788 */ 5789 if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) { 5790 if (flag == TIMEOUT_CEEN_CHECK && 5791 !((ec_err_enable = get_error_enable()) & EN_REG_CEEN)) 5792 set_error_enable(ec_err_enable | EN_REG_CEEN); 5793 return; 5794 } 5795 5796 /* 5797 * Ensure that CEEN was not reenabled (maybe by DR) before 5798 * we log/clear the error. 5799 */ 5800 if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN) 5801 set_error_enable(ec_err_enable & ~EN_REG_CEEN); 5802 5803 /* 5804 * log/clear the CE. If CE_CEEN_DEFER is passed, the 5805 * timeout will be rescheduled when the error is logged. 5806 */ 5807 if (!(cpu_error_regs.afsr & cpu_ce_not_deferred)) 5808 cpu_ce_detected(&cpu_error_regs, 5809 CE_CEEN_DEFER | CE_CEEN_TIMEOUT); 5810 else 5811 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT); 5812 5813 /* 5814 * If the memory scrubber runs while CEEN is 5815 * disabled, (or if CEEN is disabled during the 5816 * scrub as a result of a CE being triggered by 5817 * it), the range being scrubbed will not be 5818 * completely cleaned. If there are multiple CEs 5819 * in the range at most two of these will be dealt 5820 * with, (one by the trap handler and one by the 5821 * timeout). It is also possible that none are dealt 5822 * with, (CEEN disabled and another CE occurs before 5823 * the timeout triggers). So to ensure that the 5824 * memory is actually scrubbed, we have to access each 5825 * memory location in the range and then check whether 5826 * that access causes a CE. 5827 */ 5828 if (flag == SCRUBBER_CEEN_CHECK && va) { 5829 if ((cpu_error_regs.afar >= pa) && 5830 (cpu_error_regs.afar < (pa + psz))) { 5831 /* 5832 * Force a load from physical memory for each 5833 * 64-byte block, then check AFSR to determine 5834 * whether this access caused an error. 5835 * 5836 * This is a slow way to do a scrub, but as it will 5837 * only be invoked when the memory scrubber actually 5838 * triggered a CE, it should not happen too 5839 * frequently. 5840 * 5841 * cut down what we need to check as the scrubber 5842 * has verified up to AFAR, so get it's offset 5843 * into the page and start there. 5844 */ 5845 page_offset = (uint64_t)(cpu_error_regs.afar & 5846 (psz - 1)); 5847 va = (caddr_t)(va + (P2ALIGN(page_offset, 64))); 5848 psz -= (uint_t)(P2ALIGN(page_offset, 64)); 5849 cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)), 5850 psz); 5851 } 5852 } 5853 5854 /* 5855 * Reset error enable if this CE is not masked. 5856 */ 5857 if ((flag == TIMEOUT_CEEN_CHECK) && 5858 (cpu_error_regs.afsr & cpu_ce_not_deferred)) 5859 set_error_enable(ec_err_enable | EN_REG_CEEN); 5860 5861 } 5862 5863 /* 5864 * Attempt a cpu logout for an error that we did not trap for, such 5865 * as a CE noticed with CEEN off. It is assumed that we are still running 5866 * on the cpu that took the error and that we cannot migrate. Returns 5867 * 0 on success, otherwise nonzero. 5868 */ 5869 static int 5870 cpu_ce_delayed_ec_logout(uint64_t afar) 5871 { 5872 ch_cpu_logout_t *clop; 5873 5874 if (CPU_PRIVATE(CPU) == NULL) 5875 return (0); 5876 5877 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5878 if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) != 5879 LOGOUT_INVALID) 5880 return (0); 5881 5882 cpu_delayed_logout(afar, clop); 5883 return (1); 5884 } 5885 5886 /* 5887 * We got an error while CEEN was disabled. We 5888 * need to clean up after it and log whatever 5889 * information we have on the CE. 5890 */ 5891 void 5892 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag) 5893 { 5894 ch_async_flt_t ch_flt; 5895 struct async_flt *aflt; 5896 char pr_reason[MAX_REASON_STRING]; 5897 5898 bzero(&ch_flt, sizeof (ch_async_flt_t)); 5899 ch_flt.flt_trapped_ce = flag; 5900 aflt = (struct async_flt *)&ch_flt; 5901 aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK; 5902 ch_flt.afsr_ext = cpu_error_regs->afsr_ext; 5903 ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) | 5904 (cpu_error_regs->afsr & C_AFSR_ALL_ERRS); 5905 aflt->flt_addr = cpu_error_regs->afar; 5906 #if defined(SERRANO) 5907 ch_flt.afar2 = cpu_error_regs->afar2; 5908 #endif /* SERRANO */ 5909 aflt->flt_pc = NULL; 5910 aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0); 5911 aflt->flt_tl = 0; 5912 aflt->flt_panic = 0; 5913 cpu_log_and_clear_ce(&ch_flt); 5914 5915 /* 5916 * check if we caused any errors during cleanup 5917 */ 5918 if (clear_errors(&ch_flt)) { 5919 pr_reason[0] = '\0'; 5920 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 5921 NULL); 5922 } 5923 } 5924 5925 /* 5926 * Log/clear CEEN-controlled disrupting errors 5927 */ 5928 static void 5929 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt) 5930 { 5931 struct async_flt *aflt; 5932 uint64_t afsr, afsr_errs; 5933 ch_cpu_logout_t *clop; 5934 char pr_reason[MAX_REASON_STRING]; 5935 on_trap_data_t *otp = curthread->t_ontrap; 5936 5937 aflt = (struct async_flt *)ch_flt; 5938 afsr = aflt->flt_stat; 5939 afsr_errs = ch_flt->afsr_errs; 5940 aflt->flt_id = gethrtime_waitfree(); 5941 aflt->flt_bus_id = getprocessorid(); 5942 aflt->flt_inst = CPU->cpu_id; 5943 aflt->flt_prot = AFLT_PROT_NONE; 5944 aflt->flt_class = CPU_FAULT; 5945 aflt->flt_status = ECC_C_TRAP; 5946 5947 pr_reason[0] = '\0'; 5948 /* 5949 * Get the CPU log out info for Disrupting Trap. 5950 */ 5951 if (CPU_PRIVATE(CPU) == NULL) { 5952 clop = NULL; 5953 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 5954 } else { 5955 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5956 } 5957 5958 if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) { 5959 ch_cpu_errors_t cpu_error_regs; 5960 5961 get_cpu_error_state(&cpu_error_regs); 5962 (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar); 5963 clop->clo_data.chd_afsr = cpu_error_regs.afsr; 5964 clop->clo_data.chd_afar = cpu_error_regs.afar; 5965 clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext; 5966 clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr; 5967 clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar; 5968 clop->clo_sdw_data.chd_afsr_ext = 5969 cpu_error_regs.shadow_afsr_ext; 5970 #if defined(SERRANO) 5971 clop->clo_data.chd_afar2 = cpu_error_regs.afar2; 5972 #endif /* SERRANO */ 5973 ch_flt->flt_data_incomplete = 1; 5974 5975 /* 5976 * The logging/clear code expects AFSR/AFAR to be cleared. 5977 * The trap handler does it for CEEN enabled errors 5978 * so we need to do it here. 5979 */ 5980 set_cpu_error_state(&cpu_error_regs); 5981 } 5982 5983 #if defined(JALAPENO) || defined(SERRANO) 5984 /* 5985 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno. 5986 * For Serrano, even thou we do have the AFAR, we still do the 5987 * scrub on the RCE side since that's where the error type can 5988 * be properly classified as intermittent, persistent, etc. 5989 * 5990 * CE/RCE: If error is in memory and AFAR is valid, scrub the memory. 5991 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 5992 * the flt_status bits. 5993 */ 5994 if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) && 5995 (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 5996 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) { 5997 cpu_ce_scrub_mem_err(aflt, B_TRUE); 5998 } 5999 #else /* JALAPENO || SERRANO */ 6000 /* 6001 * CE/EMC: If error is in memory and AFAR is valid, scrub the memory. 6002 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 6003 * the flt_status bits. 6004 */ 6005 if (afsr & (C_AFSR_CE|C_AFSR_EMC)) { 6006 if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 6007 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) { 6008 cpu_ce_scrub_mem_err(aflt, B_TRUE); 6009 } 6010 } 6011 6012 #endif /* JALAPENO || SERRANO */ 6013 6014 /* 6015 * Update flt_prot if this error occurred under on_trap protection. 6016 */ 6017 if (otp != NULL && (otp->ot_prot & OT_DATA_EC)) 6018 aflt->flt_prot = AFLT_PROT_EC; 6019 6020 /* 6021 * Queue events on the async event queue, one event per error bit. 6022 */ 6023 if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 || 6024 (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) { 6025 ch_flt->flt_type = CPU_INV_AFSR; 6026 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 6027 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 6028 aflt->flt_panic); 6029 } 6030 6031 /* 6032 * Zero out + invalidate CPU logout. 6033 */ 6034 if (clop) { 6035 bzero(clop, sizeof (ch_cpu_logout_t)); 6036 clop->clo_data.chd_afar = LOGOUT_INVALID; 6037 } 6038 6039 /* 6040 * If either a CPC, WDC or EDC error has occurred while CEEN 6041 * was disabled, we need to flush either the entire 6042 * E$ or an E$ line. 6043 */ 6044 #if defined(JALAPENO) || defined(SERRANO) 6045 if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC)) 6046 #else /* JALAPENO || SERRANO */ 6047 if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC | 6048 C_AFSR_L3_CPC | C_AFSR_L3_WDC)) 6049 #endif /* JALAPENO || SERRANO */ 6050 cpu_error_ecache_flush(ch_flt); 6051 6052 } 6053 6054 /* 6055 * depending on the error type, we determine whether we 6056 * need to flush the entire ecache or just a line. 6057 */ 6058 static int 6059 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt) 6060 { 6061 struct async_flt *aflt; 6062 uint64_t afsr; 6063 uint64_t afsr_errs = ch_flt->afsr_errs; 6064 6065 aflt = (struct async_flt *)ch_flt; 6066 afsr = aflt->flt_stat; 6067 6068 /* 6069 * If we got multiple errors, no point in trying 6070 * the individual cases, just flush the whole cache 6071 */ 6072 if (afsr & C_AFSR_ME) { 6073 return (ECACHE_FLUSH_ALL); 6074 } 6075 6076 /* 6077 * If either a CPC, WDC or EDC error has occurred while CEEN 6078 * was disabled, we need to flush entire E$. We can't just 6079 * flush the cache line affected as the ME bit 6080 * is not set when multiple correctable errors of the same 6081 * type occur, so we might have multiple CPC or EDC errors, 6082 * with only the first recorded. 6083 */ 6084 #if defined(JALAPENO) || defined(SERRANO) 6085 if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) { 6086 #else /* JALAPENO || SERRANO */ 6087 if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC | 6088 C_AFSR_L3_EDC | C_AFSR_L3_WDC)) { 6089 #endif /* JALAPENO || SERRANO */ 6090 return (ECACHE_FLUSH_ALL); 6091 } 6092 6093 #if defined(JALAPENO) || defined(SERRANO) 6094 /* 6095 * If only UE or RUE is set, flush the Ecache line, otherwise 6096 * flush the entire Ecache. 6097 */ 6098 if (afsr & (C_AFSR_UE|C_AFSR_RUE)) { 6099 if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE || 6100 (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) { 6101 return (ECACHE_FLUSH_LINE); 6102 } else { 6103 return (ECACHE_FLUSH_ALL); 6104 } 6105 } 6106 #else /* JALAPENO || SERRANO */ 6107 /* 6108 * If UE only is set, flush the Ecache line, otherwise 6109 * flush the entire Ecache. 6110 */ 6111 if (afsr_errs & C_AFSR_UE) { 6112 if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == 6113 C_AFSR_UE) { 6114 return (ECACHE_FLUSH_LINE); 6115 } else { 6116 return (ECACHE_FLUSH_ALL); 6117 } 6118 } 6119 #endif /* JALAPENO || SERRANO */ 6120 6121 /* 6122 * EDU: If EDU only is set, flush the ecache line, otherwise 6123 * flush the entire Ecache. 6124 */ 6125 if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) { 6126 if (((afsr_errs & ~C_AFSR_EDU) == 0) || 6127 ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) { 6128 return (ECACHE_FLUSH_LINE); 6129 } else { 6130 return (ECACHE_FLUSH_ALL); 6131 } 6132 } 6133 6134 /* 6135 * BERR: If BERR only is set, flush the Ecache line, otherwise 6136 * flush the entire Ecache. 6137 */ 6138 if (afsr_errs & C_AFSR_BERR) { 6139 if ((afsr_errs & ~C_AFSR_BERR) == 0) { 6140 return (ECACHE_FLUSH_LINE); 6141 } else { 6142 return (ECACHE_FLUSH_ALL); 6143 } 6144 } 6145 6146 return (0); 6147 } 6148 6149 void 6150 cpu_error_ecache_flush(ch_async_flt_t *ch_flt) 6151 { 6152 int ecache_flush_flag = 6153 cpu_error_ecache_flush_required(ch_flt); 6154 6155 /* 6156 * Flush Ecache line or entire Ecache based on above checks. 6157 */ 6158 if (ecache_flush_flag == ECACHE_FLUSH_ALL) 6159 cpu_flush_ecache(); 6160 else if (ecache_flush_flag == ECACHE_FLUSH_LINE) { 6161 cpu_flush_ecache_line(ch_flt); 6162 } 6163 6164 } 6165 6166 /* 6167 * Extract the PA portion from the E$ tag. 6168 */ 6169 uint64_t 6170 cpu_ectag_to_pa(int setsize, uint64_t tag) 6171 { 6172 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6173 return (JG_ECTAG_TO_PA(setsize, tag)); 6174 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6175 return (PN_L3TAG_TO_PA(tag)); 6176 else 6177 return (CH_ECTAG_TO_PA(setsize, tag)); 6178 } 6179 6180 /* 6181 * Convert the E$ tag PA into an E$ subblock index. 6182 */ 6183 static int 6184 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr) 6185 { 6186 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6187 return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6188 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6189 /* Panther has only one subblock per line */ 6190 return (0); 6191 else 6192 return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6193 } 6194 6195 /* 6196 * All subblocks in an E$ line must be invalid for 6197 * the line to be invalid. 6198 */ 6199 int 6200 cpu_ectag_line_invalid(int cachesize, uint64_t tag) 6201 { 6202 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6203 return (JG_ECTAG_LINE_INVALID(cachesize, tag)); 6204 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6205 return (PN_L3_LINE_INVALID(tag)); 6206 else 6207 return (CH_ECTAG_LINE_INVALID(cachesize, tag)); 6208 } 6209 6210 /* 6211 * Extract state bits for a subblock given the tag. Note that for Panther 6212 * this works on both l2 and l3 tags. 6213 */ 6214 static int 6215 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag) 6216 { 6217 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6218 return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6219 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6220 return (tag & CH_ECSTATE_MASK); 6221 else 6222 return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6223 } 6224 6225 /* 6226 * Cpu specific initialization. 6227 */ 6228 void 6229 cpu_mp_init(void) 6230 { 6231 #ifdef CHEETAHPLUS_ERRATUM_25 6232 if (cheetah_sendmondo_recover) { 6233 cheetah_nudge_init(); 6234 } 6235 #endif 6236 } 6237 6238 void 6239 cpu_ereport_post(struct async_flt *aflt) 6240 { 6241 char *cpu_type, buf[FM_MAX_CLASS]; 6242 nv_alloc_t *nva = NULL; 6243 nvlist_t *ereport, *detector, *resource; 6244 errorq_elem_t *eqep; 6245 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6246 char unum[UNUM_NAMLEN]; 6247 int len = 0; 6248 uint8_t msg_type; 6249 plat_ecc_ch_async_flt_t plat_ecc_ch_flt; 6250 6251 if (aflt->flt_panic || panicstr) { 6252 eqep = errorq_reserve(ereport_errorq); 6253 if (eqep == NULL) 6254 return; 6255 ereport = errorq_elem_nvl(ereport_errorq, eqep); 6256 nva = errorq_elem_nva(ereport_errorq, eqep); 6257 } else { 6258 ereport = fm_nvlist_create(nva); 6259 } 6260 6261 /* 6262 * Create the scheme "cpu" FMRI. 6263 */ 6264 detector = fm_nvlist_create(nva); 6265 resource = fm_nvlist_create(nva); 6266 switch (cpunodes[aflt->flt_inst].implementation) { 6267 case CHEETAH_IMPL: 6268 cpu_type = FM_EREPORT_CPU_USIII; 6269 break; 6270 case CHEETAH_PLUS_IMPL: 6271 cpu_type = FM_EREPORT_CPU_USIIIplus; 6272 break; 6273 case JALAPENO_IMPL: 6274 cpu_type = FM_EREPORT_CPU_USIIIi; 6275 break; 6276 case SERRANO_IMPL: 6277 cpu_type = FM_EREPORT_CPU_USIIIiplus; 6278 break; 6279 case JAGUAR_IMPL: 6280 cpu_type = FM_EREPORT_CPU_USIV; 6281 break; 6282 case PANTHER_IMPL: 6283 cpu_type = FM_EREPORT_CPU_USIVplus; 6284 break; 6285 default: 6286 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 6287 break; 6288 } 6289 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 6290 aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version, 6291 cpunodes[aflt->flt_inst].device_id); 6292 6293 /* 6294 * Encode all the common data into the ereport. 6295 */ 6296 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 6297 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 6298 6299 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 6300 fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1), 6301 detector, NULL); 6302 6303 /* 6304 * Encode the error specific data that was saved in 6305 * the async_flt structure into the ereport. 6306 */ 6307 cpu_payload_add_aflt(aflt, ereport, resource, 6308 &plat_ecc_ch_flt.ecaf_afar_status, 6309 &plat_ecc_ch_flt.ecaf_synd_status); 6310 6311 if (aflt->flt_panic || panicstr) { 6312 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 6313 } else { 6314 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 6315 fm_nvlist_destroy(ereport, FM_NVA_FREE); 6316 fm_nvlist_destroy(detector, FM_NVA_FREE); 6317 fm_nvlist_destroy(resource, FM_NVA_FREE); 6318 } 6319 /* 6320 * Send the enhanced error information (plat_ecc_error2_data_t) 6321 * to the SC olny if it can process it. 6322 */ 6323 6324 if (&plat_ecc_capability_sc_get && 6325 plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) { 6326 msg_type = cpu_flt_bit_to_plat_error(aflt); 6327 if (msg_type != PLAT_ECC_ERROR2_NONE) { 6328 /* 6329 * If afar status is not invalid do a unum lookup. 6330 */ 6331 if (plat_ecc_ch_flt.ecaf_afar_status != 6332 AFLT_STAT_INVALID) { 6333 (void) cpu_get_mem_unum_aflt( 6334 plat_ecc_ch_flt.ecaf_synd_status, aflt, 6335 unum, UNUM_NAMLEN, &len); 6336 } else { 6337 unum[0] = '\0'; 6338 } 6339 plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar; 6340 plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr; 6341 plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext; 6342 plat_ecc_ch_flt.ecaf_sdw_afsr_ext = 6343 ch_flt->flt_sdw_afsr_ext; 6344 6345 if (&plat_log_fruid_error2) 6346 plat_log_fruid_error2(msg_type, unum, aflt, 6347 &plat_ecc_ch_flt); 6348 } 6349 } 6350 } 6351 6352 void 6353 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 6354 { 6355 int status; 6356 ddi_fm_error_t de; 6357 6358 bzero(&de, sizeof (ddi_fm_error_t)); 6359 6360 de.fme_version = DDI_FME_VERSION; 6361 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 6362 FM_ENA_FMT1); 6363 de.fme_flag = expected; 6364 de.fme_bus_specific = (void *)aflt->flt_addr; 6365 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 6366 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 6367 aflt->flt_panic = 1; 6368 } 6369 6370 void 6371 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 6372 errorq_t *eqp, uint_t flag) 6373 { 6374 struct async_flt *aflt = (struct async_flt *)payload; 6375 6376 aflt->flt_erpt_class = error_class; 6377 errorq_dispatch(eqp, payload, payload_sz, flag); 6378 } 6379 6380 /* 6381 * This routine may be called by the IO module, but does not do 6382 * anything in this cpu module. The SERD algorithm is handled by 6383 * cpumem-diagnosis engine instead. 6384 */ 6385 /*ARGSUSED*/ 6386 void 6387 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 6388 {} 6389 6390 void 6391 adjust_hw_copy_limits(int ecache_size) 6392 { 6393 /* 6394 * Set hw copy limits. 6395 * 6396 * /etc/system will be parsed later and can override one or more 6397 * of these settings. 6398 * 6399 * At this time, ecache size seems only mildly relevant. 6400 * We seem to run into issues with the d-cache and stalls 6401 * we see on misses. 6402 * 6403 * Cycle measurement indicates that 2 byte aligned copies fare 6404 * little better than doing things with VIS at around 512 bytes. 6405 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 6406 * aligned is faster whenever the source and destination data 6407 * in cache and the total size is less than 2 Kbytes. The 2K 6408 * limit seems to be driven by the 2K write cache. 6409 * When more than 2K of copies are done in non-VIS mode, stores 6410 * backup in the write cache. In VIS mode, the write cache is 6411 * bypassed, allowing faster cache-line writes aligned on cache 6412 * boundaries. 6413 * 6414 * In addition, in non-VIS mode, there is no prefetching, so 6415 * for larger copies, the advantage of prefetching to avoid even 6416 * occasional cache misses is enough to justify using the VIS code. 6417 * 6418 * During testing, it was discovered that netbench ran 3% slower 6419 * when hw_copy_limit_8 was 2K or larger. Apparently for server 6420 * applications, data is only used once (copied to the output 6421 * buffer, then copied by the network device off the system). Using 6422 * the VIS copy saves more L2 cache state. Network copies are 6423 * around 1.3K to 1.5K in size for historical reasons. 6424 * 6425 * Therefore, a limit of 1K bytes will be used for the 8 byte 6426 * aligned copy even for large caches and 8 MB ecache. The 6427 * infrastructure to allow different limits for different sized 6428 * caches is kept to allow further tuning in later releases. 6429 */ 6430 6431 if (min_ecache_size == 0 && use_hw_bcopy) { 6432 /* 6433 * First time through - should be before /etc/system 6434 * is read. 6435 * Could skip the checks for zero but this lets us 6436 * preserve any debugger rewrites. 6437 */ 6438 if (hw_copy_limit_1 == 0) { 6439 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 6440 priv_hcl_1 = hw_copy_limit_1; 6441 } 6442 if (hw_copy_limit_2 == 0) { 6443 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 6444 priv_hcl_2 = hw_copy_limit_2; 6445 } 6446 if (hw_copy_limit_4 == 0) { 6447 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 6448 priv_hcl_4 = hw_copy_limit_4; 6449 } 6450 if (hw_copy_limit_8 == 0) { 6451 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 6452 priv_hcl_8 = hw_copy_limit_8; 6453 } 6454 min_ecache_size = ecache_size; 6455 } else { 6456 /* 6457 * MP initialization. Called *after* /etc/system has 6458 * been parsed. One CPU has already been initialized. 6459 * Need to cater for /etc/system having scragged one 6460 * of our values. 6461 */ 6462 if (ecache_size == min_ecache_size) { 6463 /* 6464 * Same size ecache. We do nothing unless we 6465 * have a pessimistic ecache setting. In that 6466 * case we become more optimistic (if the cache is 6467 * large enough). 6468 */ 6469 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 6470 /* 6471 * Need to adjust hw_copy_limit* from our 6472 * pessimistic uniprocessor value to a more 6473 * optimistic UP value *iff* it hasn't been 6474 * reset. 6475 */ 6476 if ((ecache_size > 1048576) && 6477 (priv_hcl_8 == hw_copy_limit_8)) { 6478 if (ecache_size <= 2097152) 6479 hw_copy_limit_8 = 4 * 6480 VIS_COPY_THRESHOLD; 6481 else if (ecache_size <= 4194304) 6482 hw_copy_limit_8 = 4 * 6483 VIS_COPY_THRESHOLD; 6484 else 6485 hw_copy_limit_8 = 4 * 6486 VIS_COPY_THRESHOLD; 6487 priv_hcl_8 = hw_copy_limit_8; 6488 } 6489 } 6490 } else if (ecache_size < min_ecache_size) { 6491 /* 6492 * A different ecache size. Can this even happen? 6493 */ 6494 if (priv_hcl_8 == hw_copy_limit_8) { 6495 /* 6496 * The previous value that we set 6497 * is unchanged (i.e., it hasn't been 6498 * scragged by /etc/system). Rewrite it. 6499 */ 6500 if (ecache_size <= 1048576) 6501 hw_copy_limit_8 = 8 * 6502 VIS_COPY_THRESHOLD; 6503 else if (ecache_size <= 2097152) 6504 hw_copy_limit_8 = 8 * 6505 VIS_COPY_THRESHOLD; 6506 else if (ecache_size <= 4194304) 6507 hw_copy_limit_8 = 8 * 6508 VIS_COPY_THRESHOLD; 6509 else 6510 hw_copy_limit_8 = 10 * 6511 VIS_COPY_THRESHOLD; 6512 priv_hcl_8 = hw_copy_limit_8; 6513 min_ecache_size = ecache_size; 6514 } 6515 } 6516 } 6517 } 6518 6519 /* 6520 * Called from illegal instruction trap handler to see if we can attribute 6521 * the trap to a fpras check. 6522 */ 6523 int 6524 fpras_chktrap(struct regs *rp) 6525 { 6526 int op; 6527 struct fpras_chkfngrp *cgp; 6528 uintptr_t tpc = (uintptr_t)rp->r_pc; 6529 6530 if (fpras_chkfngrps == NULL) 6531 return (0); 6532 6533 cgp = &fpras_chkfngrps[CPU->cpu_id]; 6534 for (op = 0; op < FPRAS_NCOPYOPS; ++op) { 6535 if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 && 6536 tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult) 6537 break; 6538 } 6539 if (op == FPRAS_NCOPYOPS) 6540 return (0); 6541 6542 /* 6543 * This is an fpRAS failure caught through an illegal 6544 * instruction - trampoline. 6545 */ 6546 rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline; 6547 rp->r_npc = rp->r_pc + 4; 6548 return (1); 6549 } 6550 6551 /* 6552 * fpras_failure is called when a fpras check detects a bad calculation 6553 * result or an illegal instruction trap is attributed to an fpras 6554 * check. In all cases we are still bound to CPU. 6555 */ 6556 int 6557 fpras_failure(int op, int how) 6558 { 6559 int use_hw_bcopy_orig, use_hw_bzero_orig; 6560 uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig; 6561 ch_async_flt_t ch_flt; 6562 struct async_flt *aflt = (struct async_flt *)&ch_flt; 6563 struct fpras_chkfn *sfp, *cfp; 6564 uint32_t *sip, *cip; 6565 int i; 6566 6567 /* 6568 * We're running on a sick CPU. Avoid further FPU use at least for 6569 * the time in which we dispatch an ereport and (if applicable) panic. 6570 */ 6571 use_hw_bcopy_orig = use_hw_bcopy; 6572 use_hw_bzero_orig = use_hw_bzero; 6573 hcl1_orig = hw_copy_limit_1; 6574 hcl2_orig = hw_copy_limit_2; 6575 hcl4_orig = hw_copy_limit_4; 6576 hcl8_orig = hw_copy_limit_8; 6577 use_hw_bcopy = use_hw_bzero = 0; 6578 hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 = 6579 hw_copy_limit_8 = 0; 6580 6581 bzero(&ch_flt, sizeof (ch_async_flt_t)); 6582 aflt->flt_id = gethrtime_waitfree(); 6583 aflt->flt_class = CPU_FAULT; 6584 aflt->flt_inst = CPU->cpu_id; 6585 aflt->flt_status = (how << 8) | op; 6586 aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY; 6587 ch_flt.flt_type = CPU_FPUERR; 6588 6589 /* 6590 * We must panic if the copy operation had no lofault protection - 6591 * ie, don't panic for copyin, copyout, kcopy and bcopy called 6592 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy. 6593 */ 6594 aflt->flt_panic = (curthread->t_lofault == NULL); 6595 6596 /* 6597 * XOR the source instruction block with the copied instruction 6598 * block - this will show us which bit(s) are corrupted. 6599 */ 6600 sfp = (struct fpras_chkfn *)fpras_chkfn_type1; 6601 cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op]; 6602 if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) { 6603 sip = &sfp->fpras_blk0[0]; 6604 cip = &cfp->fpras_blk0[0]; 6605 } else { 6606 sip = &sfp->fpras_blk1[0]; 6607 cip = &cfp->fpras_blk1[0]; 6608 } 6609 for (i = 0; i < 16; ++i, ++sip, ++cip) 6610 ch_flt.flt_fpdata[i] = *sip ^ *cip; 6611 6612 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt, 6613 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 6614 6615 if (aflt->flt_panic) 6616 fm_panic("FPU failure on CPU %d", CPU->cpu_id); 6617 6618 /* 6619 * We get here for copyin/copyout and kcopy or bcopy where the 6620 * caller has used on_fault. We will flag the error so that 6621 * the process may be killed The trap_async_hwerr mechanism will 6622 * take appropriate further action (such as a reboot, contract 6623 * notification etc). Since we may be continuing we will 6624 * restore the global hardware copy acceleration switches. 6625 * 6626 * When we return from this function to the copy function we want to 6627 * avoid potentially bad data being used, ie we want the affected 6628 * copy function to return an error. The caller should therefore 6629 * invoke its lofault handler (which always exists for these functions) 6630 * which will return the appropriate error. 6631 */ 6632 ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR; 6633 aston(curthread); 6634 6635 use_hw_bcopy = use_hw_bcopy_orig; 6636 use_hw_bzero = use_hw_bzero_orig; 6637 hw_copy_limit_1 = hcl1_orig; 6638 hw_copy_limit_2 = hcl2_orig; 6639 hw_copy_limit_4 = hcl4_orig; 6640 hw_copy_limit_8 = hcl8_orig; 6641 6642 return (1); 6643 } 6644 6645 #define VIS_BLOCKSIZE 64 6646 6647 int 6648 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 6649 { 6650 int ret, watched; 6651 6652 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6653 ret = dtrace_blksuword32(addr, data, 0); 6654 if (watched) 6655 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6656 6657 return (ret); 6658 } 6659 6660 /* 6661 * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the 6662 * faulted cpu into that state). Cross-trap to the faulted cpu to clear 6663 * CEEN from the EER to disable traps for further disrupting error types 6664 * on that cpu. We could cross-call instead, but that has a larger 6665 * instruction and data footprint than cross-trapping, and the cpu is known 6666 * to be faulted. 6667 */ 6668 6669 void 6670 cpu_faulted_enter(struct cpu *cp) 6671 { 6672 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS); 6673 } 6674 6675 /* 6676 * Called when a cpu leaves the CPU_FAULTED state to return to one of 6677 * offline, spare, or online (by the cpu requesting this state change). 6678 * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of 6679 * disrupting error bits that have accumulated without trapping, then 6680 * we cross-trap to re-enable CEEN controlled traps. 6681 */ 6682 void 6683 cpu_faulted_exit(struct cpu *cp) 6684 { 6685 ch_cpu_errors_t cpu_error_regs; 6686 6687 cpu_error_regs.afsr = C_AFSR_CECC_ERRS; 6688 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) 6689 cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS; 6690 xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state, 6691 (uint64_t)&cpu_error_regs, 0); 6692 6693 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS); 6694 } 6695 6696 /* 6697 * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by 6698 * the errors in the original AFSR, 0 otherwise. 6699 * 6700 * For all procs if the initial error was a BERR or TO, then it is possible 6701 * that we may have caused a secondary BERR or TO in the process of logging the 6702 * inital error via cpu_run_bus_error_handlers(). If this is the case then 6703 * if the request was protected then a panic is still not necessary, if not 6704 * protected then aft_panic is already set - so either way there's no need 6705 * to set aft_panic for the secondary error. 6706 * 6707 * For Cheetah and Jalapeno, if the original error was a UE which occurred on 6708 * a store merge, then the error handling code will call cpu_deferred_error(). 6709 * When clear_errors() is called, it will determine that secondary errors have 6710 * occurred - in particular, the store merge also caused a EDU and WDU that 6711 * weren't discovered until this point. 6712 * 6713 * We do three checks to verify that we are in this case. If we pass all three 6714 * checks, we return 1 to indicate that we should not panic. If any unexpected 6715 * errors occur, we return 0. 6716 * 6717 * For Cheetah+ and derivative procs, the store merge causes a DUE, which is 6718 * handled in cpu_disrupting_errors(). Since this function is not even called 6719 * in the case we are interested in, we just return 0 for these processors. 6720 */ 6721 /*ARGSUSED*/ 6722 static int 6723 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs, 6724 uint64_t t_afar) 6725 { 6726 #if defined(CHEETAH_PLUS) 6727 #else /* CHEETAH_PLUS */ 6728 struct async_flt *aflt = (struct async_flt *)ch_flt; 6729 #endif /* CHEETAH_PLUS */ 6730 6731 /* 6732 * Was the original error a BERR or TO and only a BERR or TO 6733 * (multiple errors are also OK) 6734 */ 6735 if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) { 6736 /* 6737 * Is the new error a BERR or TO and only a BERR or TO 6738 * (multiple errors are also OK) 6739 */ 6740 if ((ch_flt->afsr_errs & 6741 ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) 6742 return (1); 6743 } 6744 6745 #if defined(CHEETAH_PLUS) 6746 return (0); 6747 #else /* CHEETAH_PLUS */ 6748 /* 6749 * Now look for secondary effects of a UE on cheetah/jalapeno 6750 * 6751 * Check the original error was a UE, and only a UE. Note that 6752 * the ME bit will cause us to fail this check. 6753 */ 6754 if (t_afsr_errs != C_AFSR_UE) 6755 return (0); 6756 6757 /* 6758 * Check the secondary errors were exclusively an EDU and/or WDU. 6759 */ 6760 if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0) 6761 return (0); 6762 6763 /* 6764 * Check the AFAR of the original error and secondary errors 6765 * match to the 64-byte boundary 6766 */ 6767 if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64)) 6768 return (0); 6769 6770 /* 6771 * We've passed all the checks, so it's a secondary error! 6772 */ 6773 return (1); 6774 #endif /* CHEETAH_PLUS */ 6775 } 6776 6777 /* 6778 * Translate the flt_bit or flt_type into an error type. First, flt_bit 6779 * is checked for any valid errors. If found, the error type is 6780 * returned. If not found, the flt_type is checked for L1$ parity errors. 6781 */ 6782 /*ARGSUSED*/ 6783 static uint8_t 6784 cpu_flt_bit_to_plat_error(struct async_flt *aflt) 6785 { 6786 #if defined(JALAPENO) 6787 /* 6788 * Currently, logging errors to the SC is not supported on Jalapeno 6789 */ 6790 return (PLAT_ECC_ERROR2_NONE); 6791 #else 6792 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6793 6794 switch (ch_flt->flt_bit) { 6795 case C_AFSR_CE: 6796 return (PLAT_ECC_ERROR2_CE); 6797 case C_AFSR_UCC: 6798 case C_AFSR_EDC: 6799 case C_AFSR_WDC: 6800 case C_AFSR_CPC: 6801 return (PLAT_ECC_ERROR2_L2_CE); 6802 case C_AFSR_EMC: 6803 return (PLAT_ECC_ERROR2_EMC); 6804 case C_AFSR_IVC: 6805 return (PLAT_ECC_ERROR2_IVC); 6806 case C_AFSR_UE: 6807 return (PLAT_ECC_ERROR2_UE); 6808 case C_AFSR_UCU: 6809 case C_AFSR_EDU: 6810 case C_AFSR_WDU: 6811 case C_AFSR_CPU: 6812 return (PLAT_ECC_ERROR2_L2_UE); 6813 case C_AFSR_IVU: 6814 return (PLAT_ECC_ERROR2_IVU); 6815 case C_AFSR_TO: 6816 return (PLAT_ECC_ERROR2_TO); 6817 case C_AFSR_BERR: 6818 return (PLAT_ECC_ERROR2_BERR); 6819 #if defined(CHEETAH_PLUS) 6820 case C_AFSR_L3_EDC: 6821 case C_AFSR_L3_UCC: 6822 case C_AFSR_L3_CPC: 6823 case C_AFSR_L3_WDC: 6824 return (PLAT_ECC_ERROR2_L3_CE); 6825 case C_AFSR_IMC: 6826 return (PLAT_ECC_ERROR2_IMC); 6827 case C_AFSR_TSCE: 6828 return (PLAT_ECC_ERROR2_L2_TSCE); 6829 case C_AFSR_THCE: 6830 return (PLAT_ECC_ERROR2_L2_THCE); 6831 case C_AFSR_L3_MECC: 6832 return (PLAT_ECC_ERROR2_L3_MECC); 6833 case C_AFSR_L3_THCE: 6834 return (PLAT_ECC_ERROR2_L3_THCE); 6835 case C_AFSR_L3_CPU: 6836 case C_AFSR_L3_EDU: 6837 case C_AFSR_L3_UCU: 6838 case C_AFSR_L3_WDU: 6839 return (PLAT_ECC_ERROR2_L3_UE); 6840 case C_AFSR_DUE: 6841 return (PLAT_ECC_ERROR2_DUE); 6842 case C_AFSR_DTO: 6843 return (PLAT_ECC_ERROR2_DTO); 6844 case C_AFSR_DBERR: 6845 return (PLAT_ECC_ERROR2_DBERR); 6846 #endif /* CHEETAH_PLUS */ 6847 default: 6848 switch (ch_flt->flt_type) { 6849 #if defined(CPU_IMP_L1_CACHE_PARITY) 6850 case CPU_IC_PARITY: 6851 return (PLAT_ECC_ERROR2_IPE); 6852 case CPU_DC_PARITY: 6853 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 6854 if (ch_flt->parity_data.dpe.cpl_cache == 6855 CPU_PC_PARITY) { 6856 return (PLAT_ECC_ERROR2_PCACHE); 6857 } 6858 } 6859 return (PLAT_ECC_ERROR2_DPE); 6860 #endif /* CPU_IMP_L1_CACHE_PARITY */ 6861 case CPU_ITLB_PARITY: 6862 return (PLAT_ECC_ERROR2_ITLB); 6863 case CPU_DTLB_PARITY: 6864 return (PLAT_ECC_ERROR2_DTLB); 6865 default: 6866 return (PLAT_ECC_ERROR2_NONE); 6867 } 6868 } 6869 #endif /* JALAPENO */ 6870 } 6871