1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/ddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/archsystm.h> 34 #include <sys/vmsystm.h> 35 #include <sys/machparam.h> 36 #include <sys/machsystm.h> 37 #include <sys/machthread.h> 38 #include <sys/cpu.h> 39 #include <sys/cmp.h> 40 #include <sys/elf_SPARC.h> 41 #include <vm/vm_dep.h> 42 #include <vm/hat_sfmmu.h> 43 #include <vm/seg_kpm.h> 44 #include <sys/cpuvar.h> 45 #include <sys/cheetahregs.h> 46 #include <sys/us3_module.h> 47 #include <sys/async.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/dditypes.h> 51 #include <sys/prom_debug.h> 52 #include <sys/prom_plat.h> 53 #include <sys/cpu_module.h> 54 #include <sys/sysmacros.h> 55 #include <sys/intreg.h> 56 #include <sys/clock.h> 57 #include <sys/platform_module.h> 58 #include <sys/machtrap.h> 59 #include <sys/ontrap.h> 60 #include <sys/panic.h> 61 #include <sys/memlist.h> 62 #include <sys/bootconf.h> 63 #include <sys/ivintr.h> 64 #include <sys/atomic.h> 65 #include <sys/taskq.h> 66 #include <sys/note.h> 67 #include <sys/ndifm.h> 68 #include <sys/ddifm.h> 69 #include <sys/fm/protocol.h> 70 #include <sys/fm/util.h> 71 #include <sys/fm/cpu/UltraSPARC-III.h> 72 #include <sys/fpras_impl.h> 73 #include <sys/dtrace.h> 74 #include <sys/watchpoint.h> 75 #include <sys/plat_ecc_unum.h> 76 #include <sys/cyclic.h> 77 #include <sys/errorq.h> 78 #include <sys/errclassify.h> 79 80 #ifdef CHEETAHPLUS_ERRATUM_25 81 #include <sys/xc_impl.h> 82 #endif /* CHEETAHPLUS_ERRATUM_25 */ 83 84 /* 85 * Note that 'Cheetah PRM' refers to: 86 * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III 87 */ 88 89 /* 90 * Per CPU pointers to physical address of TL>0 logout data areas. 91 * These pointers have to be in the kernel nucleus to avoid MMU 92 * misses. 93 */ 94 uint64_t ch_err_tl1_paddrs[NCPU]; 95 96 /* 97 * One statically allocated structure to use during startup/DR 98 * to prevent unnecessary panics. 99 */ 100 ch_err_tl1_data_t ch_err_tl1_data; 101 102 /* 103 * Per CPU pending error at TL>0, used by level15 softint handler 104 */ 105 uchar_t ch_err_tl1_pending[NCPU]; 106 107 /* 108 * For deferred CE re-enable after trap. 109 */ 110 taskq_t *ch_check_ce_tq; 111 112 /* 113 * Internal functions. 114 */ 115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep); 116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt); 117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 118 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp); 119 static int clear_ecc(struct async_flt *ecc); 120 #if defined(CPU_IMP_ECACHE_ASSOC) 121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt); 122 #endif 123 static int cpu_ecache_set_size(struct cpu *cp); 124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag); 125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr); 126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag); 127 static int cpu_ectag_pa_to_subblk_state(int cachesize, 128 uint64_t subaddr, uint64_t tag); 129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt); 130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit); 131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit); 132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit); 133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit); 134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit); 135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp); 136 static void cpu_scrubphys(struct async_flt *aflt); 137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *, 138 int *, int *); 139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *); 140 static void cpu_ereport_init(struct async_flt *aflt); 141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t); 142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt); 143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 144 uint64_t nceen, ch_cpu_logout_t *clop); 145 static int cpu_ce_delayed_ec_logout(uint64_t); 146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *); 147 148 #ifdef CHEETAHPLUS_ERRATUM_25 149 static int mondo_recover_proc(uint16_t, int); 150 static void cheetah_nudge_init(void); 151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 152 cyc_time_t *when); 153 static void cheetah_nudge_buddy(void); 154 #endif /* CHEETAHPLUS_ERRATUM_25 */ 155 156 #if defined(CPU_IMP_L1_CACHE_PARITY) 157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt); 158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index); 159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 160 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word); 161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt); 162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index); 163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt); 164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index); 165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *); 166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *); 167 #endif /* CPU_IMP_L1_CACHE_PARITY */ 168 169 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 170 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 171 int *segsp, int *banksp, int *mcidp); 172 173 /* 174 * This table is used to determine which bit(s) is(are) bad when an ECC 175 * error occurs. The array is indexed by an 9-bit syndrome. The entries 176 * of this array have the following semantics: 177 * 178 * 00-127 The number of the bad bit, when only one bit is bad. 179 * 128 ECC bit C0 is bad. 180 * 129 ECC bit C1 is bad. 181 * 130 ECC bit C2 is bad. 182 * 131 ECC bit C3 is bad. 183 * 132 ECC bit C4 is bad. 184 * 133 ECC bit C5 is bad. 185 * 134 ECC bit C6 is bad. 186 * 135 ECC bit C7 is bad. 187 * 136 ECC bit C8 is bad. 188 * 137-143 reserved for Mtag Data and ECC. 189 * 144(M2) Two bits are bad within a nibble. 190 * 145(M3) Three bits are bad within a nibble. 191 * 146(M3) Four bits are bad within a nibble. 192 * 147(M) Multiple bits (5 or more) are bad. 193 * 148 NO bits are bad. 194 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5. 195 */ 196 197 #define C0 128 198 #define C1 129 199 #define C2 130 200 #define C3 131 201 #define C4 132 202 #define C5 133 203 #define C6 134 204 #define C7 135 205 #define C8 136 206 #define MT0 137 /* Mtag Data bit 0 */ 207 #define MT1 138 208 #define MT2 139 209 #define MTC0 140 /* Mtag Check bit 0 */ 210 #define MTC1 141 211 #define MTC2 142 212 #define MTC3 143 213 #define M2 144 214 #define M3 145 215 #define M4 146 216 #define M 147 217 #define NA 148 218 #if defined(JALAPENO) || defined(SERRANO) 219 #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */ 220 #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */ 221 #define SLAST S003MEM /* last special syndrome */ 222 #else /* JALAPENO || SERRANO */ 223 #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */ 224 #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */ 225 #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */ 226 #define SLAST S11C /* last special syndrome */ 227 #endif /* JALAPENO || SERRANO */ 228 #if defined(JALAPENO) || defined(SERRANO) 229 #define BPAR0 152 /* syndrom 152 through 167 for bus parity */ 230 #define BPAR15 167 231 #endif /* JALAPENO || SERRANO */ 232 233 static uint8_t ecc_syndrome_tab[] = 234 { 235 NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, 236 C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, 237 C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, 238 M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, 239 C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, 240 M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, 241 M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, 242 #if defined(JALAPENO) || defined(SERRANO) 243 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 244 #else /* JALAPENO || SERRANO */ 245 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 246 #endif /* JALAPENO || SERRANO */ 247 C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, 248 M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, 249 M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, 250 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, 251 M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, 252 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, 253 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, 254 M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, 255 C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, 256 #if defined(JALAPENO) || defined(SERRANO) 257 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, 258 #else /* JALAPENO || SERRANO */ 259 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M, 260 #endif /* JALAPENO || SERRANO */ 261 M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, 262 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, 263 M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, 264 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, 265 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, 266 M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, 267 M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, 268 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, 269 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, 270 M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, 271 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, 272 M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, 273 M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, 274 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M 275 }; 276 277 #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t)) 278 279 #if !(defined(JALAPENO) || defined(SERRANO)) 280 /* 281 * This table is used to determine which bit(s) is(are) bad when a Mtag 282 * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries 283 * of this array have the following semantics: 284 * 285 * -1 Invalid mtag syndrome. 286 * 137 Mtag Data 0 is bad. 287 * 138 Mtag Data 1 is bad. 288 * 139 Mtag Data 2 is bad. 289 * 140 Mtag ECC 0 is bad. 290 * 141 Mtag ECC 1 is bad. 291 * 142 Mtag ECC 2 is bad. 292 * 143 Mtag ECC 3 is bad. 293 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6. 294 */ 295 short mtag_syndrome_tab[] = 296 { 297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2 298 }; 299 300 #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short)) 301 302 #else /* !(JALAPENO || SERRANO) */ 303 304 #define BSYND_TBL_SIZE 16 305 306 #endif /* !(JALAPENO || SERRANO) */ 307 308 /* 309 * CE initial classification and subsequent action lookup table 310 */ 311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE]; 312 static int ce_disp_inited; 313 314 /* 315 * Set to disable leaky and partner check for memory correctables 316 */ 317 int ce_xdiag_off; 318 319 /* 320 * The following are not incremented atomically so are indicative only 321 */ 322 static int ce_xdiag_drops; 323 static int ce_xdiag_lkydrops; 324 static int ce_xdiag_ptnrdrops; 325 static int ce_xdiag_bad; 326 327 /* 328 * CE leaky check callback structure 329 */ 330 typedef struct { 331 struct async_flt *lkycb_aflt; 332 errorq_t *lkycb_eqp; 333 errorq_elem_t *lkycb_eqep; 334 } ce_lkychk_cb_t; 335 336 /* 337 * defines for various ecache_flush_flag's 338 */ 339 #define ECACHE_FLUSH_LINE 1 340 #define ECACHE_FLUSH_ALL 2 341 342 /* 343 * STICK sync 344 */ 345 #define STICK_ITERATION 10 346 #define MAX_TSKEW 1 347 #define EV_A_START 0 348 #define EV_A_END 1 349 #define EV_B_START 2 350 #define EV_B_END 3 351 #define EVENTS 4 352 353 static int64_t stick_iter = STICK_ITERATION; 354 static int64_t stick_tsk = MAX_TSKEW; 355 356 typedef enum { 357 EVENT_NULL = 0, 358 SLAVE_START, 359 SLAVE_CONT, 360 MASTER_START 361 } event_cmd_t; 362 363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL; 364 static int64_t timestamp[EVENTS]; 365 static volatile int slave_done; 366 367 #ifdef DEBUG 368 #define DSYNC_ATTEMPTS 64 369 typedef struct { 370 int64_t skew_val[DSYNC_ATTEMPTS]; 371 } ss_t; 372 373 ss_t stick_sync_stats[NCPU]; 374 #endif /* DEBUG */ 375 376 /* 377 * Maximum number of contexts for Cheetah. 378 */ 379 #define MAX_NCTXS (1 << 13) 380 381 /* Will be set !NULL for Cheetah+ and derivatives. */ 382 uchar_t *ctx_pgsz_array = NULL; 383 #if defined(CPU_IMP_DUAL_PAGESIZE) 384 static uchar_t ctx_pgsz_arr[MAX_NCTXS]; 385 uint_t disable_dual_pgsz = 0; 386 #endif /* CPU_IMP_DUAL_PAGESIZE */ 387 388 /* 389 * Save the cache bootup state for use when internal 390 * caches are to be re-enabled after an error occurs. 391 */ 392 uint64_t cache_boot_state; 393 394 /* 395 * PA[22:0] represent Displacement in Safari configuration space. 396 */ 397 uint_t root_phys_addr_lo_mask = 0x7fffffu; 398 399 bus_config_eclk_t bus_config_eclk[] = { 400 #if defined(JALAPENO) || defined(SERRANO) 401 {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1}, 402 {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2}, 403 {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32}, 404 #else /* JALAPENO || SERRANO */ 405 {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1}, 406 {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2}, 407 {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32}, 408 #endif /* JALAPENO || SERRANO */ 409 {0, 0} 410 }; 411 412 /* 413 * Interval for deferred CEEN reenable 414 */ 415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS; 416 417 /* 418 * set in /etc/system to control logging of user BERR/TO's 419 */ 420 int cpu_berr_to_verbose = 0; 421 422 /* 423 * set to 0 in /etc/system to defer CEEN reenable for all CEs 424 */ 425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED; 426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT; 427 428 /* 429 * Set of all offline cpus 430 */ 431 cpuset_t cpu_offline_set; 432 433 static void cpu_delayed_check_ce_errors(void *); 434 static void cpu_check_ce_errors(void *); 435 void cpu_error_ecache_flush(ch_async_flt_t *); 436 static int cpu_error_ecache_flush_required(ch_async_flt_t *); 437 static void cpu_log_and_clear_ce(ch_async_flt_t *); 438 void cpu_ce_detected(ch_cpu_errors_t *, int); 439 440 /* 441 * CE Leaky check timeout in microseconds. This is chosen to be twice the 442 * memory refresh interval of current DIMMs (64ms). After initial fix that 443 * gives at least one full refresh cycle in which the cell can leak 444 * (whereafter further refreshes simply reinforce any incorrect bit value). 445 */ 446 clock_t cpu_ce_lkychk_timeout_usec = 128000; 447 448 /* 449 * CE partner check partner caching period in seconds 450 */ 451 int cpu_ce_ptnr_cachetime_sec = 60; 452 453 /* 454 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel 455 */ 456 #define CH_SET_TRAP(ttentry, ttlabel) \ 457 bcopy((const void *)&ttlabel, &ttentry, 32); \ 458 flush_instr_mem((caddr_t)&ttentry, 32); 459 460 static int min_ecache_size; 461 static uint_t priv_hcl_1; 462 static uint_t priv_hcl_2; 463 static uint_t priv_hcl_4; 464 static uint_t priv_hcl_8; 465 466 void 467 cpu_setup(void) 468 { 469 extern int at_flags; 470 extern int disable_delay_tlb_flush, delay_tlb_flush; 471 extern int cpc_has_overflow_intr; 472 extern int disable_text_largepages; 473 extern int use_text_pgsz4m; 474 475 /* 476 * Setup chip-specific trap handlers. 477 */ 478 cpu_init_trap(); 479 480 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 481 482 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 483 484 /* 485 * save the cache bootup state. 486 */ 487 cache_boot_state = get_dcu() & DCU_CACHE; 488 489 /* 490 * Use the maximum number of contexts available for Cheetah 491 * unless it has been tuned for debugging. 492 * We are checking against 0 here since this value can be patched 493 * while booting. It can not be patched via /etc/system since it 494 * will be patched too late and thus cause the system to panic. 495 */ 496 if (nctxs == 0) 497 nctxs = MAX_NCTXS; 498 499 /* 500 * Due to the number of entries in the fully-associative tlb 501 * this may have to be tuned lower than in spitfire. 502 */ 503 pp_slots = MIN(8, MAXPP_SLOTS); 504 505 /* 506 * Block stores do not invalidate all pages of the d$, pagecopy 507 * et. al. need virtual translations with virtual coloring taken 508 * into consideration. prefetch/ldd will pollute the d$ on the 509 * load side. 510 */ 511 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 512 513 if (use_page_coloring) { 514 do_pg_coloring = 1; 515 if (use_virtual_coloring) 516 do_virtual_coloring = 1; 517 } 518 519 isa_list = 520 "sparcv9+vis2 sparcv9+vis sparcv9 " 521 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 522 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 523 524 /* 525 * On Panther-based machines, this should 526 * also include AV_SPARC_POPC too 527 */ 528 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; 529 530 /* 531 * On cheetah, there's no hole in the virtual address space 532 */ 533 hole_start = hole_end = 0; 534 535 /* 536 * The kpm mapping window. 537 * kpm_size: 538 * The size of a single kpm range. 539 * The overall size will be: kpm_size * vac_colors. 540 * kpm_vbase: 541 * The virtual start address of the kpm range within the kernel 542 * virtual address space. kpm_vbase has to be kpm_size aligned. 543 */ 544 kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */ 545 kpm_size_shift = 43; 546 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 547 kpm_smallpages = 1; 548 549 /* 550 * The traptrace code uses either %tick or %stick for 551 * timestamping. We have %stick so we can use it. 552 */ 553 traptrace_use_stick = 1; 554 555 /* 556 * Cheetah has a performance counter overflow interrupt 557 */ 558 cpc_has_overflow_intr = 1; 559 560 /* 561 * Use cheetah flush-all support 562 */ 563 if (!disable_delay_tlb_flush) 564 delay_tlb_flush = 1; 565 566 #if defined(CPU_IMP_DUAL_PAGESIZE) 567 /* 568 * Use Cheetah+ and later dual page size support. 569 */ 570 if (!disable_dual_pgsz) { 571 ctx_pgsz_array = ctx_pgsz_arr; 572 } 573 #endif /* CPU_IMP_DUAL_PAGESIZE */ 574 575 /* 576 * Declare that this architecture/cpu combination does fpRAS. 577 */ 578 fpras_implemented = 1; 579 580 /* 581 * Enable 4M pages to be used for mapping user text by default. Don't 582 * use large pages for initialized data segments since we may not know 583 * at exec() time what should be the preferred large page size for DTLB 584 * programming. 585 */ 586 use_text_pgsz4m = 1; 587 disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | 588 (1 << TTE32M) | (1 << TTE256M); 589 590 /* 591 * Setup CE lookup table 592 */ 593 CE_INITDISPTBL_POPULATE(ce_disp_table); 594 ce_disp_inited = 1; 595 } 596 597 /* 598 * Called by setcpudelay 599 */ 600 void 601 cpu_init_tick_freq(void) 602 { 603 /* 604 * For UltraSPARC III and beyond we want to use the 605 * system clock rate as the basis for low level timing, 606 * due to support of mixed speed CPUs and power managment. 607 */ 608 if (system_clock_freq == 0) 609 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 610 611 sys_tick_freq = system_clock_freq; 612 } 613 614 #ifdef CHEETAHPLUS_ERRATUM_25 615 /* 616 * Tunables 617 */ 618 int cheetah_bpe_off = 0; 619 int cheetah_sendmondo_recover = 1; 620 int cheetah_sendmondo_fullscan = 0; 621 int cheetah_sendmondo_recover_delay = 5; 622 623 #define CHEETAH_LIVELOCK_MIN_DELAY 1 624 625 /* 626 * Recovery Statistics 627 */ 628 typedef struct cheetah_livelock_entry { 629 int cpuid; /* fallen cpu */ 630 int buddy; /* cpu that ran recovery */ 631 clock_t lbolt; /* when recovery started */ 632 hrtime_t recovery_time; /* time spent in recovery */ 633 } cheetah_livelock_entry_t; 634 635 #define CHEETAH_LIVELOCK_NENTRY 32 636 637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY]; 638 int cheetah_livelock_entry_nxt; 639 640 #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \ 641 statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \ 642 if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \ 643 cheetah_livelock_entry_nxt = 0; \ 644 } \ 645 } 646 647 #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val 648 649 struct { 650 hrtime_t hrt; /* maximum recovery time */ 651 int recovery; /* recovered */ 652 int full_claimed; /* maximum pages claimed in full recovery */ 653 int proc_entry; /* attempted to claim TSB */ 654 int proc_tsb_scan; /* tsb scanned */ 655 int proc_tsb_partscan; /* tsb partially scanned */ 656 int proc_tsb_fullscan; /* whole tsb scanned */ 657 int proc_claimed; /* maximum pages claimed in tsb scan */ 658 int proc_user; /* user thread */ 659 int proc_kernel; /* kernel thread */ 660 int proc_onflt; /* bad stack */ 661 int proc_cpu; /* null cpu */ 662 int proc_thread; /* null thread */ 663 int proc_proc; /* null proc */ 664 int proc_as; /* null as */ 665 int proc_hat; /* null hat */ 666 int proc_hat_inval; /* hat contents don't make sense */ 667 int proc_hat_busy; /* hat is changing TSBs */ 668 int proc_tsb_reloc; /* TSB skipped because being relocated */ 669 int proc_cnum_bad; /* cnum out of range */ 670 int proc_cnum; /* last cnum processed */ 671 tte_t proc_tte; /* last tte processed */ 672 } cheetah_livelock_stat; 673 674 #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++ 675 676 #define CHEETAH_LIVELOCK_STATSET(item, value) \ 677 cheetah_livelock_stat.item = value 678 679 #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \ 680 if (value > cheetah_livelock_stat.item) \ 681 cheetah_livelock_stat.item = value; \ 682 } 683 684 /* 685 * Attempt to recover a cpu by claiming every cache line as saved 686 * in the TSB that the non-responsive cpu is using. Since we can't 687 * grab any adaptive lock, this is at best an attempt to do so. Because 688 * we don't grab any locks, we must operate under the protection of 689 * on_fault(). 690 * 691 * Return 1 if cpuid could be recovered, 0 if failed. 692 */ 693 int 694 mondo_recover_proc(uint16_t cpuid, int bn) 695 { 696 label_t ljb; 697 cpu_t *cp; 698 kthread_t *t; 699 proc_t *p; 700 struct as *as; 701 struct hat *hat; 702 short cnum; 703 struct tsb_info *tsbinfop; 704 struct tsbe *tsbep; 705 caddr_t tsbp; 706 caddr_t end_tsbp; 707 uint64_t paddr; 708 uint64_t idsr; 709 u_longlong_t pahi, palo; 710 int pages_claimed = 0; 711 tte_t tsbe_tte; 712 int tried_kernel_tsb = 0; 713 714 CHEETAH_LIVELOCK_STAT(proc_entry); 715 716 if (on_fault(&ljb)) { 717 CHEETAH_LIVELOCK_STAT(proc_onflt); 718 goto badstruct; 719 } 720 721 if ((cp = cpu[cpuid]) == NULL) { 722 CHEETAH_LIVELOCK_STAT(proc_cpu); 723 goto badstruct; 724 } 725 726 if ((t = cp->cpu_thread) == NULL) { 727 CHEETAH_LIVELOCK_STAT(proc_thread); 728 goto badstruct; 729 } 730 731 if ((p = ttoproc(t)) == NULL) { 732 CHEETAH_LIVELOCK_STAT(proc_proc); 733 goto badstruct; 734 } 735 736 if ((as = p->p_as) == NULL) { 737 CHEETAH_LIVELOCK_STAT(proc_as); 738 goto badstruct; 739 } 740 741 if ((hat = as->a_hat) == NULL) { 742 CHEETAH_LIVELOCK_STAT(proc_hat); 743 goto badstruct; 744 } 745 746 if (hat != ksfmmup) { 747 CHEETAH_LIVELOCK_STAT(proc_user); 748 if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) { 749 CHEETAH_LIVELOCK_STAT(proc_hat_busy); 750 goto badstruct; 751 } 752 tsbinfop = hat->sfmmu_tsb; 753 if (tsbinfop == NULL) { 754 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 755 goto badstruct; 756 } 757 tsbp = tsbinfop->tsb_va; 758 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 759 } else { 760 CHEETAH_LIVELOCK_STAT(proc_kernel); 761 tsbinfop = NULL; 762 tsbp = ktsb_base; 763 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 764 } 765 766 /* Verify as */ 767 if (hat->sfmmu_as != as) { 768 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 769 goto badstruct; 770 } 771 772 cnum = hat->sfmmu_cnum; 773 CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum); 774 775 if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) { 776 CHEETAH_LIVELOCK_STAT(proc_cnum_bad); 777 goto badstruct; 778 } 779 780 do { 781 CHEETAH_LIVELOCK_STAT(proc_tsb_scan); 782 783 /* 784 * Skip TSBs being relocated. This is important because 785 * we want to avoid the following deadlock scenario: 786 * 787 * 1) when we came in we set ourselves to "in recover" state. 788 * 2) when we try to touch TSB being relocated the mapping 789 * will be in the suspended state so we'll spin waiting 790 * for it to be unlocked. 791 * 3) when the CPU that holds the TSB mapping locked tries to 792 * unlock it it will send a xtrap which will fail to xcall 793 * us or the CPU we're trying to recover, and will in turn 794 * enter the mondo code. 795 * 4) since we are still spinning on the locked mapping 796 * no further progress will be made and the system will 797 * inevitably hard hang. 798 * 799 * A TSB not being relocated can't begin being relocated 800 * while we're accessing it because we check 801 * sendmondo_in_recover before relocating TSBs. 802 */ 803 if (hat != ksfmmup && 804 (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) { 805 CHEETAH_LIVELOCK_STAT(proc_tsb_reloc); 806 goto next_tsbinfo; 807 } 808 809 for (tsbep = (struct tsbe *)tsbp; 810 tsbep < (struct tsbe *)end_tsbp; tsbep++) { 811 tsbe_tte = tsbep->tte_data; 812 813 if (tsbe_tte.tte_val == 0) { 814 /* 815 * Invalid tte 816 */ 817 continue; 818 } 819 if (tsbe_tte.tte_se) { 820 /* 821 * Don't want device registers 822 */ 823 continue; 824 } 825 if (tsbe_tte.tte_cp == 0) { 826 /* 827 * Must be cached in E$ 828 */ 829 continue; 830 } 831 CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte); 832 idsr = getidsr(); 833 if ((idsr & (IDSR_NACK_BIT(bn) | 834 IDSR_BUSY_BIT(bn))) == 0) { 835 CHEETAH_LIVELOCK_STAT(proc_tsb_partscan); 836 goto done; 837 } 838 pahi = tsbe_tte.tte_pahi; 839 palo = tsbe_tte.tte_palo; 840 paddr = (uint64_t)((pahi << 32) | 841 (palo << MMU_PAGESHIFT)); 842 claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)), 843 CH_ECACHE_SUBBLK_SIZE); 844 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 845 shipit(cpuid, bn); 846 } 847 pages_claimed++; 848 } 849 next_tsbinfo: 850 if (tsbinfop != NULL) 851 tsbinfop = tsbinfop->tsb_next; 852 if (tsbinfop != NULL) { 853 tsbp = tsbinfop->tsb_va; 854 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 855 } else if (tsbp == ktsb_base) { 856 tried_kernel_tsb = 1; 857 } else if (!tried_kernel_tsb) { 858 tsbp = ktsb_base; 859 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 860 hat = ksfmmup; 861 tsbinfop = NULL; 862 } 863 } while (tsbinfop != NULL || 864 ((tsbp == ktsb_base) && !tried_kernel_tsb)); 865 866 CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan); 867 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 868 no_fault(); 869 idsr = getidsr(); 870 if ((idsr & (IDSR_NACK_BIT(bn) | 871 IDSR_BUSY_BIT(bn))) == 0) { 872 return (1); 873 } else { 874 return (0); 875 } 876 877 done: 878 no_fault(); 879 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 880 return (1); 881 882 badstruct: 883 no_fault(); 884 return (0); 885 } 886 887 /* 888 * Attempt to claim ownership, temporarily, of every cache line that a 889 * non-responsive cpu might be using. This might kick that cpu out of 890 * this state. 891 * 892 * The return value indicates to the caller if we have exhausted all recovery 893 * techniques. If 1 is returned, it is useless to call this function again 894 * even for a different target CPU. 895 */ 896 int 897 mondo_recover(uint16_t cpuid, int bn) 898 { 899 struct memseg *seg; 900 uint64_t begin_pa, end_pa, cur_pa; 901 hrtime_t begin_hrt, end_hrt; 902 int retval = 0; 903 int pages_claimed = 0; 904 cheetah_livelock_entry_t *histp; 905 uint64_t idsr; 906 907 if (cas32(&sendmondo_in_recover, 0, 1) != 0) { 908 /* 909 * Wait while recovery takes place 910 */ 911 while (sendmondo_in_recover) { 912 drv_usecwait(1); 913 } 914 /* 915 * Assume we didn't claim the whole memory. If 916 * the target of this caller is not recovered, 917 * it will come back. 918 */ 919 return (retval); 920 } 921 922 CHEETAH_LIVELOCK_ENTRY_NEXT(histp) 923 CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt); 924 CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid); 925 CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id); 926 927 begin_hrt = gethrtime_waitfree(); 928 /* 929 * First try to claim the lines in the TSB the target 930 * may have been using. 931 */ 932 if (mondo_recover_proc(cpuid, bn) == 1) { 933 /* 934 * Didn't claim the whole memory 935 */ 936 goto done; 937 } 938 939 /* 940 * We tried using the TSB. The target is still 941 * not recovered. Check if complete memory scan is 942 * enabled. 943 */ 944 if (cheetah_sendmondo_fullscan == 0) { 945 /* 946 * Full memory scan is disabled. 947 */ 948 retval = 1; 949 goto done; 950 } 951 952 /* 953 * Try claiming the whole memory. 954 */ 955 for (seg = memsegs; seg; seg = seg->next) { 956 begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT; 957 end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT; 958 for (cur_pa = begin_pa; cur_pa < end_pa; 959 cur_pa += MMU_PAGESIZE) { 960 idsr = getidsr(); 961 if ((idsr & (IDSR_NACK_BIT(bn) | 962 IDSR_BUSY_BIT(bn))) == 0) { 963 /* 964 * Didn't claim all memory 965 */ 966 goto done; 967 } 968 claimlines(cur_pa, MMU_PAGESIZE, 969 CH_ECACHE_SUBBLK_SIZE); 970 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 971 shipit(cpuid, bn); 972 } 973 pages_claimed++; 974 } 975 } 976 977 /* 978 * We did all we could. 979 */ 980 retval = 1; 981 982 done: 983 /* 984 * Update statistics 985 */ 986 end_hrt = gethrtime_waitfree(); 987 CHEETAH_LIVELOCK_STAT(recovery); 988 CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt)); 989 CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed); 990 CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \ 991 (end_hrt - begin_hrt)); 992 993 while (cas32(&sendmondo_in_recover, 1, 0) != 1); 994 995 return (retval); 996 } 997 998 /* 999 * This is called by the cyclic framework when this CPU becomes online 1000 */ 1001 /*ARGSUSED*/ 1002 static void 1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 1004 { 1005 1006 hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy; 1007 hdlr->cyh_level = CY_LOW_LEVEL; 1008 hdlr->cyh_arg = NULL; 1009 1010 /* 1011 * Stagger the start time 1012 */ 1013 when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU); 1014 if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) { 1015 cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY; 1016 } 1017 when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC; 1018 } 1019 1020 /* 1021 * Create a low level cyclic to send a xtrap to the next cpu online. 1022 * However, there's no need to have this running on a uniprocessor system. 1023 */ 1024 static void 1025 cheetah_nudge_init(void) 1026 { 1027 cyc_omni_handler_t hdlr; 1028 1029 if (max_ncpus == 1) { 1030 return; 1031 } 1032 1033 hdlr.cyo_online = cheetah_nudge_onln; 1034 hdlr.cyo_offline = NULL; 1035 hdlr.cyo_arg = NULL; 1036 1037 mutex_enter(&cpu_lock); 1038 (void) cyclic_add_omni(&hdlr); 1039 mutex_exit(&cpu_lock); 1040 } 1041 1042 /* 1043 * Cyclic handler to wake up buddy 1044 */ 1045 void 1046 cheetah_nudge_buddy(void) 1047 { 1048 /* 1049 * Disable kernel preemption to protect the cpu list 1050 */ 1051 kpreempt_disable(); 1052 if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) { 1053 xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1, 1054 0, 0); 1055 } 1056 kpreempt_enable(); 1057 } 1058 1059 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1060 1061 #ifdef SEND_MONDO_STATS 1062 uint32_t x_one_stimes[64]; 1063 uint32_t x_one_ltimes[16]; 1064 uint32_t x_set_stimes[64]; 1065 uint32_t x_set_ltimes[16]; 1066 uint32_t x_set_cpus[NCPU]; 1067 uint32_t x_nack_stimes[64]; 1068 #endif 1069 1070 /* 1071 * Note: A version of this function is used by the debugger via the KDI, 1072 * and must be kept in sync with this version. Any changes made to this 1073 * function to support new chips or to accomodate errata must also be included 1074 * in the KDI-specific version. See us3_kdi.c. 1075 */ 1076 void 1077 send_one_mondo(int cpuid) 1078 { 1079 int busy, nack; 1080 uint64_t idsr, starttick, endtick, tick, lasttick; 1081 uint64_t busymask; 1082 #ifdef CHEETAHPLUS_ERRATUM_25 1083 int recovered = 0; 1084 #endif 1085 1086 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 1087 starttick = lasttick = gettick(); 1088 shipit(cpuid, 0); 1089 endtick = starttick + xc_tick_limit; 1090 busy = nack = 0; 1091 #if defined(JALAPENO) || defined(SERRANO) 1092 /* 1093 * Lower 2 bits of the agent ID determine which BUSY/NACK pair 1094 * will be used for dispatching interrupt. For now, assume 1095 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing 1096 * issues with respect to BUSY/NACK pair usage. 1097 */ 1098 busymask = IDSR_BUSY_BIT(cpuid); 1099 #else /* JALAPENO || SERRANO */ 1100 busymask = IDSR_BUSY; 1101 #endif /* JALAPENO || SERRANO */ 1102 for (;;) { 1103 idsr = getidsr(); 1104 if (idsr == 0) 1105 break; 1106 1107 tick = gettick(); 1108 /* 1109 * If there is a big jump between the current tick 1110 * count and lasttick, we have probably hit a break 1111 * point. Adjust endtick accordingly to avoid panic. 1112 */ 1113 if (tick > (lasttick + xc_tick_jump_limit)) 1114 endtick += (tick - lasttick); 1115 lasttick = tick; 1116 if (tick > endtick) { 1117 if (panic_quiesce) 1118 return; 1119 #ifdef CHEETAHPLUS_ERRATUM_25 1120 if (cheetah_sendmondo_recover && recovered == 0) { 1121 if (mondo_recover(cpuid, 0)) { 1122 /* 1123 * We claimed the whole memory or 1124 * full scan is disabled. 1125 */ 1126 recovered++; 1127 } 1128 tick = gettick(); 1129 endtick = tick + xc_tick_limit; 1130 lasttick = tick; 1131 /* 1132 * Recheck idsr 1133 */ 1134 continue; 1135 } else 1136 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1137 { 1138 cmn_err(CE_PANIC, "send mondo timeout " 1139 "(target 0x%x) [%d NACK %d BUSY]", 1140 cpuid, nack, busy); 1141 } 1142 } 1143 1144 if (idsr & busymask) { 1145 busy++; 1146 continue; 1147 } 1148 drv_usecwait(1); 1149 shipit(cpuid, 0); 1150 nack++; 1151 busy = 0; 1152 } 1153 #ifdef SEND_MONDO_STATS 1154 { 1155 int n = gettick() - starttick; 1156 if (n < 8192) 1157 x_one_stimes[n >> 7]++; 1158 else 1159 x_one_ltimes[(n >> 13) & 0xf]++; 1160 } 1161 #endif 1162 } 1163 1164 void 1165 syncfpu(void) 1166 { 1167 } 1168 1169 /* 1170 * Return processor specific async error structure 1171 * size used. 1172 */ 1173 int 1174 cpu_aflt_size(void) 1175 { 1176 return (sizeof (ch_async_flt_t)); 1177 } 1178 1179 /* 1180 * The fast_ecc_err handler transfers control here for UCU, UCC events. 1181 * Note that we flush Ecache twice, once in the fast_ecc_err handler to 1182 * flush the error that caused the UCU/UCC, then again here at the end to 1183 * flush the TL=1 trap handler code out of the Ecache, so we can minimize 1184 * the probability of getting a TL>1 Fast ECC trap when we're fielding 1185 * another Fast ECC trap. 1186 * 1187 * Cheetah+ also handles: TSCE: No additional processing required. 1188 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT. 1189 * 1190 * Note that the p_clo_flags input is only valid in cases where the 1191 * cpu_private struct is not yet initialized (since that is the only 1192 * time that information cannot be obtained from the logout struct.) 1193 */ 1194 /*ARGSUSED*/ 1195 void 1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags) 1197 { 1198 ch_cpu_logout_t *clop; 1199 uint64_t ceen, nceen; 1200 1201 /* 1202 * Get the CPU log out info. If we can't find our CPU private 1203 * pointer, then we will have to make due without any detailed 1204 * logout information. 1205 */ 1206 if (CPU_PRIVATE(CPU) == NULL) { 1207 clop = NULL; 1208 ceen = p_clo_flags & EN_REG_CEEN; 1209 nceen = p_clo_flags & EN_REG_NCEEN; 1210 } else { 1211 clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout); 1212 ceen = clop->clo_flags & EN_REG_CEEN; 1213 nceen = clop->clo_flags & EN_REG_NCEEN; 1214 } 1215 1216 cpu_log_fast_ecc_error((caddr_t)rp->r_pc, 1217 (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop); 1218 } 1219 1220 /* 1221 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast 1222 * ECC at TL>0. Need to supply either a error register pointer or a 1223 * cpu logout structure pointer. 1224 */ 1225 static void 1226 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 1227 uint64_t nceen, ch_cpu_logout_t *clop) 1228 { 1229 struct async_flt *aflt; 1230 ch_async_flt_t ch_flt; 1231 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1232 char pr_reason[MAX_REASON_STRING]; 1233 ch_cpu_errors_t cpu_error_regs; 1234 1235 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1236 /* 1237 * If no cpu logout data, then we will have to make due without 1238 * any detailed logout information. 1239 */ 1240 if (clop == NULL) { 1241 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1242 get_cpu_error_state(&cpu_error_regs); 1243 set_cpu_error_state(&cpu_error_regs); 1244 t_afar = cpu_error_regs.afar; 1245 t_afsr = cpu_error_regs.afsr; 1246 t_afsr_ext = cpu_error_regs.afsr_ext; 1247 #if defined(SERRANO) 1248 ch_flt.afar2 = cpu_error_regs.afar2; 1249 #endif /* SERRANO */ 1250 } else { 1251 t_afar = clop->clo_data.chd_afar; 1252 t_afsr = clop->clo_data.chd_afsr; 1253 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1254 #if defined(SERRANO) 1255 ch_flt.afar2 = clop->clo_data.chd_afar2; 1256 #endif /* SERRANO */ 1257 } 1258 1259 /* 1260 * In order to simplify code, we maintain this afsr_errs 1261 * variable which holds the aggregate of AFSR and AFSR_EXT 1262 * sticky bits. 1263 */ 1264 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1265 (t_afsr & C_AFSR_ALL_ERRS); 1266 pr_reason[0] = '\0'; 1267 1268 /* Setup the async fault structure */ 1269 aflt = (struct async_flt *)&ch_flt; 1270 aflt->flt_id = gethrtime_waitfree(); 1271 ch_flt.afsr_ext = t_afsr_ext; 1272 ch_flt.afsr_errs = t_afsr_errs; 1273 aflt->flt_stat = t_afsr; 1274 aflt->flt_addr = t_afar; 1275 aflt->flt_bus_id = getprocessorid(); 1276 aflt->flt_inst = CPU->cpu_id; 1277 aflt->flt_pc = tpc; 1278 aflt->flt_prot = AFLT_PROT_NONE; 1279 aflt->flt_class = CPU_FAULT; 1280 aflt->flt_priv = priv; 1281 aflt->flt_tl = tl; 1282 aflt->flt_status = ECC_F_TRAP; 1283 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1284 1285 /* 1286 * XXXX - Phenomenal hack to get around Solaris not getting all the 1287 * cmn_err messages out to the console. The situation is a UCU (in 1288 * priv mode) which causes a WDU which causes a UE (on the retry). 1289 * The messages for the UCU and WDU are enqueued and then pulled off 1290 * the async queue via softint and syslogd starts to process them 1291 * but doesn't get them to the console. The UE causes a panic, but 1292 * since the UCU/WDU messages are already in transit, those aren't 1293 * on the async queue. The hack is to check if we have a matching 1294 * WDU event for the UCU, and if it matches, we're more than likely 1295 * going to panic with a UE, unless we're under protection. So, we 1296 * check to see if we got a matching WDU event and if we're under 1297 * protection. 1298 * 1299 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about 1300 * looks like this: 1301 * UCU->WDU->UE 1302 * For Panther, it could look like either of these: 1303 * UCU---->WDU->L3_WDU->UE 1304 * L3_UCU->WDU->L3_WDU->UE 1305 */ 1306 if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) && 1307 aflt->flt_panic == 0 && aflt->flt_priv != 0 && 1308 curthread->t_ontrap == NULL && curthread->t_lofault == NULL) { 1309 get_cpu_error_state(&cpu_error_regs); 1310 aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) && 1311 (cpu_error_regs.afar == t_afar)); 1312 aflt->flt_panic |= ((clop == NULL) && 1313 (t_afsr_errs & C_AFSR_WDU)); 1314 } 1315 1316 /* 1317 * Queue events on the async event queue, one event per error bit. 1318 * If no events are queued or no Fast ECC events are on in the AFSR, 1319 * queue an event to complain. 1320 */ 1321 if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 || 1322 ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) { 1323 ch_flt.flt_type = CPU_INV_AFSR; 1324 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1325 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1326 aflt->flt_panic); 1327 } 1328 1329 /* 1330 * Zero out + invalidate CPU logout. 1331 */ 1332 if (clop) { 1333 bzero(clop, sizeof (ch_cpu_logout_t)); 1334 clop->clo_data.chd_afar = LOGOUT_INVALID; 1335 } 1336 1337 /* 1338 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1339 * or disrupting errors have happened. We do this because if a 1340 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1341 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1342 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1343 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1344 * deferred or disrupting error happening between checking the AFSR and 1345 * enabling NCEEN/CEEN. 1346 * 1347 * Note: CEEN and NCEEN are only reenabled if they were on when trap 1348 * taken. 1349 */ 1350 set_error_enable(get_error_enable() | (nceen | ceen)); 1351 if (clear_errors(&ch_flt)) { 1352 aflt->flt_panic |= ((ch_flt.afsr_errs & 1353 (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0); 1354 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1355 NULL); 1356 } 1357 1358 /* 1359 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1360 * be logged as part of the panic flow. 1361 */ 1362 if (aflt->flt_panic) 1363 fm_panic("%sError(s)", pr_reason); 1364 1365 /* 1366 * Flushing the Ecache here gets the part of the trap handler that 1367 * is run at TL=1 out of the Ecache. 1368 */ 1369 cpu_flush_ecache(); 1370 } 1371 1372 /* 1373 * This is called via sys_trap from pil15_interrupt code if the 1374 * corresponding entry in ch_err_tl1_pending is set. Checks the 1375 * various ch_err_tl1_data structures for valid entries based on the bit 1376 * settings in the ch_err_tl1_flags entry of the structure. 1377 */ 1378 /*ARGSUSED*/ 1379 void 1380 cpu_tl1_error(struct regs *rp, int panic) 1381 { 1382 ch_err_tl1_data_t *cl1p, cl1; 1383 int i, ncl1ps; 1384 uint64_t me_flags; 1385 uint64_t ceen, nceen; 1386 1387 if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) { 1388 cl1p = &ch_err_tl1_data; 1389 ncl1ps = 1; 1390 } else if (CPU_PRIVATE(CPU) != NULL) { 1391 cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]); 1392 ncl1ps = CH_ERR_TL1_TLMAX; 1393 } else { 1394 ncl1ps = 0; 1395 } 1396 1397 for (i = 0; i < ncl1ps; i++, cl1p++) { 1398 if (cl1p->ch_err_tl1_flags == 0) 1399 continue; 1400 1401 /* 1402 * Grab a copy of the logout data and invalidate 1403 * the logout area. 1404 */ 1405 cl1 = *cl1p; 1406 bzero(cl1p, sizeof (ch_err_tl1_data_t)); 1407 cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID; 1408 me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags); 1409 1410 /* 1411 * Log "first error" in ch_err_tl1_data. 1412 */ 1413 if (cl1.ch_err_tl1_flags & CH_ERR_FECC) { 1414 ceen = get_error_enable() & EN_REG_CEEN; 1415 nceen = get_error_enable() & EN_REG_NCEEN; 1416 cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1, 1417 1, ceen, nceen, &cl1.ch_err_tl1_logout); 1418 } 1419 #if defined(CPU_IMP_L1_CACHE_PARITY) 1420 if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1421 cpu_parity_error(rp, cl1.ch_err_tl1_flags, 1422 (caddr_t)cl1.ch_err_tl1_tpc); 1423 } 1424 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1425 1426 /* 1427 * Log "multiple events" in ch_err_tl1_data. Note that 1428 * we don't read and clear the AFSR/AFAR in the TL>0 code 1429 * if the structure is busy, we just do the cache flushing 1430 * we have to do and then do the retry. So the AFSR/AFAR 1431 * at this point *should* have some relevant info. If there 1432 * are no valid errors in the AFSR, we'll assume they've 1433 * already been picked up and logged. For I$/D$ parity, 1434 * we just log an event with an "Unknown" (NULL) TPC. 1435 */ 1436 if (me_flags & CH_ERR_FECC) { 1437 ch_cpu_errors_t cpu_error_regs; 1438 uint64_t t_afsr_errs; 1439 1440 /* 1441 * Get the error registers and see if there's 1442 * a pending error. If not, don't bother 1443 * generating an "Invalid AFSR" error event. 1444 */ 1445 get_cpu_error_state(&cpu_error_regs); 1446 t_afsr_errs = (cpu_error_regs.afsr_ext & 1447 C_AFSR_EXT_ALL_ERRS) | 1448 (cpu_error_regs.afsr & C_AFSR_ALL_ERRS); 1449 if (t_afsr_errs != 0) { 1450 ceen = get_error_enable() & EN_REG_CEEN; 1451 nceen = get_error_enable() & EN_REG_NCEEN; 1452 cpu_log_fast_ecc_error((caddr_t)NULL, 1, 1453 1, ceen, nceen, NULL); 1454 } 1455 } 1456 #if defined(CPU_IMP_L1_CACHE_PARITY) 1457 if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1458 cpu_parity_error(rp, me_flags, (caddr_t)NULL); 1459 } 1460 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1461 } 1462 } 1463 1464 /* 1465 * Called from Fast ECC TL>0 handler in case of fatal error. 1466 * cpu_tl1_error should always find an associated ch_err_tl1_data structure, 1467 * but if we don't, we'll panic with something reasonable. 1468 */ 1469 /*ARGSUSED*/ 1470 void 1471 cpu_tl1_err_panic(struct regs *rp, ulong_t flags) 1472 { 1473 cpu_tl1_error(rp, 1); 1474 /* 1475 * Should never return, but just in case. 1476 */ 1477 fm_panic("Unsurvivable ECC Error at TL>0"); 1478 } 1479 1480 /* 1481 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST, 1482 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events. 1483 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU 1484 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC 1485 * 1486 * Cheetah+ also handles (No additional processing required): 1487 * DUE, DTO, DBERR (NCEEN controlled) 1488 * THCE (CEEN and ET_ECC_en controlled) 1489 * TUE (ET_ECC_en controlled) 1490 * 1491 * Panther further adds: 1492 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 1493 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 1494 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 1495 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 1496 * THCE (CEEN and L2_tag_ECC_en controlled) 1497 * L3_THCE (CEEN and ET_ECC_en controlled) 1498 * 1499 * Note that the p_clo_flags input is only valid in cases where the 1500 * cpu_private struct is not yet initialized (since that is the only 1501 * time that information cannot be obtained from the logout struct.) 1502 */ 1503 /*ARGSUSED*/ 1504 void 1505 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags) 1506 { 1507 struct async_flt *aflt; 1508 ch_async_flt_t ch_flt; 1509 char pr_reason[MAX_REASON_STRING]; 1510 ch_cpu_logout_t *clop; 1511 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1512 ch_cpu_errors_t cpu_error_regs; 1513 1514 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1515 /* 1516 * Get the CPU log out info. If we can't find our CPU private 1517 * pointer, then we will have to make due without any detailed 1518 * logout information. 1519 */ 1520 if (CPU_PRIVATE(CPU) == NULL) { 1521 clop = NULL; 1522 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1523 get_cpu_error_state(&cpu_error_regs); 1524 set_cpu_error_state(&cpu_error_regs); 1525 t_afar = cpu_error_regs.afar; 1526 t_afsr = cpu_error_regs.afsr; 1527 t_afsr_ext = cpu_error_regs.afsr_ext; 1528 #if defined(SERRANO) 1529 ch_flt.afar2 = cpu_error_regs.afar2; 1530 #endif /* SERRANO */ 1531 } else { 1532 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 1533 t_afar = clop->clo_data.chd_afar; 1534 t_afsr = clop->clo_data.chd_afsr; 1535 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1536 #if defined(SERRANO) 1537 ch_flt.afar2 = clop->clo_data.chd_afar2; 1538 #endif /* SERRANO */ 1539 } 1540 1541 /* 1542 * In order to simplify code, we maintain this afsr_errs 1543 * variable which holds the aggregate of AFSR and AFSR_EXT 1544 * sticky bits. 1545 */ 1546 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1547 (t_afsr & C_AFSR_ALL_ERRS); 1548 1549 pr_reason[0] = '\0'; 1550 /* Setup the async fault structure */ 1551 aflt = (struct async_flt *)&ch_flt; 1552 ch_flt.afsr_ext = t_afsr_ext; 1553 ch_flt.afsr_errs = t_afsr_errs; 1554 aflt->flt_stat = t_afsr; 1555 aflt->flt_addr = t_afar; 1556 aflt->flt_pc = (caddr_t)rp->r_pc; 1557 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1558 aflt->flt_tl = 0; 1559 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1560 1561 /* 1562 * If this trap is a result of one of the errors not masked 1563 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead 1564 * indicate that a timeout is to be set later. 1565 */ 1566 if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) && 1567 !aflt->flt_panic) 1568 ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED; 1569 else 1570 ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED; 1571 1572 /* 1573 * log the CE and clean up 1574 */ 1575 cpu_log_and_clear_ce(&ch_flt); 1576 1577 /* 1578 * We re-enable CEEN (if required) and check if any disrupting errors 1579 * have happened. We do this because if a disrupting error had occurred 1580 * with CEEN off, the trap will not be taken when CEEN is re-enabled. 1581 * Note that CEEN works differently on Cheetah than on Spitfire. Also, 1582 * we enable CEEN *before* checking the AFSR to avoid the small window 1583 * of a error happening between checking the AFSR and enabling CEEN. 1584 */ 1585 if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER) 1586 set_error_enable(get_error_enable() | EN_REG_CEEN); 1587 if (clear_errors(&ch_flt)) { 1588 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1589 NULL); 1590 } 1591 1592 /* 1593 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1594 * be logged as part of the panic flow. 1595 */ 1596 if (aflt->flt_panic) 1597 fm_panic("%sError(s)", pr_reason); 1598 } 1599 1600 /* 1601 * The async_err handler transfers control here for UE, EMU, EDU:BLD, 1602 * L3_EDU:BLD, TO, and BERR events. 1603 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 1604 * 1605 * Cheetah+: No additional errors handled. 1606 * 1607 * Note that the p_clo_flags input is only valid in cases where the 1608 * cpu_private struct is not yet initialized (since that is the only 1609 * time that information cannot be obtained from the logout struct.) 1610 */ 1611 /*ARGSUSED*/ 1612 void 1613 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags) 1614 { 1615 ushort_t ttype, tl; 1616 ch_async_flt_t ch_flt; 1617 struct async_flt *aflt; 1618 int trampolined = 0; 1619 char pr_reason[MAX_REASON_STRING]; 1620 ch_cpu_logout_t *clop; 1621 uint64_t ceen, clo_flags; 1622 uint64_t log_afsr; 1623 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1624 ch_cpu_errors_t cpu_error_regs; 1625 int expected = DDI_FM_ERR_UNEXPECTED; 1626 ddi_acc_hdl_t *hp; 1627 1628 /* 1629 * We need to look at p_flag to determine if the thread detected an 1630 * error while dumping core. We can't grab p_lock here, but it's ok 1631 * because we just need a consistent snapshot and we know that everyone 1632 * else will store a consistent set of bits while holding p_lock. We 1633 * don't have to worry about a race because SDOCORE is set once prior 1634 * to doing i/o from the process's address space and is never cleared. 1635 */ 1636 uint_t pflag = ttoproc(curthread)->p_flag; 1637 1638 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1639 /* 1640 * Get the CPU log out info. If we can't find our CPU private 1641 * pointer then we will have to make due without any detailed 1642 * logout information. 1643 */ 1644 if (CPU_PRIVATE(CPU) == NULL) { 1645 clop = NULL; 1646 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1647 get_cpu_error_state(&cpu_error_regs); 1648 set_cpu_error_state(&cpu_error_regs); 1649 t_afar = cpu_error_regs.afar; 1650 t_afsr = cpu_error_regs.afsr; 1651 t_afsr_ext = cpu_error_regs.afsr_ext; 1652 #if defined(SERRANO) 1653 ch_flt.afar2 = cpu_error_regs.afar2; 1654 #endif /* SERRANO */ 1655 clo_flags = p_clo_flags; 1656 } else { 1657 clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout); 1658 t_afar = clop->clo_data.chd_afar; 1659 t_afsr = clop->clo_data.chd_afsr; 1660 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1661 #if defined(SERRANO) 1662 ch_flt.afar2 = clop->clo_data.chd_afar2; 1663 #endif /* SERRANO */ 1664 clo_flags = clop->clo_flags; 1665 } 1666 1667 /* 1668 * In order to simplify code, we maintain this afsr_errs 1669 * variable which holds the aggregate of AFSR and AFSR_EXT 1670 * sticky bits. 1671 */ 1672 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1673 (t_afsr & C_AFSR_ALL_ERRS); 1674 pr_reason[0] = '\0'; 1675 1676 /* 1677 * Grab information encoded into our clo_flags field. 1678 */ 1679 ceen = clo_flags & EN_REG_CEEN; 1680 tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT; 1681 ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT; 1682 1683 /* 1684 * handle the specific error 1685 */ 1686 aflt = (struct async_flt *)&ch_flt; 1687 aflt->flt_id = gethrtime_waitfree(); 1688 aflt->flt_bus_id = getprocessorid(); 1689 aflt->flt_inst = CPU->cpu_id; 1690 ch_flt.afsr_ext = t_afsr_ext; 1691 ch_flt.afsr_errs = t_afsr_errs; 1692 aflt->flt_stat = t_afsr; 1693 aflt->flt_addr = t_afar; 1694 aflt->flt_pc = (caddr_t)rp->r_pc; 1695 aflt->flt_prot = AFLT_PROT_NONE; 1696 aflt->flt_class = CPU_FAULT; 1697 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1698 aflt->flt_tl = (uchar_t)tl; 1699 aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) || 1700 C_AFSR_PANIC(t_afsr_errs)); 1701 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1702 aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP); 1703 1704 /* 1705 * If the trap occurred in privileged mode at TL=0, we need to check to 1706 * see if we were executing in the kernel under on_trap() or t_lofault 1707 * protection. If so, modify the saved registers so that we return 1708 * from the trap to the appropriate trampoline routine. 1709 */ 1710 if (aflt->flt_priv && tl == 0) { 1711 if (curthread->t_ontrap != NULL) { 1712 on_trap_data_t *otp = curthread->t_ontrap; 1713 1714 if (otp->ot_prot & OT_DATA_EC) { 1715 aflt->flt_prot = AFLT_PROT_EC; 1716 otp->ot_trap |= OT_DATA_EC; 1717 rp->r_pc = otp->ot_trampoline; 1718 rp->r_npc = rp->r_pc + 4; 1719 trampolined = 1; 1720 } 1721 1722 if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) && 1723 (otp->ot_prot & OT_DATA_ACCESS)) { 1724 aflt->flt_prot = AFLT_PROT_ACCESS; 1725 otp->ot_trap |= OT_DATA_ACCESS; 1726 rp->r_pc = otp->ot_trampoline; 1727 rp->r_npc = rp->r_pc + 4; 1728 trampolined = 1; 1729 /* 1730 * for peeks and caut_gets errors are expected 1731 */ 1732 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1733 if (!hp) 1734 expected = DDI_FM_ERR_PEEK; 1735 else if (hp->ah_acc.devacc_attr_access == 1736 DDI_CAUTIOUS_ACC) 1737 expected = DDI_FM_ERR_EXPECTED; 1738 } 1739 1740 } else if (curthread->t_lofault) { 1741 aflt->flt_prot = AFLT_PROT_COPY; 1742 rp->r_g1 = EFAULT; 1743 rp->r_pc = curthread->t_lofault; 1744 rp->r_npc = rp->r_pc + 4; 1745 trampolined = 1; 1746 } 1747 } 1748 1749 /* 1750 * If we're in user mode or we're doing a protected copy, we either 1751 * want the ASTON code below to send a signal to the user process 1752 * or we want to panic if aft_panic is set. 1753 * 1754 * If we're in privileged mode and we're not doing a copy, then we 1755 * need to check if we've trampolined. If we haven't trampolined, 1756 * we should panic. 1757 */ 1758 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1759 if (t_afsr_errs & 1760 ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) & 1761 ~(C_AFSR_BERR | C_AFSR_TO))) 1762 aflt->flt_panic |= aft_panic; 1763 } else if (!trampolined) { 1764 aflt->flt_panic = 1; 1765 } 1766 1767 /* 1768 * If we've trampolined due to a privileged TO or BERR, or if an 1769 * unprivileged TO or BERR occurred, we don't want to enqueue an 1770 * event for that TO or BERR. Queue all other events (if any) besides 1771 * the TO/BERR. Since we may not be enqueing any events, we need to 1772 * ignore the number of events queued. If we haven't trampolined due 1773 * to a TO or BERR, just enqueue events normally. 1774 */ 1775 log_afsr = t_afsr_errs; 1776 if (trampolined) { 1777 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1778 } else if (!aflt->flt_priv) { 1779 /* 1780 * User mode, suppress messages if 1781 * cpu_berr_to_verbose is not set. 1782 */ 1783 if (!cpu_berr_to_verbose) 1784 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1785 } 1786 1787 /* 1788 * Log any errors that occurred 1789 */ 1790 if (((log_afsr & 1791 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) && 1792 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) || 1793 (t_afsr_errs & 1794 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) { 1795 ch_flt.flt_type = CPU_INV_AFSR; 1796 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1797 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1798 aflt->flt_panic); 1799 } 1800 1801 /* 1802 * Zero out + invalidate CPU logout. 1803 */ 1804 if (clop) { 1805 bzero(clop, sizeof (ch_cpu_logout_t)); 1806 clop->clo_data.chd_afar = LOGOUT_INVALID; 1807 } 1808 1809 #if defined(JALAPENO) || defined(SERRANO) 1810 /* 1811 * UE/RUE/BERR/TO: Call our bus nexus friends to check for 1812 * IO errors that may have resulted in this trap. 1813 */ 1814 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) { 1815 cpu_run_bus_error_handlers(aflt, expected); 1816 } 1817 1818 /* 1819 * UE/RUE: If UE or RUE is in memory, we need to flush the bad 1820 * line from the Ecache. We also need to query the bus nexus for 1821 * fatal errors. Attempts to do diagnostic read on caches may 1822 * introduce more errors (especially when the module is bad). 1823 */ 1824 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) { 1825 /* 1826 * Ask our bus nexus friends if they have any fatal errors. If 1827 * so, they will log appropriate error messages. 1828 */ 1829 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1830 aflt->flt_panic = 1; 1831 1832 /* 1833 * We got a UE or RUE and are panicking, save the fault PA in 1834 * a known location so that the platform specific panic code 1835 * can check for copyback errors. 1836 */ 1837 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1838 panic_aflt = *aflt; 1839 } 1840 } 1841 1842 /* 1843 * Flush Ecache line or entire Ecache 1844 */ 1845 if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR)) 1846 cpu_error_ecache_flush(&ch_flt); 1847 #else /* JALAPENO || SERRANO */ 1848 /* 1849 * UE/BERR/TO: Call our bus nexus friends to check for 1850 * IO errors that may have resulted in this trap. 1851 */ 1852 if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) { 1853 cpu_run_bus_error_handlers(aflt, expected); 1854 } 1855 1856 /* 1857 * UE: If the UE is in memory, we need to flush the bad 1858 * line from the Ecache. We also need to query the bus nexus for 1859 * fatal errors. Attempts to do diagnostic read on caches may 1860 * introduce more errors (especially when the module is bad). 1861 */ 1862 if (t_afsr & C_AFSR_UE) { 1863 /* 1864 * Ask our legacy bus nexus friends if they have any fatal 1865 * errors. If so, they will log appropriate error messages. 1866 */ 1867 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1868 aflt->flt_panic = 1; 1869 1870 /* 1871 * We got a UE and are panicking, save the fault PA in a known 1872 * location so that the platform specific panic code can check 1873 * for copyback errors. 1874 */ 1875 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1876 panic_aflt = *aflt; 1877 } 1878 } 1879 1880 /* 1881 * Flush Ecache line or entire Ecache 1882 */ 1883 if (t_afsr_errs & 1884 (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU)) 1885 cpu_error_ecache_flush(&ch_flt); 1886 #endif /* JALAPENO || SERRANO */ 1887 1888 /* 1889 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1890 * or disrupting errors have happened. We do this because if a 1891 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1892 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1893 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1894 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1895 * deferred or disrupting error happening between checking the AFSR and 1896 * enabling NCEEN/CEEN. 1897 * 1898 * Note: CEEN reenabled only if it was on when trap taken. 1899 */ 1900 set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen)); 1901 if (clear_errors(&ch_flt)) { 1902 /* 1903 * Check for secondary errors, and avoid panicking if we 1904 * have them 1905 */ 1906 if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs, 1907 t_afar) == 0) { 1908 aflt->flt_panic |= ((ch_flt.afsr_errs & 1909 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0); 1910 } 1911 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1912 NULL); 1913 } 1914 1915 /* 1916 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1917 * be logged as part of the panic flow. 1918 */ 1919 if (aflt->flt_panic) 1920 fm_panic("%sError(s)", pr_reason); 1921 1922 /* 1923 * If we queued an error and we are going to return from the trap and 1924 * the error was in user mode or inside of a copy routine, set AST flag 1925 * so the queue will be drained before returning to user mode. The 1926 * AST processing will also act on our failure policy. 1927 */ 1928 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1929 int pcb_flag = 0; 1930 1931 if (t_afsr_errs & 1932 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS & 1933 ~(C_AFSR_BERR | C_AFSR_TO))) 1934 pcb_flag |= ASYNC_HWERR; 1935 1936 if (t_afsr & C_AFSR_BERR) 1937 pcb_flag |= ASYNC_BERR; 1938 1939 if (t_afsr & C_AFSR_TO) 1940 pcb_flag |= ASYNC_BTO; 1941 1942 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1943 aston(curthread); 1944 } 1945 } 1946 1947 #if defined(CPU_IMP_L1_CACHE_PARITY) 1948 /* 1949 * Handling of data and instruction parity errors (traps 0x71, 0x72). 1950 * 1951 * For Panther, P$ data parity errors during floating point load hits 1952 * are also detected (reported as TT 0x71) and handled by this trap 1953 * handler. 1954 * 1955 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address) 1956 * is available. 1957 */ 1958 /*ARGSUSED*/ 1959 void 1960 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc) 1961 { 1962 ch_async_flt_t ch_flt; 1963 struct async_flt *aflt; 1964 uchar_t tl = ((flags & CH_ERR_TL) != 0); 1965 uchar_t iparity = ((flags & CH_ERR_IPE) != 0); 1966 uchar_t panic = ((flags & CH_ERR_PANIC) != 0); 1967 char *error_class; 1968 1969 /* 1970 * Log the error. 1971 * For icache parity errors the fault address is the trap PC. 1972 * For dcache/pcache parity errors the instruction would have to 1973 * be decoded to determine the address and that isn't possible 1974 * at high PIL. 1975 */ 1976 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1977 aflt = (struct async_flt *)&ch_flt; 1978 aflt->flt_id = gethrtime_waitfree(); 1979 aflt->flt_bus_id = getprocessorid(); 1980 aflt->flt_inst = CPU->cpu_id; 1981 aflt->flt_pc = tpc; 1982 aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR; 1983 aflt->flt_prot = AFLT_PROT_NONE; 1984 aflt->flt_class = CPU_FAULT; 1985 aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0; 1986 aflt->flt_tl = tl; 1987 aflt->flt_panic = panic; 1988 aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP; 1989 ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY; 1990 1991 if (iparity) { 1992 cpu_icache_parity_info(&ch_flt); 1993 if (ch_flt.parity_data.ipe.cpl_off != -1) 1994 error_class = FM_EREPORT_CPU_USIII_IDSPE; 1995 else if (ch_flt.parity_data.ipe.cpl_way != -1) 1996 error_class = FM_EREPORT_CPU_USIII_ITSPE; 1997 else 1998 error_class = FM_EREPORT_CPU_USIII_IPE; 1999 aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE; 2000 } else { 2001 cpu_dcache_parity_info(&ch_flt); 2002 if (ch_flt.parity_data.dpe.cpl_off != -1) 2003 error_class = FM_EREPORT_CPU_USIII_DDSPE; 2004 else if (ch_flt.parity_data.dpe.cpl_way != -1) 2005 error_class = FM_EREPORT_CPU_USIII_DTSPE; 2006 else 2007 error_class = FM_EREPORT_CPU_USIII_DPE; 2008 aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE; 2009 /* 2010 * For panther we also need to check the P$ for parity errors. 2011 */ 2012 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2013 cpu_pcache_parity_info(&ch_flt); 2014 if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2015 error_class = FM_EREPORT_CPU_USIII_PDSPE; 2016 aflt->flt_payload = 2017 FM_EREPORT_PAYLOAD_PCACHE_PE; 2018 } 2019 } 2020 } 2021 2022 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 2023 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 2024 2025 if (iparity) { 2026 /* 2027 * Invalidate entire I$. 2028 * This is required due to the use of diagnostic ASI 2029 * accesses that may result in a loss of I$ coherency. 2030 */ 2031 if (cache_boot_state & DCU_IC) { 2032 flush_icache(); 2033 } 2034 /* 2035 * According to section P.3.1 of the Panther PRM, we 2036 * need to do a little more for recovery on those 2037 * CPUs after encountering an I$ parity error. 2038 */ 2039 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2040 flush_ipb(); 2041 correct_dcache_parity(dcache_size, 2042 dcache_linesize); 2043 flush_pcache(); 2044 } 2045 } else { 2046 /* 2047 * Since the valid bit is ignored when checking parity the 2048 * D$ data and tag must also be corrected. Set D$ data bits 2049 * to zero and set utag to 0, 1, 2, 3. 2050 */ 2051 correct_dcache_parity(dcache_size, dcache_linesize); 2052 2053 /* 2054 * According to section P.3.3 of the Panther PRM, we 2055 * need to do a little more for recovery on those 2056 * CPUs after encountering a D$ or P$ parity error. 2057 * 2058 * As far as clearing P$ parity errors, it is enough to 2059 * simply invalidate all entries in the P$ since P$ parity 2060 * error traps are only generated for floating point load 2061 * hits. 2062 */ 2063 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2064 flush_icache(); 2065 flush_ipb(); 2066 flush_pcache(); 2067 } 2068 } 2069 2070 /* 2071 * Invalidate entire D$ if it was enabled. 2072 * This is done to avoid stale data in the D$ which might 2073 * occur with the D$ disabled and the trap handler doing 2074 * stores affecting lines already in the D$. 2075 */ 2076 if (cache_boot_state & DCU_DC) { 2077 flush_dcache(); 2078 } 2079 2080 /* 2081 * Restore caches to their bootup state. 2082 */ 2083 set_dcu(get_dcu() | cache_boot_state); 2084 2085 /* 2086 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2087 * be logged as part of the panic flow. 2088 */ 2089 if (aflt->flt_panic) 2090 fm_panic("%sError(s)", iparity ? "IPE " : "DPE "); 2091 2092 /* 2093 * If this error occurred at TL>0 then flush the E$ here to reduce 2094 * the chance of getting an unrecoverable Fast ECC error. This 2095 * flush will evict the part of the parity trap handler that is run 2096 * at TL>1. 2097 */ 2098 if (tl) { 2099 cpu_flush_ecache(); 2100 } 2101 } 2102 2103 /* 2104 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t 2105 * to indicate which portions of the captured data should be in the ereport. 2106 */ 2107 void 2108 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt) 2109 { 2110 int way = ch_flt->parity_data.ipe.cpl_way; 2111 int offset = ch_flt->parity_data.ipe.cpl_off; 2112 int tag_index; 2113 struct async_flt *aflt = (struct async_flt *)ch_flt; 2114 2115 2116 if ((offset != -1) || (way != -1)) { 2117 /* 2118 * Parity error in I$ tag or data 2119 */ 2120 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2121 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2122 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2123 PN_ICIDX_TO_WAY(tag_index); 2124 else 2125 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2126 CH_ICIDX_TO_WAY(tag_index); 2127 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2128 IC_LOGFLAG_MAGIC; 2129 } else { 2130 /* 2131 * Parity error was not identified. 2132 * Log tags and data for all ways. 2133 */ 2134 for (way = 0; way < CH_ICACHE_NWAY; way++) { 2135 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2136 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2137 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2138 PN_ICIDX_TO_WAY(tag_index); 2139 else 2140 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2141 CH_ICIDX_TO_WAY(tag_index); 2142 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2143 IC_LOGFLAG_MAGIC; 2144 } 2145 } 2146 } 2147 2148 /* 2149 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t 2150 * to indicate which portions of the captured data should be in the ereport. 2151 */ 2152 void 2153 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt) 2154 { 2155 int way = ch_flt->parity_data.dpe.cpl_way; 2156 int offset = ch_flt->parity_data.dpe.cpl_off; 2157 int tag_index; 2158 2159 if (offset != -1) { 2160 /* 2161 * Parity error in D$ or P$ data array. 2162 * 2163 * First check to see whether the parity error is in D$ or P$ 2164 * since P$ data parity errors are reported in Panther using 2165 * the same trap. 2166 */ 2167 if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2168 tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx; 2169 ch_flt->parity_data.dpe.cpl_pc[way].pc_way = 2170 CH_PCIDX_TO_WAY(tag_index); 2171 ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag = 2172 PC_LOGFLAG_MAGIC; 2173 } else { 2174 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2175 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2176 CH_DCIDX_TO_WAY(tag_index); 2177 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2178 DC_LOGFLAG_MAGIC; 2179 } 2180 } else if (way != -1) { 2181 /* 2182 * Parity error in D$ tag. 2183 */ 2184 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2185 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2186 CH_DCIDX_TO_WAY(tag_index); 2187 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2188 DC_LOGFLAG_MAGIC; 2189 } 2190 } 2191 #endif /* CPU_IMP_L1_CACHE_PARITY */ 2192 2193 /* 2194 * The cpu_async_log_err() function is called via the [uc]e_drain() function to 2195 * post-process CPU events that are dequeued. As such, it can be invoked 2196 * from softint context, from AST processing in the trap() flow, or from the 2197 * panic flow. We decode the CPU-specific data, and take appropriate actions. 2198 * Historically this entry point was used to log the actual cmn_err(9F) text; 2199 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 2200 * With FMA this function now also returns a flag which indicates to the 2201 * caller whether the ereport should be posted (1) or suppressed (0). 2202 */ 2203 static int 2204 cpu_async_log_err(void *flt, errorq_elem_t *eqep) 2205 { 2206 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 2207 struct async_flt *aflt = (struct async_flt *)flt; 2208 uint64_t errors; 2209 2210 switch (ch_flt->flt_type) { 2211 case CPU_INV_AFSR: 2212 /* 2213 * If it is a disrupting trap and the AFSR is zero, then 2214 * the event has probably already been noted. Do not post 2215 * an ereport. 2216 */ 2217 if ((aflt->flt_status & ECC_C_TRAP) && 2218 (!(aflt->flt_stat & C_AFSR_MASK))) 2219 return (0); 2220 else 2221 return (1); 2222 case CPU_TO: 2223 case CPU_BERR: 2224 case CPU_FATAL: 2225 case CPU_FPUERR: 2226 return (1); 2227 2228 case CPU_UE_ECACHE_RETIRE: 2229 cpu_log_err(aflt); 2230 cpu_page_retire(ch_flt); 2231 return (1); 2232 2233 /* 2234 * Cases where we may want to suppress logging or perform 2235 * extended diagnostics. 2236 */ 2237 case CPU_CE: 2238 case CPU_EMC: 2239 /* 2240 * We want to skip logging and further classification 2241 * only if ALL the following conditions are true: 2242 * 2243 * 1. There is only one error 2244 * 2. That error is a correctable memory error 2245 * 3. The error is caused by the memory scrubber (in 2246 * which case the error will have occurred under 2247 * on_trap protection) 2248 * 4. The error is on a retired page 2249 * 2250 * Note: AFLT_PROT_EC is used places other than the memory 2251 * scrubber. However, none of those errors should occur 2252 * on a retired page. 2253 */ 2254 if ((ch_flt->afsr_errs & 2255 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE && 2256 aflt->flt_prot == AFLT_PROT_EC) { 2257 2258 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2259 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2260 2261 /* 2262 * Since we're skipping logging, we'll need 2263 * to schedule the re-enabling of CEEN 2264 */ 2265 (void) timeout(cpu_delayed_check_ce_errors, 2266 (void *)aflt->flt_inst, drv_usectohz( 2267 (clock_t)cpu_ceen_delay_secs * MICROSEC)); 2268 } 2269 return (0); 2270 } 2271 } 2272 2273 /* 2274 * Perform/schedule further classification actions, but 2275 * only if the page is healthy (we don't want bad 2276 * pages inducing too much diagnostic activity). If we could 2277 * not find a page pointer then we also skip this. If 2278 * ce_scrub_xdiag_recirc returns nonzero then it has chosen 2279 * to copy and recirculate the event (for further diagnostics) 2280 * and we should not proceed to log it here. 2281 * 2282 * This must be the last step here before the cpu_log_err() 2283 * below - if an event recirculates cpu_ce_log_err() will 2284 * not call the current function but just proceed directly 2285 * to cpu_ereport_post after the cpu_log_err() avoided below. 2286 * 2287 * Note: Check cpu_impl_async_log_err if changing this 2288 */ 2289 if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) { 2290 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2291 CE_XDIAG_SKIP_NOPP); 2292 } else { 2293 if (errors != PR_OK) { 2294 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2295 CE_XDIAG_SKIP_PAGEDET); 2296 } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep, 2297 offsetof(ch_async_flt_t, cmn_asyncflt))) { 2298 return (0); 2299 } 2300 } 2301 /*FALLTHRU*/ 2302 2303 /* 2304 * Cases where we just want to report the error and continue. 2305 */ 2306 case CPU_CE_ECACHE: 2307 case CPU_UE_ECACHE: 2308 case CPU_IV: 2309 case CPU_ORPH: 2310 cpu_log_err(aflt); 2311 return (1); 2312 2313 /* 2314 * Cases where we want to fall through to handle panicking. 2315 */ 2316 case CPU_UE: 2317 /* 2318 * We want to skip logging in the same conditions as the 2319 * CE case. In addition, we want to make sure we're not 2320 * panicking. 2321 */ 2322 if (!panicstr && (ch_flt->afsr_errs & 2323 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE && 2324 aflt->flt_prot == AFLT_PROT_EC) { 2325 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2326 /* Zero the address to clear the error */ 2327 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2328 return (0); 2329 } 2330 } 2331 cpu_log_err(aflt); 2332 break; 2333 2334 default: 2335 /* 2336 * If the us3_common.c code doesn't know the flt_type, it may 2337 * be an implementation-specific code. Call into the impldep 2338 * backend to find out what to do: if it tells us to continue, 2339 * break and handle as if falling through from a UE; if not, 2340 * the impldep backend has handled the error and we're done. 2341 */ 2342 switch (cpu_impl_async_log_err(flt, eqep)) { 2343 case CH_ASYNC_LOG_DONE: 2344 return (1); 2345 case CH_ASYNC_LOG_RECIRC: 2346 return (0); 2347 case CH_ASYNC_LOG_CONTINUE: 2348 break; /* continue on to handle UE-like error */ 2349 default: 2350 cmn_err(CE_WARN, "discarding error 0x%p with " 2351 "invalid fault type (0x%x)", 2352 (void *)aflt, ch_flt->flt_type); 2353 return (0); 2354 } 2355 } 2356 2357 /* ... fall through from the UE case */ 2358 2359 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2360 if (!panicstr) { 2361 cpu_page_retire(ch_flt); 2362 } else { 2363 /* 2364 * Clear UEs on panic so that we don't 2365 * get haunted by them during panic or 2366 * after reboot 2367 */ 2368 cpu_clearphys(aflt); 2369 (void) clear_errors(NULL); 2370 } 2371 } 2372 2373 return (1); 2374 } 2375 2376 /* 2377 * Retire the bad page that may contain the flushed error. 2378 */ 2379 void 2380 cpu_page_retire(ch_async_flt_t *ch_flt) 2381 { 2382 struct async_flt *aflt = (struct async_flt *)ch_flt; 2383 (void) page_retire(aflt->flt_addr, PR_UE); 2384 } 2385 2386 /* 2387 * The cpu_log_err() function is called by cpu_async_log_err() to perform the 2388 * generic event post-processing for correctable and uncorrectable memory, 2389 * E$, and MTag errors. Historically this entry point was used to log bits of 2390 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be 2391 * converted into an ereport. In addition, it transmits the error to any 2392 * platform-specific service-processor FRU logging routines, if available. 2393 */ 2394 void 2395 cpu_log_err(struct async_flt *aflt) 2396 { 2397 char unum[UNUM_NAMLEN]; 2398 int len = 0; 2399 int synd_status, synd_code, afar_status; 2400 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 2401 2402 /* 2403 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 2404 * For Panther, L2$ is not external, so we don't want to 2405 * generate an E$ unum for those errors. 2406 */ 2407 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 2408 if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS) 2409 aflt->flt_status |= ECC_ECACHE; 2410 } else { 2411 if (ch_flt->flt_bit & C_AFSR_ECACHE) 2412 aflt->flt_status |= ECC_ECACHE; 2413 } 2414 2415 /* 2416 * Determine syndrome status. 2417 */ 2418 synd_status = afsr_to_synd_status(aflt->flt_inst, 2419 ch_flt->afsr_errs, ch_flt->flt_bit); 2420 2421 /* 2422 * Determine afar status. 2423 */ 2424 if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) 2425 afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 2426 ch_flt->flt_bit); 2427 else 2428 afar_status = AFLT_STAT_INVALID; 2429 2430 /* 2431 * If afar status is not invalid do a unum lookup. 2432 */ 2433 if (afar_status != AFLT_STAT_INVALID) { 2434 (void) cpu_get_mem_unum_aflt(synd_status, aflt, unum, 2435 UNUM_NAMLEN, &len); 2436 } else { 2437 unum[0] = '\0'; 2438 } 2439 2440 synd_code = synd_to_synd_code(synd_status, 2441 aflt->flt_synd, ch_flt->flt_bit); 2442 2443 /* 2444 * Do not send the fruid message (plat_ecc_error_data_t) 2445 * to the SC if it can handle the enhanced error information 2446 * (plat_ecc_error2_data_t) or when the tunable 2447 * ecc_log_fruid_enable is set to 0. 2448 */ 2449 2450 if (&plat_ecc_capability_sc_get && 2451 plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) { 2452 if (&plat_log_fruid_error) 2453 plat_log_fruid_error(synd_code, aflt, unum, 2454 ch_flt->flt_bit); 2455 } 2456 2457 if (aflt->flt_func != NULL) 2458 aflt->flt_func(aflt, unum); 2459 2460 if (afar_status != AFLT_STAT_INVALID) 2461 cpu_log_diag_info(ch_flt); 2462 2463 /* 2464 * If we have a CEEN error , we do not reenable CEEN until after 2465 * we exit the trap handler. Otherwise, another error may 2466 * occur causing the handler to be entered recursively. 2467 * We set a timeout to trigger in cpu_ceen_delay_secs seconds, 2468 * to try and ensure that the CPU makes progress in the face 2469 * of a CE storm. 2470 */ 2471 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2472 (void) timeout(cpu_delayed_check_ce_errors, 2473 (void *)aflt->flt_inst, 2474 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 2475 } 2476 } 2477 2478 /* 2479 * Invoked by error_init() early in startup and therefore before 2480 * startup_errorq() is called to drain any error Q - 2481 * 2482 * startup() 2483 * startup_end() 2484 * error_init() 2485 * cpu_error_init() 2486 * errorq_init() 2487 * errorq_drain() 2488 * start_other_cpus() 2489 * 2490 * The purpose of this routine is to create error-related taskqs. Taskqs 2491 * are used for this purpose because cpu_lock can't be grabbed from interrupt 2492 * context. 2493 */ 2494 void 2495 cpu_error_init(int items) 2496 { 2497 /* 2498 * Create taskq(s) to reenable CE 2499 */ 2500 ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri, 2501 items, items, TASKQ_PREPOPULATE); 2502 } 2503 2504 void 2505 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep) 2506 { 2507 char unum[UNUM_NAMLEN]; 2508 int len; 2509 2510 switch (aflt->flt_class) { 2511 case CPU_FAULT: 2512 cpu_ereport_init(aflt); 2513 if (cpu_async_log_err(aflt, eqep)) 2514 cpu_ereport_post(aflt); 2515 break; 2516 2517 case BUS_FAULT: 2518 if (aflt->flt_func != NULL) { 2519 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, 2520 unum, UNUM_NAMLEN, &len); 2521 aflt->flt_func(aflt, unum); 2522 } 2523 break; 2524 2525 case RECIRC_CPU_FAULT: 2526 aflt->flt_class = CPU_FAULT; 2527 cpu_log_err(aflt); 2528 cpu_ereport_post(aflt); 2529 break; 2530 2531 case RECIRC_BUS_FAULT: 2532 ASSERT(aflt->flt_class != RECIRC_BUS_FAULT); 2533 /*FALLTHRU*/ 2534 default: 2535 cmn_err(CE_WARN, "discarding CE error 0x%p with invalid " 2536 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 2537 return; 2538 } 2539 } 2540 2541 /* 2542 * Scrub and classify a CE. This function must not modify the 2543 * fault structure passed to it but instead should return the classification 2544 * information. 2545 */ 2546 2547 static uchar_t 2548 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried) 2549 { 2550 uchar_t disp = CE_XDIAG_EXTALG; 2551 on_trap_data_t otd; 2552 uint64_t orig_err; 2553 ch_cpu_logout_t *clop; 2554 2555 /* 2556 * Clear CEEN. CPU CE TL > 0 trap handling will already have done 2557 * this, but our other callers have not. Disable preemption to 2558 * avoid CPU migration so that we restore CEEN on the correct 2559 * cpu later. 2560 * 2561 * CEEN is cleared so that further CEs that our instruction and 2562 * data footprint induce do not cause use to either creep down 2563 * kernel stack to the point of overflow, or do so much CE 2564 * notification as to make little real forward progress. 2565 * 2566 * NCEEN must not be cleared. However it is possible that 2567 * our accesses to the flt_addr may provoke a bus error or timeout 2568 * if the offending address has just been unconfigured as part of 2569 * a DR action. So we must operate under on_trap protection. 2570 */ 2571 kpreempt_disable(); 2572 orig_err = get_error_enable(); 2573 if (orig_err & EN_REG_CEEN) 2574 set_error_enable(orig_err & ~EN_REG_CEEN); 2575 2576 /* 2577 * Our classification algorithm includes the line state before 2578 * the scrub; we'd like this captured after the detection and 2579 * before the algorithm below - the earlier the better. 2580 * 2581 * If we've come from a cpu CE trap then this info already exists 2582 * in the cpu logout area. 2583 * 2584 * For a CE detected by memscrub for which there was no trap 2585 * (running with CEEN off) cpu_log_and_clear_ce has called 2586 * cpu_ce_delayed_ec_logout to capture some cache data, and 2587 * marked the fault structure as incomplete as a flag to later 2588 * logging code. 2589 * 2590 * If called directly from an IO detected CE there has been 2591 * no line data capture. In this case we logout to the cpu logout 2592 * area - that's appropriate since it's the cpu cache data we need 2593 * for classification. We thus borrow the cpu logout area for a 2594 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in 2595 * this time (we will invalidate it again below). 2596 * 2597 * If called from the partner check xcall handler then this cpu 2598 * (the partner) has not necessarily experienced a CE at this 2599 * address. But we want to capture line state before its scrub 2600 * attempt since we use that in our classification. 2601 */ 2602 if (logout_tried == B_FALSE) { 2603 if (!cpu_ce_delayed_ec_logout(ecc->flt_addr)) 2604 disp |= CE_XDIAG_NOLOGOUT; 2605 } 2606 2607 /* 2608 * Scrub memory, then check AFSR for errors. The AFAR we scrub may 2609 * no longer be valid (if DR'd since the initial event) so we 2610 * perform this scrub under on_trap protection. If this access is 2611 * ok then further accesses below will also be ok - DR cannot 2612 * proceed while this thread is active (preemption is disabled); 2613 * to be safe we'll nonetheless use on_trap again below. 2614 */ 2615 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2616 cpu_scrubphys(ecc); 2617 } else { 2618 no_trap(); 2619 if (orig_err & EN_REG_CEEN) 2620 set_error_enable(orig_err); 2621 kpreempt_enable(); 2622 return (disp); 2623 } 2624 no_trap(); 2625 2626 /* 2627 * Did the casx read of the scrub log a CE that matches the AFAR? 2628 * Note that it's quite possible that the read sourced the data from 2629 * another cpu. 2630 */ 2631 if (clear_ecc(ecc)) 2632 disp |= CE_XDIAG_CE1; 2633 2634 /* 2635 * Read the data again. This time the read is very likely to 2636 * come from memory since the scrub induced a writeback to memory. 2637 */ 2638 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2639 (void) lddphys(P2ALIGN(ecc->flt_addr, 8)); 2640 } else { 2641 no_trap(); 2642 if (orig_err & EN_REG_CEEN) 2643 set_error_enable(orig_err); 2644 kpreempt_enable(); 2645 return (disp); 2646 } 2647 no_trap(); 2648 2649 /* Did that read induce a CE that matches the AFAR? */ 2650 if (clear_ecc(ecc)) 2651 disp |= CE_XDIAG_CE2; 2652 2653 /* 2654 * Look at the logout information and record whether we found the 2655 * line in l2/l3 cache. For Panther we are interested in whether 2656 * we found it in either cache (it won't reside in both but 2657 * it is possible to read it that way given the moving target). 2658 */ 2659 clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL; 2660 if (!(disp & CE_XDIAG_NOLOGOUT) && clop && 2661 clop->clo_data.chd_afar != LOGOUT_INVALID) { 2662 int hit, level; 2663 int state; 2664 int totalsize; 2665 ch_ec_data_t *ecp; 2666 2667 /* 2668 * If hit is nonzero then a match was found and hit will 2669 * be one greater than the index which hit. For Panther we 2670 * also need to pay attention to level to see which of l2$ or 2671 * l3$ it hit in. 2672 */ 2673 hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data, 2674 0, &level); 2675 2676 if (hit) { 2677 --hit; 2678 disp |= CE_XDIAG_AFARMATCH; 2679 2680 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2681 if (level == 2) 2682 ecp = &clop->clo_data.chd_l2_data[hit]; 2683 else 2684 ecp = &clop->clo_data.chd_ec_data[hit]; 2685 } else { 2686 ASSERT(level == 2); 2687 ecp = &clop->clo_data.chd_ec_data[hit]; 2688 } 2689 totalsize = cpunodes[CPU->cpu_id].ecache_size; 2690 state = cpu_ectag_pa_to_subblk_state(totalsize, 2691 ecc->flt_addr, ecp->ec_tag); 2692 2693 /* 2694 * Cheetah variants use different state encodings - 2695 * the CH_ECSTATE_* defines vary depending on the 2696 * module we're compiled for. Translate into our 2697 * one true version. Conflate Owner-Shared state 2698 * of SSM mode with Owner as victimisation of such 2699 * lines may cause a writeback. 2700 */ 2701 switch (state) { 2702 case CH_ECSTATE_MOD: 2703 disp |= EC_STATE_M; 2704 break; 2705 2706 case CH_ECSTATE_OWN: 2707 case CH_ECSTATE_OWS: 2708 disp |= EC_STATE_O; 2709 break; 2710 2711 case CH_ECSTATE_EXL: 2712 disp |= EC_STATE_E; 2713 break; 2714 2715 case CH_ECSTATE_SHR: 2716 disp |= EC_STATE_S; 2717 break; 2718 2719 default: 2720 disp |= EC_STATE_I; 2721 break; 2722 } 2723 } 2724 2725 /* 2726 * If we initiated the delayed logout then we are responsible 2727 * for invalidating the logout area. 2728 */ 2729 if (logout_tried == B_FALSE) { 2730 bzero(clop, sizeof (ch_cpu_logout_t)); 2731 clop->clo_data.chd_afar = LOGOUT_INVALID; 2732 } 2733 } 2734 2735 /* 2736 * Re-enable CEEN if we turned it off. 2737 */ 2738 if (orig_err & EN_REG_CEEN) 2739 set_error_enable(orig_err); 2740 kpreempt_enable(); 2741 2742 return (disp); 2743 } 2744 2745 /* 2746 * Scrub a correctable memory error and collect data for classification 2747 * of CE type. This function is called in the detection path, ie tl0 handling 2748 * of a correctable error trap (cpus) or interrupt (IO) at high PIL. 2749 */ 2750 void 2751 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried) 2752 { 2753 /* 2754 * Cheetah CE classification does not set any bits in flt_status. 2755 * Instead we will record classification datapoints in flt_disp. 2756 */ 2757 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 2758 2759 /* 2760 * To check if the error detected by IO is persistent, sticky or 2761 * intermittent. This is noticed by clear_ecc(). 2762 */ 2763 if (ecc->flt_status & ECC_IOBUS) 2764 ecc->flt_stat = C_AFSR_MEMORY; 2765 2766 /* 2767 * Record information from this first part of the algorithm in 2768 * flt_disp. 2769 */ 2770 ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried); 2771 } 2772 2773 /* 2774 * Select a partner to perform a further CE classification check from. 2775 * Must be called with kernel preemption disabled (to stop the cpu list 2776 * from changing). The detecting cpu we are partnering has cpuid 2777 * aflt->flt_inst; we might not be running on the detecting cpu. 2778 * 2779 * Restrict choice to active cpus in the same cpu partition as ourselves in 2780 * an effort to stop bad cpus in one partition causing other partitions to 2781 * perform excessive diagnostic activity. Actually since the errorq drain 2782 * is run from a softint most of the time and that is a global mechanism 2783 * this isolation is only partial. Return NULL if we fail to find a 2784 * suitable partner. 2785 * 2786 * We prefer a partner that is in a different latency group to ourselves as 2787 * we will share fewer datapaths. If such a partner is unavailable then 2788 * choose one in the same lgroup but prefer a different chip and only allow 2789 * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and 2790 * flags includes PTNR_SELFOK then permit selection of the original detector. 2791 * 2792 * We keep a cache of the last partner selected for a cpu, and we'll try to 2793 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds 2794 * have passed since that selection was made. This provides the benefit 2795 * of the point-of-view of different partners over time but without 2796 * requiring frequent cpu list traversals. 2797 */ 2798 2799 #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */ 2800 #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */ 2801 2802 static cpu_t * 2803 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) 2804 { 2805 cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr; 2806 hrtime_t lasttime, thistime; 2807 2808 ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL); 2809 2810 dtcr = cpu[aflt->flt_inst]; 2811 2812 /* 2813 * Short-circuit for the following cases: 2814 * . the dtcr is not flagged active 2815 * . there is just one cpu present 2816 * . the detector has disappeared 2817 * . we were given a bad flt_inst cpuid; this should not happen 2818 * (eg PCI code now fills flt_inst) but if it does it is no 2819 * reason to panic. 2820 * . there is just one cpu left online in the cpu partition 2821 * 2822 * If we return NULL after this point then we do not update the 2823 * chpr_ceptnr_seltime which will cause us to perform a full lookup 2824 * again next time; this is the case where the only other cpu online 2825 * in the detector's partition is on the same chip as the detector 2826 * and since CEEN re-enable is throttled even that case should not 2827 * hurt performance. 2828 */ 2829 if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) { 2830 return (NULL); 2831 } 2832 if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) { 2833 if (flags & PTNR_SELFOK) { 2834 *typep = CE_XDIAG_PTNR_SELF; 2835 return (dtcr); 2836 } else { 2837 return (NULL); 2838 } 2839 } 2840 2841 thistime = gethrtime(); 2842 lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime); 2843 2844 /* 2845 * Select a starting point. 2846 */ 2847 if (!lasttime) { 2848 /* 2849 * We've never selected a partner for this detector before. 2850 * Start the scan at the next online cpu in the same cpu 2851 * partition. 2852 */ 2853 sp = dtcr->cpu_next_part; 2854 } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) { 2855 /* 2856 * Our last selection has not aged yet. If this partner: 2857 * . is still a valid cpu, 2858 * . is still in the same partition as the detector 2859 * . is still marked active 2860 * . satisfies the 'flags' argument criteria 2861 * then select it again without updating the timestamp. 2862 */ 2863 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2864 if (sp == NULL || sp->cpu_part != dtcr->cpu_part || 2865 !cpu_flagged_active(sp->cpu_flags) || 2866 (sp == dtcr && !(flags & PTNR_SELFOK)) || 2867 (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id && 2868 !(flags & PTNR_SIBLINGOK))) { 2869 sp = dtcr->cpu_next_part; 2870 } else { 2871 if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2872 *typep = CE_XDIAG_PTNR_REMOTE; 2873 } else if (sp == dtcr) { 2874 *typep = CE_XDIAG_PTNR_SELF; 2875 } else if (sp->cpu_chip->chip_id == 2876 dtcr->cpu_chip->chip_id) { 2877 *typep = CE_XDIAG_PTNR_SIBLING; 2878 } else { 2879 *typep = CE_XDIAG_PTNR_LOCAL; 2880 } 2881 return (sp); 2882 } 2883 } else { 2884 /* 2885 * Our last selection has aged. If it is nonetheless still a 2886 * valid cpu then start the scan at the next cpu in the 2887 * partition after our last partner. If the last selection 2888 * is no longer a valid cpu then go with our default. In 2889 * this way we slowly cycle through possible partners to 2890 * obtain multiple viewpoints over time. 2891 */ 2892 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2893 if (sp == NULL) { 2894 sp = dtcr->cpu_next_part; 2895 } else { 2896 sp = sp->cpu_next_part; /* may be dtcr */ 2897 if (sp->cpu_part != dtcr->cpu_part) 2898 sp = dtcr; 2899 } 2900 } 2901 2902 /* 2903 * We have a proposed starting point for our search, but if this 2904 * cpu is offline then its cpu_next_part will point to itself 2905 * so we can't use that to iterate over cpus in this partition in 2906 * the loop below. We still want to avoid iterating over cpus not 2907 * in our partition, so in the case that our starting point is offline 2908 * we will repoint it to be the detector itself; and if the detector 2909 * happens to be offline we'll return NULL from the following loop. 2910 */ 2911 if (!cpu_flagged_active(sp->cpu_flags)) { 2912 sp = dtcr; 2913 } 2914 2915 ptnr = sp; 2916 locptnr = NULL; 2917 sibptnr = NULL; 2918 do { 2919 if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags)) 2920 continue; 2921 if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2922 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id; 2923 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2924 *typep = CE_XDIAG_PTNR_REMOTE; 2925 return (ptnr); 2926 } 2927 if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) { 2928 if (sibptnr == NULL) 2929 sibptnr = ptnr; 2930 continue; 2931 } 2932 if (locptnr == NULL) 2933 locptnr = ptnr; 2934 } while ((ptnr = ptnr->cpu_next_part) != sp); 2935 2936 /* 2937 * A foreign partner has already been returned if one was available. 2938 * 2939 * If locptnr is not NULL it is a cpu in the same lgroup as the 2940 * detector, is active, and is not a sibling of the detector. 2941 * 2942 * If sibptnr is not NULL it is a sibling of the detector, and is 2943 * active. 2944 * 2945 * If we have to resort to using the detector itself we have already 2946 * checked that it is active. 2947 */ 2948 if (locptnr) { 2949 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id; 2950 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2951 *typep = CE_XDIAG_PTNR_LOCAL; 2952 return (locptnr); 2953 } else if (sibptnr && flags & PTNR_SIBLINGOK) { 2954 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id; 2955 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2956 *typep = CE_XDIAG_PTNR_SIBLING; 2957 return (sibptnr); 2958 } else if (flags & PTNR_SELFOK) { 2959 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id; 2960 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2961 *typep = CE_XDIAG_PTNR_SELF; 2962 return (dtcr); 2963 } 2964 2965 return (NULL); 2966 } 2967 2968 /* 2969 * Cross call handler that is requested to run on the designated partner of 2970 * a cpu that experienced a possibly sticky or possibly persistnet CE. 2971 */ 2972 static void 2973 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp) 2974 { 2975 *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE); 2976 } 2977 2978 /* 2979 * The associated errorqs are never destroyed so we do not need to deal with 2980 * them disappearing before this timeout fires. If the affected memory 2981 * has been DR'd out since the original event the scrub algrithm will catch 2982 * any errors and return null disposition info. If the original detecting 2983 * cpu has been DR'd out then ereport detector info will not be able to 2984 * lookup CPU type; with a small timeout this is unlikely. 2985 */ 2986 static void 2987 ce_lkychk_cb(ce_lkychk_cb_t *cbarg) 2988 { 2989 struct async_flt *aflt = cbarg->lkycb_aflt; 2990 uchar_t disp; 2991 cpu_t *cp; 2992 int ptnrtype; 2993 2994 kpreempt_disable(); 2995 if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK, 2996 &ptnrtype)) { 2997 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt, 2998 (uint64_t)&disp); 2999 CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp); 3000 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3001 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3002 } else { 3003 ce_xdiag_lkydrops++; 3004 if (ncpus > 1) 3005 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3006 CE_XDIAG_SKIP_NOPTNR); 3007 } 3008 kpreempt_enable(); 3009 3010 errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC); 3011 kmem_free(cbarg, sizeof (ce_lkychk_cb_t)); 3012 } 3013 3014 /* 3015 * Called from errorq drain code when processing a CE error, both from 3016 * CPU and PCI drain functions. Decide what further classification actions, 3017 * if any, we will perform. Perform immediate actions now, and schedule 3018 * delayed actions as required. Note that we are no longer necessarily running 3019 * on the detecting cpu, and that the async_flt structure will not persist on 3020 * return from this function. 3021 * 3022 * Calls to this function should aim to be self-throtlling in some way. With 3023 * the delayed re-enable of CEEN the absolute rate of calls should not 3024 * be excessive. Callers should also avoid performing in-depth classification 3025 * for events in pages that are already known to be suspect. 3026 * 3027 * We return nonzero to indicate that the event has been copied and 3028 * recirculated for further testing. The caller should not log the event 3029 * in this case - it will be logged when further test results are available. 3030 * 3031 * Our possible contexts are that of errorq_drain: below lock level or from 3032 * panic context. We can assume that the cpu we are running on is online. 3033 */ 3034 3035 3036 #ifdef DEBUG 3037 static int ce_xdiag_forceaction; 3038 #endif 3039 3040 int 3041 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 3042 errorq_elem_t *eqep, size_t afltoffset) 3043 { 3044 ce_dispact_t dispact, action; 3045 cpu_t *cp; 3046 uchar_t dtcrinfo, disp; 3047 int ptnrtype; 3048 3049 if (!ce_disp_inited || panicstr || ce_xdiag_off) { 3050 ce_xdiag_drops++; 3051 return (0); 3052 } else if (!aflt->flt_in_memory) { 3053 ce_xdiag_drops++; 3054 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM); 3055 return (0); 3056 } 3057 3058 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 3059 3060 /* 3061 * Some correctable events are not scrubbed/classified, such as those 3062 * noticed at the tail of cpu_deferred_error. So if there is no 3063 * initial detector classification go no further. 3064 */ 3065 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) { 3066 ce_xdiag_drops++; 3067 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB); 3068 return (0); 3069 } 3070 3071 dispact = CE_DISPACT(ce_disp_table, 3072 CE_XDIAG_AFARMATCHED(dtcrinfo), 3073 CE_XDIAG_STATE(dtcrinfo), 3074 CE_XDIAG_CE1SEEN(dtcrinfo), 3075 CE_XDIAG_CE2SEEN(dtcrinfo)); 3076 3077 3078 action = CE_ACT(dispact); /* bad lookup caught below */ 3079 #ifdef DEBUG 3080 if (ce_xdiag_forceaction != 0) 3081 action = ce_xdiag_forceaction; 3082 #endif 3083 3084 switch (action) { 3085 case CE_ACT_LKYCHK: { 3086 caddr_t ndata; 3087 errorq_elem_t *neqep; 3088 struct async_flt *ecc; 3089 ce_lkychk_cb_t *cbargp; 3090 3091 if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) { 3092 ce_xdiag_lkydrops++; 3093 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3094 CE_XDIAG_SKIP_DUPFAIL); 3095 break; 3096 } 3097 ecc = (struct async_flt *)(ndata + afltoffset); 3098 3099 ASSERT(ecc->flt_class == CPU_FAULT || 3100 ecc->flt_class == BUS_FAULT); 3101 ecc->flt_class = (ecc->flt_class == CPU_FAULT) ? 3102 RECIRC_CPU_FAULT : RECIRC_BUS_FAULT; 3103 3104 cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP); 3105 cbargp->lkycb_aflt = ecc; 3106 cbargp->lkycb_eqp = eqp; 3107 cbargp->lkycb_eqep = neqep; 3108 3109 (void) timeout((void (*)(void *))ce_lkychk_cb, 3110 (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec)); 3111 return (1); 3112 } 3113 3114 case CE_ACT_PTNRCHK: 3115 kpreempt_disable(); /* stop cpu list changing */ 3116 if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) { 3117 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, 3118 (uint64_t)aflt, (uint64_t)&disp); 3119 CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp); 3120 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3121 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3122 } else if (ncpus > 1) { 3123 ce_xdiag_ptnrdrops++; 3124 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3125 CE_XDIAG_SKIP_NOPTNR); 3126 } else { 3127 ce_xdiag_ptnrdrops++; 3128 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3129 CE_XDIAG_SKIP_UNIPROC); 3130 } 3131 kpreempt_enable(); 3132 break; 3133 3134 case CE_ACT_DONE: 3135 break; 3136 3137 case CE_ACT(CE_DISP_BAD): 3138 default: 3139 #ifdef DEBUG 3140 cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action); 3141 #endif 3142 ce_xdiag_bad++; 3143 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD); 3144 break; 3145 } 3146 3147 return (0); 3148 } 3149 3150 /* 3151 * We route all errors through a single switch statement. 3152 */ 3153 void 3154 cpu_ue_log_err(struct async_flt *aflt) 3155 { 3156 switch (aflt->flt_class) { 3157 case CPU_FAULT: 3158 cpu_ereport_init(aflt); 3159 if (cpu_async_log_err(aflt, NULL)) 3160 cpu_ereport_post(aflt); 3161 break; 3162 3163 case BUS_FAULT: 3164 bus_async_log_err(aflt); 3165 break; 3166 3167 default: 3168 cmn_err(CE_WARN, "discarding async error %p with invalid " 3169 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 3170 return; 3171 } 3172 } 3173 3174 /* 3175 * Routine for panic hook callback from panic_idle(). 3176 */ 3177 void 3178 cpu_async_panic_callb(void) 3179 { 3180 ch_async_flt_t ch_flt; 3181 struct async_flt *aflt; 3182 ch_cpu_errors_t cpu_error_regs; 3183 uint64_t afsr_errs; 3184 3185 get_cpu_error_state(&cpu_error_regs); 3186 3187 afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3188 (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS); 3189 3190 if (afsr_errs) { 3191 3192 bzero(&ch_flt, sizeof (ch_async_flt_t)); 3193 aflt = (struct async_flt *)&ch_flt; 3194 aflt->flt_id = gethrtime_waitfree(); 3195 aflt->flt_bus_id = getprocessorid(); 3196 aflt->flt_inst = CPU->cpu_id; 3197 aflt->flt_stat = cpu_error_regs.afsr; 3198 aflt->flt_addr = cpu_error_regs.afar; 3199 aflt->flt_prot = AFLT_PROT_NONE; 3200 aflt->flt_class = CPU_FAULT; 3201 aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0); 3202 aflt->flt_panic = 1; 3203 ch_flt.afsr_ext = cpu_error_regs.afsr_ext; 3204 ch_flt.afsr_errs = afsr_errs; 3205 #if defined(SERRANO) 3206 ch_flt.afar2 = cpu_error_regs.afar2; 3207 #endif /* SERRANO */ 3208 (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL); 3209 } 3210 } 3211 3212 /* 3213 * Routine to convert a syndrome into a syndrome code. 3214 */ 3215 static int 3216 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit) 3217 { 3218 if (synd_status == AFLT_STAT_INVALID) 3219 return (-1); 3220 3221 /* 3222 * Use the syndrome to index the appropriate syndrome table, 3223 * to get the code indicating which bit(s) is(are) bad. 3224 */ 3225 if (afsr_bit & 3226 (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 3227 if (afsr_bit & C_AFSR_MSYND_ERRS) { 3228 #if defined(JALAPENO) || defined(SERRANO) 3229 if ((synd == 0) || (synd >= BSYND_TBL_SIZE)) 3230 return (-1); 3231 else 3232 return (BPAR0 + synd); 3233 #else /* JALAPENO || SERRANO */ 3234 if ((synd == 0) || (synd >= MSYND_TBL_SIZE)) 3235 return (-1); 3236 else 3237 return (mtag_syndrome_tab[synd]); 3238 #endif /* JALAPENO || SERRANO */ 3239 } else { 3240 if ((synd == 0) || (synd >= ESYND_TBL_SIZE)) 3241 return (-1); 3242 else 3243 return (ecc_syndrome_tab[synd]); 3244 } 3245 } else { 3246 return (-1); 3247 } 3248 } 3249 3250 /* 3251 * Routine to return a string identifying the physical name 3252 * associated with a memory/cache error. 3253 */ 3254 int 3255 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 3256 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 3257 ushort_t flt_status, char *buf, int buflen, int *lenp) 3258 { 3259 int synd_code; 3260 int ret; 3261 3262 /* 3263 * An AFSR of -1 defaults to a memory syndrome. 3264 */ 3265 if (flt_stat == (uint64_t)-1) 3266 flt_stat = C_AFSR_CE; 3267 3268 synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat); 3269 3270 /* 3271 * Syndrome code must be either a single-bit error code 3272 * (0...143) or -1 for unum lookup. 3273 */ 3274 if (synd_code < 0 || synd_code >= M2) 3275 synd_code = -1; 3276 if (&plat_get_mem_unum) { 3277 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 3278 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 3279 buf[0] = '\0'; 3280 *lenp = 0; 3281 } 3282 3283 return (ret); 3284 } 3285 3286 return (ENOTSUP); 3287 } 3288 3289 /* 3290 * Wrapper for cpu_get_mem_unum() routine that takes an 3291 * async_flt struct rather than explicit arguments. 3292 */ 3293 int 3294 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 3295 char *buf, int buflen, int *lenp) 3296 { 3297 /* 3298 * If we come thru here for an IO bus error aflt->flt_stat will 3299 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum() 3300 * so it will interpret this as a memory error. 3301 */ 3302 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 3303 (aflt->flt_class == BUS_FAULT) ? 3304 (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs, 3305 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 3306 aflt->flt_status, buf, buflen, lenp)); 3307 } 3308 3309 /* 3310 * This routine is a more generic interface to cpu_get_mem_unum() 3311 * that may be used by other modules (e.g. mm). 3312 */ 3313 int 3314 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 3315 char *buf, int buflen, int *lenp) 3316 { 3317 int synd_status, flt_in_memory, ret; 3318 ushort_t flt_status = 0; 3319 char unum[UNUM_NAMLEN]; 3320 3321 /* 3322 * Check for an invalid address. 3323 */ 3324 if (afar == (uint64_t)-1) 3325 return (ENXIO); 3326 3327 if (synd == (uint64_t)-1) 3328 synd_status = AFLT_STAT_INVALID; 3329 else 3330 synd_status = AFLT_STAT_VALID; 3331 3332 flt_in_memory = (*afsr & C_AFSR_MEMORY) && 3333 pf_is_memory(afar >> MMU_PAGESHIFT); 3334 3335 /* 3336 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 3337 * For Panther, L2$ is not external, so we don't want to 3338 * generate an E$ unum for those errors. 3339 */ 3340 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3341 if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS) 3342 flt_status |= ECC_ECACHE; 3343 } else { 3344 if (*afsr & C_AFSR_ECACHE) 3345 flt_status |= ECC_ECACHE; 3346 } 3347 3348 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 3349 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 3350 if (ret != 0) 3351 return (ret); 3352 3353 if (*lenp >= buflen) 3354 return (ENAMETOOLONG); 3355 3356 (void) strncpy(buf, unum, buflen); 3357 3358 return (0); 3359 } 3360 3361 /* 3362 * Routine to return memory information associated 3363 * with a physical address and syndrome. 3364 */ 3365 int 3366 cpu_get_mem_info(uint64_t synd, uint64_t afar, 3367 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 3368 int *segsp, int *banksp, int *mcidp) 3369 { 3370 int synd_status, synd_code; 3371 3372 if (afar == (uint64_t)-1) 3373 return (ENXIO); 3374 3375 if (synd == (uint64_t)-1) 3376 synd_status = AFLT_STAT_INVALID; 3377 else 3378 synd_status = AFLT_STAT_VALID; 3379 3380 synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE); 3381 3382 if (p2get_mem_info != NULL) 3383 return ((p2get_mem_info)(synd_code, afar, 3384 mem_sizep, seg_sizep, bank_sizep, 3385 segsp, banksp, mcidp)); 3386 else 3387 return (ENOTSUP); 3388 } 3389 3390 /* 3391 * Routine to return a string identifying the physical 3392 * name associated with a cpuid. 3393 */ 3394 int 3395 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 3396 { 3397 int ret; 3398 char unum[UNUM_NAMLEN]; 3399 3400 if (&plat_get_cpu_unum) { 3401 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 3402 != 0) 3403 return (ret); 3404 } else { 3405 return (ENOTSUP); 3406 } 3407 3408 if (*lenp >= buflen) 3409 return (ENAMETOOLONG); 3410 3411 (void) strncpy(buf, unum, buflen); 3412 3413 return (0); 3414 } 3415 3416 /* 3417 * This routine exports the name buffer size. 3418 */ 3419 size_t 3420 cpu_get_name_bufsize() 3421 { 3422 return (UNUM_NAMLEN); 3423 } 3424 3425 /* 3426 * Historical function, apparantly not used. 3427 */ 3428 /* ARGSUSED */ 3429 void 3430 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 3431 {} 3432 3433 /* 3434 * Historical function only called for SBus errors in debugging. 3435 */ 3436 /*ARGSUSED*/ 3437 void 3438 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 3439 {} 3440 3441 /* 3442 * Clear the AFSR sticky bits. The routine returns a non-zero value if 3443 * any of the AFSR's sticky errors are detected. If a non-null pointer to 3444 * an async fault structure argument is passed in, the captured error state 3445 * (AFSR, AFAR) info will be returned in the structure. 3446 */ 3447 int 3448 clear_errors(ch_async_flt_t *ch_flt) 3449 { 3450 struct async_flt *aflt = (struct async_flt *)ch_flt; 3451 ch_cpu_errors_t cpu_error_regs; 3452 3453 get_cpu_error_state(&cpu_error_regs); 3454 3455 if (ch_flt != NULL) { 3456 aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK; 3457 aflt->flt_addr = cpu_error_regs.afar; 3458 ch_flt->afsr_ext = cpu_error_regs.afsr_ext; 3459 ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3460 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); 3461 #if defined(SERRANO) 3462 ch_flt->afar2 = cpu_error_regs.afar2; 3463 #endif /* SERRANO */ 3464 } 3465 3466 set_cpu_error_state(&cpu_error_regs); 3467 3468 return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3469 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0); 3470 } 3471 3472 /* 3473 * Clear any AFSR error bits, and check for persistence. 3474 * 3475 * It would be desirable to also insist that syndrome match. PCI handling 3476 * has already filled flt_synd. For errors trapped by CPU we only fill 3477 * flt_synd when we queue the event, so we do not have a valid flt_synd 3478 * during initial classification (it is valid if we're called as part of 3479 * subsequent low-pil additional classification attempts). We could try 3480 * to determine which syndrome to use: we know we're only called for 3481 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use 3482 * would be esynd/none and esynd/msynd, respectively. If that is 3483 * implemented then what do we do in the case that we do experience an 3484 * error on the same afar but with different syndrome? At the very least 3485 * we should count such occurences. Anyway, for now, we'll leave it as 3486 * it has been for ages. 3487 */ 3488 static int 3489 clear_ecc(struct async_flt *aflt) 3490 { 3491 ch_cpu_errors_t cpu_error_regs; 3492 3493 /* 3494 * Snapshot the AFSR and AFAR and clear any errors 3495 */ 3496 get_cpu_error_state(&cpu_error_regs); 3497 set_cpu_error_state(&cpu_error_regs); 3498 3499 /* 3500 * If any of the same memory access error bits are still on and 3501 * the AFAR matches, return that the error is persistent. 3502 */ 3503 return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 && 3504 cpu_error_regs.afar == aflt->flt_addr); 3505 } 3506 3507 /* 3508 * Turn off all cpu error detection, normally only used for panics. 3509 */ 3510 void 3511 cpu_disable_errors(void) 3512 { 3513 xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE); 3514 } 3515 3516 /* 3517 * Enable errors. 3518 */ 3519 void 3520 cpu_enable_errors(void) 3521 { 3522 xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE); 3523 } 3524 3525 /* 3526 * Flush the entire ecache using displacement flush by reading through a 3527 * physical address range twice as large as the Ecache. 3528 */ 3529 void 3530 cpu_flush_ecache(void) 3531 { 3532 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 3533 cpunodes[CPU->cpu_id].ecache_linesize); 3534 } 3535 3536 /* 3537 * Return CPU E$ set size - E$ size divided by the associativity. 3538 * We use this function in places where the CPU_PRIVATE ptr may not be 3539 * initialized yet. Note that for send_mondo and in the Ecache scrubber, 3540 * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set 3541 * up before the kernel switches from OBP's to the kernel's trap table, so 3542 * we don't have to worry about cpunodes being unitialized. 3543 */ 3544 int 3545 cpu_ecache_set_size(struct cpu *cp) 3546 { 3547 if (CPU_PRIVATE(cp)) 3548 return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size)); 3549 3550 return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway()); 3551 } 3552 3553 /* 3554 * Flush Ecache line. 3555 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno. 3556 * Uses normal displacement flush for Cheetah. 3557 */ 3558 static void 3559 cpu_flush_ecache_line(ch_async_flt_t *ch_flt) 3560 { 3561 struct async_flt *aflt = (struct async_flt *)ch_flt; 3562 int ec_set_size = cpu_ecache_set_size(CPU); 3563 3564 ecache_flush_line(aflt->flt_addr, ec_set_size); 3565 } 3566 3567 /* 3568 * Scrub physical address. 3569 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3570 * Ecache or direct-mapped Ecache. 3571 */ 3572 static void 3573 cpu_scrubphys(struct async_flt *aflt) 3574 { 3575 int ec_set_size = cpu_ecache_set_size(CPU); 3576 3577 scrubphys(aflt->flt_addr, ec_set_size); 3578 } 3579 3580 /* 3581 * Clear physical address. 3582 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3583 * Ecache or direct-mapped Ecache. 3584 */ 3585 void 3586 cpu_clearphys(struct async_flt *aflt) 3587 { 3588 int lsize = cpunodes[CPU->cpu_id].ecache_linesize; 3589 int ec_set_size = cpu_ecache_set_size(CPU); 3590 3591 3592 clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize); 3593 } 3594 3595 #if defined(CPU_IMP_ECACHE_ASSOC) 3596 /* 3597 * Check for a matching valid line in all the sets. 3598 * If found, return set# + 1. Otherwise return 0. 3599 */ 3600 static int 3601 cpu_ecache_line_valid(ch_async_flt_t *ch_flt) 3602 { 3603 struct async_flt *aflt = (struct async_flt *)ch_flt; 3604 int totalsize = cpunodes[CPU->cpu_id].ecache_size; 3605 int ec_set_size = cpu_ecache_set_size(CPU); 3606 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 3607 int nway = cpu_ecache_nway(); 3608 int i; 3609 3610 for (i = 0; i < nway; i++, ecp++) { 3611 if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) && 3612 (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) == 3613 cpu_ectag_to_pa(ec_set_size, ecp->ec_tag)) 3614 return (i+1); 3615 } 3616 return (0); 3617 } 3618 #endif /* CPU_IMP_ECACHE_ASSOC */ 3619 3620 /* 3621 * Check whether a line in the given logout info matches the specified 3622 * fault address. If reqval is set then the line must not be Invalid. 3623 * Returns 0 on failure; on success (way + 1) is returned an *level is 3624 * set to 2 for l2$ or 3 for l3$. 3625 */ 3626 static int 3627 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level) 3628 { 3629 ch_diag_data_t *cdp = data; 3630 ch_ec_data_t *ecp; 3631 int totalsize, ec_set_size; 3632 int i, ways; 3633 int match = 0; 3634 int tagvalid; 3635 uint64_t addr, tagpa; 3636 int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation); 3637 3638 /* 3639 * Check the l2$ logout data 3640 */ 3641 if (ispanther) { 3642 ecp = &cdp->chd_l2_data[0]; 3643 ec_set_size = PN_L2_SET_SIZE; 3644 ways = PN_L2_NWAYS; 3645 } else { 3646 ecp = &cdp->chd_ec_data[0]; 3647 ec_set_size = cpu_ecache_set_size(CPU); 3648 ways = cpu_ecache_nway(); 3649 totalsize = cpunodes[CPU->cpu_id].ecache_size; 3650 } 3651 /* remove low order PA bits from fault address not used in PA tag */ 3652 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3653 for (i = 0; i < ways; i++, ecp++) { 3654 if (ispanther) { 3655 tagpa = PN_L2TAG_TO_PA(ecp->ec_tag); 3656 tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag); 3657 } else { 3658 tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag); 3659 tagvalid = !cpu_ectag_line_invalid(totalsize, 3660 ecp->ec_tag); 3661 } 3662 if (tagpa == addr && (!reqval || tagvalid)) { 3663 match = i + 1; 3664 *level = 2; 3665 break; 3666 } 3667 } 3668 3669 if (match || !ispanther) 3670 return (match); 3671 3672 /* For Panther we also check the l3$ */ 3673 ecp = &cdp->chd_ec_data[0]; 3674 ec_set_size = PN_L3_SET_SIZE; 3675 ways = PN_L3_NWAYS; 3676 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3677 3678 for (i = 0; i < ways; i++, ecp++) { 3679 if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval || 3680 !PN_L3_LINE_INVALID(ecp->ec_tag))) { 3681 match = i + 1; 3682 *level = 3; 3683 break; 3684 } 3685 } 3686 3687 return (match); 3688 } 3689 3690 #if defined(CPU_IMP_L1_CACHE_PARITY) 3691 /* 3692 * Record information related to the source of an Dcache Parity Error. 3693 */ 3694 static void 3695 cpu_dcache_parity_info(ch_async_flt_t *ch_flt) 3696 { 3697 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3698 int index; 3699 3700 /* 3701 * Since instruction decode cannot be done at high PIL 3702 * just examine the entire Dcache to locate the error. 3703 */ 3704 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3705 ch_flt->parity_data.dpe.cpl_way = -1; 3706 ch_flt->parity_data.dpe.cpl_off = -1; 3707 } 3708 for (index = 0; index < dc_set_size; index += dcache_linesize) 3709 cpu_dcache_parity_check(ch_flt, index); 3710 } 3711 3712 /* 3713 * Check all ways of the Dcache at a specified index for good parity. 3714 */ 3715 static void 3716 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index) 3717 { 3718 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3719 uint64_t parity_bits, pbits, data_word; 3720 static int parity_bits_popc[] = { 0, 1, 1, 0 }; 3721 int way, word, data_byte; 3722 ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0]; 3723 ch_dc_data_t tmp_dcp; 3724 3725 for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) { 3726 /* 3727 * Perform diagnostic read. 3728 */ 3729 get_dcache_dtag(index + way * dc_set_size, 3730 (uint64_t *)&tmp_dcp); 3731 3732 /* 3733 * Check tag for even parity. 3734 * Sum of 1 bits (including parity bit) should be even. 3735 */ 3736 if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) { 3737 /* 3738 * If this is the first error log detailed information 3739 * about it and check the snoop tag. Otherwise just 3740 * record the fact that we found another error. 3741 */ 3742 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3743 ch_flt->parity_data.dpe.cpl_way = way; 3744 ch_flt->parity_data.dpe.cpl_cache = 3745 CPU_DC_PARITY; 3746 ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG; 3747 3748 if (popc64(tmp_dcp.dc_sntag & 3749 CHP_DCSNTAG_PARMASK) & 1) { 3750 ch_flt->parity_data.dpe.cpl_tag |= 3751 CHP_DC_SNTAG; 3752 ch_flt->parity_data.dpe.cpl_lcnt++; 3753 } 3754 3755 bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t)); 3756 } 3757 3758 ch_flt->parity_data.dpe.cpl_lcnt++; 3759 } 3760 3761 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3762 /* 3763 * Panther has more parity bits than the other 3764 * processors for covering dcache data and so each 3765 * byte of data in each word has its own parity bit. 3766 */ 3767 parity_bits = tmp_dcp.dc_pn_data_parity; 3768 for (word = 0; word < 4; word++) { 3769 data_word = tmp_dcp.dc_data[word]; 3770 pbits = parity_bits & PN_DC_DATA_PARITY_MASK; 3771 for (data_byte = 0; data_byte < 8; 3772 data_byte++) { 3773 if (((popc64(data_word & 3774 PN_DC_DATA_PARITY_MASK)) & 1) ^ 3775 (pbits & 1)) { 3776 cpu_record_dc_data_parity( 3777 ch_flt, dcp, &tmp_dcp, way, 3778 word); 3779 } 3780 pbits >>= 1; 3781 data_word >>= 8; 3782 } 3783 parity_bits >>= 8; 3784 } 3785 } else { 3786 /* 3787 * Check data array for even parity. 3788 * The 8 parity bits are grouped into 4 pairs each 3789 * of which covers a 64-bit word. The endianness is 3790 * reversed -- the low-order parity bits cover the 3791 * high-order data words. 3792 */ 3793 parity_bits = tmp_dcp.dc_utag >> 8; 3794 for (word = 0; word < 4; word++) { 3795 pbits = (parity_bits >> (6 - word * 2)) & 3; 3796 if ((popc64(tmp_dcp.dc_data[word]) + 3797 parity_bits_popc[pbits]) & 1) { 3798 cpu_record_dc_data_parity(ch_flt, dcp, 3799 &tmp_dcp, way, word); 3800 } 3801 } 3802 } 3803 } 3804 } 3805 3806 static void 3807 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 3808 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word) 3809 { 3810 /* 3811 * If this is the first error log detailed information about it. 3812 * Otherwise just record the fact that we found another error. 3813 */ 3814 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3815 ch_flt->parity_data.dpe.cpl_way = way; 3816 ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY; 3817 ch_flt->parity_data.dpe.cpl_off = word * 8; 3818 bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t)); 3819 } 3820 ch_flt->parity_data.dpe.cpl_lcnt++; 3821 } 3822 3823 /* 3824 * Record information related to the source of an Icache Parity Error. 3825 * 3826 * Called with the Icache disabled so any diagnostic accesses are safe. 3827 */ 3828 static void 3829 cpu_icache_parity_info(ch_async_flt_t *ch_flt) 3830 { 3831 int ic_set_size; 3832 int ic_linesize; 3833 int index; 3834 3835 if (CPU_PRIVATE(CPU)) { 3836 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3837 CH_ICACHE_NWAY; 3838 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3839 } else { 3840 ic_set_size = icache_size / CH_ICACHE_NWAY; 3841 ic_linesize = icache_linesize; 3842 } 3843 3844 ch_flt->parity_data.ipe.cpl_way = -1; 3845 ch_flt->parity_data.ipe.cpl_off = -1; 3846 3847 for (index = 0; index < ic_set_size; index += ic_linesize) 3848 cpu_icache_parity_check(ch_flt, index); 3849 } 3850 3851 /* 3852 * Check all ways of the Icache at a specified index for good parity. 3853 */ 3854 static void 3855 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index) 3856 { 3857 uint64_t parmask, pn_inst_parity; 3858 int ic_set_size; 3859 int ic_linesize; 3860 int flt_index, way, instr, num_instr; 3861 struct async_flt *aflt = (struct async_flt *)ch_flt; 3862 ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0]; 3863 ch_ic_data_t tmp_icp; 3864 3865 if (CPU_PRIVATE(CPU)) { 3866 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3867 CH_ICACHE_NWAY; 3868 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3869 } else { 3870 ic_set_size = icache_size / CH_ICACHE_NWAY; 3871 ic_linesize = icache_linesize; 3872 } 3873 3874 /* 3875 * Panther has twice as many instructions per icache line and the 3876 * instruction parity bit is in a different location. 3877 */ 3878 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3879 num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t); 3880 pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK; 3881 } else { 3882 num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t); 3883 pn_inst_parity = 0; 3884 } 3885 3886 /* 3887 * Index at which we expect to find the parity error. 3888 */ 3889 flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize); 3890 3891 for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) { 3892 /* 3893 * Diagnostic reads expect address argument in ASI format. 3894 */ 3895 get_icache_dtag(2 * (index + way * ic_set_size), 3896 (uint64_t *)&tmp_icp); 3897 3898 /* 3899 * If this is the index in which we expect to find the 3900 * error log detailed information about each of the ways. 3901 * This information will be displayed later if we can't 3902 * determine the exact way in which the error is located. 3903 */ 3904 if (flt_index == index) 3905 bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t)); 3906 3907 /* 3908 * Check tag for even parity. 3909 * Sum of 1 bits (including parity bit) should be even. 3910 */ 3911 if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) { 3912 /* 3913 * If this way is the one in which we expected 3914 * to find the error record the way and check the 3915 * snoop tag. Otherwise just record the fact we 3916 * found another error. 3917 */ 3918 if (flt_index == index) { 3919 ch_flt->parity_data.ipe.cpl_way = way; 3920 ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG; 3921 3922 if (popc64(tmp_icp.ic_sntag & 3923 CHP_ICSNTAG_PARMASK) & 1) { 3924 ch_flt->parity_data.ipe.cpl_tag |= 3925 CHP_IC_SNTAG; 3926 ch_flt->parity_data.ipe.cpl_lcnt++; 3927 } 3928 3929 } 3930 ch_flt->parity_data.ipe.cpl_lcnt++; 3931 continue; 3932 } 3933 3934 /* 3935 * Check instruction data for even parity. 3936 * Bits participating in parity differ for PC-relative 3937 * versus non-PC-relative instructions. 3938 */ 3939 for (instr = 0; instr < num_instr; instr++) { 3940 parmask = (tmp_icp.ic_data[instr] & 3941 CH_ICDATA_PRED_ISPCREL) ? 3942 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) : 3943 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity); 3944 if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) { 3945 /* 3946 * If this way is the one in which we expected 3947 * to find the error record the way and offset. 3948 * Otherwise just log the fact we found another 3949 * error. 3950 */ 3951 if (flt_index == index) { 3952 ch_flt->parity_data.ipe.cpl_way = way; 3953 ch_flt->parity_data.ipe.cpl_off = 3954 instr * 4; 3955 } 3956 ch_flt->parity_data.ipe.cpl_lcnt++; 3957 continue; 3958 } 3959 } 3960 } 3961 } 3962 3963 /* 3964 * Record information related to the source of an Pcache Parity Error. 3965 */ 3966 static void 3967 cpu_pcache_parity_info(ch_async_flt_t *ch_flt) 3968 { 3969 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 3970 int index; 3971 3972 /* 3973 * Since instruction decode cannot be done at high PIL just 3974 * examine the entire Pcache to check for any parity errors. 3975 */ 3976 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3977 ch_flt->parity_data.dpe.cpl_way = -1; 3978 ch_flt->parity_data.dpe.cpl_off = -1; 3979 } 3980 for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE) 3981 cpu_pcache_parity_check(ch_flt, index); 3982 } 3983 3984 /* 3985 * Check all ways of the Pcache at a specified index for good parity. 3986 */ 3987 static void 3988 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index) 3989 { 3990 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 3991 int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t); 3992 int way, word, pbit, parity_bits; 3993 ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0]; 3994 ch_pc_data_t tmp_pcp; 3995 3996 for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) { 3997 /* 3998 * Perform diagnostic read. 3999 */ 4000 get_pcache_dtag(index + way * pc_set_size, 4001 (uint64_t *)&tmp_pcp); 4002 /* 4003 * Check data array for odd parity. There are 8 parity 4004 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each 4005 * of those bits covers exactly 8 bytes of the data 4006 * array: 4007 * 4008 * parity bit P$ data bytes covered 4009 * ---------- --------------------- 4010 * 50 63:56 4011 * 51 55:48 4012 * 52 47:40 4013 * 53 39:32 4014 * 54 31:24 4015 * 55 23:16 4016 * 56 15:8 4017 * 57 7:0 4018 */ 4019 parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status); 4020 for (word = 0; word < pc_data_words; word++) { 4021 pbit = (parity_bits >> (pc_data_words - word - 1)) & 1; 4022 if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) { 4023 /* 4024 * If this is the first error log detailed 4025 * information about it. Otherwise just record 4026 * the fact that we found another error. 4027 */ 4028 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 4029 ch_flt->parity_data.dpe.cpl_way = way; 4030 ch_flt->parity_data.dpe.cpl_cache = 4031 CPU_PC_PARITY; 4032 ch_flt->parity_data.dpe.cpl_off = 4033 word * sizeof (uint64_t); 4034 bcopy(&tmp_pcp, pcp, 4035 sizeof (ch_pc_data_t)); 4036 } 4037 ch_flt->parity_data.dpe.cpl_lcnt++; 4038 } 4039 } 4040 } 4041 } 4042 4043 4044 /* 4045 * Add L1 Data cache data to the ereport payload. 4046 */ 4047 static void 4048 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl) 4049 { 4050 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4051 ch_dc_data_t *dcp; 4052 ch_dc_data_t dcdata[CH_DCACHE_NWAY]; 4053 uint_t nelem; 4054 int i, ways_to_check, ways_logged = 0; 4055 4056 /* 4057 * If this is an D$ fault then there may be multiple 4058 * ways captured in the ch_parity_log_t structure. 4059 * Otherwise, there will be at most one way captured 4060 * in the ch_diag_data_t struct. 4061 * Check each way to see if it should be encoded. 4062 */ 4063 if (ch_flt->flt_type == CPU_DC_PARITY) 4064 ways_to_check = CH_DCACHE_NWAY; 4065 else 4066 ways_to_check = 1; 4067 for (i = 0; i < ways_to_check; i++) { 4068 if (ch_flt->flt_type == CPU_DC_PARITY) 4069 dcp = &ch_flt->parity_data.dpe.cpl_dc[i]; 4070 else 4071 dcp = &ch_flt->flt_diag_data.chd_dc_data; 4072 if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) { 4073 bcopy(dcp, &dcdata[ways_logged], 4074 sizeof (ch_dc_data_t)); 4075 ways_logged++; 4076 } 4077 } 4078 4079 /* 4080 * Add the dcache data to the payload. 4081 */ 4082 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS, 4083 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4084 if (ways_logged != 0) { 4085 nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged; 4086 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA, 4087 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL); 4088 } 4089 } 4090 4091 /* 4092 * Add L1 Instruction cache data to the ereport payload. 4093 */ 4094 static void 4095 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl) 4096 { 4097 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4098 ch_ic_data_t *icp; 4099 ch_ic_data_t icdata[CH_ICACHE_NWAY]; 4100 uint_t nelem; 4101 int i, ways_to_check, ways_logged = 0; 4102 4103 /* 4104 * If this is an I$ fault then there may be multiple 4105 * ways captured in the ch_parity_log_t structure. 4106 * Otherwise, there will be at most one way captured 4107 * in the ch_diag_data_t struct. 4108 * Check each way to see if it should be encoded. 4109 */ 4110 if (ch_flt->flt_type == CPU_IC_PARITY) 4111 ways_to_check = CH_ICACHE_NWAY; 4112 else 4113 ways_to_check = 1; 4114 for (i = 0; i < ways_to_check; i++) { 4115 if (ch_flt->flt_type == CPU_IC_PARITY) 4116 icp = &ch_flt->parity_data.ipe.cpl_ic[i]; 4117 else 4118 icp = &ch_flt->flt_diag_data.chd_ic_data; 4119 if (icp->ic_logflag == IC_LOGFLAG_MAGIC) { 4120 bcopy(icp, &icdata[ways_logged], 4121 sizeof (ch_ic_data_t)); 4122 ways_logged++; 4123 } 4124 } 4125 4126 /* 4127 * Add the icache data to the payload. 4128 */ 4129 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS, 4130 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4131 if (ways_logged != 0) { 4132 nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged; 4133 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA, 4134 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL); 4135 } 4136 } 4137 4138 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4139 4140 /* 4141 * Add ecache data to payload. 4142 */ 4143 static void 4144 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl) 4145 { 4146 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4147 ch_ec_data_t *ecp; 4148 ch_ec_data_t ecdata[CHD_EC_DATA_SETS]; 4149 uint_t nelem; 4150 int i, ways_logged = 0; 4151 4152 /* 4153 * Check each way to see if it should be encoded 4154 * and concatinate it into a temporary buffer. 4155 */ 4156 for (i = 0; i < CHD_EC_DATA_SETS; i++) { 4157 ecp = &ch_flt->flt_diag_data.chd_ec_data[i]; 4158 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4159 bcopy(ecp, &ecdata[ways_logged], 4160 sizeof (ch_ec_data_t)); 4161 ways_logged++; 4162 } 4163 } 4164 4165 /* 4166 * Panther CPUs have an additional level of cache and so 4167 * what we just collected was the L3 (ecache) and not the 4168 * L2 cache. 4169 */ 4170 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4171 /* 4172 * Add the L3 (ecache) data to the payload. 4173 */ 4174 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS, 4175 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4176 if (ways_logged != 0) { 4177 nelem = sizeof (ch_ec_data_t) / 4178 sizeof (uint64_t) * ways_logged; 4179 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA, 4180 DATA_TYPE_UINT64_ARRAY, nelem, 4181 (uint64_t *)ecdata, NULL); 4182 } 4183 4184 /* 4185 * Now collect the L2 cache. 4186 */ 4187 ways_logged = 0; 4188 for (i = 0; i < PN_L2_NWAYS; i++) { 4189 ecp = &ch_flt->flt_diag_data.chd_l2_data[i]; 4190 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4191 bcopy(ecp, &ecdata[ways_logged], 4192 sizeof (ch_ec_data_t)); 4193 ways_logged++; 4194 } 4195 } 4196 } 4197 4198 /* 4199 * Add the L2 cache data to the payload. 4200 */ 4201 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS, 4202 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4203 if (ways_logged != 0) { 4204 nelem = sizeof (ch_ec_data_t) / 4205 sizeof (uint64_t) * ways_logged; 4206 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA, 4207 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL); 4208 } 4209 } 4210 4211 /* 4212 * Encode the data saved in the ch_async_flt_t struct into 4213 * the FM ereport payload. 4214 */ 4215 static void 4216 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 4217 nvlist_t *resource, int *afar_status, int *synd_status) 4218 { 4219 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4220 *synd_status = AFLT_STAT_INVALID; 4221 *afar_status = AFLT_STAT_INVALID; 4222 4223 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) { 4224 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR, 4225 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 4226 } 4227 4228 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) && 4229 IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4230 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT, 4231 DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL); 4232 } 4233 4234 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) { 4235 *afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 4236 ch_flt->flt_bit); 4237 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, 4238 DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL); 4239 } 4240 4241 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) { 4242 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR, 4243 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 4244 } 4245 4246 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 4247 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 4248 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 4249 } 4250 4251 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 4252 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 4253 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 4254 } 4255 4256 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 4257 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 4258 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 4259 } 4260 4261 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 4262 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 4263 DATA_TYPE_BOOLEAN_VALUE, 4264 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 4265 } 4266 4267 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) { 4268 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME, 4269 DATA_TYPE_BOOLEAN_VALUE, 4270 (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL); 4271 } 4272 4273 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) { 4274 *synd_status = afsr_to_synd_status(aflt->flt_inst, 4275 ch_flt->afsr_errs, ch_flt->flt_bit); 4276 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, 4277 DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL); 4278 } 4279 4280 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) { 4281 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND, 4282 DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL); 4283 } 4284 4285 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) { 4286 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, 4287 DATA_TYPE_STRING, flt_to_error_type(aflt), NULL); 4288 } 4289 4290 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) { 4291 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, 4292 DATA_TYPE_UINT64, aflt->flt_disp, NULL); 4293 } 4294 4295 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2) 4296 cpu_payload_add_ecache(aflt, payload); 4297 4298 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) { 4299 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION, 4300 DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL); 4301 } 4302 4303 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) { 4304 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED, 4305 DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL); 4306 } 4307 4308 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) { 4309 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK, 4310 DATA_TYPE_UINT32_ARRAY, 16, 4311 (uint32_t *)&ch_flt->flt_fpdata, NULL); 4312 } 4313 4314 #if defined(CPU_IMP_L1_CACHE_PARITY) 4315 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D) 4316 cpu_payload_add_dcache(aflt, payload); 4317 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I) 4318 cpu_payload_add_icache(aflt, payload); 4319 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4320 4321 #if defined(CHEETAH_PLUS) 4322 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P) 4323 cpu_payload_add_pcache(aflt, payload); 4324 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB) 4325 cpu_payload_add_tlb(aflt, payload); 4326 #endif /* CHEETAH_PLUS */ 4327 /* 4328 * Create the FMRI that goes into the payload 4329 * and contains the unum info if necessary. 4330 */ 4331 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) && 4332 (*afar_status == AFLT_STAT_VALID)) { 4333 char unum[UNUM_NAMLEN]; 4334 int len; 4335 4336 if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum, 4337 UNUM_NAMLEN, &len) == 0) { 4338 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 4339 NULL, unum, NULL); 4340 fm_payload_set(payload, 4341 FM_EREPORT_PAYLOAD_NAME_RESOURCE, 4342 DATA_TYPE_NVLIST, resource, NULL); 4343 } 4344 } 4345 } 4346 4347 /* 4348 * Initialize the way info if necessary. 4349 */ 4350 void 4351 cpu_ereport_init(struct async_flt *aflt) 4352 { 4353 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4354 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4355 ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0]; 4356 int i; 4357 4358 /* 4359 * Initialize the info in the CPU logout structure. 4360 * The I$/D$ way information is not initialized here 4361 * since it is captured in the logout assembly code. 4362 */ 4363 for (i = 0; i < CHD_EC_DATA_SETS; i++) 4364 (ecp + i)->ec_way = i; 4365 4366 for (i = 0; i < PN_L2_NWAYS; i++) 4367 (l2p + i)->ec_way = i; 4368 } 4369 4370 /* 4371 * Returns whether fault address is valid for this error bit and 4372 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 4373 */ 4374 int 4375 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) 4376 { 4377 struct async_flt *aflt = (struct async_flt *)ch_flt; 4378 4379 return ((aflt->flt_stat & C_AFSR_MEMORY) && 4380 afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) == 4381 AFLT_STAT_VALID && 4382 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 4383 } 4384 4385 static void 4386 cpu_log_diag_info(ch_async_flt_t *ch_flt) 4387 { 4388 struct async_flt *aflt = (struct async_flt *)ch_flt; 4389 ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data; 4390 ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data; 4391 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4392 #if defined(CPU_IMP_ECACHE_ASSOC) 4393 int i, nway; 4394 #endif /* CPU_IMP_ECACHE_ASSOC */ 4395 4396 /* 4397 * Check if the CPU log out captured was valid. 4398 */ 4399 if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID || 4400 ch_flt->flt_data_incomplete) 4401 return; 4402 4403 #if defined(CPU_IMP_ECACHE_ASSOC) 4404 nway = cpu_ecache_nway(); 4405 i = cpu_ecache_line_valid(ch_flt); 4406 if (i == 0 || i > nway) { 4407 for (i = 0; i < nway; i++) 4408 ecp[i].ec_logflag = EC_LOGFLAG_MAGIC; 4409 } else 4410 ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC; 4411 #else /* CPU_IMP_ECACHE_ASSOC */ 4412 ecp->ec_logflag = EC_LOGFLAG_MAGIC; 4413 #endif /* CPU_IMP_ECACHE_ASSOC */ 4414 4415 #if defined(CHEETAH_PLUS) 4416 pn_cpu_log_diag_l2_info(ch_flt); 4417 #endif /* CHEETAH_PLUS */ 4418 4419 if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) { 4420 dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx); 4421 dcp->dc_logflag = DC_LOGFLAG_MAGIC; 4422 } 4423 4424 if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) { 4425 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 4426 icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx); 4427 else 4428 icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx); 4429 icp->ic_logflag = IC_LOGFLAG_MAGIC; 4430 } 4431 } 4432 4433 /* 4434 * Cheetah ECC calculation. 4435 * 4436 * We only need to do the calculation on the data bits and can ignore check 4437 * bit and Mtag bit terms in the calculation. 4438 */ 4439 static uint64_t ch_ecc_table[9][2] = { 4440 /* 4441 * low order 64-bits high-order 64-bits 4442 */ 4443 { 0x46bffffeccd1177f, 0x488800022100014c }, 4444 { 0x42fccc81331ff77f, 0x14424f1010249184 }, 4445 { 0x8898827c222f1ffe, 0x22c1222808184aaf }, 4446 { 0xf7632203e131ccf1, 0xe1241121848292b8 }, 4447 { 0x7f5511421b113809, 0x901c88d84288aafe }, 4448 { 0x1d49412184882487, 0x8f338c87c044c6ef }, 4449 { 0xf552181014448344, 0x7ff8f4443e411911 }, 4450 { 0x2189240808f24228, 0xfeeff8cc81333f42 }, 4451 { 0x3280008440001112, 0xfee88b337ffffd62 }, 4452 }; 4453 4454 /* 4455 * 64-bit population count, use well-known popcnt trick. 4456 * We could use the UltraSPARC V9 POPC instruction, but some 4457 * CPUs including Cheetahplus and Jaguar do not support that 4458 * instruction. 4459 */ 4460 int 4461 popc64(uint64_t val) 4462 { 4463 int cnt; 4464 4465 for (cnt = 0; val != 0; val &= val - 1) 4466 cnt++; 4467 return (cnt); 4468 } 4469 4470 /* 4471 * Generate the 9 ECC bits for the 128-bit chunk based on the table above. 4472 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number 4473 * of 1 bits == 0, so we can just use the least significant bit of the popcnt 4474 * instead of doing all the xor's. 4475 */ 4476 uint32_t 4477 us3_gen_ecc(uint64_t data_low, uint64_t data_high) 4478 { 4479 int bitno, s; 4480 int synd = 0; 4481 4482 for (bitno = 0; bitno < 9; bitno++) { 4483 s = (popc64(data_low & ch_ecc_table[bitno][0]) + 4484 popc64(data_high & ch_ecc_table[bitno][1])) & 1; 4485 synd |= (s << bitno); 4486 } 4487 return (synd); 4488 4489 } 4490 4491 /* 4492 * Queue one event based on ecc_type_to_info entry. If the event has an AFT1 4493 * tag associated with it or is a fatal event (aflt_panic set), it is sent to 4494 * the UE event queue. Otherwise it is dispatched to the CE event queue. 4495 */ 4496 static void 4497 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 4498 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp) 4499 { 4500 struct async_flt *aflt = (struct async_flt *)ch_flt; 4501 4502 if (reason && 4503 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 4504 (void) strcat(reason, eccp->ec_reason); 4505 } 4506 4507 ch_flt->flt_bit = eccp->ec_afsr_bit; 4508 ch_flt->flt_type = eccp->ec_flt_type; 4509 if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID) 4510 ch_flt->flt_diag_data = *cdp; 4511 else 4512 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 4513 aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit); 4514 4515 if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS) 4516 aflt->flt_synd = GET_M_SYND(aflt->flt_stat); 4517 else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) 4518 aflt->flt_synd = GET_E_SYND(aflt->flt_stat); 4519 else 4520 aflt->flt_synd = 0; 4521 4522 aflt->flt_payload = eccp->ec_err_payload; 4523 4524 if (aflt->flt_panic || (eccp->ec_afsr_bit & 4525 (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1))) 4526 cpu_errorq_dispatch(eccp->ec_err_class, 4527 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 4528 aflt->flt_panic); 4529 else 4530 cpu_errorq_dispatch(eccp->ec_err_class, 4531 (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue, 4532 aflt->flt_panic); 4533 } 4534 4535 /* 4536 * Queue events on async event queue one event per error bit. First we 4537 * queue the events that we "expect" for the given trap, then we queue events 4538 * that we may not expect. Return number of events queued. 4539 */ 4540 int 4541 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs, 4542 ch_cpu_logout_t *clop) 4543 { 4544 struct async_flt *aflt = (struct async_flt *)ch_flt; 4545 ecc_type_to_info_t *eccp; 4546 int nevents = 0; 4547 uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat; 4548 #if defined(CHEETAH_PLUS) 4549 uint64_t orig_t_afsr_errs; 4550 #endif 4551 uint64_t primary_afsr_ext = ch_flt->afsr_ext; 4552 uint64_t primary_afsr_errs = ch_flt->afsr_errs; 4553 ch_diag_data_t *cdp = NULL; 4554 4555 t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS); 4556 4557 #if defined(CHEETAH_PLUS) 4558 orig_t_afsr_errs = t_afsr_errs; 4559 4560 /* 4561 * For Cheetah+, log the shadow AFSR/AFAR bits first. 4562 */ 4563 if (clop != NULL) { 4564 /* 4565 * Set the AFSR and AFAR fields to the shadow registers. The 4566 * flt_addr and flt_stat fields will be reset to the primaries 4567 * below, but the sdw_addr and sdw_stat will stay as the 4568 * secondaries. 4569 */ 4570 cdp = &clop->clo_sdw_data; 4571 aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar; 4572 aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr; 4573 ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext; 4574 ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 4575 (cdp->chd_afsr & C_AFSR_ALL_ERRS); 4576 4577 /* 4578 * If the primary and shadow AFSR differ, tag the shadow as 4579 * the first fault. 4580 */ 4581 if ((primary_afar != cdp->chd_afar) || 4582 (primary_afsr_errs != ch_flt->afsr_errs)) { 4583 aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT); 4584 } 4585 4586 /* 4587 * Check AFSR bits as well as AFSR_EXT bits in order of 4588 * the AFAR overwrite priority. Our stored AFSR_EXT value 4589 * is expected to be zero for those CPUs which do not have 4590 * an AFSR_EXT register. 4591 */ 4592 for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) { 4593 if ((eccp->ec_afsr_bit & 4594 (ch_flt->afsr_errs & t_afsr_errs)) && 4595 ((eccp->ec_flags & aflt->flt_status) != 0)) { 4596 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4597 cdp = NULL; 4598 t_afsr_errs &= ~eccp->ec_afsr_bit; 4599 nevents++; 4600 } 4601 } 4602 4603 /* 4604 * If the ME bit is on in the primary AFSR turn all the 4605 * error bits on again that may set the ME bit to make 4606 * sure we see the ME AFSR error logs. 4607 */ 4608 if ((primary_afsr & C_AFSR_ME) != 0) 4609 t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS); 4610 } 4611 #endif /* CHEETAH_PLUS */ 4612 4613 if (clop != NULL) 4614 cdp = &clop->clo_data; 4615 4616 /* 4617 * Queue expected errors, error bit and fault type must match 4618 * in the ecc_type_to_info table. 4619 */ 4620 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4621 eccp++) { 4622 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 4623 (eccp->ec_flags & aflt->flt_status) != 0) { 4624 #if defined(SERRANO) 4625 /* 4626 * For FRC/FRU errors on Serrano the afar2 captures 4627 * the address and the associated data is 4628 * in the shadow logout area. 4629 */ 4630 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4631 if (clop != NULL) 4632 cdp = &clop->clo_sdw_data; 4633 aflt->flt_addr = ch_flt->afar2; 4634 } else { 4635 if (clop != NULL) 4636 cdp = &clop->clo_data; 4637 aflt->flt_addr = primary_afar; 4638 } 4639 #else /* SERRANO */ 4640 aflt->flt_addr = primary_afar; 4641 #endif /* SERRANO */ 4642 aflt->flt_stat = primary_afsr; 4643 ch_flt->afsr_ext = primary_afsr_ext; 4644 ch_flt->afsr_errs = primary_afsr_errs; 4645 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4646 cdp = NULL; 4647 t_afsr_errs &= ~eccp->ec_afsr_bit; 4648 nevents++; 4649 } 4650 } 4651 4652 /* 4653 * Queue unexpected errors, error bit only match. 4654 */ 4655 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4656 eccp++) { 4657 if (eccp->ec_afsr_bit & t_afsr_errs) { 4658 #if defined(SERRANO) 4659 /* 4660 * For FRC/FRU errors on Serrano the afar2 captures 4661 * the address and the associated data is 4662 * in the shadow logout area. 4663 */ 4664 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4665 if (clop != NULL) 4666 cdp = &clop->clo_sdw_data; 4667 aflt->flt_addr = ch_flt->afar2; 4668 } else { 4669 if (clop != NULL) 4670 cdp = &clop->clo_data; 4671 aflt->flt_addr = primary_afar; 4672 } 4673 #else /* SERRANO */ 4674 aflt->flt_addr = primary_afar; 4675 #endif /* SERRANO */ 4676 aflt->flt_stat = primary_afsr; 4677 ch_flt->afsr_ext = primary_afsr_ext; 4678 ch_flt->afsr_errs = primary_afsr_errs; 4679 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4680 cdp = NULL; 4681 t_afsr_errs &= ~eccp->ec_afsr_bit; 4682 nevents++; 4683 } 4684 } 4685 return (nevents); 4686 } 4687 4688 /* 4689 * Return trap type number. 4690 */ 4691 uint8_t 4692 flt_to_trap_type(struct async_flt *aflt) 4693 { 4694 if (aflt->flt_status & ECC_I_TRAP) 4695 return (TRAP_TYPE_ECC_I); 4696 if (aflt->flt_status & ECC_D_TRAP) 4697 return (TRAP_TYPE_ECC_D); 4698 if (aflt->flt_status & ECC_F_TRAP) 4699 return (TRAP_TYPE_ECC_F); 4700 if (aflt->flt_status & ECC_C_TRAP) 4701 return (TRAP_TYPE_ECC_C); 4702 if (aflt->flt_status & ECC_DP_TRAP) 4703 return (TRAP_TYPE_ECC_DP); 4704 if (aflt->flt_status & ECC_IP_TRAP) 4705 return (TRAP_TYPE_ECC_IP); 4706 if (aflt->flt_status & ECC_ITLB_TRAP) 4707 return (TRAP_TYPE_ECC_ITLB); 4708 if (aflt->flt_status & ECC_DTLB_TRAP) 4709 return (TRAP_TYPE_ECC_DTLB); 4710 return (TRAP_TYPE_UNKNOWN); 4711 } 4712 4713 /* 4714 * Decide an error type based on detector and leaky/partner tests. 4715 * The following array is used for quick translation - it must 4716 * stay in sync with ce_dispact_t. 4717 */ 4718 4719 static char *cetypes[] = { 4720 CE_DISP_DESC_U, 4721 CE_DISP_DESC_I, 4722 CE_DISP_DESC_PP, 4723 CE_DISP_DESC_P, 4724 CE_DISP_DESC_L, 4725 CE_DISP_DESC_PS, 4726 CE_DISP_DESC_S 4727 }; 4728 4729 char * 4730 flt_to_error_type(struct async_flt *aflt) 4731 { 4732 ce_dispact_t dispact, disp; 4733 uchar_t dtcrinfo, ptnrinfo, lkyinfo; 4734 4735 /* 4736 * The memory payload bundle is shared by some events that do 4737 * not perform any classification. For those flt_disp will be 4738 * 0 and we will return "unknown". 4739 */ 4740 if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0) 4741 return (cetypes[CE_DISP_UNKNOWN]); 4742 4743 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 4744 4745 /* 4746 * It is also possible that no scrub/classification was performed 4747 * by the detector, for instance where a disrupting error logged 4748 * in the AFSR while CEEN was off in cpu_deferred_error. 4749 */ 4750 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) 4751 return (cetypes[CE_DISP_UNKNOWN]); 4752 4753 /* 4754 * Lookup type in initial classification/action table 4755 */ 4756 dispact = CE_DISPACT(ce_disp_table, 4757 CE_XDIAG_AFARMATCHED(dtcrinfo), 4758 CE_XDIAG_STATE(dtcrinfo), 4759 CE_XDIAG_CE1SEEN(dtcrinfo), 4760 CE_XDIAG_CE2SEEN(dtcrinfo)); 4761 4762 /* 4763 * A bad lookup is not something to panic production systems for. 4764 */ 4765 ASSERT(dispact != CE_DISP_BAD); 4766 if (dispact == CE_DISP_BAD) 4767 return (cetypes[CE_DISP_UNKNOWN]); 4768 4769 disp = CE_DISP(dispact); 4770 4771 switch (disp) { 4772 case CE_DISP_UNKNOWN: 4773 case CE_DISP_INTERMITTENT: 4774 break; 4775 4776 case CE_DISP_POSS_PERS: 4777 /* 4778 * "Possible persistent" errors to which we have applied a valid 4779 * leaky test can be separated into "persistent" or "leaky". 4780 */ 4781 lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp); 4782 if (CE_XDIAG_TESTVALID(lkyinfo)) { 4783 if (CE_XDIAG_CE1SEEN(lkyinfo) || 4784 CE_XDIAG_CE2SEEN(lkyinfo)) 4785 disp = CE_DISP_LEAKY; 4786 else 4787 disp = CE_DISP_PERS; 4788 } 4789 break; 4790 4791 case CE_DISP_POSS_STICKY: 4792 /* 4793 * Promote "possible sticky" results that have been 4794 * confirmed by a partner test to "sticky". Unconfirmed 4795 * "possible sticky" events are left at that status - we do not 4796 * guess at any bad reader/writer etc status here. 4797 */ 4798 ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp); 4799 if (CE_XDIAG_TESTVALID(ptnrinfo) && 4800 CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo)) 4801 disp = CE_DISP_STICKY; 4802 4803 /* 4804 * Promote "possible sticky" results on a uniprocessor 4805 * to "sticky" 4806 */ 4807 if (disp == CE_DISP_POSS_STICKY && 4808 CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC) 4809 disp = CE_DISP_STICKY; 4810 break; 4811 4812 default: 4813 disp = CE_DISP_UNKNOWN; 4814 break; 4815 } 4816 4817 return (cetypes[disp]); 4818 } 4819 4820 /* 4821 * Given the entire afsr, the specific bit to check and a prioritized list of 4822 * error bits, determine the validity of the various overwrite priority 4823 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have 4824 * different overwrite priorities. 4825 * 4826 * Given a specific afsr error bit and the entire afsr, there are three cases: 4827 * INVALID: The specified bit is lower overwrite priority than some other 4828 * error bit which is on in the afsr (or IVU/IVC). 4829 * VALID: The specified bit is higher priority than all other error bits 4830 * which are on in the afsr. 4831 * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified 4832 * bit is on in the afsr. 4833 */ 4834 int 4835 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits) 4836 { 4837 uint64_t afsr_ow; 4838 4839 while ((afsr_ow = *ow_bits++) != 0) { 4840 /* 4841 * If bit is in the priority class, check to see if another 4842 * bit in the same class is on => ambiguous. Otherwise, 4843 * the value is valid. If the bit is not on at this priority 4844 * class, but a higher priority bit is on, then the value is 4845 * invalid. 4846 */ 4847 if (afsr_ow & afsr_bit) { 4848 /* 4849 * If equal pri bit is on, ambiguous. 4850 */ 4851 if (afsr & (afsr_ow & ~afsr_bit)) 4852 return (AFLT_STAT_AMBIGUOUS); 4853 return (AFLT_STAT_VALID); 4854 } else if (afsr & afsr_ow) 4855 break; 4856 } 4857 4858 /* 4859 * We didn't find a match or a higher priority bit was on. Not 4860 * finding a match handles the case of invalid AFAR for IVC, IVU. 4861 */ 4862 return (AFLT_STAT_INVALID); 4863 } 4864 4865 static int 4866 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit) 4867 { 4868 #if defined(SERRANO) 4869 if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) 4870 return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite)); 4871 else 4872 #endif /* SERRANO */ 4873 return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite)); 4874 } 4875 4876 static int 4877 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit) 4878 { 4879 return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite)); 4880 } 4881 4882 static int 4883 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit) 4884 { 4885 return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite)); 4886 } 4887 4888 static int 4889 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit) 4890 { 4891 #ifdef lint 4892 cpuid = cpuid; 4893 #endif 4894 if (afsr_bit & C_AFSR_MSYND_ERRS) { 4895 return (afsr_to_msynd_status(afsr, afsr_bit)); 4896 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 4897 #if defined(CHEETAH_PLUS) 4898 /* 4899 * The E_SYND overwrite policy is slightly different 4900 * for Panther CPUs. 4901 */ 4902 if (IS_PANTHER(cpunodes[cpuid].implementation)) 4903 return (afsr_to_pn_esynd_status(afsr, afsr_bit)); 4904 else 4905 return (afsr_to_esynd_status(afsr, afsr_bit)); 4906 #else /* CHEETAH_PLUS */ 4907 return (afsr_to_esynd_status(afsr, afsr_bit)); 4908 #endif /* CHEETAH_PLUS */ 4909 } else { 4910 return (AFLT_STAT_INVALID); 4911 } 4912 } 4913 4914 /* 4915 * Slave CPU stick synchronization. 4916 */ 4917 void 4918 sticksync_slave(void) 4919 { 4920 int i; 4921 int tries = 0; 4922 int64_t tskew; 4923 int64_t av_tskew; 4924 4925 kpreempt_disable(); 4926 /* wait for the master side */ 4927 while (stick_sync_cmd != SLAVE_START) 4928 ; 4929 /* 4930 * Synchronization should only take a few tries at most. But in the 4931 * odd case where the cpu isn't cooperating we'll keep trying. A cpu 4932 * without it's stick synchronized wouldn't be a good citizen. 4933 */ 4934 while (slave_done == 0) { 4935 /* 4936 * Time skew calculation. 4937 */ 4938 av_tskew = tskew = 0; 4939 4940 for (i = 0; i < stick_iter; i++) { 4941 /* make location hot */ 4942 timestamp[EV_A_START] = 0; 4943 stick_timestamp(×tamp[EV_A_START]); 4944 4945 /* tell the master we're ready */ 4946 stick_sync_cmd = MASTER_START; 4947 4948 /* and wait */ 4949 while (stick_sync_cmd != SLAVE_CONT) 4950 ; 4951 /* Event B end */ 4952 stick_timestamp(×tamp[EV_B_END]); 4953 4954 /* calculate time skew */ 4955 tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START]) 4956 - (timestamp[EV_A_END] - 4957 timestamp[EV_A_START])) / 2; 4958 4959 /* keep running count */ 4960 av_tskew += tskew; 4961 } /* for */ 4962 4963 /* 4964 * Adjust stick for time skew if not within the max allowed; 4965 * otherwise we're all done. 4966 */ 4967 if (stick_iter != 0) 4968 av_tskew = av_tskew/stick_iter; 4969 if (ABS(av_tskew) > stick_tsk) { 4970 /* 4971 * If the skew is 1 (the slave's STICK register 4972 * is 1 STICK ahead of the master's), stick_adj 4973 * could fail to adjust the slave's STICK register 4974 * if the STICK read on the slave happens to 4975 * align with the increment of the STICK. 4976 * Therefore, we increment the skew to 2. 4977 */ 4978 if (av_tskew == 1) 4979 av_tskew++; 4980 stick_adj(-av_tskew); 4981 } else 4982 slave_done = 1; 4983 #ifdef DEBUG 4984 if (tries < DSYNC_ATTEMPTS) 4985 stick_sync_stats[CPU->cpu_id].skew_val[tries] = 4986 av_tskew; 4987 ++tries; 4988 #endif /* DEBUG */ 4989 #ifdef lint 4990 tries = tries; 4991 #endif 4992 4993 } /* while */ 4994 4995 /* allow the master to finish */ 4996 stick_sync_cmd = EVENT_NULL; 4997 kpreempt_enable(); 4998 } 4999 5000 /* 5001 * Master CPU side of stick synchronization. 5002 * - timestamp end of Event A 5003 * - timestamp beginning of Event B 5004 */ 5005 void 5006 sticksync_master(void) 5007 { 5008 int i; 5009 5010 kpreempt_disable(); 5011 /* tell the slave we've started */ 5012 slave_done = 0; 5013 stick_sync_cmd = SLAVE_START; 5014 5015 while (slave_done == 0) { 5016 for (i = 0; i < stick_iter; i++) { 5017 /* wait for the slave */ 5018 while (stick_sync_cmd != MASTER_START) 5019 ; 5020 /* Event A end */ 5021 stick_timestamp(×tamp[EV_A_END]); 5022 5023 /* make location hot */ 5024 timestamp[EV_B_START] = 0; 5025 stick_timestamp(×tamp[EV_B_START]); 5026 5027 /* tell the slave to continue */ 5028 stick_sync_cmd = SLAVE_CONT; 5029 } /* for */ 5030 5031 /* wait while slave calculates time skew */ 5032 while (stick_sync_cmd == SLAVE_CONT) 5033 ; 5034 } /* while */ 5035 kpreempt_enable(); 5036 } 5037 5038 /* 5039 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to 5040 * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also, 5041 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to 5042 * panic idle. 5043 */ 5044 /*ARGSUSED*/ 5045 void 5046 cpu_check_allcpus(struct async_flt *aflt) 5047 {} 5048 5049 struct kmem_cache *ch_private_cache; 5050 5051 /* 5052 * Cpu private unitialization. Uninitialize the Ecache scrubber and 5053 * deallocate the scrubber data structures and cpu_private data structure. 5054 */ 5055 void 5056 cpu_uninit_private(struct cpu *cp) 5057 { 5058 cheetah_private_t *chprp = CPU_PRIVATE(cp); 5059 5060 ASSERT(chprp); 5061 cpu_uninit_ecache_scrub_dr(cp); 5062 CPU_PRIVATE(cp) = NULL; 5063 ch_err_tl1_paddrs[cp->cpu_id] = NULL; 5064 kmem_cache_free(ch_private_cache, chprp); 5065 cmp_delete_cpu(cp->cpu_id); 5066 5067 } 5068 5069 /* 5070 * Cheetah Cache Scrubbing 5071 * 5072 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure 5073 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not 5074 * protected by either parity or ECC. 5075 * 5076 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the 5077 * cache per second). Due to the the specifics of how the I$ control 5078 * logic works with respect to the ASI used to scrub I$ lines, the entire 5079 * I$ is scanned at once. 5080 */ 5081 5082 /* 5083 * Tuneables to enable and disable the scrubbing of the caches, and to tune 5084 * scrubbing behavior. These may be changed via /etc/system or using mdb 5085 * on a running system. 5086 */ 5087 int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */ 5088 5089 /* 5090 * The following are the PIL levels that the softints/cross traps will fire at. 5091 */ 5092 uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */ 5093 uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */ 5094 uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */ 5095 5096 #if defined(JALAPENO) 5097 5098 /* 5099 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber 5100 * on Jalapeno. 5101 */ 5102 int ecache_scrub_enable = 0; 5103 5104 #else /* JALAPENO */ 5105 5106 /* 5107 * With all other cpu types, E$ scrubbing is on by default 5108 */ 5109 int ecache_scrub_enable = 1; 5110 5111 #endif /* JALAPENO */ 5112 5113 5114 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO) 5115 5116 /* 5117 * The I$ scrubber tends to cause latency problems for real-time SW, so it 5118 * is disabled by default on non-Cheetah systems 5119 */ 5120 int icache_scrub_enable = 0; 5121 5122 /* 5123 * Tuneables specifying the scrub calls per second and the scan rate 5124 * for each cache 5125 * 5126 * The cyclic times are set during boot based on the following values. 5127 * Changing these values in mdb after this time will have no effect. If 5128 * a different value is desired, it must be set in /etc/system before a 5129 * reboot. 5130 */ 5131 int ecache_calls_a_sec = 1; 5132 int dcache_calls_a_sec = 2; 5133 int icache_calls_a_sec = 2; 5134 5135 int ecache_scan_rate_idle = 1; 5136 int ecache_scan_rate_busy = 1; 5137 int dcache_scan_rate_idle = 1; 5138 int dcache_scan_rate_busy = 1; 5139 int icache_scan_rate_idle = 1; 5140 int icache_scan_rate_busy = 1; 5141 5142 #else /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5143 5144 int icache_scrub_enable = 1; /* I$ scrubbing is on by default */ 5145 5146 int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */ 5147 int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */ 5148 int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */ 5149 5150 int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */ 5151 int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */ 5152 int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */ 5153 int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */ 5154 int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */ 5155 int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */ 5156 5157 #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5158 5159 /* 5160 * In order to scrub on offline cpus, a cross trap is sent. The handler will 5161 * increment the outstanding request counter and schedule a softint to run 5162 * the scrubber. 5163 */ 5164 extern xcfunc_t cache_scrubreq_tl1; 5165 5166 /* 5167 * These are the softint functions for each cache scrubber 5168 */ 5169 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2); 5170 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2); 5171 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2); 5172 5173 /* 5174 * The cache scrub info table contains cache specific information 5175 * and allows for some of the scrub code to be table driven, reducing 5176 * duplication of cache similar code. 5177 * 5178 * This table keeps a copy of the value in the calls per second variable 5179 * (?cache_calls_a_sec). This makes it much more difficult for someone 5180 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in 5181 * mdb in a misguided attempt to disable the scrubber). 5182 */ 5183 struct scrub_info { 5184 int *csi_enable; /* scrubber enable flag */ 5185 int csi_freq; /* scrubber calls per second */ 5186 int csi_index; /* index to chsm_outstanding[] */ 5187 uint_t csi_inum; /* scrubber interrupt number */ 5188 cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */ 5189 cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */ 5190 char csi_name[3]; /* cache name for this scrub entry */ 5191 } cache_scrub_info[] = { 5192 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"}, 5193 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"}, 5194 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"} 5195 }; 5196 5197 /* 5198 * If scrubbing is enabled, increment the outstanding request counter. If it 5199 * is 1 (meaning there were no previous requests outstanding), call 5200 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing 5201 * a self trap. 5202 */ 5203 static void 5204 do_scrub(struct scrub_info *csi) 5205 { 5206 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5207 int index = csi->csi_index; 5208 uint32_t *outstanding = &csmp->chsm_outstanding[index]; 5209 5210 if (*(csi->csi_enable) && (csmp->chsm_enable[index])) { 5211 if (atomic_add_32_nv(outstanding, 1) == 1) { 5212 xt_one_unchecked(CPU->cpu_id, setsoftint_tl1, 5213 csi->csi_inum, 0); 5214 } 5215 } 5216 } 5217 5218 /* 5219 * Omni cyclics don't fire on offline cpus, so we use another cyclic to 5220 * cross-trap the offline cpus. 5221 */ 5222 static void 5223 do_scrub_offline(struct scrub_info *csi) 5224 { 5225 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5226 5227 if (CPUSET_ISNULL(cpu_offline_set)) { 5228 /* 5229 * No offline cpus - nothing to do 5230 */ 5231 return; 5232 } 5233 5234 if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) { 5235 xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum, 5236 csi->csi_index); 5237 } 5238 } 5239 5240 /* 5241 * This is the initial setup for the scrubber cyclics - it sets the 5242 * interrupt level, frequency, and function to call. 5243 */ 5244 /*ARGSUSED*/ 5245 static void 5246 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 5247 cyc_time_t *when) 5248 { 5249 struct scrub_info *csi = (struct scrub_info *)arg; 5250 5251 ASSERT(csi != NULL); 5252 hdlr->cyh_func = (cyc_func_t)do_scrub; 5253 hdlr->cyh_level = CY_LOW_LEVEL; 5254 hdlr->cyh_arg = arg; 5255 5256 when->cyt_when = 0; /* Start immediately */ 5257 when->cyt_interval = NANOSEC / csi->csi_freq; 5258 } 5259 5260 /* 5261 * Initialization for cache scrubbing. 5262 * This routine is called AFTER all cpus have had cpu_init_private called 5263 * to initialize their private data areas. 5264 */ 5265 void 5266 cpu_init_cache_scrub(void) 5267 { 5268 int i; 5269 struct scrub_info *csi; 5270 cyc_omni_handler_t omni_hdlr; 5271 cyc_handler_t offline_hdlr; 5272 cyc_time_t when; 5273 5274 /* 5275 * save away the maximum number of lines for the D$ 5276 */ 5277 dcache_nlines = dcache_size / dcache_linesize; 5278 5279 /* 5280 * register the softints for the cache scrubbing 5281 */ 5282 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum = 5283 add_softintr(ecache_scrub_pil, scrub_ecache_line_intr, 5284 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]); 5285 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec; 5286 5287 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum = 5288 add_softintr(dcache_scrub_pil, scrub_dcache_line_intr, 5289 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]); 5290 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec; 5291 5292 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum = 5293 add_softintr(icache_scrub_pil, scrub_icache_line_intr, 5294 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]); 5295 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec; 5296 5297 /* 5298 * start the scrubbing for all the caches 5299 */ 5300 mutex_enter(&cpu_lock); 5301 for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) { 5302 5303 csi = &cache_scrub_info[i]; 5304 5305 if (!(*csi->csi_enable)) 5306 continue; 5307 5308 /* 5309 * force the following to be true: 5310 * 1 <= calls_a_sec <= hz 5311 */ 5312 if (csi->csi_freq > hz) { 5313 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high " 5314 "(%d); resetting to hz (%d)", csi->csi_name, 5315 csi->csi_freq, hz); 5316 csi->csi_freq = hz; 5317 } else if (csi->csi_freq < 1) { 5318 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low " 5319 "(%d); resetting to 1", csi->csi_name, 5320 csi->csi_freq); 5321 csi->csi_freq = 1; 5322 } 5323 5324 omni_hdlr.cyo_online = cpu_scrub_cyclic_setup; 5325 omni_hdlr.cyo_offline = NULL; 5326 omni_hdlr.cyo_arg = (void *)csi; 5327 5328 offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline; 5329 offline_hdlr.cyh_arg = (void *)csi; 5330 offline_hdlr.cyh_level = CY_LOW_LEVEL; 5331 5332 when.cyt_when = 0; /* Start immediately */ 5333 when.cyt_interval = NANOSEC / csi->csi_freq; 5334 5335 csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr); 5336 csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when); 5337 } 5338 register_cpu_setup_func(cpu_scrub_cpu_setup, NULL); 5339 mutex_exit(&cpu_lock); 5340 } 5341 5342 /* 5343 * Indicate that the specified cpu is idle. 5344 */ 5345 void 5346 cpu_idle_ecache_scrub(struct cpu *cp) 5347 { 5348 if (CPU_PRIVATE(cp) != NULL) { 5349 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5350 csmp->chsm_ecache_busy = ECACHE_CPU_IDLE; 5351 } 5352 } 5353 5354 /* 5355 * Indicate that the specified cpu is busy. 5356 */ 5357 void 5358 cpu_busy_ecache_scrub(struct cpu *cp) 5359 { 5360 if (CPU_PRIVATE(cp) != NULL) { 5361 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5362 csmp->chsm_ecache_busy = ECACHE_CPU_BUSY; 5363 } 5364 } 5365 5366 /* 5367 * Initialization for cache scrubbing for the specified cpu. 5368 */ 5369 void 5370 cpu_init_ecache_scrub_dr(struct cpu *cp) 5371 { 5372 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5373 int cpuid = cp->cpu_id; 5374 5375 /* initialize the number of lines in the caches */ 5376 csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size / 5377 cpunodes[cpuid].ecache_linesize; 5378 csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) / 5379 CPU_PRIVATE_VAL(cp, chpr_icache_linesize); 5380 5381 /* 5382 * do_scrub() and do_scrub_offline() check both the global 5383 * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers 5384 * check this value before scrubbing. Currently, we use it to 5385 * disable the E$ scrubber on multi-core cpus or while running at 5386 * slowed speed. For now, just turn everything on and allow 5387 * cpu_init_private() to change it if necessary. 5388 */ 5389 csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 5390 csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1; 5391 csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1; 5392 5393 cpu_busy_ecache_scrub(cp); 5394 } 5395 5396 /* 5397 * Un-initialization for cache scrubbing for the specified cpu. 5398 */ 5399 static void 5400 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 5401 { 5402 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5403 5404 /* 5405 * un-initialize bookkeeping for cache scrubbing 5406 */ 5407 bzero(csmp, sizeof (ch_scrub_misc_t)); 5408 5409 cpu_idle_ecache_scrub(cp); 5410 } 5411 5412 /* 5413 * Called periodically on each CPU to scrub the D$. 5414 */ 5415 static void 5416 scrub_dcache(int how_many) 5417 { 5418 int i; 5419 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5420 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D]; 5421 5422 /* 5423 * scrub the desired number of lines 5424 */ 5425 for (i = 0; i < how_many; i++) { 5426 /* 5427 * scrub a D$ line 5428 */ 5429 dcache_inval_line(index); 5430 5431 /* 5432 * calculate the next D$ line to scrub, assumes 5433 * that dcache_nlines is a power of 2 5434 */ 5435 index = (index + 1) & (dcache_nlines - 1); 5436 } 5437 5438 /* 5439 * set the scrub index for the next visit 5440 */ 5441 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index; 5442 } 5443 5444 /* 5445 * Handler for D$ scrub inum softint. Call scrub_dcache until 5446 * we decrement the outstanding request count to zero. 5447 */ 5448 /*ARGSUSED*/ 5449 static uint_t 5450 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2) 5451 { 5452 int i; 5453 int how_many; 5454 int outstanding; 5455 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5456 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D]; 5457 struct scrub_info *csi = (struct scrub_info *)arg1; 5458 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5459 dcache_scan_rate_idle : dcache_scan_rate_busy; 5460 5461 /* 5462 * The scan rates are expressed in units of tenths of a 5463 * percent. A scan rate of 1000 (100%) means the whole 5464 * cache is scanned every second. 5465 */ 5466 how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq); 5467 5468 do { 5469 outstanding = *countp; 5470 for (i = 0; i < outstanding; i++) { 5471 scrub_dcache(how_many); 5472 } 5473 } while (atomic_add_32_nv(countp, -outstanding)); 5474 5475 return (DDI_INTR_CLAIMED); 5476 } 5477 5478 /* 5479 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed 5480 * by invalidating lines. Due to the characteristics of the ASI which 5481 * is used to invalidate an I$ line, the entire I$ must be invalidated 5482 * vs. an individual I$ line. 5483 */ 5484 static void 5485 scrub_icache(int how_many) 5486 { 5487 int i; 5488 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5489 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I]; 5490 int icache_nlines = csmp->chsm_icache_nlines; 5491 5492 /* 5493 * scrub the desired number of lines 5494 */ 5495 for (i = 0; i < how_many; i++) { 5496 /* 5497 * since the entire I$ must be scrubbed at once, 5498 * wait until the index wraps to zero to invalidate 5499 * the entire I$ 5500 */ 5501 if (index == 0) { 5502 icache_inval_all(); 5503 } 5504 5505 /* 5506 * calculate the next I$ line to scrub, assumes 5507 * that chsm_icache_nlines is a power of 2 5508 */ 5509 index = (index + 1) & (icache_nlines - 1); 5510 } 5511 5512 /* 5513 * set the scrub index for the next visit 5514 */ 5515 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index; 5516 } 5517 5518 /* 5519 * Handler for I$ scrub inum softint. Call scrub_icache until 5520 * we decrement the outstanding request count to zero. 5521 */ 5522 /*ARGSUSED*/ 5523 static uint_t 5524 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2) 5525 { 5526 int i; 5527 int how_many; 5528 int outstanding; 5529 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5530 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I]; 5531 struct scrub_info *csi = (struct scrub_info *)arg1; 5532 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5533 icache_scan_rate_idle : icache_scan_rate_busy; 5534 int icache_nlines = csmp->chsm_icache_nlines; 5535 5536 /* 5537 * The scan rates are expressed in units of tenths of a 5538 * percent. A scan rate of 1000 (100%) means the whole 5539 * cache is scanned every second. 5540 */ 5541 how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq); 5542 5543 do { 5544 outstanding = *countp; 5545 for (i = 0; i < outstanding; i++) { 5546 scrub_icache(how_many); 5547 } 5548 } while (atomic_add_32_nv(countp, -outstanding)); 5549 5550 return (DDI_INTR_CLAIMED); 5551 } 5552 5553 /* 5554 * Called periodically on each CPU to scrub the E$. 5555 */ 5556 static void 5557 scrub_ecache(int how_many) 5558 { 5559 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5560 int i; 5561 int cpuid = CPU->cpu_id; 5562 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 5563 int nlines = csmp->chsm_ecache_nlines; 5564 int linesize = cpunodes[cpuid].ecache_linesize; 5565 int ec_set_size = cpu_ecache_set_size(CPU); 5566 5567 /* 5568 * scrub the desired number of lines 5569 */ 5570 for (i = 0; i < how_many; i++) { 5571 /* 5572 * scrub the E$ line 5573 */ 5574 ecache_flush_line(ecache_flushaddr + (index * linesize), 5575 ec_set_size); 5576 5577 /* 5578 * calculate the next E$ line to scrub based on twice 5579 * the number of E$ lines (to displace lines containing 5580 * flush area data), assumes that the number of lines 5581 * is a power of 2 5582 */ 5583 index = (index + 1) & ((nlines << 1) - 1); 5584 } 5585 5586 /* 5587 * set the ecache scrub index for the next visit 5588 */ 5589 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index; 5590 } 5591 5592 /* 5593 * Handler for E$ scrub inum softint. Call the E$ scrubber until 5594 * we decrement the outstanding request count to zero. 5595 * 5596 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may 5597 * become negative after the atomic_add_32_nv(). This is not a problem, as 5598 * the next trip around the loop won't scrub anything, and the next add will 5599 * reset the count back to zero. 5600 */ 5601 /*ARGSUSED*/ 5602 static uint_t 5603 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 5604 { 5605 int i; 5606 int how_many; 5607 int outstanding; 5608 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5609 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E]; 5610 struct scrub_info *csi = (struct scrub_info *)arg1; 5611 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5612 ecache_scan_rate_idle : ecache_scan_rate_busy; 5613 int ecache_nlines = csmp->chsm_ecache_nlines; 5614 5615 /* 5616 * The scan rates are expressed in units of tenths of a 5617 * percent. A scan rate of 1000 (100%) means the whole 5618 * cache is scanned every second. 5619 */ 5620 how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq); 5621 5622 do { 5623 outstanding = *countp; 5624 for (i = 0; i < outstanding; i++) { 5625 scrub_ecache(how_many); 5626 } 5627 } while (atomic_add_32_nv(countp, -outstanding)); 5628 5629 return (DDI_INTR_CLAIMED); 5630 } 5631 5632 /* 5633 * Timeout function to reenable CE 5634 */ 5635 static void 5636 cpu_delayed_check_ce_errors(void *arg) 5637 { 5638 if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg, 5639 TQ_NOSLEEP)) { 5640 (void) timeout(cpu_delayed_check_ce_errors, arg, 5641 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5642 } 5643 } 5644 5645 /* 5646 * CE Deferred Re-enable after trap. 5647 * 5648 * When the CPU gets a disrupting trap for any of the errors 5649 * controlled by the CEEN bit, CEEN is disabled in the trap handler 5650 * immediately. To eliminate the possibility of multiple CEs causing 5651 * recursive stack overflow in the trap handler, we cannot 5652 * reenable CEEN while still running in the trap handler. Instead, 5653 * after a CE is logged on a CPU, we schedule a timeout function, 5654 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs 5655 * seconds. This function will check whether any further CEs 5656 * have occurred on that CPU, and if none have, will reenable CEEN. 5657 * 5658 * If further CEs have occurred while CEEN is disabled, another 5659 * timeout will be scheduled. This is to ensure that the CPU can 5660 * make progress in the face of CE 'storms', and that it does not 5661 * spend all its time logging CE errors. 5662 */ 5663 static void 5664 cpu_check_ce_errors(void *arg) 5665 { 5666 int cpuid = (int)arg; 5667 cpu_t *cp; 5668 5669 /* 5670 * We acquire cpu_lock. 5671 */ 5672 ASSERT(curthread->t_pil == 0); 5673 5674 /* 5675 * verify that the cpu is still around, DR 5676 * could have got there first ... 5677 */ 5678 mutex_enter(&cpu_lock); 5679 cp = cpu_get(cpuid); 5680 if (cp == NULL) { 5681 mutex_exit(&cpu_lock); 5682 return; 5683 } 5684 /* 5685 * make sure we don't migrate across CPUs 5686 * while checking our CE status. 5687 */ 5688 kpreempt_disable(); 5689 5690 /* 5691 * If we are running on the CPU that got the 5692 * CE, we can do the checks directly. 5693 */ 5694 if (cp->cpu_id == CPU->cpu_id) { 5695 mutex_exit(&cpu_lock); 5696 cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0); 5697 kpreempt_enable(); 5698 return; 5699 } 5700 kpreempt_enable(); 5701 5702 /* 5703 * send an x-call to get the CPU that originally 5704 * got the CE to do the necessary checks. If we can't 5705 * send the x-call, reschedule the timeout, otherwise we 5706 * lose CEEN forever on that CPU. 5707 */ 5708 if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) { 5709 xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce, 5710 TIMEOUT_CEEN_CHECK, 0); 5711 mutex_exit(&cpu_lock); 5712 } else { 5713 /* 5714 * When the CPU is not accepting xcalls, or 5715 * the processor is offlined, we don't want to 5716 * incur the extra overhead of trying to schedule the 5717 * CE timeout indefinitely. However, we don't want to lose 5718 * CE checking forever. 5719 * 5720 * Keep rescheduling the timeout, accepting the additional 5721 * overhead as the cost of correctness in the case where we get 5722 * a CE, disable CEEN, offline the CPU during the 5723 * the timeout interval, and then online it at some 5724 * point in the future. This is unlikely given the short 5725 * cpu_ceen_delay_secs. 5726 */ 5727 mutex_exit(&cpu_lock); 5728 (void) timeout(cpu_delayed_check_ce_errors, (void *)cp->cpu_id, 5729 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5730 } 5731 } 5732 5733 /* 5734 * This routine will check whether CEs have occurred while 5735 * CEEN is disabled. Any CEs detected will be logged and, if 5736 * possible, scrubbed. 5737 * 5738 * The memscrubber will also use this routine to clear any errors 5739 * caused by its scrubbing with CEEN disabled. 5740 * 5741 * flag == SCRUBBER_CEEN_CHECK 5742 * called from memscrubber, just check/scrub, no reset 5743 * paddr physical addr. for start of scrub pages 5744 * vaddr virtual addr. for scrub area 5745 * psz page size of area to be scrubbed 5746 * 5747 * flag == TIMEOUT_CEEN_CHECK 5748 * timeout function has triggered, reset timeout or CEEN 5749 * 5750 * Note: We must not migrate cpus during this function. This can be 5751 * achieved by one of: 5752 * - invoking as target of an x-call in which case we're at XCALL_PIL 5753 * The flag value must be first xcall argument. 5754 * - disabling kernel preemption. This should be done for very short 5755 * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might 5756 * scrub an extended area with cpu_check_block. The call for 5757 * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept 5758 * brief for this case. 5759 * - binding to a cpu, eg with thread_affinity_set(). This is used 5760 * in the SCRUBBER_CEEN_CHECK case, but is not practical for 5761 * the TIMEOUT_CEEN_CHECK because both need cpu_lock. 5762 */ 5763 void 5764 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 5765 { 5766 ch_cpu_errors_t cpu_error_regs; 5767 uint64_t ec_err_enable; 5768 uint64_t page_offset; 5769 5770 /* Read AFSR */ 5771 get_cpu_error_state(&cpu_error_regs); 5772 5773 /* 5774 * If no CEEN errors have occurred during the timeout 5775 * interval, it is safe to re-enable CEEN and exit. 5776 */ 5777 if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) { 5778 if (flag == TIMEOUT_CEEN_CHECK && 5779 !((ec_err_enable = get_error_enable()) & EN_REG_CEEN)) 5780 set_error_enable(ec_err_enable | EN_REG_CEEN); 5781 return; 5782 } 5783 5784 /* 5785 * Ensure that CEEN was not reenabled (maybe by DR) before 5786 * we log/clear the error. 5787 */ 5788 if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN) 5789 set_error_enable(ec_err_enable & ~EN_REG_CEEN); 5790 5791 /* 5792 * log/clear the CE. If CE_CEEN_DEFER is passed, the 5793 * timeout will be rescheduled when the error is logged. 5794 */ 5795 if (!(cpu_error_regs.afsr & cpu_ce_not_deferred)) 5796 cpu_ce_detected(&cpu_error_regs, 5797 CE_CEEN_DEFER | CE_CEEN_TIMEOUT); 5798 else 5799 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT); 5800 5801 /* 5802 * If the memory scrubber runs while CEEN is 5803 * disabled, (or if CEEN is disabled during the 5804 * scrub as a result of a CE being triggered by 5805 * it), the range being scrubbed will not be 5806 * completely cleaned. If there are multiple CEs 5807 * in the range at most two of these will be dealt 5808 * with, (one by the trap handler and one by the 5809 * timeout). It is also possible that none are dealt 5810 * with, (CEEN disabled and another CE occurs before 5811 * the timeout triggers). So to ensure that the 5812 * memory is actually scrubbed, we have to access each 5813 * memory location in the range and then check whether 5814 * that access causes a CE. 5815 */ 5816 if (flag == SCRUBBER_CEEN_CHECK && va) { 5817 if ((cpu_error_regs.afar >= pa) && 5818 (cpu_error_regs.afar < (pa + psz))) { 5819 /* 5820 * Force a load from physical memory for each 5821 * 64-byte block, then check AFSR to determine 5822 * whether this access caused an error. 5823 * 5824 * This is a slow way to do a scrub, but as it will 5825 * only be invoked when the memory scrubber actually 5826 * triggered a CE, it should not happen too 5827 * frequently. 5828 * 5829 * cut down what we need to check as the scrubber 5830 * has verified up to AFAR, so get it's offset 5831 * into the page and start there. 5832 */ 5833 page_offset = (uint64_t)(cpu_error_regs.afar & 5834 (psz - 1)); 5835 va = (caddr_t)(va + (P2ALIGN(page_offset, 64))); 5836 psz -= (uint_t)(P2ALIGN(page_offset, 64)); 5837 cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)), 5838 psz); 5839 } 5840 } 5841 5842 /* 5843 * Reset error enable if this CE is not masked. 5844 */ 5845 if ((flag == TIMEOUT_CEEN_CHECK) && 5846 (cpu_error_regs.afsr & cpu_ce_not_deferred)) 5847 set_error_enable(ec_err_enable | EN_REG_CEEN); 5848 5849 } 5850 5851 /* 5852 * Attempt a cpu logout for an error that we did not trap for, such 5853 * as a CE noticed with CEEN off. It is assumed that we are still running 5854 * on the cpu that took the error and that we cannot migrate. Returns 5855 * 0 on success, otherwise nonzero. 5856 */ 5857 static int 5858 cpu_ce_delayed_ec_logout(uint64_t afar) 5859 { 5860 ch_cpu_logout_t *clop; 5861 5862 if (CPU_PRIVATE(CPU) == NULL) 5863 return (0); 5864 5865 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5866 if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) != 5867 LOGOUT_INVALID) 5868 return (0); 5869 5870 cpu_delayed_logout(afar, clop); 5871 return (1); 5872 } 5873 5874 /* 5875 * We got an error while CEEN was disabled. We 5876 * need to clean up after it and log whatever 5877 * information we have on the CE. 5878 */ 5879 void 5880 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag) 5881 { 5882 ch_async_flt_t ch_flt; 5883 struct async_flt *aflt; 5884 char pr_reason[MAX_REASON_STRING]; 5885 5886 bzero(&ch_flt, sizeof (ch_async_flt_t)); 5887 ch_flt.flt_trapped_ce = flag; 5888 aflt = (struct async_flt *)&ch_flt; 5889 aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK; 5890 ch_flt.afsr_ext = cpu_error_regs->afsr_ext; 5891 ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) | 5892 (cpu_error_regs->afsr & C_AFSR_ALL_ERRS); 5893 aflt->flt_addr = cpu_error_regs->afar; 5894 #if defined(SERRANO) 5895 ch_flt.afar2 = cpu_error_regs->afar2; 5896 #endif /* SERRANO */ 5897 aflt->flt_pc = NULL; 5898 aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0); 5899 aflt->flt_tl = 0; 5900 aflt->flt_panic = 0; 5901 cpu_log_and_clear_ce(&ch_flt); 5902 5903 /* 5904 * check if we caused any errors during cleanup 5905 */ 5906 if (clear_errors(&ch_flt)) { 5907 pr_reason[0] = '\0'; 5908 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 5909 NULL); 5910 } 5911 } 5912 5913 /* 5914 * Log/clear CEEN-controlled disrupting errors 5915 */ 5916 static void 5917 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt) 5918 { 5919 struct async_flt *aflt; 5920 uint64_t afsr, afsr_errs; 5921 ch_cpu_logout_t *clop; 5922 char pr_reason[MAX_REASON_STRING]; 5923 on_trap_data_t *otp = curthread->t_ontrap; 5924 5925 aflt = (struct async_flt *)ch_flt; 5926 afsr = aflt->flt_stat; 5927 afsr_errs = ch_flt->afsr_errs; 5928 aflt->flt_id = gethrtime_waitfree(); 5929 aflt->flt_bus_id = getprocessorid(); 5930 aflt->flt_inst = CPU->cpu_id; 5931 aflt->flt_prot = AFLT_PROT_NONE; 5932 aflt->flt_class = CPU_FAULT; 5933 aflt->flt_status = ECC_C_TRAP; 5934 5935 pr_reason[0] = '\0'; 5936 /* 5937 * Get the CPU log out info for Disrupting Trap. 5938 */ 5939 if (CPU_PRIVATE(CPU) == NULL) { 5940 clop = NULL; 5941 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 5942 } else { 5943 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5944 } 5945 5946 if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) { 5947 ch_cpu_errors_t cpu_error_regs; 5948 5949 get_cpu_error_state(&cpu_error_regs); 5950 (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar); 5951 clop->clo_data.chd_afsr = cpu_error_regs.afsr; 5952 clop->clo_data.chd_afar = cpu_error_regs.afar; 5953 clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext; 5954 clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr; 5955 clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar; 5956 clop->clo_sdw_data.chd_afsr_ext = 5957 cpu_error_regs.shadow_afsr_ext; 5958 #if defined(SERRANO) 5959 clop->clo_data.chd_afar2 = cpu_error_regs.afar2; 5960 #endif /* SERRANO */ 5961 ch_flt->flt_data_incomplete = 1; 5962 5963 /* 5964 * The logging/clear code expects AFSR/AFAR to be cleared. 5965 * The trap handler does it for CEEN enabled errors 5966 * so we need to do it here. 5967 */ 5968 set_cpu_error_state(&cpu_error_regs); 5969 } 5970 5971 #if defined(JALAPENO) || defined(SERRANO) 5972 /* 5973 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno. 5974 * For Serrano, even thou we do have the AFAR, we still do the 5975 * scrub on the RCE side since that's where the error type can 5976 * be properly classified as intermittent, persistent, etc. 5977 * 5978 * CE/RCE: If error is in memory and AFAR is valid, scrub the memory. 5979 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 5980 * the flt_status bits. 5981 */ 5982 if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) && 5983 (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 5984 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) { 5985 cpu_ce_scrub_mem_err(aflt, B_TRUE); 5986 } 5987 #else /* JALAPENO || SERRANO */ 5988 /* 5989 * CE/EMC: If error is in memory and AFAR is valid, scrub the memory. 5990 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 5991 * the flt_status bits. 5992 */ 5993 if (afsr & (C_AFSR_CE|C_AFSR_EMC)) { 5994 if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 5995 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) { 5996 cpu_ce_scrub_mem_err(aflt, B_TRUE); 5997 } 5998 } 5999 6000 #endif /* JALAPENO || SERRANO */ 6001 6002 /* 6003 * Update flt_prot if this error occurred under on_trap protection. 6004 */ 6005 if (otp != NULL && (otp->ot_prot & OT_DATA_EC)) 6006 aflt->flt_prot = AFLT_PROT_EC; 6007 6008 /* 6009 * Queue events on the async event queue, one event per error bit. 6010 */ 6011 if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 || 6012 (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) { 6013 ch_flt->flt_type = CPU_INV_AFSR; 6014 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 6015 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 6016 aflt->flt_panic); 6017 } 6018 6019 /* 6020 * Zero out + invalidate CPU logout. 6021 */ 6022 if (clop) { 6023 bzero(clop, sizeof (ch_cpu_logout_t)); 6024 clop->clo_data.chd_afar = LOGOUT_INVALID; 6025 } 6026 6027 /* 6028 * If either a CPC, WDC or EDC error has occurred while CEEN 6029 * was disabled, we need to flush either the entire 6030 * E$ or an E$ line. 6031 */ 6032 #if defined(JALAPENO) || defined(SERRANO) 6033 if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC)) 6034 #else /* JALAPENO || SERRANO */ 6035 if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC | 6036 C_AFSR_L3_CPC | C_AFSR_L3_WDC)) 6037 #endif /* JALAPENO || SERRANO */ 6038 cpu_error_ecache_flush(ch_flt); 6039 6040 } 6041 6042 /* 6043 * depending on the error type, we determine whether we 6044 * need to flush the entire ecache or just a line. 6045 */ 6046 static int 6047 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt) 6048 { 6049 struct async_flt *aflt; 6050 uint64_t afsr; 6051 uint64_t afsr_errs = ch_flt->afsr_errs; 6052 6053 aflt = (struct async_flt *)ch_flt; 6054 afsr = aflt->flt_stat; 6055 6056 /* 6057 * If we got multiple errors, no point in trying 6058 * the individual cases, just flush the whole cache 6059 */ 6060 if (afsr & C_AFSR_ME) { 6061 return (ECACHE_FLUSH_ALL); 6062 } 6063 6064 /* 6065 * If either a CPC, WDC or EDC error has occurred while CEEN 6066 * was disabled, we need to flush entire E$. We can't just 6067 * flush the cache line affected as the ME bit 6068 * is not set when multiple correctable errors of the same 6069 * type occur, so we might have multiple CPC or EDC errors, 6070 * with only the first recorded. 6071 */ 6072 #if defined(JALAPENO) || defined(SERRANO) 6073 if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) { 6074 #else /* JALAPENO || SERRANO */ 6075 if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC | 6076 C_AFSR_L3_EDC | C_AFSR_L3_WDC)) { 6077 #endif /* JALAPENO || SERRANO */ 6078 return (ECACHE_FLUSH_ALL); 6079 } 6080 6081 #if defined(JALAPENO) || defined(SERRANO) 6082 /* 6083 * If only UE or RUE is set, flush the Ecache line, otherwise 6084 * flush the entire Ecache. 6085 */ 6086 if (afsr & (C_AFSR_UE|C_AFSR_RUE)) { 6087 if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE || 6088 (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) { 6089 return (ECACHE_FLUSH_LINE); 6090 } else { 6091 return (ECACHE_FLUSH_ALL); 6092 } 6093 } 6094 #else /* JALAPENO || SERRANO */ 6095 /* 6096 * If UE only is set, flush the Ecache line, otherwise 6097 * flush the entire Ecache. 6098 */ 6099 if (afsr_errs & C_AFSR_UE) { 6100 if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == 6101 C_AFSR_UE) { 6102 return (ECACHE_FLUSH_LINE); 6103 } else { 6104 return (ECACHE_FLUSH_ALL); 6105 } 6106 } 6107 #endif /* JALAPENO || SERRANO */ 6108 6109 /* 6110 * EDU: If EDU only is set, flush the ecache line, otherwise 6111 * flush the entire Ecache. 6112 */ 6113 if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) { 6114 if (((afsr_errs & ~C_AFSR_EDU) == 0) || 6115 ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) { 6116 return (ECACHE_FLUSH_LINE); 6117 } else { 6118 return (ECACHE_FLUSH_ALL); 6119 } 6120 } 6121 6122 /* 6123 * BERR: If BERR only is set, flush the Ecache line, otherwise 6124 * flush the entire Ecache. 6125 */ 6126 if (afsr_errs & C_AFSR_BERR) { 6127 if ((afsr_errs & ~C_AFSR_BERR) == 0) { 6128 return (ECACHE_FLUSH_LINE); 6129 } else { 6130 return (ECACHE_FLUSH_ALL); 6131 } 6132 } 6133 6134 return (0); 6135 } 6136 6137 void 6138 cpu_error_ecache_flush(ch_async_flt_t *ch_flt) 6139 { 6140 int ecache_flush_flag = 6141 cpu_error_ecache_flush_required(ch_flt); 6142 6143 /* 6144 * Flush Ecache line or entire Ecache based on above checks. 6145 */ 6146 if (ecache_flush_flag == ECACHE_FLUSH_ALL) 6147 cpu_flush_ecache(); 6148 else if (ecache_flush_flag == ECACHE_FLUSH_LINE) { 6149 cpu_flush_ecache_line(ch_flt); 6150 } 6151 6152 } 6153 6154 /* 6155 * Extract the PA portion from the E$ tag. 6156 */ 6157 uint64_t 6158 cpu_ectag_to_pa(int setsize, uint64_t tag) 6159 { 6160 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6161 return (JG_ECTAG_TO_PA(setsize, tag)); 6162 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6163 return (PN_L3TAG_TO_PA(tag)); 6164 else 6165 return (CH_ECTAG_TO_PA(setsize, tag)); 6166 } 6167 6168 /* 6169 * Convert the E$ tag PA into an E$ subblock index. 6170 */ 6171 static int 6172 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr) 6173 { 6174 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6175 return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6176 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6177 /* Panther has only one subblock per line */ 6178 return (0); 6179 else 6180 return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6181 } 6182 6183 /* 6184 * All subblocks in an E$ line must be invalid for 6185 * the line to be invalid. 6186 */ 6187 int 6188 cpu_ectag_line_invalid(int cachesize, uint64_t tag) 6189 { 6190 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6191 return (JG_ECTAG_LINE_INVALID(cachesize, tag)); 6192 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6193 return (PN_L3_LINE_INVALID(tag)); 6194 else 6195 return (CH_ECTAG_LINE_INVALID(cachesize, tag)); 6196 } 6197 6198 /* 6199 * Extract state bits for a subblock given the tag. Note that for Panther 6200 * this works on both l2 and l3 tags. 6201 */ 6202 static int 6203 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag) 6204 { 6205 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6206 return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6207 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6208 return (tag & CH_ECSTATE_MASK); 6209 else 6210 return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6211 } 6212 6213 /* 6214 * Cpu specific initialization. 6215 */ 6216 void 6217 cpu_mp_init(void) 6218 { 6219 #ifdef CHEETAHPLUS_ERRATUM_25 6220 if (cheetah_sendmondo_recover) { 6221 cheetah_nudge_init(); 6222 } 6223 #endif 6224 } 6225 6226 void 6227 cpu_ereport_post(struct async_flt *aflt) 6228 { 6229 char *cpu_type, buf[FM_MAX_CLASS]; 6230 nv_alloc_t *nva = NULL; 6231 nvlist_t *ereport, *detector, *resource; 6232 errorq_elem_t *eqep; 6233 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6234 char unum[UNUM_NAMLEN]; 6235 int len = 0; 6236 uint8_t msg_type; 6237 plat_ecc_ch_async_flt_t plat_ecc_ch_flt; 6238 6239 if (aflt->flt_panic || panicstr) { 6240 eqep = errorq_reserve(ereport_errorq); 6241 if (eqep == NULL) 6242 return; 6243 ereport = errorq_elem_nvl(ereport_errorq, eqep); 6244 nva = errorq_elem_nva(ereport_errorq, eqep); 6245 } else { 6246 ereport = fm_nvlist_create(nva); 6247 } 6248 6249 /* 6250 * Create the scheme "cpu" FMRI. 6251 */ 6252 detector = fm_nvlist_create(nva); 6253 resource = fm_nvlist_create(nva); 6254 switch (cpunodes[aflt->flt_inst].implementation) { 6255 case CHEETAH_IMPL: 6256 cpu_type = FM_EREPORT_CPU_USIII; 6257 break; 6258 case CHEETAH_PLUS_IMPL: 6259 cpu_type = FM_EREPORT_CPU_USIIIplus; 6260 break; 6261 case JALAPENO_IMPL: 6262 cpu_type = FM_EREPORT_CPU_USIIIi; 6263 break; 6264 case SERRANO_IMPL: 6265 cpu_type = FM_EREPORT_CPU_USIIIiplus; 6266 break; 6267 case JAGUAR_IMPL: 6268 cpu_type = FM_EREPORT_CPU_USIV; 6269 break; 6270 case PANTHER_IMPL: 6271 cpu_type = FM_EREPORT_CPU_USIVplus; 6272 break; 6273 default: 6274 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 6275 break; 6276 } 6277 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 6278 aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version, 6279 cpunodes[aflt->flt_inst].device_id); 6280 6281 /* 6282 * Encode all the common data into the ereport. 6283 */ 6284 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 6285 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 6286 6287 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 6288 fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1), 6289 detector, NULL); 6290 6291 /* 6292 * Encode the error specific data that was saved in 6293 * the async_flt structure into the ereport. 6294 */ 6295 cpu_payload_add_aflt(aflt, ereport, resource, 6296 &plat_ecc_ch_flt.ecaf_afar_status, 6297 &plat_ecc_ch_flt.ecaf_synd_status); 6298 6299 if (aflt->flt_panic || panicstr) { 6300 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 6301 } else { 6302 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 6303 fm_nvlist_destroy(ereport, FM_NVA_FREE); 6304 fm_nvlist_destroy(detector, FM_NVA_FREE); 6305 fm_nvlist_destroy(resource, FM_NVA_FREE); 6306 } 6307 /* 6308 * Send the enhanced error information (plat_ecc_error2_data_t) 6309 * to the SC olny if it can process it. 6310 */ 6311 6312 if (&plat_ecc_capability_sc_get && 6313 plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) { 6314 msg_type = cpu_flt_bit_to_plat_error(aflt); 6315 if (msg_type != PLAT_ECC_ERROR2_NONE) { 6316 /* 6317 * If afar status is not invalid do a unum lookup. 6318 */ 6319 if (plat_ecc_ch_flt.ecaf_afar_status != 6320 AFLT_STAT_INVALID) { 6321 (void) cpu_get_mem_unum_aflt( 6322 plat_ecc_ch_flt.ecaf_synd_status, aflt, 6323 unum, UNUM_NAMLEN, &len); 6324 } else { 6325 unum[0] = '\0'; 6326 } 6327 plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar; 6328 plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr; 6329 plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext; 6330 plat_ecc_ch_flt.ecaf_sdw_afsr_ext = 6331 ch_flt->flt_sdw_afsr_ext; 6332 6333 if (&plat_log_fruid_error2) 6334 plat_log_fruid_error2(msg_type, unum, aflt, 6335 &plat_ecc_ch_flt); 6336 } 6337 } 6338 } 6339 6340 void 6341 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 6342 { 6343 int status; 6344 ddi_fm_error_t de; 6345 6346 bzero(&de, sizeof (ddi_fm_error_t)); 6347 6348 de.fme_version = DDI_FME_VERSION; 6349 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 6350 FM_ENA_FMT1); 6351 de.fme_flag = expected; 6352 de.fme_bus_specific = (void *)aflt->flt_addr; 6353 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 6354 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 6355 aflt->flt_panic = 1; 6356 } 6357 6358 void 6359 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 6360 errorq_t *eqp, uint_t flag) 6361 { 6362 struct async_flt *aflt = (struct async_flt *)payload; 6363 6364 aflt->flt_erpt_class = error_class; 6365 errorq_dispatch(eqp, payload, payload_sz, flag); 6366 } 6367 6368 /* 6369 * This routine may be called by the IO module, but does not do 6370 * anything in this cpu module. The SERD algorithm is handled by 6371 * cpumem-diagnosis engine instead. 6372 */ 6373 /*ARGSUSED*/ 6374 void 6375 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 6376 {} 6377 6378 void 6379 adjust_hw_copy_limits(int ecache_size) 6380 { 6381 /* 6382 * Set hw copy limits. 6383 * 6384 * /etc/system will be parsed later and can override one or more 6385 * of these settings. 6386 * 6387 * At this time, ecache size seems only mildly relevant. 6388 * We seem to run into issues with the d-cache and stalls 6389 * we see on misses. 6390 * 6391 * Cycle measurement indicates that 2 byte aligned copies fare 6392 * little better than doing things with VIS at around 512 bytes. 6393 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 6394 * aligned is faster whenever the source and destination data 6395 * in cache and the total size is less than 2 Kbytes. The 2K 6396 * limit seems to be driven by the 2K write cache. 6397 * When more than 2K of copies are done in non-VIS mode, stores 6398 * backup in the write cache. In VIS mode, the write cache is 6399 * bypassed, allowing faster cache-line writes aligned on cache 6400 * boundaries. 6401 * 6402 * In addition, in non-VIS mode, there is no prefetching, so 6403 * for larger copies, the advantage of prefetching to avoid even 6404 * occasional cache misses is enough to justify using the VIS code. 6405 * 6406 * During testing, it was discovered that netbench ran 3% slower 6407 * when hw_copy_limit_8 was 2K or larger. Apparently for server 6408 * applications, data is only used once (copied to the output 6409 * buffer, then copied by the network device off the system). Using 6410 * the VIS copy saves more L2 cache state. Network copies are 6411 * around 1.3K to 1.5K in size for historical reasons. 6412 * 6413 * Therefore, a limit of 1K bytes will be used for the 8 byte 6414 * aligned copy even for large caches and 8 MB ecache. The 6415 * infrastructure to allow different limits for different sized 6416 * caches is kept to allow further tuning in later releases. 6417 */ 6418 6419 if (min_ecache_size == 0 && use_hw_bcopy) { 6420 /* 6421 * First time through - should be before /etc/system 6422 * is read. 6423 * Could skip the checks for zero but this lets us 6424 * preserve any debugger rewrites. 6425 */ 6426 if (hw_copy_limit_1 == 0) { 6427 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 6428 priv_hcl_1 = hw_copy_limit_1; 6429 } 6430 if (hw_copy_limit_2 == 0) { 6431 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 6432 priv_hcl_2 = hw_copy_limit_2; 6433 } 6434 if (hw_copy_limit_4 == 0) { 6435 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 6436 priv_hcl_4 = hw_copy_limit_4; 6437 } 6438 if (hw_copy_limit_8 == 0) { 6439 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 6440 priv_hcl_8 = hw_copy_limit_8; 6441 } 6442 min_ecache_size = ecache_size; 6443 } else { 6444 /* 6445 * MP initialization. Called *after* /etc/system has 6446 * been parsed. One CPU has already been initialized. 6447 * Need to cater for /etc/system having scragged one 6448 * of our values. 6449 */ 6450 if (ecache_size == min_ecache_size) { 6451 /* 6452 * Same size ecache. We do nothing unless we 6453 * have a pessimistic ecache setting. In that 6454 * case we become more optimistic (if the cache is 6455 * large enough). 6456 */ 6457 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 6458 /* 6459 * Need to adjust hw_copy_limit* from our 6460 * pessimistic uniprocessor value to a more 6461 * optimistic UP value *iff* it hasn't been 6462 * reset. 6463 */ 6464 if ((ecache_size > 1048576) && 6465 (priv_hcl_8 == hw_copy_limit_8)) { 6466 if (ecache_size <= 2097152) 6467 hw_copy_limit_8 = 4 * 6468 VIS_COPY_THRESHOLD; 6469 else if (ecache_size <= 4194304) 6470 hw_copy_limit_8 = 4 * 6471 VIS_COPY_THRESHOLD; 6472 else 6473 hw_copy_limit_8 = 4 * 6474 VIS_COPY_THRESHOLD; 6475 priv_hcl_8 = hw_copy_limit_8; 6476 } 6477 } 6478 } else if (ecache_size < min_ecache_size) { 6479 /* 6480 * A different ecache size. Can this even happen? 6481 */ 6482 if (priv_hcl_8 == hw_copy_limit_8) { 6483 /* 6484 * The previous value that we set 6485 * is unchanged (i.e., it hasn't been 6486 * scragged by /etc/system). Rewrite it. 6487 */ 6488 if (ecache_size <= 1048576) 6489 hw_copy_limit_8 = 8 * 6490 VIS_COPY_THRESHOLD; 6491 else if (ecache_size <= 2097152) 6492 hw_copy_limit_8 = 8 * 6493 VIS_COPY_THRESHOLD; 6494 else if (ecache_size <= 4194304) 6495 hw_copy_limit_8 = 8 * 6496 VIS_COPY_THRESHOLD; 6497 else 6498 hw_copy_limit_8 = 10 * 6499 VIS_COPY_THRESHOLD; 6500 priv_hcl_8 = hw_copy_limit_8; 6501 min_ecache_size = ecache_size; 6502 } 6503 } 6504 } 6505 } 6506 6507 /* 6508 * Called from illegal instruction trap handler to see if we can attribute 6509 * the trap to a fpras check. 6510 */ 6511 int 6512 fpras_chktrap(struct regs *rp) 6513 { 6514 int op; 6515 struct fpras_chkfngrp *cgp; 6516 uintptr_t tpc = (uintptr_t)rp->r_pc; 6517 6518 if (fpras_chkfngrps == NULL) 6519 return (0); 6520 6521 cgp = &fpras_chkfngrps[CPU->cpu_id]; 6522 for (op = 0; op < FPRAS_NCOPYOPS; ++op) { 6523 if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 && 6524 tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult) 6525 break; 6526 } 6527 if (op == FPRAS_NCOPYOPS) 6528 return (0); 6529 6530 /* 6531 * This is an fpRAS failure caught through an illegal 6532 * instruction - trampoline. 6533 */ 6534 rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline; 6535 rp->r_npc = rp->r_pc + 4; 6536 return (1); 6537 } 6538 6539 /* 6540 * fpras_failure is called when a fpras check detects a bad calculation 6541 * result or an illegal instruction trap is attributed to an fpras 6542 * check. In all cases we are still bound to CPU. 6543 */ 6544 int 6545 fpras_failure(int op, int how) 6546 { 6547 int use_hw_bcopy_orig, use_hw_bzero_orig; 6548 uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig; 6549 ch_async_flt_t ch_flt; 6550 struct async_flt *aflt = (struct async_flt *)&ch_flt; 6551 struct fpras_chkfn *sfp, *cfp; 6552 uint32_t *sip, *cip; 6553 int i; 6554 6555 /* 6556 * We're running on a sick CPU. Avoid further FPU use at least for 6557 * the time in which we dispatch an ereport and (if applicable) panic. 6558 */ 6559 use_hw_bcopy_orig = use_hw_bcopy; 6560 use_hw_bzero_orig = use_hw_bzero; 6561 hcl1_orig = hw_copy_limit_1; 6562 hcl2_orig = hw_copy_limit_2; 6563 hcl4_orig = hw_copy_limit_4; 6564 hcl8_orig = hw_copy_limit_8; 6565 use_hw_bcopy = use_hw_bzero = 0; 6566 hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 = 6567 hw_copy_limit_8 = 0; 6568 6569 bzero(&ch_flt, sizeof (ch_async_flt_t)); 6570 aflt->flt_id = gethrtime_waitfree(); 6571 aflt->flt_class = CPU_FAULT; 6572 aflt->flt_inst = CPU->cpu_id; 6573 aflt->flt_status = (how << 8) | op; 6574 aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY; 6575 ch_flt.flt_type = CPU_FPUERR; 6576 6577 /* 6578 * We must panic if the copy operation had no lofault protection - 6579 * ie, don't panic for copyin, copyout, kcopy and bcopy called 6580 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy. 6581 */ 6582 aflt->flt_panic = (curthread->t_lofault == NULL); 6583 6584 /* 6585 * XOR the source instruction block with the copied instruction 6586 * block - this will show us which bit(s) are corrupted. 6587 */ 6588 sfp = (struct fpras_chkfn *)fpras_chkfn_type1; 6589 cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op]; 6590 if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) { 6591 sip = &sfp->fpras_blk0[0]; 6592 cip = &cfp->fpras_blk0[0]; 6593 } else { 6594 sip = &sfp->fpras_blk1[0]; 6595 cip = &cfp->fpras_blk1[0]; 6596 } 6597 for (i = 0; i < 16; ++i, ++sip, ++cip) 6598 ch_flt.flt_fpdata[i] = *sip ^ *cip; 6599 6600 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt, 6601 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 6602 6603 if (aflt->flt_panic) 6604 fm_panic("FPU failure on CPU %d", CPU->cpu_id); 6605 6606 /* 6607 * We get here for copyin/copyout and kcopy or bcopy where the 6608 * caller has used on_fault. We will flag the error so that 6609 * the process may be killed The trap_async_hwerr mechanism will 6610 * take appropriate further action (such as a reboot, contract 6611 * notification etc). Since we may be continuing we will 6612 * restore the global hardware copy acceleration switches. 6613 * 6614 * When we return from this function to the copy function we want to 6615 * avoid potentially bad data being used, ie we want the affected 6616 * copy function to return an error. The caller should therefore 6617 * invoke its lofault handler (which always exists for these functions) 6618 * which will return the appropriate error. 6619 */ 6620 ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR; 6621 aston(curthread); 6622 6623 use_hw_bcopy = use_hw_bcopy_orig; 6624 use_hw_bzero = use_hw_bzero_orig; 6625 hw_copy_limit_1 = hcl1_orig; 6626 hw_copy_limit_2 = hcl2_orig; 6627 hw_copy_limit_4 = hcl4_orig; 6628 hw_copy_limit_8 = hcl8_orig; 6629 6630 return (1); 6631 } 6632 6633 #define VIS_BLOCKSIZE 64 6634 6635 int 6636 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 6637 { 6638 int ret, watched; 6639 6640 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6641 ret = dtrace_blksuword32(addr, data, 0); 6642 if (watched) 6643 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6644 6645 return (ret); 6646 } 6647 6648 /* 6649 * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the 6650 * faulted cpu into that state). Cross-trap to the faulted cpu to clear 6651 * CEEN from the EER to disable traps for further disrupting error types 6652 * on that cpu. We could cross-call instead, but that has a larger 6653 * instruction and data footprint than cross-trapping, and the cpu is known 6654 * to be faulted. 6655 */ 6656 6657 void 6658 cpu_faulted_enter(struct cpu *cp) 6659 { 6660 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS); 6661 } 6662 6663 /* 6664 * Called when a cpu leaves the CPU_FAULTED state to return to one of 6665 * offline, spare, or online (by the cpu requesting this state change). 6666 * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of 6667 * disrupting error bits that have accumulated without trapping, then 6668 * we cross-trap to re-enable CEEN controlled traps. 6669 */ 6670 void 6671 cpu_faulted_exit(struct cpu *cp) 6672 { 6673 ch_cpu_errors_t cpu_error_regs; 6674 6675 cpu_error_regs.afsr = C_AFSR_CECC_ERRS; 6676 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) 6677 cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS; 6678 xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state, 6679 (uint64_t)&cpu_error_regs, 0); 6680 6681 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS); 6682 } 6683 6684 /* 6685 * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by 6686 * the errors in the original AFSR, 0 otherwise. 6687 * 6688 * For all procs if the initial error was a BERR or TO, then it is possible 6689 * that we may have caused a secondary BERR or TO in the process of logging the 6690 * inital error via cpu_run_bus_error_handlers(). If this is the case then 6691 * if the request was protected then a panic is still not necessary, if not 6692 * protected then aft_panic is already set - so either way there's no need 6693 * to set aft_panic for the secondary error. 6694 * 6695 * For Cheetah and Jalapeno, if the original error was a UE which occurred on 6696 * a store merge, then the error handling code will call cpu_deferred_error(). 6697 * When clear_errors() is called, it will determine that secondary errors have 6698 * occurred - in particular, the store merge also caused a EDU and WDU that 6699 * weren't discovered until this point. 6700 * 6701 * We do three checks to verify that we are in this case. If we pass all three 6702 * checks, we return 1 to indicate that we should not panic. If any unexpected 6703 * errors occur, we return 0. 6704 * 6705 * For Cheetah+ and derivative procs, the store merge causes a DUE, which is 6706 * handled in cpu_disrupting_errors(). Since this function is not even called 6707 * in the case we are interested in, we just return 0 for these processors. 6708 */ 6709 /*ARGSUSED*/ 6710 static int 6711 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs, 6712 uint64_t t_afar) 6713 { 6714 #if defined(CHEETAH_PLUS) 6715 #else /* CHEETAH_PLUS */ 6716 struct async_flt *aflt = (struct async_flt *)ch_flt; 6717 #endif /* CHEETAH_PLUS */ 6718 6719 /* 6720 * Was the original error a BERR or TO and only a BERR or TO 6721 * (multiple errors are also OK) 6722 */ 6723 if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) { 6724 /* 6725 * Is the new error a BERR or TO and only a BERR or TO 6726 * (multiple errors are also OK) 6727 */ 6728 if ((ch_flt->afsr_errs & 6729 ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) 6730 return (1); 6731 } 6732 6733 #if defined(CHEETAH_PLUS) 6734 return (0); 6735 #else /* CHEETAH_PLUS */ 6736 /* 6737 * Now look for secondary effects of a UE on cheetah/jalapeno 6738 * 6739 * Check the original error was a UE, and only a UE. Note that 6740 * the ME bit will cause us to fail this check. 6741 */ 6742 if (t_afsr_errs != C_AFSR_UE) 6743 return (0); 6744 6745 /* 6746 * Check the secondary errors were exclusively an EDU and/or WDU. 6747 */ 6748 if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0) 6749 return (0); 6750 6751 /* 6752 * Check the AFAR of the original error and secondary errors 6753 * match to the 64-byte boundary 6754 */ 6755 if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64)) 6756 return (0); 6757 6758 /* 6759 * We've passed all the checks, so it's a secondary error! 6760 */ 6761 return (1); 6762 #endif /* CHEETAH_PLUS */ 6763 } 6764 6765 /* 6766 * Translate the flt_bit or flt_type into an error type. First, flt_bit 6767 * is checked for any valid errors. If found, the error type is 6768 * returned. If not found, the flt_type is checked for L1$ parity errors. 6769 */ 6770 /*ARGSUSED*/ 6771 static uint8_t 6772 cpu_flt_bit_to_plat_error(struct async_flt *aflt) 6773 { 6774 #if defined(JALAPENO) 6775 /* 6776 * Currently, logging errors to the SC is not supported on Jalapeno 6777 */ 6778 return (PLAT_ECC_ERROR2_NONE); 6779 #else 6780 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6781 6782 switch (ch_flt->flt_bit) { 6783 case C_AFSR_CE: 6784 return (PLAT_ECC_ERROR2_CE); 6785 case C_AFSR_UCC: 6786 case C_AFSR_EDC: 6787 case C_AFSR_WDC: 6788 case C_AFSR_CPC: 6789 return (PLAT_ECC_ERROR2_L2_CE); 6790 case C_AFSR_EMC: 6791 return (PLAT_ECC_ERROR2_EMC); 6792 case C_AFSR_IVC: 6793 return (PLAT_ECC_ERROR2_IVC); 6794 case C_AFSR_UE: 6795 return (PLAT_ECC_ERROR2_UE); 6796 case C_AFSR_UCU: 6797 case C_AFSR_EDU: 6798 case C_AFSR_WDU: 6799 case C_AFSR_CPU: 6800 return (PLAT_ECC_ERROR2_L2_UE); 6801 case C_AFSR_IVU: 6802 return (PLAT_ECC_ERROR2_IVU); 6803 case C_AFSR_TO: 6804 return (PLAT_ECC_ERROR2_TO); 6805 case C_AFSR_BERR: 6806 return (PLAT_ECC_ERROR2_BERR); 6807 #if defined(CHEETAH_PLUS) 6808 case C_AFSR_L3_EDC: 6809 case C_AFSR_L3_UCC: 6810 case C_AFSR_L3_CPC: 6811 case C_AFSR_L3_WDC: 6812 return (PLAT_ECC_ERROR2_L3_CE); 6813 case C_AFSR_IMC: 6814 return (PLAT_ECC_ERROR2_IMC); 6815 case C_AFSR_TSCE: 6816 return (PLAT_ECC_ERROR2_L2_TSCE); 6817 case C_AFSR_THCE: 6818 return (PLAT_ECC_ERROR2_L2_THCE); 6819 case C_AFSR_L3_MECC: 6820 return (PLAT_ECC_ERROR2_L3_MECC); 6821 case C_AFSR_L3_THCE: 6822 return (PLAT_ECC_ERROR2_L3_THCE); 6823 case C_AFSR_L3_CPU: 6824 case C_AFSR_L3_EDU: 6825 case C_AFSR_L3_UCU: 6826 case C_AFSR_L3_WDU: 6827 return (PLAT_ECC_ERROR2_L3_UE); 6828 case C_AFSR_DUE: 6829 return (PLAT_ECC_ERROR2_DUE); 6830 case C_AFSR_DTO: 6831 return (PLAT_ECC_ERROR2_DTO); 6832 case C_AFSR_DBERR: 6833 return (PLAT_ECC_ERROR2_DBERR); 6834 #endif /* CHEETAH_PLUS */ 6835 default: 6836 switch (ch_flt->flt_type) { 6837 #if defined(CPU_IMP_L1_CACHE_PARITY) 6838 case CPU_IC_PARITY: 6839 return (PLAT_ECC_ERROR2_IPE); 6840 case CPU_DC_PARITY: 6841 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 6842 if (ch_flt->parity_data.dpe.cpl_cache == 6843 CPU_PC_PARITY) { 6844 return (PLAT_ECC_ERROR2_PCACHE); 6845 } 6846 } 6847 return (PLAT_ECC_ERROR2_DPE); 6848 #endif /* CPU_IMP_L1_CACHE_PARITY */ 6849 case CPU_ITLB_PARITY: 6850 return (PLAT_ECC_ERROR2_ITLB); 6851 case CPU_DTLB_PARITY: 6852 return (PLAT_ECC_ERROR2_DTLB); 6853 default: 6854 return (PLAT_ECC_ERROR2_NONE); 6855 } 6856 } 6857 #endif /* JALAPENO */ 6858 } 6859