1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/ddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/archsystm.h> 34 #include <sys/vmsystm.h> 35 #include <sys/machparam.h> 36 #include <sys/machsystm.h> 37 #include <sys/machthread.h> 38 #include <sys/cpu.h> 39 #include <sys/cmp.h> 40 #include <sys/elf_SPARC.h> 41 #include <vm/vm_dep.h> 42 #include <vm/hat_sfmmu.h> 43 #include <vm/seg_kpm.h> 44 #include <sys/cpuvar.h> 45 #include <sys/cheetahregs.h> 46 #include <sys/us3_module.h> 47 #include <sys/async.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/dditypes.h> 51 #include <sys/prom_debug.h> 52 #include <sys/prom_plat.h> 53 #include <sys/cpu_module.h> 54 #include <sys/sysmacros.h> 55 #include <sys/intreg.h> 56 #include <sys/clock.h> 57 #include <sys/platform_module.h> 58 #include <sys/machtrap.h> 59 #include <sys/ontrap.h> 60 #include <sys/panic.h> 61 #include <sys/memlist.h> 62 #include <sys/bootconf.h> 63 #include <sys/ivintr.h> 64 #include <sys/atomic.h> 65 #include <sys/taskq.h> 66 #include <sys/note.h> 67 #include <sys/ndifm.h> 68 #include <sys/ddifm.h> 69 #include <sys/fm/protocol.h> 70 #include <sys/fm/util.h> 71 #include <sys/fm/cpu/UltraSPARC-III.h> 72 #include <sys/fpras_impl.h> 73 #include <sys/dtrace.h> 74 #include <sys/watchpoint.h> 75 #include <sys/plat_ecc_unum.h> 76 #include <sys/cyclic.h> 77 #include <sys/errorq.h> 78 #include <sys/errclassify.h> 79 80 #ifdef CHEETAHPLUS_ERRATUM_25 81 #include <sys/xc_impl.h> 82 #endif /* CHEETAHPLUS_ERRATUM_25 */ 83 84 /* 85 * Note that 'Cheetah PRM' refers to: 86 * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III 87 */ 88 89 /* 90 * Per CPU pointers to physical address of TL>0 logout data areas. 91 * These pointers have to be in the kernel nucleus to avoid MMU 92 * misses. 93 */ 94 uint64_t ch_err_tl1_paddrs[NCPU]; 95 96 /* 97 * One statically allocated structure to use during startup/DR 98 * to prevent unnecessary panics. 99 */ 100 ch_err_tl1_data_t ch_err_tl1_data; 101 102 /* 103 * Per CPU pending error at TL>0, used by level15 softint handler 104 */ 105 uchar_t ch_err_tl1_pending[NCPU]; 106 107 /* 108 * For deferred CE re-enable after trap. 109 */ 110 taskq_t *ch_check_ce_tq; 111 112 /* 113 * Internal functions. 114 */ 115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep); 116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt); 117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 118 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp); 119 static int clear_ecc(struct async_flt *ecc); 120 #if defined(CPU_IMP_ECACHE_ASSOC) 121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt); 122 #endif 123 static int cpu_ecache_set_size(struct cpu *cp); 124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag); 125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr); 126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag); 127 static int cpu_ectag_pa_to_subblk_state(int cachesize, 128 uint64_t subaddr, uint64_t tag); 129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt); 130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit); 131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit); 132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit); 133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit); 134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit); 135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp); 136 static void cpu_scrubphys(struct async_flt *aflt); 137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *, 138 int *, int *); 139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *); 140 static void cpu_ereport_init(struct async_flt *aflt); 141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t); 142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt); 143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 144 uint64_t nceen, ch_cpu_logout_t *clop); 145 static int cpu_ce_delayed_ec_logout(uint64_t); 146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *); 147 148 #ifdef CHEETAHPLUS_ERRATUM_25 149 static int mondo_recover_proc(uint16_t, int); 150 static void cheetah_nudge_init(void); 151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 152 cyc_time_t *when); 153 static void cheetah_nudge_buddy(void); 154 #endif /* CHEETAHPLUS_ERRATUM_25 */ 155 156 #if defined(CPU_IMP_L1_CACHE_PARITY) 157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt); 158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index); 159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 160 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word); 161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt); 162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index); 163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt); 164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index); 165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *); 166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *); 167 #endif /* CPU_IMP_L1_CACHE_PARITY */ 168 169 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 170 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 171 int *segsp, int *banksp, int *mcidp); 172 173 /* 174 * This table is used to determine which bit(s) is(are) bad when an ECC 175 * error occurs. The array is indexed by an 9-bit syndrome. The entries 176 * of this array have the following semantics: 177 * 178 * 00-127 The number of the bad bit, when only one bit is bad. 179 * 128 ECC bit C0 is bad. 180 * 129 ECC bit C1 is bad. 181 * 130 ECC bit C2 is bad. 182 * 131 ECC bit C3 is bad. 183 * 132 ECC bit C4 is bad. 184 * 133 ECC bit C5 is bad. 185 * 134 ECC bit C6 is bad. 186 * 135 ECC bit C7 is bad. 187 * 136 ECC bit C8 is bad. 188 * 137-143 reserved for Mtag Data and ECC. 189 * 144(M2) Two bits are bad within a nibble. 190 * 145(M3) Three bits are bad within a nibble. 191 * 146(M3) Four bits are bad within a nibble. 192 * 147(M) Multiple bits (5 or more) are bad. 193 * 148 NO bits are bad. 194 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5. 195 */ 196 197 #define C0 128 198 #define C1 129 199 #define C2 130 200 #define C3 131 201 #define C4 132 202 #define C5 133 203 #define C6 134 204 #define C7 135 205 #define C8 136 206 #define MT0 137 /* Mtag Data bit 0 */ 207 #define MT1 138 208 #define MT2 139 209 #define MTC0 140 /* Mtag Check bit 0 */ 210 #define MTC1 141 211 #define MTC2 142 212 #define MTC3 143 213 #define M2 144 214 #define M3 145 215 #define M4 146 216 #define M 147 217 #define NA 148 218 #if defined(JALAPENO) || defined(SERRANO) 219 #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */ 220 #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */ 221 #define SLAST S003MEM /* last special syndrome */ 222 #else /* JALAPENO || SERRANO */ 223 #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */ 224 #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */ 225 #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */ 226 #define SLAST S11C /* last special syndrome */ 227 #endif /* JALAPENO || SERRANO */ 228 #if defined(JALAPENO) || defined(SERRANO) 229 #define BPAR0 152 /* syndrom 152 through 167 for bus parity */ 230 #define BPAR15 167 231 #endif /* JALAPENO || SERRANO */ 232 233 static uint8_t ecc_syndrome_tab[] = 234 { 235 NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, 236 C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, 237 C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, 238 M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, 239 C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, 240 M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, 241 M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, 242 #if defined(JALAPENO) || defined(SERRANO) 243 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 244 #else /* JALAPENO || SERRANO */ 245 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, 246 #endif /* JALAPENO || SERRANO */ 247 C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, 248 M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, 249 M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, 250 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, 251 M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, 252 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, 253 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, 254 M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, 255 C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, 256 #if defined(JALAPENO) || defined(SERRANO) 257 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, 258 #else /* JALAPENO || SERRANO */ 259 M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M, 260 #endif /* JALAPENO || SERRANO */ 261 M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, 262 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, 263 M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, 264 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, 265 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, 266 M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, 267 M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, 268 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, 269 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, 270 M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, 271 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, 272 M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, 273 M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, 274 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M 275 }; 276 277 #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t)) 278 279 #if !(defined(JALAPENO) || defined(SERRANO)) 280 /* 281 * This table is used to determine which bit(s) is(are) bad when a Mtag 282 * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries 283 * of this array have the following semantics: 284 * 285 * -1 Invalid mtag syndrome. 286 * 137 Mtag Data 0 is bad. 287 * 138 Mtag Data 1 is bad. 288 * 139 Mtag Data 2 is bad. 289 * 140 Mtag ECC 0 is bad. 290 * 141 Mtag ECC 1 is bad. 291 * 142 Mtag ECC 2 is bad. 292 * 143 Mtag ECC 3 is bad. 293 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6. 294 */ 295 short mtag_syndrome_tab[] = 296 { 297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2 298 }; 299 300 #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short)) 301 302 #else /* !(JALAPENO || SERRANO) */ 303 304 #define BSYND_TBL_SIZE 16 305 306 #endif /* !(JALAPENO || SERRANO) */ 307 308 /* 309 * CE initial classification and subsequent action lookup table 310 */ 311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE]; 312 static int ce_disp_inited; 313 314 /* 315 * Set to disable leaky and partner check for memory correctables 316 */ 317 int ce_xdiag_off; 318 319 /* 320 * The following are not incremented atomically so are indicative only 321 */ 322 static int ce_xdiag_drops; 323 static int ce_xdiag_lkydrops; 324 static int ce_xdiag_ptnrdrops; 325 static int ce_xdiag_bad; 326 327 /* 328 * CE leaky check callback structure 329 */ 330 typedef struct { 331 struct async_flt *lkycb_aflt; 332 errorq_t *lkycb_eqp; 333 errorq_elem_t *lkycb_eqep; 334 } ce_lkychk_cb_t; 335 336 /* 337 * defines for various ecache_flush_flag's 338 */ 339 #define ECACHE_FLUSH_LINE 1 340 #define ECACHE_FLUSH_ALL 2 341 342 /* 343 * STICK sync 344 */ 345 #define STICK_ITERATION 10 346 #define MAX_TSKEW 1 347 #define EV_A_START 0 348 #define EV_A_END 1 349 #define EV_B_START 2 350 #define EV_B_END 3 351 #define EVENTS 4 352 353 static int64_t stick_iter = STICK_ITERATION; 354 static int64_t stick_tsk = MAX_TSKEW; 355 356 typedef enum { 357 EVENT_NULL = 0, 358 SLAVE_START, 359 SLAVE_CONT, 360 MASTER_START 361 } event_cmd_t; 362 363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL; 364 static int64_t timestamp[EVENTS]; 365 static volatile int slave_done; 366 367 #ifdef DEBUG 368 #define DSYNC_ATTEMPTS 64 369 typedef struct { 370 int64_t skew_val[DSYNC_ATTEMPTS]; 371 } ss_t; 372 373 ss_t stick_sync_stats[NCPU]; 374 #endif /* DEBUG */ 375 376 /* 377 * Maximum number of contexts for Cheetah. 378 */ 379 #define MAX_NCTXS (1 << 13) 380 381 /* Will be set !NULL for Cheetah+ and derivatives. */ 382 uchar_t *ctx_pgsz_array = NULL; 383 #if defined(CPU_IMP_DUAL_PAGESIZE) 384 static uchar_t ctx_pgsz_arr[MAX_NCTXS]; 385 uint_t disable_dual_pgsz = 0; 386 #endif /* CPU_IMP_DUAL_PAGESIZE */ 387 388 /* 389 * Save the cache bootup state for use when internal 390 * caches are to be re-enabled after an error occurs. 391 */ 392 uint64_t cache_boot_state; 393 394 /* 395 * PA[22:0] represent Displacement in Safari configuration space. 396 */ 397 uint_t root_phys_addr_lo_mask = 0x7fffffu; 398 399 bus_config_eclk_t bus_config_eclk[] = { 400 #if defined(JALAPENO) || defined(SERRANO) 401 {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1}, 402 {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2}, 403 {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32}, 404 #else /* JALAPENO || SERRANO */ 405 {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1}, 406 {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2}, 407 {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32}, 408 #endif /* JALAPENO || SERRANO */ 409 {0, 0} 410 }; 411 412 /* 413 * Interval for deferred CEEN reenable 414 */ 415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS; 416 417 /* 418 * set in /etc/system to control logging of user BERR/TO's 419 */ 420 int cpu_berr_to_verbose = 0; 421 422 /* 423 * set to 0 in /etc/system to defer CEEN reenable for all CEs 424 */ 425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED; 426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT; 427 428 /* 429 * Set of all offline cpus 430 */ 431 cpuset_t cpu_offline_set; 432 433 static void cpu_delayed_check_ce_errors(void *); 434 static void cpu_check_ce_errors(void *); 435 void cpu_error_ecache_flush(ch_async_flt_t *); 436 static int cpu_error_ecache_flush_required(ch_async_flt_t *); 437 static void cpu_log_and_clear_ce(ch_async_flt_t *); 438 void cpu_ce_detected(ch_cpu_errors_t *, int); 439 440 /* 441 * CE Leaky check timeout in microseconds. This is chosen to be twice the 442 * memory refresh interval of current DIMMs (64ms). After initial fix that 443 * gives at least one full refresh cycle in which the cell can leak 444 * (whereafter further refreshes simply reinforce any incorrect bit value). 445 */ 446 clock_t cpu_ce_lkychk_timeout_usec = 128000; 447 448 /* 449 * CE partner check partner caching period in seconds 450 */ 451 int cpu_ce_ptnr_cachetime_sec = 60; 452 453 /* 454 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel 455 */ 456 #define CH_SET_TRAP(ttentry, ttlabel) \ 457 bcopy((const void *)&ttlabel, &ttentry, 32); \ 458 flush_instr_mem((caddr_t)&ttentry, 32); 459 460 static int min_ecache_size; 461 static uint_t priv_hcl_1; 462 static uint_t priv_hcl_2; 463 static uint_t priv_hcl_4; 464 static uint_t priv_hcl_8; 465 466 void 467 cpu_setup(void) 468 { 469 extern int at_flags; 470 extern int disable_delay_tlb_flush, delay_tlb_flush; 471 extern int cpc_has_overflow_intr; 472 extern int disable_text_largepages; 473 extern int use_text_pgsz4m; 474 475 /* 476 * Setup chip-specific trap handlers. 477 */ 478 cpu_init_trap(); 479 480 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 481 482 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 483 484 /* 485 * save the cache bootup state. 486 */ 487 cache_boot_state = get_dcu() & DCU_CACHE; 488 489 /* 490 * Use the maximum number of contexts available for Cheetah 491 * unless it has been tuned for debugging. 492 * We are checking against 0 here since this value can be patched 493 * while booting. It can not be patched via /etc/system since it 494 * will be patched too late and thus cause the system to panic. 495 */ 496 if (nctxs == 0) 497 nctxs = MAX_NCTXS; 498 499 /* 500 * Due to the number of entries in the fully-associative tlb 501 * this may have to be tuned lower than in spitfire. 502 */ 503 pp_slots = MIN(8, MAXPP_SLOTS); 504 505 /* 506 * Block stores do not invalidate all pages of the d$, pagecopy 507 * et. al. need virtual translations with virtual coloring taken 508 * into consideration. prefetch/ldd will pollute the d$ on the 509 * load side. 510 */ 511 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 512 513 if (use_page_coloring) { 514 do_pg_coloring = 1; 515 if (use_virtual_coloring) 516 do_virtual_coloring = 1; 517 } 518 519 isa_list = 520 "sparcv9+vis2 sparcv9+vis sparcv9 " 521 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 522 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 523 524 /* 525 * On Panther-based machines, this should 526 * also include AV_SPARC_POPC too 527 */ 528 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; 529 530 /* 531 * On cheetah, there's no hole in the virtual address space 532 */ 533 hole_start = hole_end = 0; 534 535 /* 536 * The kpm mapping window. 537 * kpm_size: 538 * The size of a single kpm range. 539 * The overall size will be: kpm_size * vac_colors. 540 * kpm_vbase: 541 * The virtual start address of the kpm range within the kernel 542 * virtual address space. kpm_vbase has to be kpm_size aligned. 543 */ 544 kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */ 545 kpm_size_shift = 43; 546 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 547 kpm_smallpages = 1; 548 549 /* 550 * The traptrace code uses either %tick or %stick for 551 * timestamping. We have %stick so we can use it. 552 */ 553 traptrace_use_stick = 1; 554 555 /* 556 * Cheetah has a performance counter overflow interrupt 557 */ 558 cpc_has_overflow_intr = 1; 559 560 /* 561 * Use cheetah flush-all support 562 */ 563 if (!disable_delay_tlb_flush) 564 delay_tlb_flush = 1; 565 566 #if defined(CPU_IMP_DUAL_PAGESIZE) 567 /* 568 * Use Cheetah+ and later dual page size support. 569 */ 570 if (!disable_dual_pgsz) { 571 ctx_pgsz_array = ctx_pgsz_arr; 572 } 573 #endif /* CPU_IMP_DUAL_PAGESIZE */ 574 575 /* 576 * Declare that this architecture/cpu combination does fpRAS. 577 */ 578 fpras_implemented = 1; 579 580 /* 581 * Enable 4M pages to be used for mapping user text by default. Don't 582 * use large pages for initialized data segments since we may not know 583 * at exec() time what should be the preferred large page size for DTLB 584 * programming. 585 */ 586 use_text_pgsz4m = 1; 587 disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | 588 (1 << TTE32M) | (1 << TTE256M); 589 590 /* 591 * Setup CE lookup table 592 */ 593 CE_INITDISPTBL_POPULATE(ce_disp_table); 594 ce_disp_inited = 1; 595 } 596 597 /* 598 * Called by setcpudelay 599 */ 600 void 601 cpu_init_tick_freq(void) 602 { 603 /* 604 * For UltraSPARC III and beyond we want to use the 605 * system clock rate as the basis for low level timing, 606 * due to support of mixed speed CPUs and power managment. 607 */ 608 if (system_clock_freq == 0) 609 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 610 611 sys_tick_freq = system_clock_freq; 612 } 613 614 #ifdef CHEETAHPLUS_ERRATUM_25 615 /* 616 * Tunables 617 */ 618 int cheetah_bpe_off = 0; 619 int cheetah_sendmondo_recover = 1; 620 int cheetah_sendmondo_fullscan = 0; 621 int cheetah_sendmondo_recover_delay = 5; 622 623 #define CHEETAH_LIVELOCK_MIN_DELAY 1 624 625 /* 626 * Recovery Statistics 627 */ 628 typedef struct cheetah_livelock_entry { 629 int cpuid; /* fallen cpu */ 630 int buddy; /* cpu that ran recovery */ 631 clock_t lbolt; /* when recovery started */ 632 hrtime_t recovery_time; /* time spent in recovery */ 633 } cheetah_livelock_entry_t; 634 635 #define CHEETAH_LIVELOCK_NENTRY 32 636 637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY]; 638 int cheetah_livelock_entry_nxt; 639 640 #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \ 641 statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \ 642 if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \ 643 cheetah_livelock_entry_nxt = 0; \ 644 } \ 645 } 646 647 #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val 648 649 struct { 650 hrtime_t hrt; /* maximum recovery time */ 651 int recovery; /* recovered */ 652 int full_claimed; /* maximum pages claimed in full recovery */ 653 int proc_entry; /* attempted to claim TSB */ 654 int proc_tsb_scan; /* tsb scanned */ 655 int proc_tsb_partscan; /* tsb partially scanned */ 656 int proc_tsb_fullscan; /* whole tsb scanned */ 657 int proc_claimed; /* maximum pages claimed in tsb scan */ 658 int proc_user; /* user thread */ 659 int proc_kernel; /* kernel thread */ 660 int proc_onflt; /* bad stack */ 661 int proc_cpu; /* null cpu */ 662 int proc_thread; /* null thread */ 663 int proc_proc; /* null proc */ 664 int proc_as; /* null as */ 665 int proc_hat; /* null hat */ 666 int proc_hat_inval; /* hat contents don't make sense */ 667 int proc_hat_busy; /* hat is changing TSBs */ 668 int proc_tsb_reloc; /* TSB skipped because being relocated */ 669 int proc_cnum_bad; /* cnum out of range */ 670 int proc_cnum; /* last cnum processed */ 671 tte_t proc_tte; /* last tte processed */ 672 } cheetah_livelock_stat; 673 674 #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++ 675 676 #define CHEETAH_LIVELOCK_STATSET(item, value) \ 677 cheetah_livelock_stat.item = value 678 679 #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \ 680 if (value > cheetah_livelock_stat.item) \ 681 cheetah_livelock_stat.item = value; \ 682 } 683 684 /* 685 * Attempt to recover a cpu by claiming every cache line as saved 686 * in the TSB that the non-responsive cpu is using. Since we can't 687 * grab any adaptive lock, this is at best an attempt to do so. Because 688 * we don't grab any locks, we must operate under the protection of 689 * on_fault(). 690 * 691 * Return 1 if cpuid could be recovered, 0 if failed. 692 */ 693 int 694 mondo_recover_proc(uint16_t cpuid, int bn) 695 { 696 label_t ljb; 697 cpu_t *cp; 698 kthread_t *t; 699 proc_t *p; 700 struct as *as; 701 struct hat *hat; 702 short cnum; 703 struct tsb_info *tsbinfop; 704 struct tsbe *tsbep; 705 caddr_t tsbp; 706 caddr_t end_tsbp; 707 uint64_t paddr; 708 uint64_t idsr; 709 u_longlong_t pahi, palo; 710 int pages_claimed = 0; 711 tte_t tsbe_tte; 712 int tried_kernel_tsb = 0; 713 714 CHEETAH_LIVELOCK_STAT(proc_entry); 715 716 if (on_fault(&ljb)) { 717 CHEETAH_LIVELOCK_STAT(proc_onflt); 718 goto badstruct; 719 } 720 721 if ((cp = cpu[cpuid]) == NULL) { 722 CHEETAH_LIVELOCK_STAT(proc_cpu); 723 goto badstruct; 724 } 725 726 if ((t = cp->cpu_thread) == NULL) { 727 CHEETAH_LIVELOCK_STAT(proc_thread); 728 goto badstruct; 729 } 730 731 if ((p = ttoproc(t)) == NULL) { 732 CHEETAH_LIVELOCK_STAT(proc_proc); 733 goto badstruct; 734 } 735 736 if ((as = p->p_as) == NULL) { 737 CHEETAH_LIVELOCK_STAT(proc_as); 738 goto badstruct; 739 } 740 741 if ((hat = as->a_hat) == NULL) { 742 CHEETAH_LIVELOCK_STAT(proc_hat); 743 goto badstruct; 744 } 745 746 if (hat != ksfmmup) { 747 CHEETAH_LIVELOCK_STAT(proc_user); 748 if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) { 749 CHEETAH_LIVELOCK_STAT(proc_hat_busy); 750 goto badstruct; 751 } 752 tsbinfop = hat->sfmmu_tsb; 753 if (tsbinfop == NULL) { 754 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 755 goto badstruct; 756 } 757 tsbp = tsbinfop->tsb_va; 758 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 759 } else { 760 CHEETAH_LIVELOCK_STAT(proc_kernel); 761 tsbinfop = NULL; 762 tsbp = ktsb_base; 763 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 764 } 765 766 /* Verify as */ 767 if (hat->sfmmu_as != as) { 768 CHEETAH_LIVELOCK_STAT(proc_hat_inval); 769 goto badstruct; 770 } 771 772 cnum = hat->sfmmu_cnum; 773 CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum); 774 775 if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) { 776 CHEETAH_LIVELOCK_STAT(proc_cnum_bad); 777 goto badstruct; 778 } 779 780 do { 781 CHEETAH_LIVELOCK_STAT(proc_tsb_scan); 782 783 /* 784 * Skip TSBs being relocated. This is important because 785 * we want to avoid the following deadlock scenario: 786 * 787 * 1) when we came in we set ourselves to "in recover" state. 788 * 2) when we try to touch TSB being relocated the mapping 789 * will be in the suspended state so we'll spin waiting 790 * for it to be unlocked. 791 * 3) when the CPU that holds the TSB mapping locked tries to 792 * unlock it it will send a xtrap which will fail to xcall 793 * us or the CPU we're trying to recover, and will in turn 794 * enter the mondo code. 795 * 4) since we are still spinning on the locked mapping 796 * no further progress will be made and the system will 797 * inevitably hard hang. 798 * 799 * A TSB not being relocated can't begin being relocated 800 * while we're accessing it because we check 801 * sendmondo_in_recover before relocating TSBs. 802 */ 803 if (hat != ksfmmup && 804 (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) { 805 CHEETAH_LIVELOCK_STAT(proc_tsb_reloc); 806 goto next_tsbinfo; 807 } 808 809 for (tsbep = (struct tsbe *)tsbp; 810 tsbep < (struct tsbe *)end_tsbp; tsbep++) { 811 tsbe_tte = tsbep->tte_data; 812 813 if (tsbe_tte.tte_val == 0) { 814 /* 815 * Invalid tte 816 */ 817 continue; 818 } 819 if (tsbe_tte.tte_se) { 820 /* 821 * Don't want device registers 822 */ 823 continue; 824 } 825 if (tsbe_tte.tte_cp == 0) { 826 /* 827 * Must be cached in E$ 828 */ 829 continue; 830 } 831 CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte); 832 idsr = getidsr(); 833 if ((idsr & (IDSR_NACK_BIT(bn) | 834 IDSR_BUSY_BIT(bn))) == 0) { 835 CHEETAH_LIVELOCK_STAT(proc_tsb_partscan); 836 goto done; 837 } 838 pahi = tsbe_tte.tte_pahi; 839 palo = tsbe_tte.tte_palo; 840 paddr = (uint64_t)((pahi << 32) | 841 (palo << MMU_PAGESHIFT)); 842 claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)), 843 CH_ECACHE_SUBBLK_SIZE); 844 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 845 shipit(cpuid, bn); 846 } 847 pages_claimed++; 848 } 849 next_tsbinfo: 850 if (tsbinfop != NULL) 851 tsbinfop = tsbinfop->tsb_next; 852 if (tsbinfop != NULL) { 853 tsbp = tsbinfop->tsb_va; 854 end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); 855 } else if (tsbp == ktsb_base) { 856 tried_kernel_tsb = 1; 857 } else if (!tried_kernel_tsb) { 858 tsbp = ktsb_base; 859 end_tsbp = tsbp + TSB_BYTES(ktsb_sz); 860 hat = ksfmmup; 861 tsbinfop = NULL; 862 } 863 } while (tsbinfop != NULL || 864 ((tsbp == ktsb_base) && !tried_kernel_tsb)); 865 866 CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan); 867 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 868 no_fault(); 869 idsr = getidsr(); 870 if ((idsr & (IDSR_NACK_BIT(bn) | 871 IDSR_BUSY_BIT(bn))) == 0) { 872 return (1); 873 } else { 874 return (0); 875 } 876 877 done: 878 no_fault(); 879 CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); 880 return (1); 881 882 badstruct: 883 no_fault(); 884 return (0); 885 } 886 887 /* 888 * Attempt to claim ownership, temporarily, of every cache line that a 889 * non-responsive cpu might be using. This might kick that cpu out of 890 * this state. 891 * 892 * The return value indicates to the caller if we have exhausted all recovery 893 * techniques. If 1 is returned, it is useless to call this function again 894 * even for a different target CPU. 895 */ 896 int 897 mondo_recover(uint16_t cpuid, int bn) 898 { 899 struct memseg *seg; 900 uint64_t begin_pa, end_pa, cur_pa; 901 hrtime_t begin_hrt, end_hrt; 902 int retval = 0; 903 int pages_claimed = 0; 904 cheetah_livelock_entry_t *histp; 905 uint64_t idsr; 906 907 if (cas32(&sendmondo_in_recover, 0, 1) != 0) { 908 /* 909 * Wait while recovery takes place 910 */ 911 while (sendmondo_in_recover) { 912 drv_usecwait(1); 913 } 914 /* 915 * Assume we didn't claim the whole memory. If 916 * the target of this caller is not recovered, 917 * it will come back. 918 */ 919 return (retval); 920 } 921 922 CHEETAH_LIVELOCK_ENTRY_NEXT(histp) 923 CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt); 924 CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid); 925 CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id); 926 927 begin_hrt = gethrtime_waitfree(); 928 /* 929 * First try to claim the lines in the TSB the target 930 * may have been using. 931 */ 932 if (mondo_recover_proc(cpuid, bn) == 1) { 933 /* 934 * Didn't claim the whole memory 935 */ 936 goto done; 937 } 938 939 /* 940 * We tried using the TSB. The target is still 941 * not recovered. Check if complete memory scan is 942 * enabled. 943 */ 944 if (cheetah_sendmondo_fullscan == 0) { 945 /* 946 * Full memory scan is disabled. 947 */ 948 retval = 1; 949 goto done; 950 } 951 952 /* 953 * Try claiming the whole memory. 954 */ 955 for (seg = memsegs; seg; seg = seg->next) { 956 begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT; 957 end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT; 958 for (cur_pa = begin_pa; cur_pa < end_pa; 959 cur_pa += MMU_PAGESIZE) { 960 idsr = getidsr(); 961 if ((idsr & (IDSR_NACK_BIT(bn) | 962 IDSR_BUSY_BIT(bn))) == 0) { 963 /* 964 * Didn't claim all memory 965 */ 966 goto done; 967 } 968 claimlines(cur_pa, MMU_PAGESIZE, 969 CH_ECACHE_SUBBLK_SIZE); 970 if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { 971 shipit(cpuid, bn); 972 } 973 pages_claimed++; 974 } 975 } 976 977 /* 978 * We did all we could. 979 */ 980 retval = 1; 981 982 done: 983 /* 984 * Update statistics 985 */ 986 end_hrt = gethrtime_waitfree(); 987 CHEETAH_LIVELOCK_STAT(recovery); 988 CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt)); 989 CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed); 990 CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \ 991 (end_hrt - begin_hrt)); 992 993 while (cas32(&sendmondo_in_recover, 1, 0) != 1); 994 995 return (retval); 996 } 997 998 /* 999 * This is called by the cyclic framework when this CPU becomes online 1000 */ 1001 /*ARGSUSED*/ 1002 static void 1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 1004 { 1005 1006 hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy; 1007 hdlr->cyh_level = CY_LOW_LEVEL; 1008 hdlr->cyh_arg = NULL; 1009 1010 /* 1011 * Stagger the start time 1012 */ 1013 when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU); 1014 if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) { 1015 cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY; 1016 } 1017 when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC; 1018 } 1019 1020 /* 1021 * Create a low level cyclic to send a xtrap to the next cpu online. 1022 * However, there's no need to have this running on a uniprocessor system. 1023 */ 1024 static void 1025 cheetah_nudge_init(void) 1026 { 1027 cyc_omni_handler_t hdlr; 1028 1029 if (max_ncpus == 1) { 1030 return; 1031 } 1032 1033 hdlr.cyo_online = cheetah_nudge_onln; 1034 hdlr.cyo_offline = NULL; 1035 hdlr.cyo_arg = NULL; 1036 1037 mutex_enter(&cpu_lock); 1038 (void) cyclic_add_omni(&hdlr); 1039 mutex_exit(&cpu_lock); 1040 } 1041 1042 /* 1043 * Cyclic handler to wake up buddy 1044 */ 1045 void 1046 cheetah_nudge_buddy(void) 1047 { 1048 /* 1049 * Disable kernel preemption to protect the cpu list 1050 */ 1051 kpreempt_disable(); 1052 if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) { 1053 xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1, 1054 0, 0); 1055 } 1056 kpreempt_enable(); 1057 } 1058 1059 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1060 1061 #ifdef SEND_MONDO_STATS 1062 uint32_t x_one_stimes[64]; 1063 uint32_t x_one_ltimes[16]; 1064 uint32_t x_set_stimes[64]; 1065 uint32_t x_set_ltimes[16]; 1066 uint32_t x_set_cpus[NCPU]; 1067 uint32_t x_nack_stimes[64]; 1068 #endif 1069 1070 /* 1071 * Note: A version of this function is used by the debugger via the KDI, 1072 * and must be kept in sync with this version. Any changes made to this 1073 * function to support new chips or to accomodate errata must also be included 1074 * in the KDI-specific version. See us3_kdi.c. 1075 */ 1076 void 1077 send_one_mondo(int cpuid) 1078 { 1079 int busy, nack; 1080 uint64_t idsr, starttick, endtick, tick, lasttick; 1081 uint64_t busymask; 1082 #ifdef CHEETAHPLUS_ERRATUM_25 1083 int recovered = 0; 1084 #endif 1085 1086 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 1087 starttick = lasttick = gettick(); 1088 shipit(cpuid, 0); 1089 endtick = starttick + xc_tick_limit; 1090 busy = nack = 0; 1091 #if defined(JALAPENO) || defined(SERRANO) 1092 /* 1093 * Lower 2 bits of the agent ID determine which BUSY/NACK pair 1094 * will be used for dispatching interrupt. For now, assume 1095 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing 1096 * issues with respect to BUSY/NACK pair usage. 1097 */ 1098 busymask = IDSR_BUSY_BIT(cpuid); 1099 #else /* JALAPENO || SERRANO */ 1100 busymask = IDSR_BUSY; 1101 #endif /* JALAPENO || SERRANO */ 1102 for (;;) { 1103 idsr = getidsr(); 1104 if (idsr == 0) 1105 break; 1106 1107 tick = gettick(); 1108 /* 1109 * If there is a big jump between the current tick 1110 * count and lasttick, we have probably hit a break 1111 * point. Adjust endtick accordingly to avoid panic. 1112 */ 1113 if (tick > (lasttick + xc_tick_jump_limit)) 1114 endtick += (tick - lasttick); 1115 lasttick = tick; 1116 if (tick > endtick) { 1117 if (panic_quiesce) 1118 return; 1119 #ifdef CHEETAHPLUS_ERRATUM_25 1120 if (cheetah_sendmondo_recover && recovered == 0) { 1121 if (mondo_recover(cpuid, 0)) { 1122 /* 1123 * We claimed the whole memory or 1124 * full scan is disabled. 1125 */ 1126 recovered++; 1127 } 1128 tick = gettick(); 1129 endtick = tick + xc_tick_limit; 1130 lasttick = tick; 1131 /* 1132 * Recheck idsr 1133 */ 1134 continue; 1135 } else 1136 #endif /* CHEETAHPLUS_ERRATUM_25 */ 1137 { 1138 cmn_err(CE_PANIC, "send mondo timeout " 1139 "(target 0x%x) [%d NACK %d BUSY]", 1140 cpuid, nack, busy); 1141 } 1142 } 1143 1144 if (idsr & busymask) { 1145 busy++; 1146 continue; 1147 } 1148 drv_usecwait(1); 1149 shipit(cpuid, 0); 1150 nack++; 1151 busy = 0; 1152 } 1153 #ifdef SEND_MONDO_STATS 1154 { 1155 int n = gettick() - starttick; 1156 if (n < 8192) 1157 x_one_stimes[n >> 7]++; 1158 else 1159 x_one_ltimes[(n >> 13) & 0xf]++; 1160 } 1161 #endif 1162 } 1163 1164 void 1165 syncfpu(void) 1166 { 1167 } 1168 1169 /* 1170 * Return processor specific async error structure 1171 * size used. 1172 */ 1173 int 1174 cpu_aflt_size(void) 1175 { 1176 return (sizeof (ch_async_flt_t)); 1177 } 1178 1179 /* 1180 * The fast_ecc_err handler transfers control here for UCU, UCC events. 1181 * Note that we flush Ecache twice, once in the fast_ecc_err handler to 1182 * flush the error that caused the UCU/UCC, then again here at the end to 1183 * flush the TL=1 trap handler code out of the Ecache, so we can minimize 1184 * the probability of getting a TL>1 Fast ECC trap when we're fielding 1185 * another Fast ECC trap. 1186 * 1187 * Cheetah+ also handles: TSCE: No additional processing required. 1188 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT. 1189 * 1190 * Note that the p_clo_flags input is only valid in cases where the 1191 * cpu_private struct is not yet initialized (since that is the only 1192 * time that information cannot be obtained from the logout struct.) 1193 */ 1194 /*ARGSUSED*/ 1195 void 1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags) 1197 { 1198 ch_cpu_logout_t *clop; 1199 uint64_t ceen, nceen; 1200 1201 /* 1202 * Get the CPU log out info. If we can't find our CPU private 1203 * pointer, then we will have to make due without any detailed 1204 * logout information. 1205 */ 1206 if (CPU_PRIVATE(CPU) == NULL) { 1207 clop = NULL; 1208 ceen = p_clo_flags & EN_REG_CEEN; 1209 nceen = p_clo_flags & EN_REG_NCEEN; 1210 } else { 1211 clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout); 1212 ceen = clop->clo_flags & EN_REG_CEEN; 1213 nceen = clop->clo_flags & EN_REG_NCEEN; 1214 } 1215 1216 cpu_log_fast_ecc_error((caddr_t)rp->r_pc, 1217 (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop); 1218 } 1219 1220 /* 1221 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast 1222 * ECC at TL>0. Need to supply either a error register pointer or a 1223 * cpu logout structure pointer. 1224 */ 1225 static void 1226 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, 1227 uint64_t nceen, ch_cpu_logout_t *clop) 1228 { 1229 struct async_flt *aflt; 1230 ch_async_flt_t ch_flt; 1231 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1232 char pr_reason[MAX_REASON_STRING]; 1233 ch_cpu_errors_t cpu_error_regs; 1234 1235 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1236 /* 1237 * If no cpu logout data, then we will have to make due without 1238 * any detailed logout information. 1239 */ 1240 if (clop == NULL) { 1241 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1242 get_cpu_error_state(&cpu_error_regs); 1243 set_cpu_error_state(&cpu_error_regs); 1244 t_afar = cpu_error_regs.afar; 1245 t_afsr = cpu_error_regs.afsr; 1246 t_afsr_ext = cpu_error_regs.afsr_ext; 1247 #if defined(SERRANO) 1248 ch_flt.afar2 = cpu_error_regs.afar2; 1249 #endif /* SERRANO */ 1250 } else { 1251 t_afar = clop->clo_data.chd_afar; 1252 t_afsr = clop->clo_data.chd_afsr; 1253 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1254 #if defined(SERRANO) 1255 ch_flt.afar2 = clop->clo_data.chd_afar2; 1256 #endif /* SERRANO */ 1257 } 1258 1259 /* 1260 * In order to simplify code, we maintain this afsr_errs 1261 * variable which holds the aggregate of AFSR and AFSR_EXT 1262 * sticky bits. 1263 */ 1264 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1265 (t_afsr & C_AFSR_ALL_ERRS); 1266 pr_reason[0] = '\0'; 1267 1268 /* Setup the async fault structure */ 1269 aflt = (struct async_flt *)&ch_flt; 1270 aflt->flt_id = gethrtime_waitfree(); 1271 ch_flt.afsr_ext = t_afsr_ext; 1272 ch_flt.afsr_errs = t_afsr_errs; 1273 aflt->flt_stat = t_afsr; 1274 aflt->flt_addr = t_afar; 1275 aflt->flt_bus_id = getprocessorid(); 1276 aflt->flt_inst = CPU->cpu_id; 1277 aflt->flt_pc = tpc; 1278 aflt->flt_prot = AFLT_PROT_NONE; 1279 aflt->flt_class = CPU_FAULT; 1280 aflt->flt_priv = priv; 1281 aflt->flt_tl = tl; 1282 aflt->flt_status = ECC_F_TRAP; 1283 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1284 1285 /* 1286 * XXXX - Phenomenal hack to get around Solaris not getting all the 1287 * cmn_err messages out to the console. The situation is a UCU (in 1288 * priv mode) which causes a WDU which causes a UE (on the retry). 1289 * The messages for the UCU and WDU are enqueued and then pulled off 1290 * the async queue via softint and syslogd starts to process them 1291 * but doesn't get them to the console. The UE causes a panic, but 1292 * since the UCU/WDU messages are already in transit, those aren't 1293 * on the async queue. The hack is to check if we have a matching 1294 * WDU event for the UCU, and if it matches, we're more than likely 1295 * going to panic with a UE, unless we're under protection. So, we 1296 * check to see if we got a matching WDU event and if we're under 1297 * protection. 1298 * 1299 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about 1300 * looks like this: 1301 * UCU->WDU->UE 1302 * For Panther, it could look like either of these: 1303 * UCU---->WDU->L3_WDU->UE 1304 * L3_UCU->WDU->L3_WDU->UE 1305 */ 1306 if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) && 1307 aflt->flt_panic == 0 && aflt->flt_priv != 0 && 1308 curthread->t_ontrap == NULL && curthread->t_lofault == NULL) { 1309 get_cpu_error_state(&cpu_error_regs); 1310 aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) && 1311 (cpu_error_regs.afar == t_afar)); 1312 aflt->flt_panic |= ((clop == NULL) && 1313 (t_afsr_errs & C_AFSR_WDU)); 1314 } 1315 1316 /* 1317 * Queue events on the async event queue, one event per error bit. 1318 * If no events are queued or no Fast ECC events are on in the AFSR, 1319 * queue an event to complain. 1320 */ 1321 if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 || 1322 ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) { 1323 ch_flt.flt_type = CPU_INV_AFSR; 1324 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1325 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1326 aflt->flt_panic); 1327 } 1328 1329 /* 1330 * Zero out + invalidate CPU logout. 1331 */ 1332 if (clop) { 1333 bzero(clop, sizeof (ch_cpu_logout_t)); 1334 clop->clo_data.chd_afar = LOGOUT_INVALID; 1335 } 1336 1337 /* 1338 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1339 * or disrupting errors have happened. We do this because if a 1340 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1341 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1342 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1343 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1344 * deferred or disrupting error happening between checking the AFSR and 1345 * enabling NCEEN/CEEN. 1346 * 1347 * Note: CEEN and NCEEN are only reenabled if they were on when trap 1348 * taken. 1349 */ 1350 set_error_enable(get_error_enable() | (nceen | ceen)); 1351 if (clear_errors(&ch_flt)) { 1352 aflt->flt_panic |= ((ch_flt.afsr_errs & 1353 (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0); 1354 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1355 NULL); 1356 } 1357 1358 /* 1359 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1360 * be logged as part of the panic flow. 1361 */ 1362 if (aflt->flt_panic) 1363 fm_panic("%sError(s)", pr_reason); 1364 1365 /* 1366 * Flushing the Ecache here gets the part of the trap handler that 1367 * is run at TL=1 out of the Ecache. 1368 */ 1369 cpu_flush_ecache(); 1370 } 1371 1372 /* 1373 * This is called via sys_trap from pil15_interrupt code if the 1374 * corresponding entry in ch_err_tl1_pending is set. Checks the 1375 * various ch_err_tl1_data structures for valid entries based on the bit 1376 * settings in the ch_err_tl1_flags entry of the structure. 1377 */ 1378 /*ARGSUSED*/ 1379 void 1380 cpu_tl1_error(struct regs *rp, int panic) 1381 { 1382 ch_err_tl1_data_t *cl1p, cl1; 1383 int i, ncl1ps; 1384 uint64_t me_flags; 1385 uint64_t ceen, nceen; 1386 1387 if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) { 1388 cl1p = &ch_err_tl1_data; 1389 ncl1ps = 1; 1390 } else if (CPU_PRIVATE(CPU) != NULL) { 1391 cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]); 1392 ncl1ps = CH_ERR_TL1_TLMAX; 1393 } else { 1394 ncl1ps = 0; 1395 } 1396 1397 for (i = 0; i < ncl1ps; i++, cl1p++) { 1398 if (cl1p->ch_err_tl1_flags == 0) 1399 continue; 1400 1401 /* 1402 * Grab a copy of the logout data and invalidate 1403 * the logout area. 1404 */ 1405 cl1 = *cl1p; 1406 bzero(cl1p, sizeof (ch_err_tl1_data_t)); 1407 cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID; 1408 me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags); 1409 1410 /* 1411 * Log "first error" in ch_err_tl1_data. 1412 */ 1413 if (cl1.ch_err_tl1_flags & CH_ERR_FECC) { 1414 ceen = get_error_enable() & EN_REG_CEEN; 1415 nceen = get_error_enable() & EN_REG_NCEEN; 1416 cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1, 1417 1, ceen, nceen, &cl1.ch_err_tl1_logout); 1418 } 1419 #if defined(CPU_IMP_L1_CACHE_PARITY) 1420 if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1421 cpu_parity_error(rp, cl1.ch_err_tl1_flags, 1422 (caddr_t)cl1.ch_err_tl1_tpc); 1423 } 1424 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1425 1426 /* 1427 * Log "multiple events" in ch_err_tl1_data. Note that 1428 * we don't read and clear the AFSR/AFAR in the TL>0 code 1429 * if the structure is busy, we just do the cache flushing 1430 * we have to do and then do the retry. So the AFSR/AFAR 1431 * at this point *should* have some relevant info. If there 1432 * are no valid errors in the AFSR, we'll assume they've 1433 * already been picked up and logged. For I$/D$ parity, 1434 * we just log an event with an "Unknown" (NULL) TPC. 1435 */ 1436 if (me_flags & CH_ERR_FECC) { 1437 ch_cpu_errors_t cpu_error_regs; 1438 uint64_t t_afsr_errs; 1439 1440 /* 1441 * Get the error registers and see if there's 1442 * a pending error. If not, don't bother 1443 * generating an "Invalid AFSR" error event. 1444 */ 1445 get_cpu_error_state(&cpu_error_regs); 1446 t_afsr_errs = (cpu_error_regs.afsr_ext & 1447 C_AFSR_EXT_ALL_ERRS) | 1448 (cpu_error_regs.afsr & C_AFSR_ALL_ERRS); 1449 if (t_afsr_errs != 0) { 1450 ceen = get_error_enable() & EN_REG_CEEN; 1451 nceen = get_error_enable() & EN_REG_NCEEN; 1452 cpu_log_fast_ecc_error((caddr_t)NULL, 1, 1453 1, ceen, nceen, NULL); 1454 } 1455 } 1456 #if defined(CPU_IMP_L1_CACHE_PARITY) 1457 if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) { 1458 cpu_parity_error(rp, me_flags, (caddr_t)NULL); 1459 } 1460 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1461 } 1462 } 1463 1464 /* 1465 * Called from Fast ECC TL>0 handler in case of fatal error. 1466 * cpu_tl1_error should always find an associated ch_err_tl1_data structure, 1467 * but if we don't, we'll panic with something reasonable. 1468 */ 1469 /*ARGSUSED*/ 1470 void 1471 cpu_tl1_err_panic(struct regs *rp, ulong_t flags) 1472 { 1473 cpu_tl1_error(rp, 1); 1474 /* 1475 * Should never return, but just in case. 1476 */ 1477 fm_panic("Unsurvivable ECC Error at TL>0"); 1478 } 1479 1480 /* 1481 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST, 1482 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events. 1483 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU 1484 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC 1485 * 1486 * Cheetah+ also handles (No additional processing required): 1487 * DUE, DTO, DBERR (NCEEN controlled) 1488 * THCE (CEEN and ET_ECC_en controlled) 1489 * TUE (ET_ECC_en controlled) 1490 * 1491 * Panther further adds: 1492 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 1493 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 1494 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 1495 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 1496 * THCE (CEEN and L2_tag_ECC_en controlled) 1497 * L3_THCE (CEEN and ET_ECC_en controlled) 1498 * 1499 * Note that the p_clo_flags input is only valid in cases where the 1500 * cpu_private struct is not yet initialized (since that is the only 1501 * time that information cannot be obtained from the logout struct.) 1502 */ 1503 /*ARGSUSED*/ 1504 void 1505 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags) 1506 { 1507 struct async_flt *aflt; 1508 ch_async_flt_t ch_flt; 1509 char pr_reason[MAX_REASON_STRING]; 1510 ch_cpu_logout_t *clop; 1511 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1512 ch_cpu_errors_t cpu_error_regs; 1513 1514 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1515 /* 1516 * Get the CPU log out info. If we can't find our CPU private 1517 * pointer, then we will have to make due without any detailed 1518 * logout information. 1519 */ 1520 if (CPU_PRIVATE(CPU) == NULL) { 1521 clop = NULL; 1522 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1523 get_cpu_error_state(&cpu_error_regs); 1524 set_cpu_error_state(&cpu_error_regs); 1525 t_afar = cpu_error_regs.afar; 1526 t_afsr = cpu_error_regs.afsr; 1527 t_afsr_ext = cpu_error_regs.afsr_ext; 1528 #if defined(SERRANO) 1529 ch_flt.afar2 = cpu_error_regs.afar2; 1530 #endif /* SERRANO */ 1531 } else { 1532 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 1533 t_afar = clop->clo_data.chd_afar; 1534 t_afsr = clop->clo_data.chd_afsr; 1535 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1536 #if defined(SERRANO) 1537 ch_flt.afar2 = clop->clo_data.chd_afar2; 1538 #endif /* SERRANO */ 1539 } 1540 1541 /* 1542 * In order to simplify code, we maintain this afsr_errs 1543 * variable which holds the aggregate of AFSR and AFSR_EXT 1544 * sticky bits. 1545 */ 1546 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1547 (t_afsr & C_AFSR_ALL_ERRS); 1548 1549 pr_reason[0] = '\0'; 1550 /* Setup the async fault structure */ 1551 aflt = (struct async_flt *)&ch_flt; 1552 ch_flt.afsr_ext = t_afsr_ext; 1553 ch_flt.afsr_errs = t_afsr_errs; 1554 aflt->flt_stat = t_afsr; 1555 aflt->flt_addr = t_afar; 1556 aflt->flt_pc = (caddr_t)rp->r_pc; 1557 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1558 aflt->flt_tl = 0; 1559 aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); 1560 1561 /* 1562 * If this trap is a result of one of the errors not masked 1563 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead 1564 * indicate that a timeout is to be set later. 1565 */ 1566 if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) && 1567 !aflt->flt_panic) 1568 ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED; 1569 else 1570 ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED; 1571 1572 /* 1573 * log the CE and clean up 1574 */ 1575 cpu_log_and_clear_ce(&ch_flt); 1576 1577 /* 1578 * We re-enable CEEN (if required) and check if any disrupting errors 1579 * have happened. We do this because if a disrupting error had occurred 1580 * with CEEN off, the trap will not be taken when CEEN is re-enabled. 1581 * Note that CEEN works differently on Cheetah than on Spitfire. Also, 1582 * we enable CEEN *before* checking the AFSR to avoid the small window 1583 * of a error happening between checking the AFSR and enabling CEEN. 1584 */ 1585 if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER) 1586 set_error_enable(get_error_enable() | EN_REG_CEEN); 1587 if (clear_errors(&ch_flt)) { 1588 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1589 NULL); 1590 } 1591 1592 /* 1593 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1594 * be logged as part of the panic flow. 1595 */ 1596 if (aflt->flt_panic) 1597 fm_panic("%sError(s)", pr_reason); 1598 } 1599 1600 /* 1601 * The async_err handler transfers control here for UE, EMU, EDU:BLD, 1602 * L3_EDU:BLD, TO, and BERR events. 1603 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 1604 * 1605 * Cheetah+: No additional errors handled. 1606 * 1607 * Note that the p_clo_flags input is only valid in cases where the 1608 * cpu_private struct is not yet initialized (since that is the only 1609 * time that information cannot be obtained from the logout struct.) 1610 */ 1611 /*ARGSUSED*/ 1612 void 1613 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags) 1614 { 1615 ushort_t ttype, tl; 1616 ch_async_flt_t ch_flt; 1617 struct async_flt *aflt; 1618 int trampolined = 0; 1619 char pr_reason[MAX_REASON_STRING]; 1620 ch_cpu_logout_t *clop; 1621 uint64_t ceen, clo_flags; 1622 uint64_t log_afsr; 1623 uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; 1624 ch_cpu_errors_t cpu_error_regs; 1625 int expected = DDI_FM_ERR_UNEXPECTED; 1626 ddi_acc_hdl_t *hp; 1627 1628 /* 1629 * We need to look at p_flag to determine if the thread detected an 1630 * error while dumping core. We can't grab p_lock here, but it's ok 1631 * because we just need a consistent snapshot and we know that everyone 1632 * else will store a consistent set of bits while holding p_lock. We 1633 * don't have to worry about a race because SDOCORE is set once prior 1634 * to doing i/o from the process's address space and is never cleared. 1635 */ 1636 uint_t pflag = ttoproc(curthread)->p_flag; 1637 1638 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1639 /* 1640 * Get the CPU log out info. If we can't find our CPU private 1641 * pointer then we will have to make due without any detailed 1642 * logout information. 1643 */ 1644 if (CPU_PRIVATE(CPU) == NULL) { 1645 clop = NULL; 1646 ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; 1647 get_cpu_error_state(&cpu_error_regs); 1648 set_cpu_error_state(&cpu_error_regs); 1649 t_afar = cpu_error_regs.afar; 1650 t_afsr = cpu_error_regs.afsr; 1651 t_afsr_ext = cpu_error_regs.afsr_ext; 1652 #if defined(SERRANO) 1653 ch_flt.afar2 = cpu_error_regs.afar2; 1654 #endif /* SERRANO */ 1655 clo_flags = p_clo_flags; 1656 } else { 1657 clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout); 1658 t_afar = clop->clo_data.chd_afar; 1659 t_afsr = clop->clo_data.chd_afsr; 1660 t_afsr_ext = clop->clo_data.chd_afsr_ext; 1661 #if defined(SERRANO) 1662 ch_flt.afar2 = clop->clo_data.chd_afar2; 1663 #endif /* SERRANO */ 1664 clo_flags = clop->clo_flags; 1665 } 1666 1667 /* 1668 * In order to simplify code, we maintain this afsr_errs 1669 * variable which holds the aggregate of AFSR and AFSR_EXT 1670 * sticky bits. 1671 */ 1672 t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 1673 (t_afsr & C_AFSR_ALL_ERRS); 1674 pr_reason[0] = '\0'; 1675 1676 /* 1677 * Grab information encoded into our clo_flags field. 1678 */ 1679 ceen = clo_flags & EN_REG_CEEN; 1680 tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT; 1681 ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT; 1682 1683 /* 1684 * handle the specific error 1685 */ 1686 aflt = (struct async_flt *)&ch_flt; 1687 aflt->flt_id = gethrtime_waitfree(); 1688 aflt->flt_bus_id = getprocessorid(); 1689 aflt->flt_inst = CPU->cpu_id; 1690 ch_flt.afsr_ext = t_afsr_ext; 1691 ch_flt.afsr_errs = t_afsr_errs; 1692 aflt->flt_stat = t_afsr; 1693 aflt->flt_addr = t_afar; 1694 aflt->flt_pc = (caddr_t)rp->r_pc; 1695 aflt->flt_prot = AFLT_PROT_NONE; 1696 aflt->flt_class = CPU_FAULT; 1697 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1698 aflt->flt_tl = (uchar_t)tl; 1699 aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) || 1700 C_AFSR_PANIC(t_afsr_errs)); 1701 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1702 aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP); 1703 1704 /* 1705 * If the trap occurred in privileged mode at TL=0, we need to check to 1706 * see if we were executing in the kernel under on_trap() or t_lofault 1707 * protection. If so, modify the saved registers so that we return 1708 * from the trap to the appropriate trampoline routine. 1709 */ 1710 if (aflt->flt_priv && tl == 0) { 1711 if (curthread->t_ontrap != NULL) { 1712 on_trap_data_t *otp = curthread->t_ontrap; 1713 1714 if (otp->ot_prot & OT_DATA_EC) { 1715 aflt->flt_prot = AFLT_PROT_EC; 1716 otp->ot_trap |= OT_DATA_EC; 1717 rp->r_pc = otp->ot_trampoline; 1718 rp->r_npc = rp->r_pc + 4; 1719 trampolined = 1; 1720 } 1721 1722 if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) && 1723 (otp->ot_prot & OT_DATA_ACCESS)) { 1724 aflt->flt_prot = AFLT_PROT_ACCESS; 1725 otp->ot_trap |= OT_DATA_ACCESS; 1726 rp->r_pc = otp->ot_trampoline; 1727 rp->r_npc = rp->r_pc + 4; 1728 trampolined = 1; 1729 /* 1730 * for peeks and caut_gets errors are expected 1731 */ 1732 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1733 if (!hp) 1734 expected = DDI_FM_ERR_PEEK; 1735 else if (hp->ah_acc.devacc_attr_access == 1736 DDI_CAUTIOUS_ACC) 1737 expected = DDI_FM_ERR_EXPECTED; 1738 } 1739 1740 } else if (curthread->t_lofault) { 1741 aflt->flt_prot = AFLT_PROT_COPY; 1742 rp->r_g1 = EFAULT; 1743 rp->r_pc = curthread->t_lofault; 1744 rp->r_npc = rp->r_pc + 4; 1745 trampolined = 1; 1746 } 1747 } 1748 1749 /* 1750 * If we're in user mode or we're doing a protected copy, we either 1751 * want the ASTON code below to send a signal to the user process 1752 * or we want to panic if aft_panic is set. 1753 * 1754 * If we're in privileged mode and we're not doing a copy, then we 1755 * need to check if we've trampolined. If we haven't trampolined, 1756 * we should panic. 1757 */ 1758 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1759 if (t_afsr_errs & 1760 ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) & 1761 ~(C_AFSR_BERR | C_AFSR_TO))) 1762 aflt->flt_panic |= aft_panic; 1763 } else if (!trampolined) { 1764 aflt->flt_panic = 1; 1765 } 1766 1767 /* 1768 * If we've trampolined due to a privileged TO or BERR, or if an 1769 * unprivileged TO or BERR occurred, we don't want to enqueue an 1770 * event for that TO or BERR. Queue all other events (if any) besides 1771 * the TO/BERR. Since we may not be enqueing any events, we need to 1772 * ignore the number of events queued. If we haven't trampolined due 1773 * to a TO or BERR, just enqueue events normally. 1774 */ 1775 log_afsr = t_afsr_errs; 1776 if (trampolined) { 1777 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1778 } else if (!aflt->flt_priv) { 1779 /* 1780 * User mode, suppress messages if 1781 * cpu_berr_to_verbose is not set. 1782 */ 1783 if (!cpu_berr_to_verbose) 1784 log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); 1785 } 1786 1787 /* 1788 * Log any errors that occurred 1789 */ 1790 if (((log_afsr & 1791 ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) && 1792 cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) || 1793 (t_afsr_errs & 1794 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) { 1795 ch_flt.flt_type = CPU_INV_AFSR; 1796 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 1797 (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, 1798 aflt->flt_panic); 1799 } 1800 1801 /* 1802 * Zero out + invalidate CPU logout. 1803 */ 1804 if (clop) { 1805 bzero(clop, sizeof (ch_cpu_logout_t)); 1806 clop->clo_data.chd_afar = LOGOUT_INVALID; 1807 } 1808 1809 #if defined(JALAPENO) || defined(SERRANO) 1810 /* 1811 * UE/RUE/BERR/TO: Call our bus nexus friends to check for 1812 * IO errors that may have resulted in this trap. 1813 */ 1814 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) { 1815 cpu_run_bus_error_handlers(aflt, expected); 1816 } 1817 1818 /* 1819 * UE/RUE: If UE or RUE is in memory, we need to flush the bad 1820 * line from the Ecache. We also need to query the bus nexus for 1821 * fatal errors. Attempts to do diagnostic read on caches may 1822 * introduce more errors (especially when the module is bad). 1823 */ 1824 if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) { 1825 /* 1826 * Ask our bus nexus friends if they have any fatal errors. If 1827 * so, they will log appropriate error messages. 1828 */ 1829 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1830 aflt->flt_panic = 1; 1831 1832 /* 1833 * We got a UE or RUE and are panicking, save the fault PA in 1834 * a known location so that the platform specific panic code 1835 * can check for copyback errors. 1836 */ 1837 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1838 panic_aflt = *aflt; 1839 } 1840 } 1841 1842 /* 1843 * Flush Ecache line or entire Ecache 1844 */ 1845 if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR)) 1846 cpu_error_ecache_flush(&ch_flt); 1847 #else /* JALAPENO || SERRANO */ 1848 /* 1849 * UE/BERR/TO: Call our bus nexus friends to check for 1850 * IO errors that may have resulted in this trap. 1851 */ 1852 if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) { 1853 cpu_run_bus_error_handlers(aflt, expected); 1854 } 1855 1856 /* 1857 * UE: If the UE is in memory, we need to flush the bad 1858 * line from the Ecache. We also need to query the bus nexus for 1859 * fatal errors. Attempts to do diagnostic read on caches may 1860 * introduce more errors (especially when the module is bad). 1861 */ 1862 if (t_afsr & C_AFSR_UE) { 1863 /* 1864 * Ask our legacy bus nexus friends if they have any fatal 1865 * errors. If so, they will log appropriate error messages. 1866 */ 1867 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 1868 aflt->flt_panic = 1; 1869 1870 /* 1871 * We got a UE and are panicking, save the fault PA in a known 1872 * location so that the platform specific panic code can check 1873 * for copyback errors. 1874 */ 1875 if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { 1876 panic_aflt = *aflt; 1877 } 1878 } 1879 1880 /* 1881 * Flush Ecache line or entire Ecache 1882 */ 1883 if (t_afsr_errs & 1884 (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU)) 1885 cpu_error_ecache_flush(&ch_flt); 1886 #endif /* JALAPENO || SERRANO */ 1887 1888 /* 1889 * We carefully re-enable NCEEN and CEEN and then check if any deferred 1890 * or disrupting errors have happened. We do this because if a 1891 * deferred or disrupting error had occurred with NCEEN/CEEN off, the 1892 * trap will not be taken when NCEEN/CEEN is re-enabled. Note that 1893 * CEEN works differently on Cheetah than on Spitfire. Also, we enable 1894 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a 1895 * deferred or disrupting error happening between checking the AFSR and 1896 * enabling NCEEN/CEEN. 1897 * 1898 * Note: CEEN reenabled only if it was on when trap taken. 1899 */ 1900 set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen)); 1901 if (clear_errors(&ch_flt)) { 1902 /* 1903 * Check for secondary errors, and avoid panicking if we 1904 * have them 1905 */ 1906 if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs, 1907 t_afar) == 0) { 1908 aflt->flt_panic |= ((ch_flt.afsr_errs & 1909 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0); 1910 } 1911 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 1912 NULL); 1913 } 1914 1915 /* 1916 * Panic here if aflt->flt_panic has been set. Enqueued errors will 1917 * be logged as part of the panic flow. 1918 */ 1919 if (aflt->flt_panic) 1920 fm_panic("%sError(s)", pr_reason); 1921 1922 /* 1923 * If we queued an error and we are going to return from the trap and 1924 * the error was in user mode or inside of a copy routine, set AST flag 1925 * so the queue will be drained before returning to user mode. The 1926 * AST processing will also act on our failure policy. 1927 */ 1928 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1929 int pcb_flag = 0; 1930 1931 if (t_afsr_errs & 1932 (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS & 1933 ~(C_AFSR_BERR | C_AFSR_TO))) 1934 pcb_flag |= ASYNC_HWERR; 1935 1936 if (t_afsr & C_AFSR_BERR) 1937 pcb_flag |= ASYNC_BERR; 1938 1939 if (t_afsr & C_AFSR_TO) 1940 pcb_flag |= ASYNC_BTO; 1941 1942 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1943 aston(curthread); 1944 } 1945 } 1946 1947 #if defined(CPU_IMP_L1_CACHE_PARITY) 1948 /* 1949 * Handling of data and instruction parity errors (traps 0x71, 0x72). 1950 * 1951 * For Panther, P$ data parity errors during floating point load hits 1952 * are also detected (reported as TT 0x71) and handled by this trap 1953 * handler. 1954 * 1955 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address) 1956 * is available. 1957 */ 1958 /*ARGSUSED*/ 1959 void 1960 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc) 1961 { 1962 ch_async_flt_t ch_flt; 1963 struct async_flt *aflt; 1964 uchar_t tl = ((flags & CH_ERR_TL) != 0); 1965 uchar_t iparity = ((flags & CH_ERR_IPE) != 0); 1966 uchar_t panic = ((flags & CH_ERR_PANIC) != 0); 1967 char *error_class; 1968 1969 /* 1970 * Log the error. 1971 * For icache parity errors the fault address is the trap PC. 1972 * For dcache/pcache parity errors the instruction would have to 1973 * be decoded to determine the address and that isn't possible 1974 * at high PIL. 1975 */ 1976 bzero(&ch_flt, sizeof (ch_async_flt_t)); 1977 aflt = (struct async_flt *)&ch_flt; 1978 aflt->flt_id = gethrtime_waitfree(); 1979 aflt->flt_bus_id = getprocessorid(); 1980 aflt->flt_inst = CPU->cpu_id; 1981 aflt->flt_pc = tpc; 1982 aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR; 1983 aflt->flt_prot = AFLT_PROT_NONE; 1984 aflt->flt_class = CPU_FAULT; 1985 aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0; 1986 aflt->flt_tl = tl; 1987 aflt->flt_panic = panic; 1988 aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP; 1989 ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY; 1990 1991 if (iparity) { 1992 cpu_icache_parity_info(&ch_flt); 1993 if (ch_flt.parity_data.ipe.cpl_off != -1) 1994 error_class = FM_EREPORT_CPU_USIII_IDSPE; 1995 else if (ch_flt.parity_data.ipe.cpl_way != -1) 1996 error_class = FM_EREPORT_CPU_USIII_ITSPE; 1997 else 1998 error_class = FM_EREPORT_CPU_USIII_IPE; 1999 aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE; 2000 } else { 2001 cpu_dcache_parity_info(&ch_flt); 2002 if (ch_flt.parity_data.dpe.cpl_off != -1) 2003 error_class = FM_EREPORT_CPU_USIII_DDSPE; 2004 else if (ch_flt.parity_data.dpe.cpl_way != -1) 2005 error_class = FM_EREPORT_CPU_USIII_DTSPE; 2006 else 2007 error_class = FM_EREPORT_CPU_USIII_DPE; 2008 aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE; 2009 /* 2010 * For panther we also need to check the P$ for parity errors. 2011 */ 2012 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2013 cpu_pcache_parity_info(&ch_flt); 2014 if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2015 error_class = FM_EREPORT_CPU_USIII_PDSPE; 2016 aflt->flt_payload = 2017 FM_EREPORT_PAYLOAD_PCACHE_PE; 2018 } 2019 } 2020 } 2021 2022 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 2023 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 2024 2025 if (iparity) { 2026 /* 2027 * Invalidate entire I$. 2028 * This is required due to the use of diagnostic ASI 2029 * accesses that may result in a loss of I$ coherency. 2030 */ 2031 if (cache_boot_state & DCU_IC) { 2032 flush_icache(); 2033 } 2034 /* 2035 * According to section P.3.1 of the Panther PRM, we 2036 * need to do a little more for recovery on those 2037 * CPUs after encountering an I$ parity error. 2038 */ 2039 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2040 flush_ipb(); 2041 correct_dcache_parity(dcache_size, 2042 dcache_linesize); 2043 flush_pcache(); 2044 } 2045 } else { 2046 /* 2047 * Since the valid bit is ignored when checking parity the 2048 * D$ data and tag must also be corrected. Set D$ data bits 2049 * to zero and set utag to 0, 1, 2, 3. 2050 */ 2051 correct_dcache_parity(dcache_size, dcache_linesize); 2052 2053 /* 2054 * According to section P.3.3 of the Panther PRM, we 2055 * need to do a little more for recovery on those 2056 * CPUs after encountering a D$ or P$ parity error. 2057 * 2058 * As far as clearing P$ parity errors, it is enough to 2059 * simply invalidate all entries in the P$ since P$ parity 2060 * error traps are only generated for floating point load 2061 * hits. 2062 */ 2063 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2064 flush_icache(); 2065 flush_ipb(); 2066 flush_pcache(); 2067 } 2068 } 2069 2070 /* 2071 * Invalidate entire D$ if it was enabled. 2072 * This is done to avoid stale data in the D$ which might 2073 * occur with the D$ disabled and the trap handler doing 2074 * stores affecting lines already in the D$. 2075 */ 2076 if (cache_boot_state & DCU_DC) { 2077 flush_dcache(); 2078 } 2079 2080 /* 2081 * Restore caches to their bootup state. 2082 */ 2083 set_dcu(get_dcu() | cache_boot_state); 2084 2085 /* 2086 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2087 * be logged as part of the panic flow. 2088 */ 2089 if (aflt->flt_panic) 2090 fm_panic("%sError(s)", iparity ? "IPE " : "DPE "); 2091 2092 /* 2093 * If this error occurred at TL>0 then flush the E$ here to reduce 2094 * the chance of getting an unrecoverable Fast ECC error. This 2095 * flush will evict the part of the parity trap handler that is run 2096 * at TL>1. 2097 */ 2098 if (tl) { 2099 cpu_flush_ecache(); 2100 } 2101 } 2102 2103 /* 2104 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t 2105 * to indicate which portions of the captured data should be in the ereport. 2106 */ 2107 void 2108 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt) 2109 { 2110 int way = ch_flt->parity_data.ipe.cpl_way; 2111 int offset = ch_flt->parity_data.ipe.cpl_off; 2112 int tag_index; 2113 struct async_flt *aflt = (struct async_flt *)ch_flt; 2114 2115 2116 if ((offset != -1) || (way != -1)) { 2117 /* 2118 * Parity error in I$ tag or data 2119 */ 2120 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2121 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2122 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2123 PN_ICIDX_TO_WAY(tag_index); 2124 else 2125 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2126 CH_ICIDX_TO_WAY(tag_index); 2127 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2128 IC_LOGFLAG_MAGIC; 2129 } else { 2130 /* 2131 * Parity error was not identified. 2132 * Log tags and data for all ways. 2133 */ 2134 for (way = 0; way < CH_ICACHE_NWAY; way++) { 2135 tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; 2136 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 2137 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2138 PN_ICIDX_TO_WAY(tag_index); 2139 else 2140 ch_flt->parity_data.ipe.cpl_ic[way].ic_way = 2141 CH_ICIDX_TO_WAY(tag_index); 2142 ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = 2143 IC_LOGFLAG_MAGIC; 2144 } 2145 } 2146 } 2147 2148 /* 2149 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t 2150 * to indicate which portions of the captured data should be in the ereport. 2151 */ 2152 void 2153 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt) 2154 { 2155 int way = ch_flt->parity_data.dpe.cpl_way; 2156 int offset = ch_flt->parity_data.dpe.cpl_off; 2157 int tag_index; 2158 2159 if (offset != -1) { 2160 /* 2161 * Parity error in D$ or P$ data array. 2162 * 2163 * First check to see whether the parity error is in D$ or P$ 2164 * since P$ data parity errors are reported in Panther using 2165 * the same trap. 2166 */ 2167 if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) { 2168 tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx; 2169 ch_flt->parity_data.dpe.cpl_pc[way].pc_way = 2170 CH_PCIDX_TO_WAY(tag_index); 2171 ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag = 2172 PC_LOGFLAG_MAGIC; 2173 } else { 2174 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2175 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2176 CH_DCIDX_TO_WAY(tag_index); 2177 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2178 DC_LOGFLAG_MAGIC; 2179 } 2180 } else if (way != -1) { 2181 /* 2182 * Parity error in D$ tag. 2183 */ 2184 tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; 2185 ch_flt->parity_data.dpe.cpl_dc[way].dc_way = 2186 CH_DCIDX_TO_WAY(tag_index); 2187 ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = 2188 DC_LOGFLAG_MAGIC; 2189 } 2190 } 2191 #endif /* CPU_IMP_L1_CACHE_PARITY */ 2192 2193 /* 2194 * The cpu_async_log_err() function is called via the [uc]e_drain() function to 2195 * post-process CPU events that are dequeued. As such, it can be invoked 2196 * from softint context, from AST processing in the trap() flow, or from the 2197 * panic flow. We decode the CPU-specific data, and take appropriate actions. 2198 * Historically this entry point was used to log the actual cmn_err(9F) text; 2199 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 2200 * With FMA this function now also returns a flag which indicates to the 2201 * caller whether the ereport should be posted (1) or suppressed (0). 2202 */ 2203 static int 2204 cpu_async_log_err(void *flt, errorq_elem_t *eqep) 2205 { 2206 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 2207 struct async_flt *aflt = (struct async_flt *)flt; 2208 uint64_t errors; 2209 2210 switch (ch_flt->flt_type) { 2211 case CPU_INV_AFSR: 2212 /* 2213 * If it is a disrupting trap and the AFSR is zero, then 2214 * the event has probably already been noted. Do not post 2215 * an ereport. 2216 */ 2217 if ((aflt->flt_status & ECC_C_TRAP) && 2218 (!(aflt->flt_stat & C_AFSR_MASK))) 2219 return (0); 2220 else 2221 return (1); 2222 case CPU_TO: 2223 case CPU_BERR: 2224 case CPU_FATAL: 2225 case CPU_FPUERR: 2226 return (1); 2227 2228 case CPU_UE_ECACHE_RETIRE: 2229 cpu_log_err(aflt); 2230 cpu_page_retire(ch_flt); 2231 return (1); 2232 2233 /* 2234 * Cases where we may want to suppress logging or perform 2235 * extended diagnostics. 2236 */ 2237 case CPU_CE: 2238 case CPU_EMC: 2239 /* 2240 * We want to skip logging and further classification 2241 * only if ALL the following conditions are true: 2242 * 2243 * 1. There is only one error 2244 * 2. That error is a correctable memory error 2245 * 3. The error is caused by the memory scrubber (in 2246 * which case the error will have occurred under 2247 * on_trap protection) 2248 * 4. The error is on a retired page 2249 * 2250 * Note: AFLT_PROT_EC is used places other than the memory 2251 * scrubber. However, none of those errors should occur 2252 * on a retired page. 2253 */ 2254 if ((ch_flt->afsr_errs & 2255 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE && 2256 aflt->flt_prot == AFLT_PROT_EC) { 2257 2258 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2259 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2260 2261 /* 2262 * Since we're skipping logging, we'll need 2263 * to schedule the re-enabling of CEEN 2264 */ 2265 (void) timeout(cpu_delayed_check_ce_errors, 2266 (void *)(uintptr_t)aflt->flt_inst, 2267 drv_usectohz((clock_t)cpu_ceen_delay_secs 2268 * MICROSEC)); 2269 } 2270 return (0); 2271 } 2272 } 2273 2274 /* 2275 * Perform/schedule further classification actions, but 2276 * only if the page is healthy (we don't want bad 2277 * pages inducing too much diagnostic activity). If we could 2278 * not find a page pointer then we also skip this. If 2279 * ce_scrub_xdiag_recirc returns nonzero then it has chosen 2280 * to copy and recirculate the event (for further diagnostics) 2281 * and we should not proceed to log it here. 2282 * 2283 * This must be the last step here before the cpu_log_err() 2284 * below - if an event recirculates cpu_ce_log_err() will 2285 * not call the current function but just proceed directly 2286 * to cpu_ereport_post after the cpu_log_err() avoided below. 2287 * 2288 * Note: Check cpu_impl_async_log_err if changing this 2289 */ 2290 if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) { 2291 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2292 CE_XDIAG_SKIP_NOPP); 2293 } else { 2294 if (errors != PR_OK) { 2295 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 2296 CE_XDIAG_SKIP_PAGEDET); 2297 } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep, 2298 offsetof(ch_async_flt_t, cmn_asyncflt))) { 2299 return (0); 2300 } 2301 } 2302 /*FALLTHRU*/ 2303 2304 /* 2305 * Cases where we just want to report the error and continue. 2306 */ 2307 case CPU_CE_ECACHE: 2308 case CPU_UE_ECACHE: 2309 case CPU_IV: 2310 case CPU_ORPH: 2311 cpu_log_err(aflt); 2312 return (1); 2313 2314 /* 2315 * Cases where we want to fall through to handle panicking. 2316 */ 2317 case CPU_UE: 2318 /* 2319 * We want to skip logging in the same conditions as the 2320 * CE case. In addition, we want to make sure we're not 2321 * panicking. 2322 */ 2323 if (!panicstr && (ch_flt->afsr_errs & 2324 (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE && 2325 aflt->flt_prot == AFLT_PROT_EC) { 2326 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2327 /* Zero the address to clear the error */ 2328 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2329 return (0); 2330 } 2331 } 2332 cpu_log_err(aflt); 2333 break; 2334 2335 default: 2336 /* 2337 * If the us3_common.c code doesn't know the flt_type, it may 2338 * be an implementation-specific code. Call into the impldep 2339 * backend to find out what to do: if it tells us to continue, 2340 * break and handle as if falling through from a UE; if not, 2341 * the impldep backend has handled the error and we're done. 2342 */ 2343 switch (cpu_impl_async_log_err(flt, eqep)) { 2344 case CH_ASYNC_LOG_DONE: 2345 return (1); 2346 case CH_ASYNC_LOG_RECIRC: 2347 return (0); 2348 case CH_ASYNC_LOG_CONTINUE: 2349 break; /* continue on to handle UE-like error */ 2350 default: 2351 cmn_err(CE_WARN, "discarding error 0x%p with " 2352 "invalid fault type (0x%x)", 2353 (void *)aflt, ch_flt->flt_type); 2354 return (0); 2355 } 2356 } 2357 2358 /* ... fall through from the UE case */ 2359 2360 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2361 if (!panicstr) { 2362 cpu_page_retire(ch_flt); 2363 } else { 2364 /* 2365 * Clear UEs on panic so that we don't 2366 * get haunted by them during panic or 2367 * after reboot 2368 */ 2369 cpu_clearphys(aflt); 2370 (void) clear_errors(NULL); 2371 } 2372 } 2373 2374 return (1); 2375 } 2376 2377 /* 2378 * Retire the bad page that may contain the flushed error. 2379 */ 2380 void 2381 cpu_page_retire(ch_async_flt_t *ch_flt) 2382 { 2383 struct async_flt *aflt = (struct async_flt *)ch_flt; 2384 (void) page_retire(aflt->flt_addr, PR_UE); 2385 } 2386 2387 /* 2388 * The cpu_log_err() function is called by cpu_async_log_err() to perform the 2389 * generic event post-processing for correctable and uncorrectable memory, 2390 * E$, and MTag errors. Historically this entry point was used to log bits of 2391 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be 2392 * converted into an ereport. In addition, it transmits the error to any 2393 * platform-specific service-processor FRU logging routines, if available. 2394 */ 2395 void 2396 cpu_log_err(struct async_flt *aflt) 2397 { 2398 char unum[UNUM_NAMLEN]; 2399 int len = 0; 2400 int synd_status, synd_code, afar_status; 2401 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 2402 2403 /* 2404 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 2405 * For Panther, L2$ is not external, so we don't want to 2406 * generate an E$ unum for those errors. 2407 */ 2408 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 2409 if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS) 2410 aflt->flt_status |= ECC_ECACHE; 2411 } else { 2412 if (ch_flt->flt_bit & C_AFSR_ECACHE) 2413 aflt->flt_status |= ECC_ECACHE; 2414 } 2415 2416 /* 2417 * Determine syndrome status. 2418 */ 2419 synd_status = afsr_to_synd_status(aflt->flt_inst, 2420 ch_flt->afsr_errs, ch_flt->flt_bit); 2421 2422 /* 2423 * Determine afar status. 2424 */ 2425 if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) 2426 afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 2427 ch_flt->flt_bit); 2428 else 2429 afar_status = AFLT_STAT_INVALID; 2430 2431 /* 2432 * If afar status is not invalid do a unum lookup. 2433 */ 2434 if (afar_status != AFLT_STAT_INVALID) { 2435 (void) cpu_get_mem_unum_aflt(synd_status, aflt, unum, 2436 UNUM_NAMLEN, &len); 2437 } else { 2438 unum[0] = '\0'; 2439 } 2440 2441 synd_code = synd_to_synd_code(synd_status, 2442 aflt->flt_synd, ch_flt->flt_bit); 2443 2444 /* 2445 * Do not send the fruid message (plat_ecc_error_data_t) 2446 * to the SC if it can handle the enhanced error information 2447 * (plat_ecc_error2_data_t) or when the tunable 2448 * ecc_log_fruid_enable is set to 0. 2449 */ 2450 2451 if (&plat_ecc_capability_sc_get && 2452 plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) { 2453 if (&plat_log_fruid_error) 2454 plat_log_fruid_error(synd_code, aflt, unum, 2455 ch_flt->flt_bit); 2456 } 2457 2458 if (aflt->flt_func != NULL) 2459 aflt->flt_func(aflt, unum); 2460 2461 if (afar_status != AFLT_STAT_INVALID) 2462 cpu_log_diag_info(ch_flt); 2463 2464 /* 2465 * If we have a CEEN error , we do not reenable CEEN until after 2466 * we exit the trap handler. Otherwise, another error may 2467 * occur causing the handler to be entered recursively. 2468 * We set a timeout to trigger in cpu_ceen_delay_secs seconds, 2469 * to try and ensure that the CPU makes progress in the face 2470 * of a CE storm. 2471 */ 2472 if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { 2473 (void) timeout(cpu_delayed_check_ce_errors, 2474 (void *)(uintptr_t)aflt->flt_inst, 2475 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 2476 } 2477 } 2478 2479 /* 2480 * Invoked by error_init() early in startup and therefore before 2481 * startup_errorq() is called to drain any error Q - 2482 * 2483 * startup() 2484 * startup_end() 2485 * error_init() 2486 * cpu_error_init() 2487 * errorq_init() 2488 * errorq_drain() 2489 * start_other_cpus() 2490 * 2491 * The purpose of this routine is to create error-related taskqs. Taskqs 2492 * are used for this purpose because cpu_lock can't be grabbed from interrupt 2493 * context. 2494 */ 2495 void 2496 cpu_error_init(int items) 2497 { 2498 /* 2499 * Create taskq(s) to reenable CE 2500 */ 2501 ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri, 2502 items, items, TASKQ_PREPOPULATE); 2503 } 2504 2505 void 2506 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep) 2507 { 2508 char unum[UNUM_NAMLEN]; 2509 int len; 2510 2511 switch (aflt->flt_class) { 2512 case CPU_FAULT: 2513 cpu_ereport_init(aflt); 2514 if (cpu_async_log_err(aflt, eqep)) 2515 cpu_ereport_post(aflt); 2516 break; 2517 2518 case BUS_FAULT: 2519 if (aflt->flt_func != NULL) { 2520 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, 2521 unum, UNUM_NAMLEN, &len); 2522 aflt->flt_func(aflt, unum); 2523 } 2524 break; 2525 2526 case RECIRC_CPU_FAULT: 2527 aflt->flt_class = CPU_FAULT; 2528 cpu_log_err(aflt); 2529 cpu_ereport_post(aflt); 2530 break; 2531 2532 case RECIRC_BUS_FAULT: 2533 ASSERT(aflt->flt_class != RECIRC_BUS_FAULT); 2534 /*FALLTHRU*/ 2535 default: 2536 cmn_err(CE_WARN, "discarding CE error 0x%p with invalid " 2537 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 2538 return; 2539 } 2540 } 2541 2542 /* 2543 * Scrub and classify a CE. This function must not modify the 2544 * fault structure passed to it but instead should return the classification 2545 * information. 2546 */ 2547 2548 static uchar_t 2549 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried) 2550 { 2551 uchar_t disp = CE_XDIAG_EXTALG; 2552 on_trap_data_t otd; 2553 uint64_t orig_err; 2554 ch_cpu_logout_t *clop; 2555 2556 /* 2557 * Clear CEEN. CPU CE TL > 0 trap handling will already have done 2558 * this, but our other callers have not. Disable preemption to 2559 * avoid CPU migration so that we restore CEEN on the correct 2560 * cpu later. 2561 * 2562 * CEEN is cleared so that further CEs that our instruction and 2563 * data footprint induce do not cause use to either creep down 2564 * kernel stack to the point of overflow, or do so much CE 2565 * notification as to make little real forward progress. 2566 * 2567 * NCEEN must not be cleared. However it is possible that 2568 * our accesses to the flt_addr may provoke a bus error or timeout 2569 * if the offending address has just been unconfigured as part of 2570 * a DR action. So we must operate under on_trap protection. 2571 */ 2572 kpreempt_disable(); 2573 orig_err = get_error_enable(); 2574 if (orig_err & EN_REG_CEEN) 2575 set_error_enable(orig_err & ~EN_REG_CEEN); 2576 2577 /* 2578 * Our classification algorithm includes the line state before 2579 * the scrub; we'd like this captured after the detection and 2580 * before the algorithm below - the earlier the better. 2581 * 2582 * If we've come from a cpu CE trap then this info already exists 2583 * in the cpu logout area. 2584 * 2585 * For a CE detected by memscrub for which there was no trap 2586 * (running with CEEN off) cpu_log_and_clear_ce has called 2587 * cpu_ce_delayed_ec_logout to capture some cache data, and 2588 * marked the fault structure as incomplete as a flag to later 2589 * logging code. 2590 * 2591 * If called directly from an IO detected CE there has been 2592 * no line data capture. In this case we logout to the cpu logout 2593 * area - that's appropriate since it's the cpu cache data we need 2594 * for classification. We thus borrow the cpu logout area for a 2595 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in 2596 * this time (we will invalidate it again below). 2597 * 2598 * If called from the partner check xcall handler then this cpu 2599 * (the partner) has not necessarily experienced a CE at this 2600 * address. But we want to capture line state before its scrub 2601 * attempt since we use that in our classification. 2602 */ 2603 if (logout_tried == B_FALSE) { 2604 if (!cpu_ce_delayed_ec_logout(ecc->flt_addr)) 2605 disp |= CE_XDIAG_NOLOGOUT; 2606 } 2607 2608 /* 2609 * Scrub memory, then check AFSR for errors. The AFAR we scrub may 2610 * no longer be valid (if DR'd since the initial event) so we 2611 * perform this scrub under on_trap protection. If this access is 2612 * ok then further accesses below will also be ok - DR cannot 2613 * proceed while this thread is active (preemption is disabled); 2614 * to be safe we'll nonetheless use on_trap again below. 2615 */ 2616 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2617 cpu_scrubphys(ecc); 2618 } else { 2619 no_trap(); 2620 if (orig_err & EN_REG_CEEN) 2621 set_error_enable(orig_err); 2622 kpreempt_enable(); 2623 return (disp); 2624 } 2625 no_trap(); 2626 2627 /* 2628 * Did the casx read of the scrub log a CE that matches the AFAR? 2629 * Note that it's quite possible that the read sourced the data from 2630 * another cpu. 2631 */ 2632 if (clear_ecc(ecc)) 2633 disp |= CE_XDIAG_CE1; 2634 2635 /* 2636 * Read the data again. This time the read is very likely to 2637 * come from memory since the scrub induced a writeback to memory. 2638 */ 2639 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2640 (void) lddphys(P2ALIGN(ecc->flt_addr, 8)); 2641 } else { 2642 no_trap(); 2643 if (orig_err & EN_REG_CEEN) 2644 set_error_enable(orig_err); 2645 kpreempt_enable(); 2646 return (disp); 2647 } 2648 no_trap(); 2649 2650 /* Did that read induce a CE that matches the AFAR? */ 2651 if (clear_ecc(ecc)) 2652 disp |= CE_XDIAG_CE2; 2653 2654 /* 2655 * Look at the logout information and record whether we found the 2656 * line in l2/l3 cache. For Panther we are interested in whether 2657 * we found it in either cache (it won't reside in both but 2658 * it is possible to read it that way given the moving target). 2659 */ 2660 clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL; 2661 if (!(disp & CE_XDIAG_NOLOGOUT) && clop && 2662 clop->clo_data.chd_afar != LOGOUT_INVALID) { 2663 int hit, level; 2664 int state; 2665 int totalsize; 2666 ch_ec_data_t *ecp; 2667 2668 /* 2669 * If hit is nonzero then a match was found and hit will 2670 * be one greater than the index which hit. For Panther we 2671 * also need to pay attention to level to see which of l2$ or 2672 * l3$ it hit in. 2673 */ 2674 hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data, 2675 0, &level); 2676 2677 if (hit) { 2678 --hit; 2679 disp |= CE_XDIAG_AFARMATCH; 2680 2681 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 2682 if (level == 2) 2683 ecp = &clop->clo_data.chd_l2_data[hit]; 2684 else 2685 ecp = &clop->clo_data.chd_ec_data[hit]; 2686 } else { 2687 ASSERT(level == 2); 2688 ecp = &clop->clo_data.chd_ec_data[hit]; 2689 } 2690 totalsize = cpunodes[CPU->cpu_id].ecache_size; 2691 state = cpu_ectag_pa_to_subblk_state(totalsize, 2692 ecc->flt_addr, ecp->ec_tag); 2693 2694 /* 2695 * Cheetah variants use different state encodings - 2696 * the CH_ECSTATE_* defines vary depending on the 2697 * module we're compiled for. Translate into our 2698 * one true version. Conflate Owner-Shared state 2699 * of SSM mode with Owner as victimisation of such 2700 * lines may cause a writeback. 2701 */ 2702 switch (state) { 2703 case CH_ECSTATE_MOD: 2704 disp |= EC_STATE_M; 2705 break; 2706 2707 case CH_ECSTATE_OWN: 2708 case CH_ECSTATE_OWS: 2709 disp |= EC_STATE_O; 2710 break; 2711 2712 case CH_ECSTATE_EXL: 2713 disp |= EC_STATE_E; 2714 break; 2715 2716 case CH_ECSTATE_SHR: 2717 disp |= EC_STATE_S; 2718 break; 2719 2720 default: 2721 disp |= EC_STATE_I; 2722 break; 2723 } 2724 } 2725 2726 /* 2727 * If we initiated the delayed logout then we are responsible 2728 * for invalidating the logout area. 2729 */ 2730 if (logout_tried == B_FALSE) { 2731 bzero(clop, sizeof (ch_cpu_logout_t)); 2732 clop->clo_data.chd_afar = LOGOUT_INVALID; 2733 } 2734 } 2735 2736 /* 2737 * Re-enable CEEN if we turned it off. 2738 */ 2739 if (orig_err & EN_REG_CEEN) 2740 set_error_enable(orig_err); 2741 kpreempt_enable(); 2742 2743 return (disp); 2744 } 2745 2746 /* 2747 * Scrub a correctable memory error and collect data for classification 2748 * of CE type. This function is called in the detection path, ie tl0 handling 2749 * of a correctable error trap (cpus) or interrupt (IO) at high PIL. 2750 */ 2751 void 2752 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried) 2753 { 2754 /* 2755 * Cheetah CE classification does not set any bits in flt_status. 2756 * Instead we will record classification datapoints in flt_disp. 2757 */ 2758 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 2759 2760 /* 2761 * To check if the error detected by IO is persistent, sticky or 2762 * intermittent. This is noticed by clear_ecc(). 2763 */ 2764 if (ecc->flt_status & ECC_IOBUS) 2765 ecc->flt_stat = C_AFSR_MEMORY; 2766 2767 /* 2768 * Record information from this first part of the algorithm in 2769 * flt_disp. 2770 */ 2771 ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried); 2772 } 2773 2774 /* 2775 * Select a partner to perform a further CE classification check from. 2776 * Must be called with kernel preemption disabled (to stop the cpu list 2777 * from changing). The detecting cpu we are partnering has cpuid 2778 * aflt->flt_inst; we might not be running on the detecting cpu. 2779 * 2780 * Restrict choice to active cpus in the same cpu partition as ourselves in 2781 * an effort to stop bad cpus in one partition causing other partitions to 2782 * perform excessive diagnostic activity. Actually since the errorq drain 2783 * is run from a softint most of the time and that is a global mechanism 2784 * this isolation is only partial. Return NULL if we fail to find a 2785 * suitable partner. 2786 * 2787 * We prefer a partner that is in a different latency group to ourselves as 2788 * we will share fewer datapaths. If such a partner is unavailable then 2789 * choose one in the same lgroup but prefer a different chip and only allow 2790 * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and 2791 * flags includes PTNR_SELFOK then permit selection of the original detector. 2792 * 2793 * We keep a cache of the last partner selected for a cpu, and we'll try to 2794 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds 2795 * have passed since that selection was made. This provides the benefit 2796 * of the point-of-view of different partners over time but without 2797 * requiring frequent cpu list traversals. 2798 */ 2799 2800 #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */ 2801 #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */ 2802 2803 static cpu_t * 2804 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) 2805 { 2806 cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr; 2807 hrtime_t lasttime, thistime; 2808 2809 ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL); 2810 2811 dtcr = cpu[aflt->flt_inst]; 2812 2813 /* 2814 * Short-circuit for the following cases: 2815 * . the dtcr is not flagged active 2816 * . there is just one cpu present 2817 * . the detector has disappeared 2818 * . we were given a bad flt_inst cpuid; this should not happen 2819 * (eg PCI code now fills flt_inst) but if it does it is no 2820 * reason to panic. 2821 * . there is just one cpu left online in the cpu partition 2822 * 2823 * If we return NULL after this point then we do not update the 2824 * chpr_ceptnr_seltime which will cause us to perform a full lookup 2825 * again next time; this is the case where the only other cpu online 2826 * in the detector's partition is on the same chip as the detector 2827 * and since CEEN re-enable is throttled even that case should not 2828 * hurt performance. 2829 */ 2830 if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) { 2831 return (NULL); 2832 } 2833 if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) { 2834 if (flags & PTNR_SELFOK) { 2835 *typep = CE_XDIAG_PTNR_SELF; 2836 return (dtcr); 2837 } else { 2838 return (NULL); 2839 } 2840 } 2841 2842 thistime = gethrtime(); 2843 lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime); 2844 2845 /* 2846 * Select a starting point. 2847 */ 2848 if (!lasttime) { 2849 /* 2850 * We've never selected a partner for this detector before. 2851 * Start the scan at the next online cpu in the same cpu 2852 * partition. 2853 */ 2854 sp = dtcr->cpu_next_part; 2855 } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) { 2856 /* 2857 * Our last selection has not aged yet. If this partner: 2858 * . is still a valid cpu, 2859 * . is still in the same partition as the detector 2860 * . is still marked active 2861 * . satisfies the 'flags' argument criteria 2862 * then select it again without updating the timestamp. 2863 */ 2864 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2865 if (sp == NULL || sp->cpu_part != dtcr->cpu_part || 2866 !cpu_flagged_active(sp->cpu_flags) || 2867 (sp == dtcr && !(flags & PTNR_SELFOK)) || 2868 (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id && 2869 !(flags & PTNR_SIBLINGOK))) { 2870 sp = dtcr->cpu_next_part; 2871 } else { 2872 if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2873 *typep = CE_XDIAG_PTNR_REMOTE; 2874 } else if (sp == dtcr) { 2875 *typep = CE_XDIAG_PTNR_SELF; 2876 } else if (sp->cpu_chip->chip_id == 2877 dtcr->cpu_chip->chip_id) { 2878 *typep = CE_XDIAG_PTNR_SIBLING; 2879 } else { 2880 *typep = CE_XDIAG_PTNR_LOCAL; 2881 } 2882 return (sp); 2883 } 2884 } else { 2885 /* 2886 * Our last selection has aged. If it is nonetheless still a 2887 * valid cpu then start the scan at the next cpu in the 2888 * partition after our last partner. If the last selection 2889 * is no longer a valid cpu then go with our default. In 2890 * this way we slowly cycle through possible partners to 2891 * obtain multiple viewpoints over time. 2892 */ 2893 sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; 2894 if (sp == NULL) { 2895 sp = dtcr->cpu_next_part; 2896 } else { 2897 sp = sp->cpu_next_part; /* may be dtcr */ 2898 if (sp->cpu_part != dtcr->cpu_part) 2899 sp = dtcr; 2900 } 2901 } 2902 2903 /* 2904 * We have a proposed starting point for our search, but if this 2905 * cpu is offline then its cpu_next_part will point to itself 2906 * so we can't use that to iterate over cpus in this partition in 2907 * the loop below. We still want to avoid iterating over cpus not 2908 * in our partition, so in the case that our starting point is offline 2909 * we will repoint it to be the detector itself; and if the detector 2910 * happens to be offline we'll return NULL from the following loop. 2911 */ 2912 if (!cpu_flagged_active(sp->cpu_flags)) { 2913 sp = dtcr; 2914 } 2915 2916 ptnr = sp; 2917 locptnr = NULL; 2918 sibptnr = NULL; 2919 do { 2920 if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags)) 2921 continue; 2922 if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { 2923 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id; 2924 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2925 *typep = CE_XDIAG_PTNR_REMOTE; 2926 return (ptnr); 2927 } 2928 if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) { 2929 if (sibptnr == NULL) 2930 sibptnr = ptnr; 2931 continue; 2932 } 2933 if (locptnr == NULL) 2934 locptnr = ptnr; 2935 } while ((ptnr = ptnr->cpu_next_part) != sp); 2936 2937 /* 2938 * A foreign partner has already been returned if one was available. 2939 * 2940 * If locptnr is not NULL it is a cpu in the same lgroup as the 2941 * detector, is active, and is not a sibling of the detector. 2942 * 2943 * If sibptnr is not NULL it is a sibling of the detector, and is 2944 * active. 2945 * 2946 * If we have to resort to using the detector itself we have already 2947 * checked that it is active. 2948 */ 2949 if (locptnr) { 2950 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id; 2951 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2952 *typep = CE_XDIAG_PTNR_LOCAL; 2953 return (locptnr); 2954 } else if (sibptnr && flags & PTNR_SIBLINGOK) { 2955 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id; 2956 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2957 *typep = CE_XDIAG_PTNR_SIBLING; 2958 return (sibptnr); 2959 } else if (flags & PTNR_SELFOK) { 2960 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id; 2961 CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; 2962 *typep = CE_XDIAG_PTNR_SELF; 2963 return (dtcr); 2964 } 2965 2966 return (NULL); 2967 } 2968 2969 /* 2970 * Cross call handler that is requested to run on the designated partner of 2971 * a cpu that experienced a possibly sticky or possibly persistnet CE. 2972 */ 2973 static void 2974 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp) 2975 { 2976 *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE); 2977 } 2978 2979 /* 2980 * The associated errorqs are never destroyed so we do not need to deal with 2981 * them disappearing before this timeout fires. If the affected memory 2982 * has been DR'd out since the original event the scrub algrithm will catch 2983 * any errors and return null disposition info. If the original detecting 2984 * cpu has been DR'd out then ereport detector info will not be able to 2985 * lookup CPU type; with a small timeout this is unlikely. 2986 */ 2987 static void 2988 ce_lkychk_cb(ce_lkychk_cb_t *cbarg) 2989 { 2990 struct async_flt *aflt = cbarg->lkycb_aflt; 2991 uchar_t disp; 2992 cpu_t *cp; 2993 int ptnrtype; 2994 2995 kpreempt_disable(); 2996 if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK, 2997 &ptnrtype)) { 2998 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt, 2999 (uint64_t)&disp); 3000 CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp); 3001 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3002 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3003 } else { 3004 ce_xdiag_lkydrops++; 3005 if (ncpus > 1) 3006 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3007 CE_XDIAG_SKIP_NOPTNR); 3008 } 3009 kpreempt_enable(); 3010 3011 errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC); 3012 kmem_free(cbarg, sizeof (ce_lkychk_cb_t)); 3013 } 3014 3015 /* 3016 * Called from errorq drain code when processing a CE error, both from 3017 * CPU and PCI drain functions. Decide what further classification actions, 3018 * if any, we will perform. Perform immediate actions now, and schedule 3019 * delayed actions as required. Note that we are no longer necessarily running 3020 * on the detecting cpu, and that the async_flt structure will not persist on 3021 * return from this function. 3022 * 3023 * Calls to this function should aim to be self-throtlling in some way. With 3024 * the delayed re-enable of CEEN the absolute rate of calls should not 3025 * be excessive. Callers should also avoid performing in-depth classification 3026 * for events in pages that are already known to be suspect. 3027 * 3028 * We return nonzero to indicate that the event has been copied and 3029 * recirculated for further testing. The caller should not log the event 3030 * in this case - it will be logged when further test results are available. 3031 * 3032 * Our possible contexts are that of errorq_drain: below lock level or from 3033 * panic context. We can assume that the cpu we are running on is online. 3034 */ 3035 3036 3037 #ifdef DEBUG 3038 static int ce_xdiag_forceaction; 3039 #endif 3040 3041 int 3042 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 3043 errorq_elem_t *eqep, size_t afltoffset) 3044 { 3045 ce_dispact_t dispact, action; 3046 cpu_t *cp; 3047 uchar_t dtcrinfo, disp; 3048 int ptnrtype; 3049 3050 if (!ce_disp_inited || panicstr || ce_xdiag_off) { 3051 ce_xdiag_drops++; 3052 return (0); 3053 } else if (!aflt->flt_in_memory) { 3054 ce_xdiag_drops++; 3055 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM); 3056 return (0); 3057 } 3058 3059 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 3060 3061 /* 3062 * Some correctable events are not scrubbed/classified, such as those 3063 * noticed at the tail of cpu_deferred_error. So if there is no 3064 * initial detector classification go no further. 3065 */ 3066 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) { 3067 ce_xdiag_drops++; 3068 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB); 3069 return (0); 3070 } 3071 3072 dispact = CE_DISPACT(ce_disp_table, 3073 CE_XDIAG_AFARMATCHED(dtcrinfo), 3074 CE_XDIAG_STATE(dtcrinfo), 3075 CE_XDIAG_CE1SEEN(dtcrinfo), 3076 CE_XDIAG_CE2SEEN(dtcrinfo)); 3077 3078 3079 action = CE_ACT(dispact); /* bad lookup caught below */ 3080 #ifdef DEBUG 3081 if (ce_xdiag_forceaction != 0) 3082 action = ce_xdiag_forceaction; 3083 #endif 3084 3085 switch (action) { 3086 case CE_ACT_LKYCHK: { 3087 caddr_t ndata; 3088 errorq_elem_t *neqep; 3089 struct async_flt *ecc; 3090 ce_lkychk_cb_t *cbargp; 3091 3092 if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) { 3093 ce_xdiag_lkydrops++; 3094 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3095 CE_XDIAG_SKIP_DUPFAIL); 3096 break; 3097 } 3098 ecc = (struct async_flt *)(ndata + afltoffset); 3099 3100 ASSERT(ecc->flt_class == CPU_FAULT || 3101 ecc->flt_class == BUS_FAULT); 3102 ecc->flt_class = (ecc->flt_class == CPU_FAULT) ? 3103 RECIRC_CPU_FAULT : RECIRC_BUS_FAULT; 3104 3105 cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP); 3106 cbargp->lkycb_aflt = ecc; 3107 cbargp->lkycb_eqp = eqp; 3108 cbargp->lkycb_eqep = neqep; 3109 3110 (void) timeout((void (*)(void *))ce_lkychk_cb, 3111 (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec)); 3112 return (1); 3113 } 3114 3115 case CE_ACT_PTNRCHK: 3116 kpreempt_disable(); /* stop cpu list changing */ 3117 if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) { 3118 xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, 3119 (uint64_t)aflt, (uint64_t)&disp); 3120 CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp); 3121 CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); 3122 CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); 3123 } else if (ncpus > 1) { 3124 ce_xdiag_ptnrdrops++; 3125 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3126 CE_XDIAG_SKIP_NOPTNR); 3127 } else { 3128 ce_xdiag_ptnrdrops++; 3129 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, 3130 CE_XDIAG_SKIP_UNIPROC); 3131 } 3132 kpreempt_enable(); 3133 break; 3134 3135 case CE_ACT_DONE: 3136 break; 3137 3138 case CE_ACT(CE_DISP_BAD): 3139 default: 3140 #ifdef DEBUG 3141 cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action); 3142 #endif 3143 ce_xdiag_bad++; 3144 CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD); 3145 break; 3146 } 3147 3148 return (0); 3149 } 3150 3151 /* 3152 * We route all errors through a single switch statement. 3153 */ 3154 void 3155 cpu_ue_log_err(struct async_flt *aflt) 3156 { 3157 switch (aflt->flt_class) { 3158 case CPU_FAULT: 3159 cpu_ereport_init(aflt); 3160 if (cpu_async_log_err(aflt, NULL)) 3161 cpu_ereport_post(aflt); 3162 break; 3163 3164 case BUS_FAULT: 3165 bus_async_log_err(aflt); 3166 break; 3167 3168 default: 3169 cmn_err(CE_WARN, "discarding async error %p with invalid " 3170 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 3171 return; 3172 } 3173 } 3174 3175 /* 3176 * Routine for panic hook callback from panic_idle(). 3177 */ 3178 void 3179 cpu_async_panic_callb(void) 3180 { 3181 ch_async_flt_t ch_flt; 3182 struct async_flt *aflt; 3183 ch_cpu_errors_t cpu_error_regs; 3184 uint64_t afsr_errs; 3185 3186 get_cpu_error_state(&cpu_error_regs); 3187 3188 afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3189 (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS); 3190 3191 if (afsr_errs) { 3192 3193 bzero(&ch_flt, sizeof (ch_async_flt_t)); 3194 aflt = (struct async_flt *)&ch_flt; 3195 aflt->flt_id = gethrtime_waitfree(); 3196 aflt->flt_bus_id = getprocessorid(); 3197 aflt->flt_inst = CPU->cpu_id; 3198 aflt->flt_stat = cpu_error_regs.afsr; 3199 aflt->flt_addr = cpu_error_regs.afar; 3200 aflt->flt_prot = AFLT_PROT_NONE; 3201 aflt->flt_class = CPU_FAULT; 3202 aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0); 3203 aflt->flt_panic = 1; 3204 ch_flt.afsr_ext = cpu_error_regs.afsr_ext; 3205 ch_flt.afsr_errs = afsr_errs; 3206 #if defined(SERRANO) 3207 ch_flt.afar2 = cpu_error_regs.afar2; 3208 #endif /* SERRANO */ 3209 (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL); 3210 } 3211 } 3212 3213 /* 3214 * Routine to convert a syndrome into a syndrome code. 3215 */ 3216 static int 3217 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit) 3218 { 3219 if (synd_status == AFLT_STAT_INVALID) 3220 return (-1); 3221 3222 /* 3223 * Use the syndrome to index the appropriate syndrome table, 3224 * to get the code indicating which bit(s) is(are) bad. 3225 */ 3226 if (afsr_bit & 3227 (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 3228 if (afsr_bit & C_AFSR_MSYND_ERRS) { 3229 #if defined(JALAPENO) || defined(SERRANO) 3230 if ((synd == 0) || (synd >= BSYND_TBL_SIZE)) 3231 return (-1); 3232 else 3233 return (BPAR0 + synd); 3234 #else /* JALAPENO || SERRANO */ 3235 if ((synd == 0) || (synd >= MSYND_TBL_SIZE)) 3236 return (-1); 3237 else 3238 return (mtag_syndrome_tab[synd]); 3239 #endif /* JALAPENO || SERRANO */ 3240 } else { 3241 if ((synd == 0) || (synd >= ESYND_TBL_SIZE)) 3242 return (-1); 3243 else 3244 return (ecc_syndrome_tab[synd]); 3245 } 3246 } else { 3247 return (-1); 3248 } 3249 } 3250 3251 /* 3252 * Routine to return a string identifying the physical name 3253 * associated with a memory/cache error. 3254 */ 3255 int 3256 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 3257 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 3258 ushort_t flt_status, char *buf, int buflen, int *lenp) 3259 { 3260 int synd_code; 3261 int ret; 3262 3263 /* 3264 * An AFSR of -1 defaults to a memory syndrome. 3265 */ 3266 if (flt_stat == (uint64_t)-1) 3267 flt_stat = C_AFSR_CE; 3268 3269 synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat); 3270 3271 /* 3272 * Syndrome code must be either a single-bit error code 3273 * (0...143) or -1 for unum lookup. 3274 */ 3275 if (synd_code < 0 || synd_code >= M2) 3276 synd_code = -1; 3277 if (&plat_get_mem_unum) { 3278 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 3279 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 3280 buf[0] = '\0'; 3281 *lenp = 0; 3282 } 3283 3284 return (ret); 3285 } 3286 3287 return (ENOTSUP); 3288 } 3289 3290 /* 3291 * Wrapper for cpu_get_mem_unum() routine that takes an 3292 * async_flt struct rather than explicit arguments. 3293 */ 3294 int 3295 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 3296 char *buf, int buflen, int *lenp) 3297 { 3298 /* 3299 * If we come thru here for an IO bus error aflt->flt_stat will 3300 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum() 3301 * so it will interpret this as a memory error. 3302 */ 3303 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 3304 (aflt->flt_class == BUS_FAULT) ? 3305 (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs, 3306 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 3307 aflt->flt_status, buf, buflen, lenp)); 3308 } 3309 3310 /* 3311 * This routine is a more generic interface to cpu_get_mem_unum() 3312 * that may be used by other modules (e.g. mm). 3313 */ 3314 int 3315 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 3316 char *buf, int buflen, int *lenp) 3317 { 3318 int synd_status, flt_in_memory, ret; 3319 ushort_t flt_status = 0; 3320 char unum[UNUM_NAMLEN]; 3321 3322 /* 3323 * Check for an invalid address. 3324 */ 3325 if (afar == (uint64_t)-1) 3326 return (ENXIO); 3327 3328 if (synd == (uint64_t)-1) 3329 synd_status = AFLT_STAT_INVALID; 3330 else 3331 synd_status = AFLT_STAT_VALID; 3332 3333 flt_in_memory = (*afsr & C_AFSR_MEMORY) && 3334 pf_is_memory(afar >> MMU_PAGESHIFT); 3335 3336 /* 3337 * Need to turn on ECC_ECACHE for plat_get_mem_unum(). 3338 * For Panther, L2$ is not external, so we don't want to 3339 * generate an E$ unum for those errors. 3340 */ 3341 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3342 if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS) 3343 flt_status |= ECC_ECACHE; 3344 } else { 3345 if (*afsr & C_AFSR_ECACHE) 3346 flt_status |= ECC_ECACHE; 3347 } 3348 3349 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 3350 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 3351 if (ret != 0) 3352 return (ret); 3353 3354 if (*lenp >= buflen) 3355 return (ENAMETOOLONG); 3356 3357 (void) strncpy(buf, unum, buflen); 3358 3359 return (0); 3360 } 3361 3362 /* 3363 * Routine to return memory information associated 3364 * with a physical address and syndrome. 3365 */ 3366 int 3367 cpu_get_mem_info(uint64_t synd, uint64_t afar, 3368 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 3369 int *segsp, int *banksp, int *mcidp) 3370 { 3371 int synd_status, synd_code; 3372 3373 if (afar == (uint64_t)-1) 3374 return (ENXIO); 3375 3376 if (synd == (uint64_t)-1) 3377 synd_status = AFLT_STAT_INVALID; 3378 else 3379 synd_status = AFLT_STAT_VALID; 3380 3381 synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE); 3382 3383 if (p2get_mem_info != NULL) 3384 return ((p2get_mem_info)(synd_code, afar, 3385 mem_sizep, seg_sizep, bank_sizep, 3386 segsp, banksp, mcidp)); 3387 else 3388 return (ENOTSUP); 3389 } 3390 3391 /* 3392 * Routine to return a string identifying the physical 3393 * name associated with a cpuid. 3394 */ 3395 int 3396 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 3397 { 3398 int ret; 3399 char unum[UNUM_NAMLEN]; 3400 3401 if (&plat_get_cpu_unum) { 3402 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 3403 != 0) 3404 return (ret); 3405 } else { 3406 return (ENOTSUP); 3407 } 3408 3409 if (*lenp >= buflen) 3410 return (ENAMETOOLONG); 3411 3412 (void) strncpy(buf, unum, buflen); 3413 3414 return (0); 3415 } 3416 3417 /* 3418 * This routine exports the name buffer size. 3419 */ 3420 size_t 3421 cpu_get_name_bufsize() 3422 { 3423 return (UNUM_NAMLEN); 3424 } 3425 3426 /* 3427 * Historical function, apparantly not used. 3428 */ 3429 /* ARGSUSED */ 3430 void 3431 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 3432 {} 3433 3434 /* 3435 * Historical function only called for SBus errors in debugging. 3436 */ 3437 /*ARGSUSED*/ 3438 void 3439 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 3440 {} 3441 3442 /* 3443 * Clear the AFSR sticky bits. The routine returns a non-zero value if 3444 * any of the AFSR's sticky errors are detected. If a non-null pointer to 3445 * an async fault structure argument is passed in, the captured error state 3446 * (AFSR, AFAR) info will be returned in the structure. 3447 */ 3448 int 3449 clear_errors(ch_async_flt_t *ch_flt) 3450 { 3451 struct async_flt *aflt = (struct async_flt *)ch_flt; 3452 ch_cpu_errors_t cpu_error_regs; 3453 3454 get_cpu_error_state(&cpu_error_regs); 3455 3456 if (ch_flt != NULL) { 3457 aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK; 3458 aflt->flt_addr = cpu_error_regs.afar; 3459 ch_flt->afsr_ext = cpu_error_regs.afsr_ext; 3460 ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3461 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); 3462 #if defined(SERRANO) 3463 ch_flt->afar2 = cpu_error_regs.afar2; 3464 #endif /* SERRANO */ 3465 } 3466 3467 set_cpu_error_state(&cpu_error_regs); 3468 3469 return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | 3470 (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0); 3471 } 3472 3473 /* 3474 * Clear any AFSR error bits, and check for persistence. 3475 * 3476 * It would be desirable to also insist that syndrome match. PCI handling 3477 * has already filled flt_synd. For errors trapped by CPU we only fill 3478 * flt_synd when we queue the event, so we do not have a valid flt_synd 3479 * during initial classification (it is valid if we're called as part of 3480 * subsequent low-pil additional classification attempts). We could try 3481 * to determine which syndrome to use: we know we're only called for 3482 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use 3483 * would be esynd/none and esynd/msynd, respectively. If that is 3484 * implemented then what do we do in the case that we do experience an 3485 * error on the same afar but with different syndrome? At the very least 3486 * we should count such occurences. Anyway, for now, we'll leave it as 3487 * it has been for ages. 3488 */ 3489 static int 3490 clear_ecc(struct async_flt *aflt) 3491 { 3492 ch_cpu_errors_t cpu_error_regs; 3493 3494 /* 3495 * Snapshot the AFSR and AFAR and clear any errors 3496 */ 3497 get_cpu_error_state(&cpu_error_regs); 3498 set_cpu_error_state(&cpu_error_regs); 3499 3500 /* 3501 * If any of the same memory access error bits are still on and 3502 * the AFAR matches, return that the error is persistent. 3503 */ 3504 return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 && 3505 cpu_error_regs.afar == aflt->flt_addr); 3506 } 3507 3508 /* 3509 * Turn off all cpu error detection, normally only used for panics. 3510 */ 3511 void 3512 cpu_disable_errors(void) 3513 { 3514 xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE); 3515 } 3516 3517 /* 3518 * Enable errors. 3519 */ 3520 void 3521 cpu_enable_errors(void) 3522 { 3523 xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE); 3524 } 3525 3526 /* 3527 * Flush the entire ecache using displacement flush by reading through a 3528 * physical address range twice as large as the Ecache. 3529 */ 3530 void 3531 cpu_flush_ecache(void) 3532 { 3533 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 3534 cpunodes[CPU->cpu_id].ecache_linesize); 3535 } 3536 3537 /* 3538 * Return CPU E$ set size - E$ size divided by the associativity. 3539 * We use this function in places where the CPU_PRIVATE ptr may not be 3540 * initialized yet. Note that for send_mondo and in the Ecache scrubber, 3541 * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set 3542 * up before the kernel switches from OBP's to the kernel's trap table, so 3543 * we don't have to worry about cpunodes being unitialized. 3544 */ 3545 int 3546 cpu_ecache_set_size(struct cpu *cp) 3547 { 3548 if (CPU_PRIVATE(cp)) 3549 return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size)); 3550 3551 return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway()); 3552 } 3553 3554 /* 3555 * Flush Ecache line. 3556 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno. 3557 * Uses normal displacement flush for Cheetah. 3558 */ 3559 static void 3560 cpu_flush_ecache_line(ch_async_flt_t *ch_flt) 3561 { 3562 struct async_flt *aflt = (struct async_flt *)ch_flt; 3563 int ec_set_size = cpu_ecache_set_size(CPU); 3564 3565 ecache_flush_line(aflt->flt_addr, ec_set_size); 3566 } 3567 3568 /* 3569 * Scrub physical address. 3570 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3571 * Ecache or direct-mapped Ecache. 3572 */ 3573 static void 3574 cpu_scrubphys(struct async_flt *aflt) 3575 { 3576 int ec_set_size = cpu_ecache_set_size(CPU); 3577 3578 scrubphys(aflt->flt_addr, ec_set_size); 3579 } 3580 3581 /* 3582 * Clear physical address. 3583 * Scrub code is different depending upon whether this a Cheetah+ with 2-way 3584 * Ecache or direct-mapped Ecache. 3585 */ 3586 void 3587 cpu_clearphys(struct async_flt *aflt) 3588 { 3589 int lsize = cpunodes[CPU->cpu_id].ecache_linesize; 3590 int ec_set_size = cpu_ecache_set_size(CPU); 3591 3592 3593 clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize); 3594 } 3595 3596 #if defined(CPU_IMP_ECACHE_ASSOC) 3597 /* 3598 * Check for a matching valid line in all the sets. 3599 * If found, return set# + 1. Otherwise return 0. 3600 */ 3601 static int 3602 cpu_ecache_line_valid(ch_async_flt_t *ch_flt) 3603 { 3604 struct async_flt *aflt = (struct async_flt *)ch_flt; 3605 int totalsize = cpunodes[CPU->cpu_id].ecache_size; 3606 int ec_set_size = cpu_ecache_set_size(CPU); 3607 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 3608 int nway = cpu_ecache_nway(); 3609 int i; 3610 3611 for (i = 0; i < nway; i++, ecp++) { 3612 if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) && 3613 (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) == 3614 cpu_ectag_to_pa(ec_set_size, ecp->ec_tag)) 3615 return (i+1); 3616 } 3617 return (0); 3618 } 3619 #endif /* CPU_IMP_ECACHE_ASSOC */ 3620 3621 /* 3622 * Check whether a line in the given logout info matches the specified 3623 * fault address. If reqval is set then the line must not be Invalid. 3624 * Returns 0 on failure; on success (way + 1) is returned an *level is 3625 * set to 2 for l2$ or 3 for l3$. 3626 */ 3627 static int 3628 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level) 3629 { 3630 ch_diag_data_t *cdp = data; 3631 ch_ec_data_t *ecp; 3632 int totalsize, ec_set_size; 3633 int i, ways; 3634 int match = 0; 3635 int tagvalid; 3636 uint64_t addr, tagpa; 3637 int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation); 3638 3639 /* 3640 * Check the l2$ logout data 3641 */ 3642 if (ispanther) { 3643 ecp = &cdp->chd_l2_data[0]; 3644 ec_set_size = PN_L2_SET_SIZE; 3645 ways = PN_L2_NWAYS; 3646 } else { 3647 ecp = &cdp->chd_ec_data[0]; 3648 ec_set_size = cpu_ecache_set_size(CPU); 3649 ways = cpu_ecache_nway(); 3650 totalsize = cpunodes[CPU->cpu_id].ecache_size; 3651 } 3652 /* remove low order PA bits from fault address not used in PA tag */ 3653 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3654 for (i = 0; i < ways; i++, ecp++) { 3655 if (ispanther) { 3656 tagpa = PN_L2TAG_TO_PA(ecp->ec_tag); 3657 tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag); 3658 } else { 3659 tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag); 3660 tagvalid = !cpu_ectag_line_invalid(totalsize, 3661 ecp->ec_tag); 3662 } 3663 if (tagpa == addr && (!reqval || tagvalid)) { 3664 match = i + 1; 3665 *level = 2; 3666 break; 3667 } 3668 } 3669 3670 if (match || !ispanther) 3671 return (match); 3672 3673 /* For Panther we also check the l3$ */ 3674 ecp = &cdp->chd_ec_data[0]; 3675 ec_set_size = PN_L3_SET_SIZE; 3676 ways = PN_L3_NWAYS; 3677 addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); 3678 3679 for (i = 0; i < ways; i++, ecp++) { 3680 if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval || 3681 !PN_L3_LINE_INVALID(ecp->ec_tag))) { 3682 match = i + 1; 3683 *level = 3; 3684 break; 3685 } 3686 } 3687 3688 return (match); 3689 } 3690 3691 #if defined(CPU_IMP_L1_CACHE_PARITY) 3692 /* 3693 * Record information related to the source of an Dcache Parity Error. 3694 */ 3695 static void 3696 cpu_dcache_parity_info(ch_async_flt_t *ch_flt) 3697 { 3698 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3699 int index; 3700 3701 /* 3702 * Since instruction decode cannot be done at high PIL 3703 * just examine the entire Dcache to locate the error. 3704 */ 3705 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3706 ch_flt->parity_data.dpe.cpl_way = -1; 3707 ch_flt->parity_data.dpe.cpl_off = -1; 3708 } 3709 for (index = 0; index < dc_set_size; index += dcache_linesize) 3710 cpu_dcache_parity_check(ch_flt, index); 3711 } 3712 3713 /* 3714 * Check all ways of the Dcache at a specified index for good parity. 3715 */ 3716 static void 3717 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index) 3718 { 3719 int dc_set_size = dcache_size / CH_DCACHE_NWAY; 3720 uint64_t parity_bits, pbits, data_word; 3721 static int parity_bits_popc[] = { 0, 1, 1, 0 }; 3722 int way, word, data_byte; 3723 ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0]; 3724 ch_dc_data_t tmp_dcp; 3725 3726 for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) { 3727 /* 3728 * Perform diagnostic read. 3729 */ 3730 get_dcache_dtag(index + way * dc_set_size, 3731 (uint64_t *)&tmp_dcp); 3732 3733 /* 3734 * Check tag for even parity. 3735 * Sum of 1 bits (including parity bit) should be even. 3736 */ 3737 if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) { 3738 /* 3739 * If this is the first error log detailed information 3740 * about it and check the snoop tag. Otherwise just 3741 * record the fact that we found another error. 3742 */ 3743 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3744 ch_flt->parity_data.dpe.cpl_way = way; 3745 ch_flt->parity_data.dpe.cpl_cache = 3746 CPU_DC_PARITY; 3747 ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG; 3748 3749 if (popc64(tmp_dcp.dc_sntag & 3750 CHP_DCSNTAG_PARMASK) & 1) { 3751 ch_flt->parity_data.dpe.cpl_tag |= 3752 CHP_DC_SNTAG; 3753 ch_flt->parity_data.dpe.cpl_lcnt++; 3754 } 3755 3756 bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t)); 3757 } 3758 3759 ch_flt->parity_data.dpe.cpl_lcnt++; 3760 } 3761 3762 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3763 /* 3764 * Panther has more parity bits than the other 3765 * processors for covering dcache data and so each 3766 * byte of data in each word has its own parity bit. 3767 */ 3768 parity_bits = tmp_dcp.dc_pn_data_parity; 3769 for (word = 0; word < 4; word++) { 3770 data_word = tmp_dcp.dc_data[word]; 3771 pbits = parity_bits & PN_DC_DATA_PARITY_MASK; 3772 for (data_byte = 0; data_byte < 8; 3773 data_byte++) { 3774 if (((popc64(data_word & 3775 PN_DC_DATA_PARITY_MASK)) & 1) ^ 3776 (pbits & 1)) { 3777 cpu_record_dc_data_parity( 3778 ch_flt, dcp, &tmp_dcp, way, 3779 word); 3780 } 3781 pbits >>= 1; 3782 data_word >>= 8; 3783 } 3784 parity_bits >>= 8; 3785 } 3786 } else { 3787 /* 3788 * Check data array for even parity. 3789 * The 8 parity bits are grouped into 4 pairs each 3790 * of which covers a 64-bit word. The endianness is 3791 * reversed -- the low-order parity bits cover the 3792 * high-order data words. 3793 */ 3794 parity_bits = tmp_dcp.dc_utag >> 8; 3795 for (word = 0; word < 4; word++) { 3796 pbits = (parity_bits >> (6 - word * 2)) & 3; 3797 if ((popc64(tmp_dcp.dc_data[word]) + 3798 parity_bits_popc[pbits]) & 1) { 3799 cpu_record_dc_data_parity(ch_flt, dcp, 3800 &tmp_dcp, way, word); 3801 } 3802 } 3803 } 3804 } 3805 } 3806 3807 static void 3808 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, 3809 ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word) 3810 { 3811 /* 3812 * If this is the first error log detailed information about it. 3813 * Otherwise just record the fact that we found another error. 3814 */ 3815 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3816 ch_flt->parity_data.dpe.cpl_way = way; 3817 ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY; 3818 ch_flt->parity_data.dpe.cpl_off = word * 8; 3819 bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t)); 3820 } 3821 ch_flt->parity_data.dpe.cpl_lcnt++; 3822 } 3823 3824 /* 3825 * Record information related to the source of an Icache Parity Error. 3826 * 3827 * Called with the Icache disabled so any diagnostic accesses are safe. 3828 */ 3829 static void 3830 cpu_icache_parity_info(ch_async_flt_t *ch_flt) 3831 { 3832 int ic_set_size; 3833 int ic_linesize; 3834 int index; 3835 3836 if (CPU_PRIVATE(CPU)) { 3837 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3838 CH_ICACHE_NWAY; 3839 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3840 } else { 3841 ic_set_size = icache_size / CH_ICACHE_NWAY; 3842 ic_linesize = icache_linesize; 3843 } 3844 3845 ch_flt->parity_data.ipe.cpl_way = -1; 3846 ch_flt->parity_data.ipe.cpl_off = -1; 3847 3848 for (index = 0; index < ic_set_size; index += ic_linesize) 3849 cpu_icache_parity_check(ch_flt, index); 3850 } 3851 3852 /* 3853 * Check all ways of the Icache at a specified index for good parity. 3854 */ 3855 static void 3856 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index) 3857 { 3858 uint64_t parmask, pn_inst_parity; 3859 int ic_set_size; 3860 int ic_linesize; 3861 int flt_index, way, instr, num_instr; 3862 struct async_flt *aflt = (struct async_flt *)ch_flt; 3863 ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0]; 3864 ch_ic_data_t tmp_icp; 3865 3866 if (CPU_PRIVATE(CPU)) { 3867 ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / 3868 CH_ICACHE_NWAY; 3869 ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); 3870 } else { 3871 ic_set_size = icache_size / CH_ICACHE_NWAY; 3872 ic_linesize = icache_linesize; 3873 } 3874 3875 /* 3876 * Panther has twice as many instructions per icache line and the 3877 * instruction parity bit is in a different location. 3878 */ 3879 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 3880 num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t); 3881 pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK; 3882 } else { 3883 num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t); 3884 pn_inst_parity = 0; 3885 } 3886 3887 /* 3888 * Index at which we expect to find the parity error. 3889 */ 3890 flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize); 3891 3892 for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) { 3893 /* 3894 * Diagnostic reads expect address argument in ASI format. 3895 */ 3896 get_icache_dtag(2 * (index + way * ic_set_size), 3897 (uint64_t *)&tmp_icp); 3898 3899 /* 3900 * If this is the index in which we expect to find the 3901 * error log detailed information about each of the ways. 3902 * This information will be displayed later if we can't 3903 * determine the exact way in which the error is located. 3904 */ 3905 if (flt_index == index) 3906 bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t)); 3907 3908 /* 3909 * Check tag for even parity. 3910 * Sum of 1 bits (including parity bit) should be even. 3911 */ 3912 if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) { 3913 /* 3914 * If this way is the one in which we expected 3915 * to find the error record the way and check the 3916 * snoop tag. Otherwise just record the fact we 3917 * found another error. 3918 */ 3919 if (flt_index == index) { 3920 ch_flt->parity_data.ipe.cpl_way = way; 3921 ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG; 3922 3923 if (popc64(tmp_icp.ic_sntag & 3924 CHP_ICSNTAG_PARMASK) & 1) { 3925 ch_flt->parity_data.ipe.cpl_tag |= 3926 CHP_IC_SNTAG; 3927 ch_flt->parity_data.ipe.cpl_lcnt++; 3928 } 3929 3930 } 3931 ch_flt->parity_data.ipe.cpl_lcnt++; 3932 continue; 3933 } 3934 3935 /* 3936 * Check instruction data for even parity. 3937 * Bits participating in parity differ for PC-relative 3938 * versus non-PC-relative instructions. 3939 */ 3940 for (instr = 0; instr < num_instr; instr++) { 3941 parmask = (tmp_icp.ic_data[instr] & 3942 CH_ICDATA_PRED_ISPCREL) ? 3943 (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) : 3944 (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity); 3945 if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) { 3946 /* 3947 * If this way is the one in which we expected 3948 * to find the error record the way and offset. 3949 * Otherwise just log the fact we found another 3950 * error. 3951 */ 3952 if (flt_index == index) { 3953 ch_flt->parity_data.ipe.cpl_way = way; 3954 ch_flt->parity_data.ipe.cpl_off = 3955 instr * 4; 3956 } 3957 ch_flt->parity_data.ipe.cpl_lcnt++; 3958 continue; 3959 } 3960 } 3961 } 3962 } 3963 3964 /* 3965 * Record information related to the source of an Pcache Parity Error. 3966 */ 3967 static void 3968 cpu_pcache_parity_info(ch_async_flt_t *ch_flt) 3969 { 3970 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 3971 int index; 3972 3973 /* 3974 * Since instruction decode cannot be done at high PIL just 3975 * examine the entire Pcache to check for any parity errors. 3976 */ 3977 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 3978 ch_flt->parity_data.dpe.cpl_way = -1; 3979 ch_flt->parity_data.dpe.cpl_off = -1; 3980 } 3981 for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE) 3982 cpu_pcache_parity_check(ch_flt, index); 3983 } 3984 3985 /* 3986 * Check all ways of the Pcache at a specified index for good parity. 3987 */ 3988 static void 3989 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index) 3990 { 3991 int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; 3992 int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t); 3993 int way, word, pbit, parity_bits; 3994 ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0]; 3995 ch_pc_data_t tmp_pcp; 3996 3997 for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) { 3998 /* 3999 * Perform diagnostic read. 4000 */ 4001 get_pcache_dtag(index + way * pc_set_size, 4002 (uint64_t *)&tmp_pcp); 4003 /* 4004 * Check data array for odd parity. There are 8 parity 4005 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each 4006 * of those bits covers exactly 8 bytes of the data 4007 * array: 4008 * 4009 * parity bit P$ data bytes covered 4010 * ---------- --------------------- 4011 * 50 63:56 4012 * 51 55:48 4013 * 52 47:40 4014 * 53 39:32 4015 * 54 31:24 4016 * 55 23:16 4017 * 56 15:8 4018 * 57 7:0 4019 */ 4020 parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status); 4021 for (word = 0; word < pc_data_words; word++) { 4022 pbit = (parity_bits >> (pc_data_words - word - 1)) & 1; 4023 if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) { 4024 /* 4025 * If this is the first error log detailed 4026 * information about it. Otherwise just record 4027 * the fact that we found another error. 4028 */ 4029 if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { 4030 ch_flt->parity_data.dpe.cpl_way = way; 4031 ch_flt->parity_data.dpe.cpl_cache = 4032 CPU_PC_PARITY; 4033 ch_flt->parity_data.dpe.cpl_off = 4034 word * sizeof (uint64_t); 4035 bcopy(&tmp_pcp, pcp, 4036 sizeof (ch_pc_data_t)); 4037 } 4038 ch_flt->parity_data.dpe.cpl_lcnt++; 4039 } 4040 } 4041 } 4042 } 4043 4044 4045 /* 4046 * Add L1 Data cache data to the ereport payload. 4047 */ 4048 static void 4049 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl) 4050 { 4051 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4052 ch_dc_data_t *dcp; 4053 ch_dc_data_t dcdata[CH_DCACHE_NWAY]; 4054 uint_t nelem; 4055 int i, ways_to_check, ways_logged = 0; 4056 4057 /* 4058 * If this is an D$ fault then there may be multiple 4059 * ways captured in the ch_parity_log_t structure. 4060 * Otherwise, there will be at most one way captured 4061 * in the ch_diag_data_t struct. 4062 * Check each way to see if it should be encoded. 4063 */ 4064 if (ch_flt->flt_type == CPU_DC_PARITY) 4065 ways_to_check = CH_DCACHE_NWAY; 4066 else 4067 ways_to_check = 1; 4068 for (i = 0; i < ways_to_check; i++) { 4069 if (ch_flt->flt_type == CPU_DC_PARITY) 4070 dcp = &ch_flt->parity_data.dpe.cpl_dc[i]; 4071 else 4072 dcp = &ch_flt->flt_diag_data.chd_dc_data; 4073 if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) { 4074 bcopy(dcp, &dcdata[ways_logged], 4075 sizeof (ch_dc_data_t)); 4076 ways_logged++; 4077 } 4078 } 4079 4080 /* 4081 * Add the dcache data to the payload. 4082 */ 4083 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS, 4084 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4085 if (ways_logged != 0) { 4086 nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged; 4087 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA, 4088 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL); 4089 } 4090 } 4091 4092 /* 4093 * Add L1 Instruction cache data to the ereport payload. 4094 */ 4095 static void 4096 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl) 4097 { 4098 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4099 ch_ic_data_t *icp; 4100 ch_ic_data_t icdata[CH_ICACHE_NWAY]; 4101 uint_t nelem; 4102 int i, ways_to_check, ways_logged = 0; 4103 4104 /* 4105 * If this is an I$ fault then there may be multiple 4106 * ways captured in the ch_parity_log_t structure. 4107 * Otherwise, there will be at most one way captured 4108 * in the ch_diag_data_t struct. 4109 * Check each way to see if it should be encoded. 4110 */ 4111 if (ch_flt->flt_type == CPU_IC_PARITY) 4112 ways_to_check = CH_ICACHE_NWAY; 4113 else 4114 ways_to_check = 1; 4115 for (i = 0; i < ways_to_check; i++) { 4116 if (ch_flt->flt_type == CPU_IC_PARITY) 4117 icp = &ch_flt->parity_data.ipe.cpl_ic[i]; 4118 else 4119 icp = &ch_flt->flt_diag_data.chd_ic_data; 4120 if (icp->ic_logflag == IC_LOGFLAG_MAGIC) { 4121 bcopy(icp, &icdata[ways_logged], 4122 sizeof (ch_ic_data_t)); 4123 ways_logged++; 4124 } 4125 } 4126 4127 /* 4128 * Add the icache data to the payload. 4129 */ 4130 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS, 4131 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4132 if (ways_logged != 0) { 4133 nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged; 4134 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA, 4135 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL); 4136 } 4137 } 4138 4139 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4140 4141 /* 4142 * Add ecache data to payload. 4143 */ 4144 static void 4145 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl) 4146 { 4147 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4148 ch_ec_data_t *ecp; 4149 ch_ec_data_t ecdata[CHD_EC_DATA_SETS]; 4150 uint_t nelem; 4151 int i, ways_logged = 0; 4152 4153 /* 4154 * Check each way to see if it should be encoded 4155 * and concatinate it into a temporary buffer. 4156 */ 4157 for (i = 0; i < CHD_EC_DATA_SETS; i++) { 4158 ecp = &ch_flt->flt_diag_data.chd_ec_data[i]; 4159 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4160 bcopy(ecp, &ecdata[ways_logged], 4161 sizeof (ch_ec_data_t)); 4162 ways_logged++; 4163 } 4164 } 4165 4166 /* 4167 * Panther CPUs have an additional level of cache and so 4168 * what we just collected was the L3 (ecache) and not the 4169 * L2 cache. 4170 */ 4171 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4172 /* 4173 * Add the L3 (ecache) data to the payload. 4174 */ 4175 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS, 4176 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4177 if (ways_logged != 0) { 4178 nelem = sizeof (ch_ec_data_t) / 4179 sizeof (uint64_t) * ways_logged; 4180 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA, 4181 DATA_TYPE_UINT64_ARRAY, nelem, 4182 (uint64_t *)ecdata, NULL); 4183 } 4184 4185 /* 4186 * Now collect the L2 cache. 4187 */ 4188 ways_logged = 0; 4189 for (i = 0; i < PN_L2_NWAYS; i++) { 4190 ecp = &ch_flt->flt_diag_data.chd_l2_data[i]; 4191 if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { 4192 bcopy(ecp, &ecdata[ways_logged], 4193 sizeof (ch_ec_data_t)); 4194 ways_logged++; 4195 } 4196 } 4197 } 4198 4199 /* 4200 * Add the L2 cache data to the payload. 4201 */ 4202 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS, 4203 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 4204 if (ways_logged != 0) { 4205 nelem = sizeof (ch_ec_data_t) / 4206 sizeof (uint64_t) * ways_logged; 4207 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA, 4208 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL); 4209 } 4210 } 4211 4212 /* 4213 * Encode the data saved in the ch_async_flt_t struct into 4214 * the FM ereport payload. 4215 */ 4216 static void 4217 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 4218 nvlist_t *resource, int *afar_status, int *synd_status) 4219 { 4220 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4221 *synd_status = AFLT_STAT_INVALID; 4222 *afar_status = AFLT_STAT_INVALID; 4223 4224 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) { 4225 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR, 4226 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 4227 } 4228 4229 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) && 4230 IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { 4231 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT, 4232 DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL); 4233 } 4234 4235 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) { 4236 *afar_status = afsr_to_afar_status(ch_flt->afsr_errs, 4237 ch_flt->flt_bit); 4238 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, 4239 DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL); 4240 } 4241 4242 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) { 4243 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR, 4244 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 4245 } 4246 4247 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 4248 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 4249 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 4250 } 4251 4252 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 4253 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 4254 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 4255 } 4256 4257 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 4258 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 4259 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 4260 } 4261 4262 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 4263 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 4264 DATA_TYPE_BOOLEAN_VALUE, 4265 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 4266 } 4267 4268 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) { 4269 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME, 4270 DATA_TYPE_BOOLEAN_VALUE, 4271 (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL); 4272 } 4273 4274 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) { 4275 *synd_status = afsr_to_synd_status(aflt->flt_inst, 4276 ch_flt->afsr_errs, ch_flt->flt_bit); 4277 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, 4278 DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL); 4279 } 4280 4281 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) { 4282 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND, 4283 DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL); 4284 } 4285 4286 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) { 4287 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, 4288 DATA_TYPE_STRING, flt_to_error_type(aflt), NULL); 4289 } 4290 4291 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) { 4292 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, 4293 DATA_TYPE_UINT64, aflt->flt_disp, NULL); 4294 } 4295 4296 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2) 4297 cpu_payload_add_ecache(aflt, payload); 4298 4299 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) { 4300 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION, 4301 DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL); 4302 } 4303 4304 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) { 4305 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED, 4306 DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL); 4307 } 4308 4309 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) { 4310 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK, 4311 DATA_TYPE_UINT32_ARRAY, 16, 4312 (uint32_t *)&ch_flt->flt_fpdata, NULL); 4313 } 4314 4315 #if defined(CPU_IMP_L1_CACHE_PARITY) 4316 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D) 4317 cpu_payload_add_dcache(aflt, payload); 4318 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I) 4319 cpu_payload_add_icache(aflt, payload); 4320 #endif /* CPU_IMP_L1_CACHE_PARITY */ 4321 4322 #if defined(CHEETAH_PLUS) 4323 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P) 4324 cpu_payload_add_pcache(aflt, payload); 4325 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB) 4326 cpu_payload_add_tlb(aflt, payload); 4327 #endif /* CHEETAH_PLUS */ 4328 /* 4329 * Create the FMRI that goes into the payload 4330 * and contains the unum info if necessary. 4331 */ 4332 if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) && 4333 (*afar_status == AFLT_STAT_VALID)) { 4334 char unum[UNUM_NAMLEN]; 4335 int len; 4336 4337 if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum, 4338 UNUM_NAMLEN, &len) == 0) { 4339 fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 4340 NULL, unum, NULL); 4341 fm_payload_set(payload, 4342 FM_EREPORT_PAYLOAD_NAME_RESOURCE, 4343 DATA_TYPE_NVLIST, resource, NULL); 4344 } 4345 } 4346 } 4347 4348 /* 4349 * Initialize the way info if necessary. 4350 */ 4351 void 4352 cpu_ereport_init(struct async_flt *aflt) 4353 { 4354 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 4355 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4356 ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0]; 4357 int i; 4358 4359 /* 4360 * Initialize the info in the CPU logout structure. 4361 * The I$/D$ way information is not initialized here 4362 * since it is captured in the logout assembly code. 4363 */ 4364 for (i = 0; i < CHD_EC_DATA_SETS; i++) 4365 (ecp + i)->ec_way = i; 4366 4367 for (i = 0; i < PN_L2_NWAYS; i++) 4368 (l2p + i)->ec_way = i; 4369 } 4370 4371 /* 4372 * Returns whether fault address is valid for this error bit and 4373 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 4374 */ 4375 int 4376 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) 4377 { 4378 struct async_flt *aflt = (struct async_flt *)ch_flt; 4379 4380 return ((aflt->flt_stat & C_AFSR_MEMORY) && 4381 afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) == 4382 AFLT_STAT_VALID && 4383 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 4384 } 4385 4386 static void 4387 cpu_log_diag_info(ch_async_flt_t *ch_flt) 4388 { 4389 struct async_flt *aflt = (struct async_flt *)ch_flt; 4390 ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data; 4391 ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data; 4392 ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; 4393 #if defined(CPU_IMP_ECACHE_ASSOC) 4394 int i, nway; 4395 #endif /* CPU_IMP_ECACHE_ASSOC */ 4396 4397 /* 4398 * Check if the CPU log out captured was valid. 4399 */ 4400 if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID || 4401 ch_flt->flt_data_incomplete) 4402 return; 4403 4404 #if defined(CPU_IMP_ECACHE_ASSOC) 4405 nway = cpu_ecache_nway(); 4406 i = cpu_ecache_line_valid(ch_flt); 4407 if (i == 0 || i > nway) { 4408 for (i = 0; i < nway; i++) 4409 ecp[i].ec_logflag = EC_LOGFLAG_MAGIC; 4410 } else 4411 ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC; 4412 #else /* CPU_IMP_ECACHE_ASSOC */ 4413 ecp->ec_logflag = EC_LOGFLAG_MAGIC; 4414 #endif /* CPU_IMP_ECACHE_ASSOC */ 4415 4416 #if defined(CHEETAH_PLUS) 4417 pn_cpu_log_diag_l2_info(ch_flt); 4418 #endif /* CHEETAH_PLUS */ 4419 4420 if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) { 4421 dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx); 4422 dcp->dc_logflag = DC_LOGFLAG_MAGIC; 4423 } 4424 4425 if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) { 4426 if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 4427 icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx); 4428 else 4429 icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx); 4430 icp->ic_logflag = IC_LOGFLAG_MAGIC; 4431 } 4432 } 4433 4434 /* 4435 * Cheetah ECC calculation. 4436 * 4437 * We only need to do the calculation on the data bits and can ignore check 4438 * bit and Mtag bit terms in the calculation. 4439 */ 4440 static uint64_t ch_ecc_table[9][2] = { 4441 /* 4442 * low order 64-bits high-order 64-bits 4443 */ 4444 { 0x46bffffeccd1177f, 0x488800022100014c }, 4445 { 0x42fccc81331ff77f, 0x14424f1010249184 }, 4446 { 0x8898827c222f1ffe, 0x22c1222808184aaf }, 4447 { 0xf7632203e131ccf1, 0xe1241121848292b8 }, 4448 { 0x7f5511421b113809, 0x901c88d84288aafe }, 4449 { 0x1d49412184882487, 0x8f338c87c044c6ef }, 4450 { 0xf552181014448344, 0x7ff8f4443e411911 }, 4451 { 0x2189240808f24228, 0xfeeff8cc81333f42 }, 4452 { 0x3280008440001112, 0xfee88b337ffffd62 }, 4453 }; 4454 4455 /* 4456 * 64-bit population count, use well-known popcnt trick. 4457 * We could use the UltraSPARC V9 POPC instruction, but some 4458 * CPUs including Cheetahplus and Jaguar do not support that 4459 * instruction. 4460 */ 4461 int 4462 popc64(uint64_t val) 4463 { 4464 int cnt; 4465 4466 for (cnt = 0; val != 0; val &= val - 1) 4467 cnt++; 4468 return (cnt); 4469 } 4470 4471 /* 4472 * Generate the 9 ECC bits for the 128-bit chunk based on the table above. 4473 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number 4474 * of 1 bits == 0, so we can just use the least significant bit of the popcnt 4475 * instead of doing all the xor's. 4476 */ 4477 uint32_t 4478 us3_gen_ecc(uint64_t data_low, uint64_t data_high) 4479 { 4480 int bitno, s; 4481 int synd = 0; 4482 4483 for (bitno = 0; bitno < 9; bitno++) { 4484 s = (popc64(data_low & ch_ecc_table[bitno][0]) + 4485 popc64(data_high & ch_ecc_table[bitno][1])) & 1; 4486 synd |= (s << bitno); 4487 } 4488 return (synd); 4489 4490 } 4491 4492 /* 4493 * Queue one event based on ecc_type_to_info entry. If the event has an AFT1 4494 * tag associated with it or is a fatal event (aflt_panic set), it is sent to 4495 * the UE event queue. Otherwise it is dispatched to the CE event queue. 4496 */ 4497 static void 4498 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, 4499 ecc_type_to_info_t *eccp, ch_diag_data_t *cdp) 4500 { 4501 struct async_flt *aflt = (struct async_flt *)ch_flt; 4502 4503 if (reason && 4504 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 4505 (void) strcat(reason, eccp->ec_reason); 4506 } 4507 4508 ch_flt->flt_bit = eccp->ec_afsr_bit; 4509 ch_flt->flt_type = eccp->ec_flt_type; 4510 if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID) 4511 ch_flt->flt_diag_data = *cdp; 4512 else 4513 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 4514 aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit); 4515 4516 if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS) 4517 aflt->flt_synd = GET_M_SYND(aflt->flt_stat); 4518 else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) 4519 aflt->flt_synd = GET_E_SYND(aflt->flt_stat); 4520 else 4521 aflt->flt_synd = 0; 4522 4523 aflt->flt_payload = eccp->ec_err_payload; 4524 4525 if (aflt->flt_panic || (eccp->ec_afsr_bit & 4526 (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1))) 4527 cpu_errorq_dispatch(eccp->ec_err_class, 4528 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 4529 aflt->flt_panic); 4530 else 4531 cpu_errorq_dispatch(eccp->ec_err_class, 4532 (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue, 4533 aflt->flt_panic); 4534 } 4535 4536 /* 4537 * Queue events on async event queue one event per error bit. First we 4538 * queue the events that we "expect" for the given trap, then we queue events 4539 * that we may not expect. Return number of events queued. 4540 */ 4541 int 4542 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs, 4543 ch_cpu_logout_t *clop) 4544 { 4545 struct async_flt *aflt = (struct async_flt *)ch_flt; 4546 ecc_type_to_info_t *eccp; 4547 int nevents = 0; 4548 uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat; 4549 #if defined(CHEETAH_PLUS) 4550 uint64_t orig_t_afsr_errs; 4551 #endif 4552 uint64_t primary_afsr_ext = ch_flt->afsr_ext; 4553 uint64_t primary_afsr_errs = ch_flt->afsr_errs; 4554 ch_diag_data_t *cdp = NULL; 4555 4556 t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS); 4557 4558 #if defined(CHEETAH_PLUS) 4559 orig_t_afsr_errs = t_afsr_errs; 4560 4561 /* 4562 * For Cheetah+, log the shadow AFSR/AFAR bits first. 4563 */ 4564 if (clop != NULL) { 4565 /* 4566 * Set the AFSR and AFAR fields to the shadow registers. The 4567 * flt_addr and flt_stat fields will be reset to the primaries 4568 * below, but the sdw_addr and sdw_stat will stay as the 4569 * secondaries. 4570 */ 4571 cdp = &clop->clo_sdw_data; 4572 aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar; 4573 aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr; 4574 ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext; 4575 ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) | 4576 (cdp->chd_afsr & C_AFSR_ALL_ERRS); 4577 4578 /* 4579 * If the primary and shadow AFSR differ, tag the shadow as 4580 * the first fault. 4581 */ 4582 if ((primary_afar != cdp->chd_afar) || 4583 (primary_afsr_errs != ch_flt->afsr_errs)) { 4584 aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT); 4585 } 4586 4587 /* 4588 * Check AFSR bits as well as AFSR_EXT bits in order of 4589 * the AFAR overwrite priority. Our stored AFSR_EXT value 4590 * is expected to be zero for those CPUs which do not have 4591 * an AFSR_EXT register. 4592 */ 4593 for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) { 4594 if ((eccp->ec_afsr_bit & 4595 (ch_flt->afsr_errs & t_afsr_errs)) && 4596 ((eccp->ec_flags & aflt->flt_status) != 0)) { 4597 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4598 cdp = NULL; 4599 t_afsr_errs &= ~eccp->ec_afsr_bit; 4600 nevents++; 4601 } 4602 } 4603 4604 /* 4605 * If the ME bit is on in the primary AFSR turn all the 4606 * error bits on again that may set the ME bit to make 4607 * sure we see the ME AFSR error logs. 4608 */ 4609 if ((primary_afsr & C_AFSR_ME) != 0) 4610 t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS); 4611 } 4612 #endif /* CHEETAH_PLUS */ 4613 4614 if (clop != NULL) 4615 cdp = &clop->clo_data; 4616 4617 /* 4618 * Queue expected errors, error bit and fault type must match 4619 * in the ecc_type_to_info table. 4620 */ 4621 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4622 eccp++) { 4623 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 4624 (eccp->ec_flags & aflt->flt_status) != 0) { 4625 #if defined(SERRANO) 4626 /* 4627 * For FRC/FRU errors on Serrano the afar2 captures 4628 * the address and the associated data is 4629 * in the shadow logout area. 4630 */ 4631 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4632 if (clop != NULL) 4633 cdp = &clop->clo_sdw_data; 4634 aflt->flt_addr = ch_flt->afar2; 4635 } else { 4636 if (clop != NULL) 4637 cdp = &clop->clo_data; 4638 aflt->flt_addr = primary_afar; 4639 } 4640 #else /* SERRANO */ 4641 aflt->flt_addr = primary_afar; 4642 #endif /* SERRANO */ 4643 aflt->flt_stat = primary_afsr; 4644 ch_flt->afsr_ext = primary_afsr_ext; 4645 ch_flt->afsr_errs = primary_afsr_errs; 4646 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4647 cdp = NULL; 4648 t_afsr_errs &= ~eccp->ec_afsr_bit; 4649 nevents++; 4650 } 4651 } 4652 4653 /* 4654 * Queue unexpected errors, error bit only match. 4655 */ 4656 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 4657 eccp++) { 4658 if (eccp->ec_afsr_bit & t_afsr_errs) { 4659 #if defined(SERRANO) 4660 /* 4661 * For FRC/FRU errors on Serrano the afar2 captures 4662 * the address and the associated data is 4663 * in the shadow logout area. 4664 */ 4665 if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { 4666 if (clop != NULL) 4667 cdp = &clop->clo_sdw_data; 4668 aflt->flt_addr = ch_flt->afar2; 4669 } else { 4670 if (clop != NULL) 4671 cdp = &clop->clo_data; 4672 aflt->flt_addr = primary_afar; 4673 } 4674 #else /* SERRANO */ 4675 aflt->flt_addr = primary_afar; 4676 #endif /* SERRANO */ 4677 aflt->flt_stat = primary_afsr; 4678 ch_flt->afsr_ext = primary_afsr_ext; 4679 ch_flt->afsr_errs = primary_afsr_errs; 4680 cpu_queue_one_event(ch_flt, reason, eccp, cdp); 4681 cdp = NULL; 4682 t_afsr_errs &= ~eccp->ec_afsr_bit; 4683 nevents++; 4684 } 4685 } 4686 return (nevents); 4687 } 4688 4689 /* 4690 * Return trap type number. 4691 */ 4692 uint8_t 4693 flt_to_trap_type(struct async_flt *aflt) 4694 { 4695 if (aflt->flt_status & ECC_I_TRAP) 4696 return (TRAP_TYPE_ECC_I); 4697 if (aflt->flt_status & ECC_D_TRAP) 4698 return (TRAP_TYPE_ECC_D); 4699 if (aflt->flt_status & ECC_F_TRAP) 4700 return (TRAP_TYPE_ECC_F); 4701 if (aflt->flt_status & ECC_C_TRAP) 4702 return (TRAP_TYPE_ECC_C); 4703 if (aflt->flt_status & ECC_DP_TRAP) 4704 return (TRAP_TYPE_ECC_DP); 4705 if (aflt->flt_status & ECC_IP_TRAP) 4706 return (TRAP_TYPE_ECC_IP); 4707 if (aflt->flt_status & ECC_ITLB_TRAP) 4708 return (TRAP_TYPE_ECC_ITLB); 4709 if (aflt->flt_status & ECC_DTLB_TRAP) 4710 return (TRAP_TYPE_ECC_DTLB); 4711 return (TRAP_TYPE_UNKNOWN); 4712 } 4713 4714 /* 4715 * Decide an error type based on detector and leaky/partner tests. 4716 * The following array is used for quick translation - it must 4717 * stay in sync with ce_dispact_t. 4718 */ 4719 4720 static char *cetypes[] = { 4721 CE_DISP_DESC_U, 4722 CE_DISP_DESC_I, 4723 CE_DISP_DESC_PP, 4724 CE_DISP_DESC_P, 4725 CE_DISP_DESC_L, 4726 CE_DISP_DESC_PS, 4727 CE_DISP_DESC_S 4728 }; 4729 4730 char * 4731 flt_to_error_type(struct async_flt *aflt) 4732 { 4733 ce_dispact_t dispact, disp; 4734 uchar_t dtcrinfo, ptnrinfo, lkyinfo; 4735 4736 /* 4737 * The memory payload bundle is shared by some events that do 4738 * not perform any classification. For those flt_disp will be 4739 * 0 and we will return "unknown". 4740 */ 4741 if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0) 4742 return (cetypes[CE_DISP_UNKNOWN]); 4743 4744 dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); 4745 4746 /* 4747 * It is also possible that no scrub/classification was performed 4748 * by the detector, for instance where a disrupting error logged 4749 * in the AFSR while CEEN was off in cpu_deferred_error. 4750 */ 4751 if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) 4752 return (cetypes[CE_DISP_UNKNOWN]); 4753 4754 /* 4755 * Lookup type in initial classification/action table 4756 */ 4757 dispact = CE_DISPACT(ce_disp_table, 4758 CE_XDIAG_AFARMATCHED(dtcrinfo), 4759 CE_XDIAG_STATE(dtcrinfo), 4760 CE_XDIAG_CE1SEEN(dtcrinfo), 4761 CE_XDIAG_CE2SEEN(dtcrinfo)); 4762 4763 /* 4764 * A bad lookup is not something to panic production systems for. 4765 */ 4766 ASSERT(dispact != CE_DISP_BAD); 4767 if (dispact == CE_DISP_BAD) 4768 return (cetypes[CE_DISP_UNKNOWN]); 4769 4770 disp = CE_DISP(dispact); 4771 4772 switch (disp) { 4773 case CE_DISP_UNKNOWN: 4774 case CE_DISP_INTERMITTENT: 4775 break; 4776 4777 case CE_DISP_POSS_PERS: 4778 /* 4779 * "Possible persistent" errors to which we have applied a valid 4780 * leaky test can be separated into "persistent" or "leaky". 4781 */ 4782 lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp); 4783 if (CE_XDIAG_TESTVALID(lkyinfo)) { 4784 if (CE_XDIAG_CE1SEEN(lkyinfo) || 4785 CE_XDIAG_CE2SEEN(lkyinfo)) 4786 disp = CE_DISP_LEAKY; 4787 else 4788 disp = CE_DISP_PERS; 4789 } 4790 break; 4791 4792 case CE_DISP_POSS_STICKY: 4793 /* 4794 * Promote "possible sticky" results that have been 4795 * confirmed by a partner test to "sticky". Unconfirmed 4796 * "possible sticky" events are left at that status - we do not 4797 * guess at any bad reader/writer etc status here. 4798 */ 4799 ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp); 4800 if (CE_XDIAG_TESTVALID(ptnrinfo) && 4801 CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo)) 4802 disp = CE_DISP_STICKY; 4803 4804 /* 4805 * Promote "possible sticky" results on a uniprocessor 4806 * to "sticky" 4807 */ 4808 if (disp == CE_DISP_POSS_STICKY && 4809 CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC) 4810 disp = CE_DISP_STICKY; 4811 break; 4812 4813 default: 4814 disp = CE_DISP_UNKNOWN; 4815 break; 4816 } 4817 4818 return (cetypes[disp]); 4819 } 4820 4821 /* 4822 * Given the entire afsr, the specific bit to check and a prioritized list of 4823 * error bits, determine the validity of the various overwrite priority 4824 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have 4825 * different overwrite priorities. 4826 * 4827 * Given a specific afsr error bit and the entire afsr, there are three cases: 4828 * INVALID: The specified bit is lower overwrite priority than some other 4829 * error bit which is on in the afsr (or IVU/IVC). 4830 * VALID: The specified bit is higher priority than all other error bits 4831 * which are on in the afsr. 4832 * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified 4833 * bit is on in the afsr. 4834 */ 4835 int 4836 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits) 4837 { 4838 uint64_t afsr_ow; 4839 4840 while ((afsr_ow = *ow_bits++) != 0) { 4841 /* 4842 * If bit is in the priority class, check to see if another 4843 * bit in the same class is on => ambiguous. Otherwise, 4844 * the value is valid. If the bit is not on at this priority 4845 * class, but a higher priority bit is on, then the value is 4846 * invalid. 4847 */ 4848 if (afsr_ow & afsr_bit) { 4849 /* 4850 * If equal pri bit is on, ambiguous. 4851 */ 4852 if (afsr & (afsr_ow & ~afsr_bit)) 4853 return (AFLT_STAT_AMBIGUOUS); 4854 return (AFLT_STAT_VALID); 4855 } else if (afsr & afsr_ow) 4856 break; 4857 } 4858 4859 /* 4860 * We didn't find a match or a higher priority bit was on. Not 4861 * finding a match handles the case of invalid AFAR for IVC, IVU. 4862 */ 4863 return (AFLT_STAT_INVALID); 4864 } 4865 4866 static int 4867 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit) 4868 { 4869 #if defined(SERRANO) 4870 if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) 4871 return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite)); 4872 else 4873 #endif /* SERRANO */ 4874 return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite)); 4875 } 4876 4877 static int 4878 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit) 4879 { 4880 return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite)); 4881 } 4882 4883 static int 4884 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit) 4885 { 4886 return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite)); 4887 } 4888 4889 static int 4890 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit) 4891 { 4892 #ifdef lint 4893 cpuid = cpuid; 4894 #endif 4895 if (afsr_bit & C_AFSR_MSYND_ERRS) { 4896 return (afsr_to_msynd_status(afsr, afsr_bit)); 4897 } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { 4898 #if defined(CHEETAH_PLUS) 4899 /* 4900 * The E_SYND overwrite policy is slightly different 4901 * for Panther CPUs. 4902 */ 4903 if (IS_PANTHER(cpunodes[cpuid].implementation)) 4904 return (afsr_to_pn_esynd_status(afsr, afsr_bit)); 4905 else 4906 return (afsr_to_esynd_status(afsr, afsr_bit)); 4907 #else /* CHEETAH_PLUS */ 4908 return (afsr_to_esynd_status(afsr, afsr_bit)); 4909 #endif /* CHEETAH_PLUS */ 4910 } else { 4911 return (AFLT_STAT_INVALID); 4912 } 4913 } 4914 4915 /* 4916 * Slave CPU stick synchronization. 4917 */ 4918 void 4919 sticksync_slave(void) 4920 { 4921 int i; 4922 int tries = 0; 4923 int64_t tskew; 4924 int64_t av_tskew; 4925 4926 kpreempt_disable(); 4927 /* wait for the master side */ 4928 while (stick_sync_cmd != SLAVE_START) 4929 ; 4930 /* 4931 * Synchronization should only take a few tries at most. But in the 4932 * odd case where the cpu isn't cooperating we'll keep trying. A cpu 4933 * without it's stick synchronized wouldn't be a good citizen. 4934 */ 4935 while (slave_done == 0) { 4936 /* 4937 * Time skew calculation. 4938 */ 4939 av_tskew = tskew = 0; 4940 4941 for (i = 0; i < stick_iter; i++) { 4942 /* make location hot */ 4943 timestamp[EV_A_START] = 0; 4944 stick_timestamp(×tamp[EV_A_START]); 4945 4946 /* tell the master we're ready */ 4947 stick_sync_cmd = MASTER_START; 4948 4949 /* and wait */ 4950 while (stick_sync_cmd != SLAVE_CONT) 4951 ; 4952 /* Event B end */ 4953 stick_timestamp(×tamp[EV_B_END]); 4954 4955 /* calculate time skew */ 4956 tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START]) 4957 - (timestamp[EV_A_END] - 4958 timestamp[EV_A_START])) / 2; 4959 4960 /* keep running count */ 4961 av_tskew += tskew; 4962 } /* for */ 4963 4964 /* 4965 * Adjust stick for time skew if not within the max allowed; 4966 * otherwise we're all done. 4967 */ 4968 if (stick_iter != 0) 4969 av_tskew = av_tskew/stick_iter; 4970 if (ABS(av_tskew) > stick_tsk) { 4971 /* 4972 * If the skew is 1 (the slave's STICK register 4973 * is 1 STICK ahead of the master's), stick_adj 4974 * could fail to adjust the slave's STICK register 4975 * if the STICK read on the slave happens to 4976 * align with the increment of the STICK. 4977 * Therefore, we increment the skew to 2. 4978 */ 4979 if (av_tskew == 1) 4980 av_tskew++; 4981 stick_adj(-av_tskew); 4982 } else 4983 slave_done = 1; 4984 #ifdef DEBUG 4985 if (tries < DSYNC_ATTEMPTS) 4986 stick_sync_stats[CPU->cpu_id].skew_val[tries] = 4987 av_tskew; 4988 ++tries; 4989 #endif /* DEBUG */ 4990 #ifdef lint 4991 tries = tries; 4992 #endif 4993 4994 } /* while */ 4995 4996 /* allow the master to finish */ 4997 stick_sync_cmd = EVENT_NULL; 4998 kpreempt_enable(); 4999 } 5000 5001 /* 5002 * Master CPU side of stick synchronization. 5003 * - timestamp end of Event A 5004 * - timestamp beginning of Event B 5005 */ 5006 void 5007 sticksync_master(void) 5008 { 5009 int i; 5010 5011 kpreempt_disable(); 5012 /* tell the slave we've started */ 5013 slave_done = 0; 5014 stick_sync_cmd = SLAVE_START; 5015 5016 while (slave_done == 0) { 5017 for (i = 0; i < stick_iter; i++) { 5018 /* wait for the slave */ 5019 while (stick_sync_cmd != MASTER_START) 5020 ; 5021 /* Event A end */ 5022 stick_timestamp(×tamp[EV_A_END]); 5023 5024 /* make location hot */ 5025 timestamp[EV_B_START] = 0; 5026 stick_timestamp(×tamp[EV_B_START]); 5027 5028 /* tell the slave to continue */ 5029 stick_sync_cmd = SLAVE_CONT; 5030 } /* for */ 5031 5032 /* wait while slave calculates time skew */ 5033 while (stick_sync_cmd == SLAVE_CONT) 5034 ; 5035 } /* while */ 5036 kpreempt_enable(); 5037 } 5038 5039 /* 5040 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to 5041 * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also, 5042 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to 5043 * panic idle. 5044 */ 5045 /*ARGSUSED*/ 5046 void 5047 cpu_check_allcpus(struct async_flt *aflt) 5048 {} 5049 5050 struct kmem_cache *ch_private_cache; 5051 5052 /* 5053 * Cpu private unitialization. Uninitialize the Ecache scrubber and 5054 * deallocate the scrubber data structures and cpu_private data structure. 5055 */ 5056 void 5057 cpu_uninit_private(struct cpu *cp) 5058 { 5059 cheetah_private_t *chprp = CPU_PRIVATE(cp); 5060 5061 ASSERT(chprp); 5062 cpu_uninit_ecache_scrub_dr(cp); 5063 CPU_PRIVATE(cp) = NULL; 5064 ch_err_tl1_paddrs[cp->cpu_id] = NULL; 5065 kmem_cache_free(ch_private_cache, chprp); 5066 cmp_delete_cpu(cp->cpu_id); 5067 5068 } 5069 5070 /* 5071 * Cheetah Cache Scrubbing 5072 * 5073 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure 5074 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not 5075 * protected by either parity or ECC. 5076 * 5077 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the 5078 * cache per second). Due to the the specifics of how the I$ control 5079 * logic works with respect to the ASI used to scrub I$ lines, the entire 5080 * I$ is scanned at once. 5081 */ 5082 5083 /* 5084 * Tuneables to enable and disable the scrubbing of the caches, and to tune 5085 * scrubbing behavior. These may be changed via /etc/system or using mdb 5086 * on a running system. 5087 */ 5088 int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */ 5089 5090 /* 5091 * The following are the PIL levels that the softints/cross traps will fire at. 5092 */ 5093 uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */ 5094 uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */ 5095 uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */ 5096 5097 #if defined(JALAPENO) 5098 5099 /* 5100 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber 5101 * on Jalapeno. 5102 */ 5103 int ecache_scrub_enable = 0; 5104 5105 #else /* JALAPENO */ 5106 5107 /* 5108 * With all other cpu types, E$ scrubbing is on by default 5109 */ 5110 int ecache_scrub_enable = 1; 5111 5112 #endif /* JALAPENO */ 5113 5114 5115 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO) 5116 5117 /* 5118 * The I$ scrubber tends to cause latency problems for real-time SW, so it 5119 * is disabled by default on non-Cheetah systems 5120 */ 5121 int icache_scrub_enable = 0; 5122 5123 /* 5124 * Tuneables specifying the scrub calls per second and the scan rate 5125 * for each cache 5126 * 5127 * The cyclic times are set during boot based on the following values. 5128 * Changing these values in mdb after this time will have no effect. If 5129 * a different value is desired, it must be set in /etc/system before a 5130 * reboot. 5131 */ 5132 int ecache_calls_a_sec = 1; 5133 int dcache_calls_a_sec = 2; 5134 int icache_calls_a_sec = 2; 5135 5136 int ecache_scan_rate_idle = 1; 5137 int ecache_scan_rate_busy = 1; 5138 int dcache_scan_rate_idle = 1; 5139 int dcache_scan_rate_busy = 1; 5140 int icache_scan_rate_idle = 1; 5141 int icache_scan_rate_busy = 1; 5142 5143 #else /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5144 5145 int icache_scrub_enable = 1; /* I$ scrubbing is on by default */ 5146 5147 int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */ 5148 int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */ 5149 int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */ 5150 5151 int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */ 5152 int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */ 5153 int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */ 5154 int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */ 5155 int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */ 5156 int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */ 5157 5158 #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */ 5159 5160 /* 5161 * In order to scrub on offline cpus, a cross trap is sent. The handler will 5162 * increment the outstanding request counter and schedule a softint to run 5163 * the scrubber. 5164 */ 5165 extern xcfunc_t cache_scrubreq_tl1; 5166 5167 /* 5168 * These are the softint functions for each cache scrubber 5169 */ 5170 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2); 5171 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2); 5172 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2); 5173 5174 /* 5175 * The cache scrub info table contains cache specific information 5176 * and allows for some of the scrub code to be table driven, reducing 5177 * duplication of cache similar code. 5178 * 5179 * This table keeps a copy of the value in the calls per second variable 5180 * (?cache_calls_a_sec). This makes it much more difficult for someone 5181 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in 5182 * mdb in a misguided attempt to disable the scrubber). 5183 */ 5184 struct scrub_info { 5185 int *csi_enable; /* scrubber enable flag */ 5186 int csi_freq; /* scrubber calls per second */ 5187 int csi_index; /* index to chsm_outstanding[] */ 5188 uint_t csi_inum; /* scrubber interrupt number */ 5189 cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */ 5190 cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */ 5191 char csi_name[3]; /* cache name for this scrub entry */ 5192 } cache_scrub_info[] = { 5193 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"}, 5194 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"}, 5195 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"} 5196 }; 5197 5198 /* 5199 * If scrubbing is enabled, increment the outstanding request counter. If it 5200 * is 1 (meaning there were no previous requests outstanding), call 5201 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing 5202 * a self trap. 5203 */ 5204 static void 5205 do_scrub(struct scrub_info *csi) 5206 { 5207 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5208 int index = csi->csi_index; 5209 uint32_t *outstanding = &csmp->chsm_outstanding[index]; 5210 5211 if (*(csi->csi_enable) && (csmp->chsm_enable[index])) { 5212 if (atomic_add_32_nv(outstanding, 1) == 1) { 5213 xt_one_unchecked(CPU->cpu_id, setsoftint_tl1, 5214 csi->csi_inum, 0); 5215 } 5216 } 5217 } 5218 5219 /* 5220 * Omni cyclics don't fire on offline cpus, so we use another cyclic to 5221 * cross-trap the offline cpus. 5222 */ 5223 static void 5224 do_scrub_offline(struct scrub_info *csi) 5225 { 5226 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5227 5228 if (CPUSET_ISNULL(cpu_offline_set)) { 5229 /* 5230 * No offline cpus - nothing to do 5231 */ 5232 return; 5233 } 5234 5235 if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) { 5236 xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum, 5237 csi->csi_index); 5238 } 5239 } 5240 5241 /* 5242 * This is the initial setup for the scrubber cyclics - it sets the 5243 * interrupt level, frequency, and function to call. 5244 */ 5245 /*ARGSUSED*/ 5246 static void 5247 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, 5248 cyc_time_t *when) 5249 { 5250 struct scrub_info *csi = (struct scrub_info *)arg; 5251 5252 ASSERT(csi != NULL); 5253 hdlr->cyh_func = (cyc_func_t)do_scrub; 5254 hdlr->cyh_level = CY_LOW_LEVEL; 5255 hdlr->cyh_arg = arg; 5256 5257 when->cyt_when = 0; /* Start immediately */ 5258 when->cyt_interval = NANOSEC / csi->csi_freq; 5259 } 5260 5261 /* 5262 * Initialization for cache scrubbing. 5263 * This routine is called AFTER all cpus have had cpu_init_private called 5264 * to initialize their private data areas. 5265 */ 5266 void 5267 cpu_init_cache_scrub(void) 5268 { 5269 int i; 5270 struct scrub_info *csi; 5271 cyc_omni_handler_t omni_hdlr; 5272 cyc_handler_t offline_hdlr; 5273 cyc_time_t when; 5274 5275 /* 5276 * save away the maximum number of lines for the D$ 5277 */ 5278 dcache_nlines = dcache_size / dcache_linesize; 5279 5280 /* 5281 * register the softints for the cache scrubbing 5282 */ 5283 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum = 5284 add_softintr(ecache_scrub_pil, scrub_ecache_line_intr, 5285 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]); 5286 cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec; 5287 5288 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum = 5289 add_softintr(dcache_scrub_pil, scrub_dcache_line_intr, 5290 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]); 5291 cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec; 5292 5293 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum = 5294 add_softintr(icache_scrub_pil, scrub_icache_line_intr, 5295 (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]); 5296 cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec; 5297 5298 /* 5299 * start the scrubbing for all the caches 5300 */ 5301 mutex_enter(&cpu_lock); 5302 for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) { 5303 5304 csi = &cache_scrub_info[i]; 5305 5306 if (!(*csi->csi_enable)) 5307 continue; 5308 5309 /* 5310 * force the following to be true: 5311 * 1 <= calls_a_sec <= hz 5312 */ 5313 if (csi->csi_freq > hz) { 5314 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high " 5315 "(%d); resetting to hz (%d)", csi->csi_name, 5316 csi->csi_freq, hz); 5317 csi->csi_freq = hz; 5318 } else if (csi->csi_freq < 1) { 5319 cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low " 5320 "(%d); resetting to 1", csi->csi_name, 5321 csi->csi_freq); 5322 csi->csi_freq = 1; 5323 } 5324 5325 omni_hdlr.cyo_online = cpu_scrub_cyclic_setup; 5326 omni_hdlr.cyo_offline = NULL; 5327 omni_hdlr.cyo_arg = (void *)csi; 5328 5329 offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline; 5330 offline_hdlr.cyh_arg = (void *)csi; 5331 offline_hdlr.cyh_level = CY_LOW_LEVEL; 5332 5333 when.cyt_when = 0; /* Start immediately */ 5334 when.cyt_interval = NANOSEC / csi->csi_freq; 5335 5336 csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr); 5337 csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when); 5338 } 5339 register_cpu_setup_func(cpu_scrub_cpu_setup, NULL); 5340 mutex_exit(&cpu_lock); 5341 } 5342 5343 /* 5344 * Indicate that the specified cpu is idle. 5345 */ 5346 void 5347 cpu_idle_ecache_scrub(struct cpu *cp) 5348 { 5349 if (CPU_PRIVATE(cp) != NULL) { 5350 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5351 csmp->chsm_ecache_busy = ECACHE_CPU_IDLE; 5352 } 5353 } 5354 5355 /* 5356 * Indicate that the specified cpu is busy. 5357 */ 5358 void 5359 cpu_busy_ecache_scrub(struct cpu *cp) 5360 { 5361 if (CPU_PRIVATE(cp) != NULL) { 5362 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5363 csmp->chsm_ecache_busy = ECACHE_CPU_BUSY; 5364 } 5365 } 5366 5367 /* 5368 * Initialization for cache scrubbing for the specified cpu. 5369 */ 5370 void 5371 cpu_init_ecache_scrub_dr(struct cpu *cp) 5372 { 5373 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5374 int cpuid = cp->cpu_id; 5375 5376 /* initialize the number of lines in the caches */ 5377 csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size / 5378 cpunodes[cpuid].ecache_linesize; 5379 csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) / 5380 CPU_PRIVATE_VAL(cp, chpr_icache_linesize); 5381 5382 /* 5383 * do_scrub() and do_scrub_offline() check both the global 5384 * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers 5385 * check this value before scrubbing. Currently, we use it to 5386 * disable the E$ scrubber on multi-core cpus or while running at 5387 * slowed speed. For now, just turn everything on and allow 5388 * cpu_init_private() to change it if necessary. 5389 */ 5390 csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 5391 csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1; 5392 csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1; 5393 5394 cpu_busy_ecache_scrub(cp); 5395 } 5396 5397 /* 5398 * Un-initialization for cache scrubbing for the specified cpu. 5399 */ 5400 static void 5401 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 5402 { 5403 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); 5404 5405 /* 5406 * un-initialize bookkeeping for cache scrubbing 5407 */ 5408 bzero(csmp, sizeof (ch_scrub_misc_t)); 5409 5410 cpu_idle_ecache_scrub(cp); 5411 } 5412 5413 /* 5414 * Called periodically on each CPU to scrub the D$. 5415 */ 5416 static void 5417 scrub_dcache(int how_many) 5418 { 5419 int i; 5420 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5421 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D]; 5422 5423 /* 5424 * scrub the desired number of lines 5425 */ 5426 for (i = 0; i < how_many; i++) { 5427 /* 5428 * scrub a D$ line 5429 */ 5430 dcache_inval_line(index); 5431 5432 /* 5433 * calculate the next D$ line to scrub, assumes 5434 * that dcache_nlines is a power of 2 5435 */ 5436 index = (index + 1) & (dcache_nlines - 1); 5437 } 5438 5439 /* 5440 * set the scrub index for the next visit 5441 */ 5442 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index; 5443 } 5444 5445 /* 5446 * Handler for D$ scrub inum softint. Call scrub_dcache until 5447 * we decrement the outstanding request count to zero. 5448 */ 5449 /*ARGSUSED*/ 5450 static uint_t 5451 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2) 5452 { 5453 int i; 5454 int how_many; 5455 int outstanding; 5456 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5457 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D]; 5458 struct scrub_info *csi = (struct scrub_info *)arg1; 5459 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5460 dcache_scan_rate_idle : dcache_scan_rate_busy; 5461 5462 /* 5463 * The scan rates are expressed in units of tenths of a 5464 * percent. A scan rate of 1000 (100%) means the whole 5465 * cache is scanned every second. 5466 */ 5467 how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq); 5468 5469 do { 5470 outstanding = *countp; 5471 for (i = 0; i < outstanding; i++) { 5472 scrub_dcache(how_many); 5473 } 5474 } while (atomic_add_32_nv(countp, -outstanding)); 5475 5476 return (DDI_INTR_CLAIMED); 5477 } 5478 5479 /* 5480 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed 5481 * by invalidating lines. Due to the characteristics of the ASI which 5482 * is used to invalidate an I$ line, the entire I$ must be invalidated 5483 * vs. an individual I$ line. 5484 */ 5485 static void 5486 scrub_icache(int how_many) 5487 { 5488 int i; 5489 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5490 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I]; 5491 int icache_nlines = csmp->chsm_icache_nlines; 5492 5493 /* 5494 * scrub the desired number of lines 5495 */ 5496 for (i = 0; i < how_many; i++) { 5497 /* 5498 * since the entire I$ must be scrubbed at once, 5499 * wait until the index wraps to zero to invalidate 5500 * the entire I$ 5501 */ 5502 if (index == 0) { 5503 icache_inval_all(); 5504 } 5505 5506 /* 5507 * calculate the next I$ line to scrub, assumes 5508 * that chsm_icache_nlines is a power of 2 5509 */ 5510 index = (index + 1) & (icache_nlines - 1); 5511 } 5512 5513 /* 5514 * set the scrub index for the next visit 5515 */ 5516 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index; 5517 } 5518 5519 /* 5520 * Handler for I$ scrub inum softint. Call scrub_icache until 5521 * we decrement the outstanding request count to zero. 5522 */ 5523 /*ARGSUSED*/ 5524 static uint_t 5525 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2) 5526 { 5527 int i; 5528 int how_many; 5529 int outstanding; 5530 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5531 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I]; 5532 struct scrub_info *csi = (struct scrub_info *)arg1; 5533 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5534 icache_scan_rate_idle : icache_scan_rate_busy; 5535 int icache_nlines = csmp->chsm_icache_nlines; 5536 5537 /* 5538 * The scan rates are expressed in units of tenths of a 5539 * percent. A scan rate of 1000 (100%) means the whole 5540 * cache is scanned every second. 5541 */ 5542 how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq); 5543 5544 do { 5545 outstanding = *countp; 5546 for (i = 0; i < outstanding; i++) { 5547 scrub_icache(how_many); 5548 } 5549 } while (atomic_add_32_nv(countp, -outstanding)); 5550 5551 return (DDI_INTR_CLAIMED); 5552 } 5553 5554 /* 5555 * Called periodically on each CPU to scrub the E$. 5556 */ 5557 static void 5558 scrub_ecache(int how_many) 5559 { 5560 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5561 int i; 5562 int cpuid = CPU->cpu_id; 5563 int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 5564 int nlines = csmp->chsm_ecache_nlines; 5565 int linesize = cpunodes[cpuid].ecache_linesize; 5566 int ec_set_size = cpu_ecache_set_size(CPU); 5567 5568 /* 5569 * scrub the desired number of lines 5570 */ 5571 for (i = 0; i < how_many; i++) { 5572 /* 5573 * scrub the E$ line 5574 */ 5575 ecache_flush_line(ecache_flushaddr + (index * linesize), 5576 ec_set_size); 5577 5578 /* 5579 * calculate the next E$ line to scrub based on twice 5580 * the number of E$ lines (to displace lines containing 5581 * flush area data), assumes that the number of lines 5582 * is a power of 2 5583 */ 5584 index = (index + 1) & ((nlines << 1) - 1); 5585 } 5586 5587 /* 5588 * set the ecache scrub index for the next visit 5589 */ 5590 csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index; 5591 } 5592 5593 /* 5594 * Handler for E$ scrub inum softint. Call the E$ scrubber until 5595 * we decrement the outstanding request count to zero. 5596 * 5597 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may 5598 * become negative after the atomic_add_32_nv(). This is not a problem, as 5599 * the next trip around the loop won't scrub anything, and the next add will 5600 * reset the count back to zero. 5601 */ 5602 /*ARGSUSED*/ 5603 static uint_t 5604 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 5605 { 5606 int i; 5607 int how_many; 5608 int outstanding; 5609 ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); 5610 uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E]; 5611 struct scrub_info *csi = (struct scrub_info *)arg1; 5612 int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? 5613 ecache_scan_rate_idle : ecache_scan_rate_busy; 5614 int ecache_nlines = csmp->chsm_ecache_nlines; 5615 5616 /* 5617 * The scan rates are expressed in units of tenths of a 5618 * percent. A scan rate of 1000 (100%) means the whole 5619 * cache is scanned every second. 5620 */ 5621 how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq); 5622 5623 do { 5624 outstanding = *countp; 5625 for (i = 0; i < outstanding; i++) { 5626 scrub_ecache(how_many); 5627 } 5628 } while (atomic_add_32_nv(countp, -outstanding)); 5629 5630 return (DDI_INTR_CLAIMED); 5631 } 5632 5633 /* 5634 * Timeout function to reenable CE 5635 */ 5636 static void 5637 cpu_delayed_check_ce_errors(void *arg) 5638 { 5639 if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg, 5640 TQ_NOSLEEP)) { 5641 (void) timeout(cpu_delayed_check_ce_errors, arg, 5642 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5643 } 5644 } 5645 5646 /* 5647 * CE Deferred Re-enable after trap. 5648 * 5649 * When the CPU gets a disrupting trap for any of the errors 5650 * controlled by the CEEN bit, CEEN is disabled in the trap handler 5651 * immediately. To eliminate the possibility of multiple CEs causing 5652 * recursive stack overflow in the trap handler, we cannot 5653 * reenable CEEN while still running in the trap handler. Instead, 5654 * after a CE is logged on a CPU, we schedule a timeout function, 5655 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs 5656 * seconds. This function will check whether any further CEs 5657 * have occurred on that CPU, and if none have, will reenable CEEN. 5658 * 5659 * If further CEs have occurred while CEEN is disabled, another 5660 * timeout will be scheduled. This is to ensure that the CPU can 5661 * make progress in the face of CE 'storms', and that it does not 5662 * spend all its time logging CE errors. 5663 */ 5664 static void 5665 cpu_check_ce_errors(void *arg) 5666 { 5667 int cpuid = (int)(uintptr_t)arg; 5668 cpu_t *cp; 5669 5670 /* 5671 * We acquire cpu_lock. 5672 */ 5673 ASSERT(curthread->t_pil == 0); 5674 5675 /* 5676 * verify that the cpu is still around, DR 5677 * could have got there first ... 5678 */ 5679 mutex_enter(&cpu_lock); 5680 cp = cpu_get(cpuid); 5681 if (cp == NULL) { 5682 mutex_exit(&cpu_lock); 5683 return; 5684 } 5685 /* 5686 * make sure we don't migrate across CPUs 5687 * while checking our CE status. 5688 */ 5689 kpreempt_disable(); 5690 5691 /* 5692 * If we are running on the CPU that got the 5693 * CE, we can do the checks directly. 5694 */ 5695 if (cp->cpu_id == CPU->cpu_id) { 5696 mutex_exit(&cpu_lock); 5697 cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0); 5698 kpreempt_enable(); 5699 return; 5700 } 5701 kpreempt_enable(); 5702 5703 /* 5704 * send an x-call to get the CPU that originally 5705 * got the CE to do the necessary checks. If we can't 5706 * send the x-call, reschedule the timeout, otherwise we 5707 * lose CEEN forever on that CPU. 5708 */ 5709 if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) { 5710 xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce, 5711 TIMEOUT_CEEN_CHECK, 0); 5712 mutex_exit(&cpu_lock); 5713 } else { 5714 /* 5715 * When the CPU is not accepting xcalls, or 5716 * the processor is offlined, we don't want to 5717 * incur the extra overhead of trying to schedule the 5718 * CE timeout indefinitely. However, we don't want to lose 5719 * CE checking forever. 5720 * 5721 * Keep rescheduling the timeout, accepting the additional 5722 * overhead as the cost of correctness in the case where we get 5723 * a CE, disable CEEN, offline the CPU during the 5724 * the timeout interval, and then online it at some 5725 * point in the future. This is unlikely given the short 5726 * cpu_ceen_delay_secs. 5727 */ 5728 mutex_exit(&cpu_lock); 5729 (void) timeout(cpu_delayed_check_ce_errors, 5730 (void *)(uintptr_t)cp->cpu_id, 5731 drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); 5732 } 5733 } 5734 5735 /* 5736 * This routine will check whether CEs have occurred while 5737 * CEEN is disabled. Any CEs detected will be logged and, if 5738 * possible, scrubbed. 5739 * 5740 * The memscrubber will also use this routine to clear any errors 5741 * caused by its scrubbing with CEEN disabled. 5742 * 5743 * flag == SCRUBBER_CEEN_CHECK 5744 * called from memscrubber, just check/scrub, no reset 5745 * paddr physical addr. for start of scrub pages 5746 * vaddr virtual addr. for scrub area 5747 * psz page size of area to be scrubbed 5748 * 5749 * flag == TIMEOUT_CEEN_CHECK 5750 * timeout function has triggered, reset timeout or CEEN 5751 * 5752 * Note: We must not migrate cpus during this function. This can be 5753 * achieved by one of: 5754 * - invoking as target of an x-call in which case we're at XCALL_PIL 5755 * The flag value must be first xcall argument. 5756 * - disabling kernel preemption. This should be done for very short 5757 * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might 5758 * scrub an extended area with cpu_check_block. The call for 5759 * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept 5760 * brief for this case. 5761 * - binding to a cpu, eg with thread_affinity_set(). This is used 5762 * in the SCRUBBER_CEEN_CHECK case, but is not practical for 5763 * the TIMEOUT_CEEN_CHECK because both need cpu_lock. 5764 */ 5765 void 5766 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 5767 { 5768 ch_cpu_errors_t cpu_error_regs; 5769 uint64_t ec_err_enable; 5770 uint64_t page_offset; 5771 5772 /* Read AFSR */ 5773 get_cpu_error_state(&cpu_error_regs); 5774 5775 /* 5776 * If no CEEN errors have occurred during the timeout 5777 * interval, it is safe to re-enable CEEN and exit. 5778 */ 5779 if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) { 5780 if (flag == TIMEOUT_CEEN_CHECK && 5781 !((ec_err_enable = get_error_enable()) & EN_REG_CEEN)) 5782 set_error_enable(ec_err_enable | EN_REG_CEEN); 5783 return; 5784 } 5785 5786 /* 5787 * Ensure that CEEN was not reenabled (maybe by DR) before 5788 * we log/clear the error. 5789 */ 5790 if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN) 5791 set_error_enable(ec_err_enable & ~EN_REG_CEEN); 5792 5793 /* 5794 * log/clear the CE. If CE_CEEN_DEFER is passed, the 5795 * timeout will be rescheduled when the error is logged. 5796 */ 5797 if (!(cpu_error_regs.afsr & cpu_ce_not_deferred)) 5798 cpu_ce_detected(&cpu_error_regs, 5799 CE_CEEN_DEFER | CE_CEEN_TIMEOUT); 5800 else 5801 cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT); 5802 5803 /* 5804 * If the memory scrubber runs while CEEN is 5805 * disabled, (or if CEEN is disabled during the 5806 * scrub as a result of a CE being triggered by 5807 * it), the range being scrubbed will not be 5808 * completely cleaned. If there are multiple CEs 5809 * in the range at most two of these will be dealt 5810 * with, (one by the trap handler and one by the 5811 * timeout). It is also possible that none are dealt 5812 * with, (CEEN disabled and another CE occurs before 5813 * the timeout triggers). So to ensure that the 5814 * memory is actually scrubbed, we have to access each 5815 * memory location in the range and then check whether 5816 * that access causes a CE. 5817 */ 5818 if (flag == SCRUBBER_CEEN_CHECK && va) { 5819 if ((cpu_error_regs.afar >= pa) && 5820 (cpu_error_regs.afar < (pa + psz))) { 5821 /* 5822 * Force a load from physical memory for each 5823 * 64-byte block, then check AFSR to determine 5824 * whether this access caused an error. 5825 * 5826 * This is a slow way to do a scrub, but as it will 5827 * only be invoked when the memory scrubber actually 5828 * triggered a CE, it should not happen too 5829 * frequently. 5830 * 5831 * cut down what we need to check as the scrubber 5832 * has verified up to AFAR, so get it's offset 5833 * into the page and start there. 5834 */ 5835 page_offset = (uint64_t)(cpu_error_regs.afar & 5836 (psz - 1)); 5837 va = (caddr_t)(va + (P2ALIGN(page_offset, 64))); 5838 psz -= (uint_t)(P2ALIGN(page_offset, 64)); 5839 cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)), 5840 psz); 5841 } 5842 } 5843 5844 /* 5845 * Reset error enable if this CE is not masked. 5846 */ 5847 if ((flag == TIMEOUT_CEEN_CHECK) && 5848 (cpu_error_regs.afsr & cpu_ce_not_deferred)) 5849 set_error_enable(ec_err_enable | EN_REG_CEEN); 5850 5851 } 5852 5853 /* 5854 * Attempt a cpu logout for an error that we did not trap for, such 5855 * as a CE noticed with CEEN off. It is assumed that we are still running 5856 * on the cpu that took the error and that we cannot migrate. Returns 5857 * 0 on success, otherwise nonzero. 5858 */ 5859 static int 5860 cpu_ce_delayed_ec_logout(uint64_t afar) 5861 { 5862 ch_cpu_logout_t *clop; 5863 5864 if (CPU_PRIVATE(CPU) == NULL) 5865 return (0); 5866 5867 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5868 if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) != 5869 LOGOUT_INVALID) 5870 return (0); 5871 5872 cpu_delayed_logout(afar, clop); 5873 return (1); 5874 } 5875 5876 /* 5877 * We got an error while CEEN was disabled. We 5878 * need to clean up after it and log whatever 5879 * information we have on the CE. 5880 */ 5881 void 5882 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag) 5883 { 5884 ch_async_flt_t ch_flt; 5885 struct async_flt *aflt; 5886 char pr_reason[MAX_REASON_STRING]; 5887 5888 bzero(&ch_flt, sizeof (ch_async_flt_t)); 5889 ch_flt.flt_trapped_ce = flag; 5890 aflt = (struct async_flt *)&ch_flt; 5891 aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK; 5892 ch_flt.afsr_ext = cpu_error_regs->afsr_ext; 5893 ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) | 5894 (cpu_error_regs->afsr & C_AFSR_ALL_ERRS); 5895 aflt->flt_addr = cpu_error_regs->afar; 5896 #if defined(SERRANO) 5897 ch_flt.afar2 = cpu_error_regs->afar2; 5898 #endif /* SERRANO */ 5899 aflt->flt_pc = NULL; 5900 aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0); 5901 aflt->flt_tl = 0; 5902 aflt->flt_panic = 0; 5903 cpu_log_and_clear_ce(&ch_flt); 5904 5905 /* 5906 * check if we caused any errors during cleanup 5907 */ 5908 if (clear_errors(&ch_flt)) { 5909 pr_reason[0] = '\0'; 5910 (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, 5911 NULL); 5912 } 5913 } 5914 5915 /* 5916 * Log/clear CEEN-controlled disrupting errors 5917 */ 5918 static void 5919 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt) 5920 { 5921 struct async_flt *aflt; 5922 uint64_t afsr, afsr_errs; 5923 ch_cpu_logout_t *clop; 5924 char pr_reason[MAX_REASON_STRING]; 5925 on_trap_data_t *otp = curthread->t_ontrap; 5926 5927 aflt = (struct async_flt *)ch_flt; 5928 afsr = aflt->flt_stat; 5929 afsr_errs = ch_flt->afsr_errs; 5930 aflt->flt_id = gethrtime_waitfree(); 5931 aflt->flt_bus_id = getprocessorid(); 5932 aflt->flt_inst = CPU->cpu_id; 5933 aflt->flt_prot = AFLT_PROT_NONE; 5934 aflt->flt_class = CPU_FAULT; 5935 aflt->flt_status = ECC_C_TRAP; 5936 5937 pr_reason[0] = '\0'; 5938 /* 5939 * Get the CPU log out info for Disrupting Trap. 5940 */ 5941 if (CPU_PRIVATE(CPU) == NULL) { 5942 clop = NULL; 5943 ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; 5944 } else { 5945 clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); 5946 } 5947 5948 if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) { 5949 ch_cpu_errors_t cpu_error_regs; 5950 5951 get_cpu_error_state(&cpu_error_regs); 5952 (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar); 5953 clop->clo_data.chd_afsr = cpu_error_regs.afsr; 5954 clop->clo_data.chd_afar = cpu_error_regs.afar; 5955 clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext; 5956 clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr; 5957 clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar; 5958 clop->clo_sdw_data.chd_afsr_ext = 5959 cpu_error_regs.shadow_afsr_ext; 5960 #if defined(SERRANO) 5961 clop->clo_data.chd_afar2 = cpu_error_regs.afar2; 5962 #endif /* SERRANO */ 5963 ch_flt->flt_data_incomplete = 1; 5964 5965 /* 5966 * The logging/clear code expects AFSR/AFAR to be cleared. 5967 * The trap handler does it for CEEN enabled errors 5968 * so we need to do it here. 5969 */ 5970 set_cpu_error_state(&cpu_error_regs); 5971 } 5972 5973 #if defined(JALAPENO) || defined(SERRANO) 5974 /* 5975 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno. 5976 * For Serrano, even thou we do have the AFAR, we still do the 5977 * scrub on the RCE side since that's where the error type can 5978 * be properly classified as intermittent, persistent, etc. 5979 * 5980 * CE/RCE: If error is in memory and AFAR is valid, scrub the memory. 5981 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 5982 * the flt_status bits. 5983 */ 5984 if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) && 5985 (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 5986 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) { 5987 cpu_ce_scrub_mem_err(aflt, B_TRUE); 5988 } 5989 #else /* JALAPENO || SERRANO */ 5990 /* 5991 * CE/EMC: If error is in memory and AFAR is valid, scrub the memory. 5992 * Must scrub memory before cpu_queue_events, as scrubbing memory sets 5993 * the flt_status bits. 5994 */ 5995 if (afsr & (C_AFSR_CE|C_AFSR_EMC)) { 5996 if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || 5997 cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) { 5998 cpu_ce_scrub_mem_err(aflt, B_TRUE); 5999 } 6000 } 6001 6002 #endif /* JALAPENO || SERRANO */ 6003 6004 /* 6005 * Update flt_prot if this error occurred under on_trap protection. 6006 */ 6007 if (otp != NULL && (otp->ot_prot & OT_DATA_EC)) 6008 aflt->flt_prot = AFLT_PROT_EC; 6009 6010 /* 6011 * Queue events on the async event queue, one event per error bit. 6012 */ 6013 if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 || 6014 (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) { 6015 ch_flt->flt_type = CPU_INV_AFSR; 6016 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, 6017 (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, 6018 aflt->flt_panic); 6019 } 6020 6021 /* 6022 * Zero out + invalidate CPU logout. 6023 */ 6024 if (clop) { 6025 bzero(clop, sizeof (ch_cpu_logout_t)); 6026 clop->clo_data.chd_afar = LOGOUT_INVALID; 6027 } 6028 6029 /* 6030 * If either a CPC, WDC or EDC error has occurred while CEEN 6031 * was disabled, we need to flush either the entire 6032 * E$ or an E$ line. 6033 */ 6034 #if defined(JALAPENO) || defined(SERRANO) 6035 if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC)) 6036 #else /* JALAPENO || SERRANO */ 6037 if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC | 6038 C_AFSR_L3_CPC | C_AFSR_L3_WDC)) 6039 #endif /* JALAPENO || SERRANO */ 6040 cpu_error_ecache_flush(ch_flt); 6041 6042 } 6043 6044 /* 6045 * depending on the error type, we determine whether we 6046 * need to flush the entire ecache or just a line. 6047 */ 6048 static int 6049 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt) 6050 { 6051 struct async_flt *aflt; 6052 uint64_t afsr; 6053 uint64_t afsr_errs = ch_flt->afsr_errs; 6054 6055 aflt = (struct async_flt *)ch_flt; 6056 afsr = aflt->flt_stat; 6057 6058 /* 6059 * If we got multiple errors, no point in trying 6060 * the individual cases, just flush the whole cache 6061 */ 6062 if (afsr & C_AFSR_ME) { 6063 return (ECACHE_FLUSH_ALL); 6064 } 6065 6066 /* 6067 * If either a CPC, WDC or EDC error has occurred while CEEN 6068 * was disabled, we need to flush entire E$. We can't just 6069 * flush the cache line affected as the ME bit 6070 * is not set when multiple correctable errors of the same 6071 * type occur, so we might have multiple CPC or EDC errors, 6072 * with only the first recorded. 6073 */ 6074 #if defined(JALAPENO) || defined(SERRANO) 6075 if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) { 6076 #else /* JALAPENO || SERRANO */ 6077 if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC | 6078 C_AFSR_L3_EDC | C_AFSR_L3_WDC)) { 6079 #endif /* JALAPENO || SERRANO */ 6080 return (ECACHE_FLUSH_ALL); 6081 } 6082 6083 #if defined(JALAPENO) || defined(SERRANO) 6084 /* 6085 * If only UE or RUE is set, flush the Ecache line, otherwise 6086 * flush the entire Ecache. 6087 */ 6088 if (afsr & (C_AFSR_UE|C_AFSR_RUE)) { 6089 if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE || 6090 (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) { 6091 return (ECACHE_FLUSH_LINE); 6092 } else { 6093 return (ECACHE_FLUSH_ALL); 6094 } 6095 } 6096 #else /* JALAPENO || SERRANO */ 6097 /* 6098 * If UE only is set, flush the Ecache line, otherwise 6099 * flush the entire Ecache. 6100 */ 6101 if (afsr_errs & C_AFSR_UE) { 6102 if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == 6103 C_AFSR_UE) { 6104 return (ECACHE_FLUSH_LINE); 6105 } else { 6106 return (ECACHE_FLUSH_ALL); 6107 } 6108 } 6109 #endif /* JALAPENO || SERRANO */ 6110 6111 /* 6112 * EDU: If EDU only is set, flush the ecache line, otherwise 6113 * flush the entire Ecache. 6114 */ 6115 if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) { 6116 if (((afsr_errs & ~C_AFSR_EDU) == 0) || 6117 ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) { 6118 return (ECACHE_FLUSH_LINE); 6119 } else { 6120 return (ECACHE_FLUSH_ALL); 6121 } 6122 } 6123 6124 /* 6125 * BERR: If BERR only is set, flush the Ecache line, otherwise 6126 * flush the entire Ecache. 6127 */ 6128 if (afsr_errs & C_AFSR_BERR) { 6129 if ((afsr_errs & ~C_AFSR_BERR) == 0) { 6130 return (ECACHE_FLUSH_LINE); 6131 } else { 6132 return (ECACHE_FLUSH_ALL); 6133 } 6134 } 6135 6136 return (0); 6137 } 6138 6139 void 6140 cpu_error_ecache_flush(ch_async_flt_t *ch_flt) 6141 { 6142 int ecache_flush_flag = 6143 cpu_error_ecache_flush_required(ch_flt); 6144 6145 /* 6146 * Flush Ecache line or entire Ecache based on above checks. 6147 */ 6148 if (ecache_flush_flag == ECACHE_FLUSH_ALL) 6149 cpu_flush_ecache(); 6150 else if (ecache_flush_flag == ECACHE_FLUSH_LINE) { 6151 cpu_flush_ecache_line(ch_flt); 6152 } 6153 6154 } 6155 6156 /* 6157 * Extract the PA portion from the E$ tag. 6158 */ 6159 uint64_t 6160 cpu_ectag_to_pa(int setsize, uint64_t tag) 6161 { 6162 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6163 return (JG_ECTAG_TO_PA(setsize, tag)); 6164 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6165 return (PN_L3TAG_TO_PA(tag)); 6166 else 6167 return (CH_ECTAG_TO_PA(setsize, tag)); 6168 } 6169 6170 /* 6171 * Convert the E$ tag PA into an E$ subblock index. 6172 */ 6173 static int 6174 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr) 6175 { 6176 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6177 return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6178 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6179 /* Panther has only one subblock per line */ 6180 return (0); 6181 else 6182 return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); 6183 } 6184 6185 /* 6186 * All subblocks in an E$ line must be invalid for 6187 * the line to be invalid. 6188 */ 6189 int 6190 cpu_ectag_line_invalid(int cachesize, uint64_t tag) 6191 { 6192 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6193 return (JG_ECTAG_LINE_INVALID(cachesize, tag)); 6194 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6195 return (PN_L3_LINE_INVALID(tag)); 6196 else 6197 return (CH_ECTAG_LINE_INVALID(cachesize, tag)); 6198 } 6199 6200 /* 6201 * Extract state bits for a subblock given the tag. Note that for Panther 6202 * this works on both l2 and l3 tags. 6203 */ 6204 static int 6205 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag) 6206 { 6207 if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) 6208 return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6209 else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 6210 return (tag & CH_ECSTATE_MASK); 6211 else 6212 return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); 6213 } 6214 6215 /* 6216 * Cpu specific initialization. 6217 */ 6218 void 6219 cpu_mp_init(void) 6220 { 6221 #ifdef CHEETAHPLUS_ERRATUM_25 6222 if (cheetah_sendmondo_recover) { 6223 cheetah_nudge_init(); 6224 } 6225 #endif 6226 } 6227 6228 void 6229 cpu_ereport_post(struct async_flt *aflt) 6230 { 6231 char *cpu_type, buf[FM_MAX_CLASS]; 6232 nv_alloc_t *nva = NULL; 6233 nvlist_t *ereport, *detector, *resource; 6234 errorq_elem_t *eqep; 6235 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6236 char unum[UNUM_NAMLEN]; 6237 int len = 0; 6238 uint8_t msg_type; 6239 plat_ecc_ch_async_flt_t plat_ecc_ch_flt; 6240 6241 if (aflt->flt_panic || panicstr) { 6242 eqep = errorq_reserve(ereport_errorq); 6243 if (eqep == NULL) 6244 return; 6245 ereport = errorq_elem_nvl(ereport_errorq, eqep); 6246 nva = errorq_elem_nva(ereport_errorq, eqep); 6247 } else { 6248 ereport = fm_nvlist_create(nva); 6249 } 6250 6251 /* 6252 * Create the scheme "cpu" FMRI. 6253 */ 6254 detector = fm_nvlist_create(nva); 6255 resource = fm_nvlist_create(nva); 6256 switch (cpunodes[aflt->flt_inst].implementation) { 6257 case CHEETAH_IMPL: 6258 cpu_type = FM_EREPORT_CPU_USIII; 6259 break; 6260 case CHEETAH_PLUS_IMPL: 6261 cpu_type = FM_EREPORT_CPU_USIIIplus; 6262 break; 6263 case JALAPENO_IMPL: 6264 cpu_type = FM_EREPORT_CPU_USIIIi; 6265 break; 6266 case SERRANO_IMPL: 6267 cpu_type = FM_EREPORT_CPU_USIIIiplus; 6268 break; 6269 case JAGUAR_IMPL: 6270 cpu_type = FM_EREPORT_CPU_USIV; 6271 break; 6272 case PANTHER_IMPL: 6273 cpu_type = FM_EREPORT_CPU_USIVplus; 6274 break; 6275 default: 6276 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 6277 break; 6278 } 6279 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 6280 aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version, 6281 cpunodes[aflt->flt_inst].device_id); 6282 6283 /* 6284 * Encode all the common data into the ereport. 6285 */ 6286 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 6287 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 6288 6289 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 6290 fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1), 6291 detector, NULL); 6292 6293 /* 6294 * Encode the error specific data that was saved in 6295 * the async_flt structure into the ereport. 6296 */ 6297 cpu_payload_add_aflt(aflt, ereport, resource, 6298 &plat_ecc_ch_flt.ecaf_afar_status, 6299 &plat_ecc_ch_flt.ecaf_synd_status); 6300 6301 if (aflt->flt_panic || panicstr) { 6302 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 6303 } else { 6304 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 6305 fm_nvlist_destroy(ereport, FM_NVA_FREE); 6306 fm_nvlist_destroy(detector, FM_NVA_FREE); 6307 fm_nvlist_destroy(resource, FM_NVA_FREE); 6308 } 6309 /* 6310 * Send the enhanced error information (plat_ecc_error2_data_t) 6311 * to the SC olny if it can process it. 6312 */ 6313 6314 if (&plat_ecc_capability_sc_get && 6315 plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) { 6316 msg_type = cpu_flt_bit_to_plat_error(aflt); 6317 if (msg_type != PLAT_ECC_ERROR2_NONE) { 6318 /* 6319 * If afar status is not invalid do a unum lookup. 6320 */ 6321 if (plat_ecc_ch_flt.ecaf_afar_status != 6322 AFLT_STAT_INVALID) { 6323 (void) cpu_get_mem_unum_aflt( 6324 plat_ecc_ch_flt.ecaf_synd_status, aflt, 6325 unum, UNUM_NAMLEN, &len); 6326 } else { 6327 unum[0] = '\0'; 6328 } 6329 plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar; 6330 plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr; 6331 plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext; 6332 plat_ecc_ch_flt.ecaf_sdw_afsr_ext = 6333 ch_flt->flt_sdw_afsr_ext; 6334 6335 if (&plat_log_fruid_error2) 6336 plat_log_fruid_error2(msg_type, unum, aflt, 6337 &plat_ecc_ch_flt); 6338 } 6339 } 6340 } 6341 6342 void 6343 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 6344 { 6345 int status; 6346 ddi_fm_error_t de; 6347 6348 bzero(&de, sizeof (ddi_fm_error_t)); 6349 6350 de.fme_version = DDI_FME_VERSION; 6351 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 6352 FM_ENA_FMT1); 6353 de.fme_flag = expected; 6354 de.fme_bus_specific = (void *)aflt->flt_addr; 6355 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 6356 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 6357 aflt->flt_panic = 1; 6358 } 6359 6360 void 6361 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 6362 errorq_t *eqp, uint_t flag) 6363 { 6364 struct async_flt *aflt = (struct async_flt *)payload; 6365 6366 aflt->flt_erpt_class = error_class; 6367 errorq_dispatch(eqp, payload, payload_sz, flag); 6368 } 6369 6370 /* 6371 * This routine may be called by the IO module, but does not do 6372 * anything in this cpu module. The SERD algorithm is handled by 6373 * cpumem-diagnosis engine instead. 6374 */ 6375 /*ARGSUSED*/ 6376 void 6377 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 6378 {} 6379 6380 void 6381 adjust_hw_copy_limits(int ecache_size) 6382 { 6383 /* 6384 * Set hw copy limits. 6385 * 6386 * /etc/system will be parsed later and can override one or more 6387 * of these settings. 6388 * 6389 * At this time, ecache size seems only mildly relevant. 6390 * We seem to run into issues with the d-cache and stalls 6391 * we see on misses. 6392 * 6393 * Cycle measurement indicates that 2 byte aligned copies fare 6394 * little better than doing things with VIS at around 512 bytes. 6395 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 6396 * aligned is faster whenever the source and destination data 6397 * in cache and the total size is less than 2 Kbytes. The 2K 6398 * limit seems to be driven by the 2K write cache. 6399 * When more than 2K of copies are done in non-VIS mode, stores 6400 * backup in the write cache. In VIS mode, the write cache is 6401 * bypassed, allowing faster cache-line writes aligned on cache 6402 * boundaries. 6403 * 6404 * In addition, in non-VIS mode, there is no prefetching, so 6405 * for larger copies, the advantage of prefetching to avoid even 6406 * occasional cache misses is enough to justify using the VIS code. 6407 * 6408 * During testing, it was discovered that netbench ran 3% slower 6409 * when hw_copy_limit_8 was 2K or larger. Apparently for server 6410 * applications, data is only used once (copied to the output 6411 * buffer, then copied by the network device off the system). Using 6412 * the VIS copy saves more L2 cache state. Network copies are 6413 * around 1.3K to 1.5K in size for historical reasons. 6414 * 6415 * Therefore, a limit of 1K bytes will be used for the 8 byte 6416 * aligned copy even for large caches and 8 MB ecache. The 6417 * infrastructure to allow different limits for different sized 6418 * caches is kept to allow further tuning in later releases. 6419 */ 6420 6421 if (min_ecache_size == 0 && use_hw_bcopy) { 6422 /* 6423 * First time through - should be before /etc/system 6424 * is read. 6425 * Could skip the checks for zero but this lets us 6426 * preserve any debugger rewrites. 6427 */ 6428 if (hw_copy_limit_1 == 0) { 6429 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 6430 priv_hcl_1 = hw_copy_limit_1; 6431 } 6432 if (hw_copy_limit_2 == 0) { 6433 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 6434 priv_hcl_2 = hw_copy_limit_2; 6435 } 6436 if (hw_copy_limit_4 == 0) { 6437 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 6438 priv_hcl_4 = hw_copy_limit_4; 6439 } 6440 if (hw_copy_limit_8 == 0) { 6441 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 6442 priv_hcl_8 = hw_copy_limit_8; 6443 } 6444 min_ecache_size = ecache_size; 6445 } else { 6446 /* 6447 * MP initialization. Called *after* /etc/system has 6448 * been parsed. One CPU has already been initialized. 6449 * Need to cater for /etc/system having scragged one 6450 * of our values. 6451 */ 6452 if (ecache_size == min_ecache_size) { 6453 /* 6454 * Same size ecache. We do nothing unless we 6455 * have a pessimistic ecache setting. In that 6456 * case we become more optimistic (if the cache is 6457 * large enough). 6458 */ 6459 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 6460 /* 6461 * Need to adjust hw_copy_limit* from our 6462 * pessimistic uniprocessor value to a more 6463 * optimistic UP value *iff* it hasn't been 6464 * reset. 6465 */ 6466 if ((ecache_size > 1048576) && 6467 (priv_hcl_8 == hw_copy_limit_8)) { 6468 if (ecache_size <= 2097152) 6469 hw_copy_limit_8 = 4 * 6470 VIS_COPY_THRESHOLD; 6471 else if (ecache_size <= 4194304) 6472 hw_copy_limit_8 = 4 * 6473 VIS_COPY_THRESHOLD; 6474 else 6475 hw_copy_limit_8 = 4 * 6476 VIS_COPY_THRESHOLD; 6477 priv_hcl_8 = hw_copy_limit_8; 6478 } 6479 } 6480 } else if (ecache_size < min_ecache_size) { 6481 /* 6482 * A different ecache size. Can this even happen? 6483 */ 6484 if (priv_hcl_8 == hw_copy_limit_8) { 6485 /* 6486 * The previous value that we set 6487 * is unchanged (i.e., it hasn't been 6488 * scragged by /etc/system). Rewrite it. 6489 */ 6490 if (ecache_size <= 1048576) 6491 hw_copy_limit_8 = 8 * 6492 VIS_COPY_THRESHOLD; 6493 else if (ecache_size <= 2097152) 6494 hw_copy_limit_8 = 8 * 6495 VIS_COPY_THRESHOLD; 6496 else if (ecache_size <= 4194304) 6497 hw_copy_limit_8 = 8 * 6498 VIS_COPY_THRESHOLD; 6499 else 6500 hw_copy_limit_8 = 10 * 6501 VIS_COPY_THRESHOLD; 6502 priv_hcl_8 = hw_copy_limit_8; 6503 min_ecache_size = ecache_size; 6504 } 6505 } 6506 } 6507 } 6508 6509 /* 6510 * Called from illegal instruction trap handler to see if we can attribute 6511 * the trap to a fpras check. 6512 */ 6513 int 6514 fpras_chktrap(struct regs *rp) 6515 { 6516 int op; 6517 struct fpras_chkfngrp *cgp; 6518 uintptr_t tpc = (uintptr_t)rp->r_pc; 6519 6520 if (fpras_chkfngrps == NULL) 6521 return (0); 6522 6523 cgp = &fpras_chkfngrps[CPU->cpu_id]; 6524 for (op = 0; op < FPRAS_NCOPYOPS; ++op) { 6525 if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 && 6526 tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult) 6527 break; 6528 } 6529 if (op == FPRAS_NCOPYOPS) 6530 return (0); 6531 6532 /* 6533 * This is an fpRAS failure caught through an illegal 6534 * instruction - trampoline. 6535 */ 6536 rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline; 6537 rp->r_npc = rp->r_pc + 4; 6538 return (1); 6539 } 6540 6541 /* 6542 * fpras_failure is called when a fpras check detects a bad calculation 6543 * result or an illegal instruction trap is attributed to an fpras 6544 * check. In all cases we are still bound to CPU. 6545 */ 6546 int 6547 fpras_failure(int op, int how) 6548 { 6549 int use_hw_bcopy_orig, use_hw_bzero_orig; 6550 uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig; 6551 ch_async_flt_t ch_flt; 6552 struct async_flt *aflt = (struct async_flt *)&ch_flt; 6553 struct fpras_chkfn *sfp, *cfp; 6554 uint32_t *sip, *cip; 6555 int i; 6556 6557 /* 6558 * We're running on a sick CPU. Avoid further FPU use at least for 6559 * the time in which we dispatch an ereport and (if applicable) panic. 6560 */ 6561 use_hw_bcopy_orig = use_hw_bcopy; 6562 use_hw_bzero_orig = use_hw_bzero; 6563 hcl1_orig = hw_copy_limit_1; 6564 hcl2_orig = hw_copy_limit_2; 6565 hcl4_orig = hw_copy_limit_4; 6566 hcl8_orig = hw_copy_limit_8; 6567 use_hw_bcopy = use_hw_bzero = 0; 6568 hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 = 6569 hw_copy_limit_8 = 0; 6570 6571 bzero(&ch_flt, sizeof (ch_async_flt_t)); 6572 aflt->flt_id = gethrtime_waitfree(); 6573 aflt->flt_class = CPU_FAULT; 6574 aflt->flt_inst = CPU->cpu_id; 6575 aflt->flt_status = (how << 8) | op; 6576 aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY; 6577 ch_flt.flt_type = CPU_FPUERR; 6578 6579 /* 6580 * We must panic if the copy operation had no lofault protection - 6581 * ie, don't panic for copyin, copyout, kcopy and bcopy called 6582 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy. 6583 */ 6584 aflt->flt_panic = (curthread->t_lofault == NULL); 6585 6586 /* 6587 * XOR the source instruction block with the copied instruction 6588 * block - this will show us which bit(s) are corrupted. 6589 */ 6590 sfp = (struct fpras_chkfn *)fpras_chkfn_type1; 6591 cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op]; 6592 if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) { 6593 sip = &sfp->fpras_blk0[0]; 6594 cip = &cfp->fpras_blk0[0]; 6595 } else { 6596 sip = &sfp->fpras_blk1[0]; 6597 cip = &cfp->fpras_blk1[0]; 6598 } 6599 for (i = 0; i < 16; ++i, ++sip, ++cip) 6600 ch_flt.flt_fpdata[i] = *sip ^ *cip; 6601 6602 cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt, 6603 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 6604 6605 if (aflt->flt_panic) 6606 fm_panic("FPU failure on CPU %d", CPU->cpu_id); 6607 6608 /* 6609 * We get here for copyin/copyout and kcopy or bcopy where the 6610 * caller has used on_fault. We will flag the error so that 6611 * the process may be killed The trap_async_hwerr mechanism will 6612 * take appropriate further action (such as a reboot, contract 6613 * notification etc). Since we may be continuing we will 6614 * restore the global hardware copy acceleration switches. 6615 * 6616 * When we return from this function to the copy function we want to 6617 * avoid potentially bad data being used, ie we want the affected 6618 * copy function to return an error. The caller should therefore 6619 * invoke its lofault handler (which always exists for these functions) 6620 * which will return the appropriate error. 6621 */ 6622 ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR; 6623 aston(curthread); 6624 6625 use_hw_bcopy = use_hw_bcopy_orig; 6626 use_hw_bzero = use_hw_bzero_orig; 6627 hw_copy_limit_1 = hcl1_orig; 6628 hw_copy_limit_2 = hcl2_orig; 6629 hw_copy_limit_4 = hcl4_orig; 6630 hw_copy_limit_8 = hcl8_orig; 6631 6632 return (1); 6633 } 6634 6635 #define VIS_BLOCKSIZE 64 6636 6637 int 6638 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 6639 { 6640 int ret, watched; 6641 6642 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6643 ret = dtrace_blksuword32(addr, data, 0); 6644 if (watched) 6645 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 6646 6647 return (ret); 6648 } 6649 6650 /* 6651 * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the 6652 * faulted cpu into that state). Cross-trap to the faulted cpu to clear 6653 * CEEN from the EER to disable traps for further disrupting error types 6654 * on that cpu. We could cross-call instead, but that has a larger 6655 * instruction and data footprint than cross-trapping, and the cpu is known 6656 * to be faulted. 6657 */ 6658 6659 void 6660 cpu_faulted_enter(struct cpu *cp) 6661 { 6662 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS); 6663 } 6664 6665 /* 6666 * Called when a cpu leaves the CPU_FAULTED state to return to one of 6667 * offline, spare, or online (by the cpu requesting this state change). 6668 * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of 6669 * disrupting error bits that have accumulated without trapping, then 6670 * we cross-trap to re-enable CEEN controlled traps. 6671 */ 6672 void 6673 cpu_faulted_exit(struct cpu *cp) 6674 { 6675 ch_cpu_errors_t cpu_error_regs; 6676 6677 cpu_error_regs.afsr = C_AFSR_CECC_ERRS; 6678 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) 6679 cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS; 6680 xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state, 6681 (uint64_t)&cpu_error_regs, 0); 6682 6683 xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS); 6684 } 6685 6686 /* 6687 * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by 6688 * the errors in the original AFSR, 0 otherwise. 6689 * 6690 * For all procs if the initial error was a BERR or TO, then it is possible 6691 * that we may have caused a secondary BERR or TO in the process of logging the 6692 * inital error via cpu_run_bus_error_handlers(). If this is the case then 6693 * if the request was protected then a panic is still not necessary, if not 6694 * protected then aft_panic is already set - so either way there's no need 6695 * to set aft_panic for the secondary error. 6696 * 6697 * For Cheetah and Jalapeno, if the original error was a UE which occurred on 6698 * a store merge, then the error handling code will call cpu_deferred_error(). 6699 * When clear_errors() is called, it will determine that secondary errors have 6700 * occurred - in particular, the store merge also caused a EDU and WDU that 6701 * weren't discovered until this point. 6702 * 6703 * We do three checks to verify that we are in this case. If we pass all three 6704 * checks, we return 1 to indicate that we should not panic. If any unexpected 6705 * errors occur, we return 0. 6706 * 6707 * For Cheetah+ and derivative procs, the store merge causes a DUE, which is 6708 * handled in cpu_disrupting_errors(). Since this function is not even called 6709 * in the case we are interested in, we just return 0 for these processors. 6710 */ 6711 /*ARGSUSED*/ 6712 static int 6713 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs, 6714 uint64_t t_afar) 6715 { 6716 #if defined(CHEETAH_PLUS) 6717 #else /* CHEETAH_PLUS */ 6718 struct async_flt *aflt = (struct async_flt *)ch_flt; 6719 #endif /* CHEETAH_PLUS */ 6720 6721 /* 6722 * Was the original error a BERR or TO and only a BERR or TO 6723 * (multiple errors are also OK) 6724 */ 6725 if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) { 6726 /* 6727 * Is the new error a BERR or TO and only a BERR or TO 6728 * (multiple errors are also OK) 6729 */ 6730 if ((ch_flt->afsr_errs & 6731 ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) 6732 return (1); 6733 } 6734 6735 #if defined(CHEETAH_PLUS) 6736 return (0); 6737 #else /* CHEETAH_PLUS */ 6738 /* 6739 * Now look for secondary effects of a UE on cheetah/jalapeno 6740 * 6741 * Check the original error was a UE, and only a UE. Note that 6742 * the ME bit will cause us to fail this check. 6743 */ 6744 if (t_afsr_errs != C_AFSR_UE) 6745 return (0); 6746 6747 /* 6748 * Check the secondary errors were exclusively an EDU and/or WDU. 6749 */ 6750 if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0) 6751 return (0); 6752 6753 /* 6754 * Check the AFAR of the original error and secondary errors 6755 * match to the 64-byte boundary 6756 */ 6757 if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64)) 6758 return (0); 6759 6760 /* 6761 * We've passed all the checks, so it's a secondary error! 6762 */ 6763 return (1); 6764 #endif /* CHEETAH_PLUS */ 6765 } 6766 6767 /* 6768 * Translate the flt_bit or flt_type into an error type. First, flt_bit 6769 * is checked for any valid errors. If found, the error type is 6770 * returned. If not found, the flt_type is checked for L1$ parity errors. 6771 */ 6772 /*ARGSUSED*/ 6773 static uint8_t 6774 cpu_flt_bit_to_plat_error(struct async_flt *aflt) 6775 { 6776 #if defined(JALAPENO) 6777 /* 6778 * Currently, logging errors to the SC is not supported on Jalapeno 6779 */ 6780 return (PLAT_ECC_ERROR2_NONE); 6781 #else 6782 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 6783 6784 switch (ch_flt->flt_bit) { 6785 case C_AFSR_CE: 6786 return (PLAT_ECC_ERROR2_CE); 6787 case C_AFSR_UCC: 6788 case C_AFSR_EDC: 6789 case C_AFSR_WDC: 6790 case C_AFSR_CPC: 6791 return (PLAT_ECC_ERROR2_L2_CE); 6792 case C_AFSR_EMC: 6793 return (PLAT_ECC_ERROR2_EMC); 6794 case C_AFSR_IVC: 6795 return (PLAT_ECC_ERROR2_IVC); 6796 case C_AFSR_UE: 6797 return (PLAT_ECC_ERROR2_UE); 6798 case C_AFSR_UCU: 6799 case C_AFSR_EDU: 6800 case C_AFSR_WDU: 6801 case C_AFSR_CPU: 6802 return (PLAT_ECC_ERROR2_L2_UE); 6803 case C_AFSR_IVU: 6804 return (PLAT_ECC_ERROR2_IVU); 6805 case C_AFSR_TO: 6806 return (PLAT_ECC_ERROR2_TO); 6807 case C_AFSR_BERR: 6808 return (PLAT_ECC_ERROR2_BERR); 6809 #if defined(CHEETAH_PLUS) 6810 case C_AFSR_L3_EDC: 6811 case C_AFSR_L3_UCC: 6812 case C_AFSR_L3_CPC: 6813 case C_AFSR_L3_WDC: 6814 return (PLAT_ECC_ERROR2_L3_CE); 6815 case C_AFSR_IMC: 6816 return (PLAT_ECC_ERROR2_IMC); 6817 case C_AFSR_TSCE: 6818 return (PLAT_ECC_ERROR2_L2_TSCE); 6819 case C_AFSR_THCE: 6820 return (PLAT_ECC_ERROR2_L2_THCE); 6821 case C_AFSR_L3_MECC: 6822 return (PLAT_ECC_ERROR2_L3_MECC); 6823 case C_AFSR_L3_THCE: 6824 return (PLAT_ECC_ERROR2_L3_THCE); 6825 case C_AFSR_L3_CPU: 6826 case C_AFSR_L3_EDU: 6827 case C_AFSR_L3_UCU: 6828 case C_AFSR_L3_WDU: 6829 return (PLAT_ECC_ERROR2_L3_UE); 6830 case C_AFSR_DUE: 6831 return (PLAT_ECC_ERROR2_DUE); 6832 case C_AFSR_DTO: 6833 return (PLAT_ECC_ERROR2_DTO); 6834 case C_AFSR_DBERR: 6835 return (PLAT_ECC_ERROR2_DBERR); 6836 #endif /* CHEETAH_PLUS */ 6837 default: 6838 switch (ch_flt->flt_type) { 6839 #if defined(CPU_IMP_L1_CACHE_PARITY) 6840 case CPU_IC_PARITY: 6841 return (PLAT_ECC_ERROR2_IPE); 6842 case CPU_DC_PARITY: 6843 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 6844 if (ch_flt->parity_data.dpe.cpl_cache == 6845 CPU_PC_PARITY) { 6846 return (PLAT_ECC_ERROR2_PCACHE); 6847 } 6848 } 6849 return (PLAT_ECC_ERROR2_DPE); 6850 #endif /* CPU_IMP_L1_CACHE_PARITY */ 6851 case CPU_ITLB_PARITY: 6852 return (PLAT_ECC_ERROR2_ITLB); 6853 case CPU_DTLB_PARITY: 6854 return (PLAT_ECC_ERROR2_DTLB); 6855 default: 6856 return (PLAT_ECC_ERROR2_NONE); 6857 } 6858 } 6859 #endif /* JALAPENO */ 6860 } 6861