1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/ddi.h> 31 #include <sys/sysmacros.h> 32 #include <sys/archsystm.h> 33 #include <sys/vmsystm.h> 34 #include <sys/machparam.h> 35 #include <sys/machsystm.h> 36 #include <sys/machthread.h> 37 #include <sys/cpu.h> 38 #include <sys/cmp.h> 39 #include <sys/elf_SPARC.h> 40 #include <vm/hat_sfmmu.h> 41 #include <vm/seg_kmem.h> 42 #include <sys/cpuvar.h> 43 #include <sys/cheetahregs.h> 44 #include <sys/us3_module.h> 45 #include <sys/async.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/dditypes.h> 49 #include <sys/prom_debug.h> 50 #include <sys/prom_plat.h> 51 #include <sys/cpu_module.h> 52 #include <sys/sysmacros.h> 53 #include <sys/intreg.h> 54 #include <sys/clock.h> 55 #include <sys/platform_module.h> 56 #include <sys/machtrap.h> 57 #include <sys/ontrap.h> 58 #include <sys/panic.h> 59 #include <sys/memlist.h> 60 #include <sys/bootconf.h> 61 #include <sys/ivintr.h> 62 #include <sys/atomic.h> 63 #include <sys/fm/protocol.h> 64 #include <sys/fm/cpu/UltraSPARC-III.h> 65 #include <sys/fm/util.h> 66 67 #ifdef CHEETAHPLUS_ERRATUM_25 68 #include <sys/cyclic.h> 69 #endif /* CHEETAHPLUS_ERRATUM_25 */ 70 71 /* 72 * See comment above cpu_scrub_cpu_setup() for description 73 */ 74 #define SCRUBBER_NEITHER_CORE_ONLINE 0x0 75 #define SCRUBBER_CORE_0_ONLINE 0x1 76 #define SCRUBBER_CORE_1_ONLINE 0x2 77 #define SCRUBBER_BOTH_CORES_ONLINE (SCRUBBER_CORE_0_ONLINE | \ 78 SCRUBBER_CORE_1_ONLINE) 79 80 static int pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data); 81 static void cpu_async_log_tlb_parity_err(void *flt); 82 static cpu_t *cpu_get_sibling_core(cpu_t *cpup); 83 84 85 /* 86 * Setup trap handlers. 87 */ 88 void 89 cpu_init_trap(void) 90 { 91 CH_SET_TRAP(tt_pil15, ch_pil15_interrupt_instr); 92 93 CH_SET_TRAP(tt0_fecc, fecc_err_instr); 94 CH_SET_TRAP(tt1_fecc, fecc_err_tl1_instr); 95 CH_SET_TRAP(tt1_swtrap0, fecc_err_tl1_cont_instr); 96 97 CH_SET_TRAP(tt0_dperr, dcache_parity_instr); 98 CH_SET_TRAP(tt1_dperr, dcache_parity_tl1_instr); 99 CH_SET_TRAP(tt1_swtrap1, dcache_parity_tl1_cont_instr); 100 101 CH_SET_TRAP(tt0_iperr, icache_parity_instr); 102 CH_SET_TRAP(tt1_iperr, icache_parity_tl1_instr); 103 CH_SET_TRAP(tt1_swtrap2, icache_parity_tl1_cont_instr); 104 } 105 106 /* 107 * Set the magic constants of the implementation. 108 */ 109 /*ARGSUSED*/ 110 void 111 cpu_fiximp(pnode_t dnode) 112 { 113 int i, a; 114 extern int vac_size, vac_shift; 115 extern uint_t vac_mask; 116 117 dcache_size = CH_DCACHE_SIZE; 118 dcache_linesize = CH_DCACHE_LSIZE; 119 120 icache_size = CHP_ICACHE_MAX_SIZE; 121 icache_linesize = CHP_ICACHE_MIN_LSIZE; 122 123 ecache_size = CH_ECACHE_MAX_SIZE; 124 ecache_alignsize = CH_ECACHE_MAX_LSIZE; 125 ecache_associativity = CHP_ECACHE_MIN_NWAY; 126 127 /* 128 * ecache_setsize needs to maximum of all cpu ecache setsizes 129 */ 130 ecache_setsize = CHP_ECACHE_MAX_SETSIZE; 131 ASSERT(ecache_setsize >= (ecache_size / ecache_associativity)); 132 133 vac_size = CH_VAC_SIZE; 134 vac_mask = MMU_PAGEMASK & (vac_size - 1); 135 i = 0; a = vac_size; 136 while (a >>= 1) 137 ++i; 138 vac_shift = i; 139 shm_alignment = vac_size; 140 vac = 1; 141 } 142 143 /* 144 * Use Panther values for Panther-only domains. 145 * See Panther PRM, 1.5.4 Cache Hierarchy 146 */ 147 void 148 cpu_fix_allpanther(void) 149 { 150 /* dcache same as Ch+ */ 151 icache_size = PN_ICACHE_SIZE; 152 icache_linesize = PN_ICACHE_LSIZE; 153 ecache_size = PN_L3_SIZE; 154 ecache_alignsize = PN_L3_LINESIZE; 155 ecache_associativity = PN_L3_NWAYS; 156 ecache_setsize = PN_L3_SET_SIZE; 157 ASSERT(ecache_setsize >= (ecache_size / ecache_associativity)); 158 /* vac same as Ch+ */ 159 /* fix hwcaps for USIV+-only domains */ 160 cpu_hwcap_flags |= AV_SPARC_POPC; 161 } 162 163 void 164 send_mondo_set(cpuset_t set) 165 { 166 int lo, busy, nack, shipped = 0; 167 uint16_t i, cpuids[IDSR_BN_SETS]; 168 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 169 uint64_t starttick, endtick, tick, lasttick; 170 #if (NCPU > IDSR_BN_SETS) 171 int index = 0; 172 int ncpuids = 0; 173 #endif 174 #ifdef CHEETAHPLUS_ERRATUM_25 175 int recovered = 0; 176 int cpuid; 177 #endif 178 179 ASSERT(!CPUSET_ISNULL(set)); 180 starttick = lasttick = gettick(); 181 182 #if (NCPU <= IDSR_BN_SETS) 183 for (i = 0; i < NCPU; i++) 184 if (CPU_IN_SET(set, i)) { 185 shipit(i, shipped); 186 nackmask |= IDSR_NACK_BIT(shipped); 187 cpuids[shipped++] = i; 188 CPUSET_DEL(set, i); 189 if (CPUSET_ISNULL(set)) 190 break; 191 } 192 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 193 #else 194 for (i = 0; i < NCPU; i++) 195 if (CPU_IN_SET(set, i)) { 196 ncpuids++; 197 198 /* 199 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 200 * find we have shipped to more than (IDSR_BN_SETS) 201 * CPUs, set "index" to the highest numbered CPU in 202 * the set so we can ship to other CPUs a bit later on. 203 */ 204 if (shipped < IDSR_BN_SETS) { 205 shipit(i, shipped); 206 nackmask |= IDSR_NACK_BIT(shipped); 207 cpuids[shipped++] = i; 208 CPUSET_DEL(set, i); 209 if (CPUSET_ISNULL(set)) 210 break; 211 } else 212 index = (int)i; 213 } 214 215 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 216 #endif 217 218 busymask = IDSR_NACK_TO_BUSY(nackmask); 219 busy = nack = 0; 220 endtick = starttick + xc_tick_limit; 221 for (;;) { 222 idsr = getidsr(); 223 #if (NCPU <= IDSR_BN_SETS) 224 if (idsr == 0) 225 break; 226 #else 227 if (idsr == 0 && shipped == ncpuids) 228 break; 229 #endif 230 tick = gettick(); 231 /* 232 * If there is a big jump between the current tick 233 * count and lasttick, we have probably hit a break 234 * point. Adjust endtick accordingly to avoid panic. 235 */ 236 if (tick > (lasttick + xc_tick_jump_limit)) 237 endtick += (tick - lasttick); 238 lasttick = tick; 239 if (tick > endtick) { 240 if (panic_quiesce) 241 return; 242 #ifdef CHEETAHPLUS_ERRATUM_25 243 cpuid = -1; 244 for (i = 0; i < IDSR_BN_SETS; i++) { 245 if (idsr & (IDSR_NACK_BIT(i) | 246 IDSR_BUSY_BIT(i))) { 247 cpuid = cpuids[i]; 248 break; 249 } 250 } 251 if (cheetah_sendmondo_recover && cpuid != -1 && 252 recovered == 0) { 253 if (mondo_recover(cpuid, i)) { 254 /* 255 * We claimed the whole memory or 256 * full scan is disabled. 257 */ 258 recovered++; 259 } 260 tick = gettick(); 261 endtick = tick + xc_tick_limit; 262 lasttick = tick; 263 /* 264 * Recheck idsr 265 */ 266 continue; 267 } else 268 #endif /* CHEETAHPLUS_ERRATUM_25 */ 269 { 270 cmn_err(CE_CONT, "send mondo timeout " 271 "[%d NACK %d BUSY]\nIDSR 0x%" 272 "" PRIx64 " cpuids:", nack, busy, idsr); 273 for (i = 0; i < IDSR_BN_SETS; i++) { 274 if (idsr & (IDSR_NACK_BIT(i) | 275 IDSR_BUSY_BIT(i))) { 276 cmn_err(CE_CONT, " 0x%x", 277 cpuids[i]); 278 } 279 } 280 cmn_err(CE_CONT, "\n"); 281 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 282 } 283 } 284 curnack = idsr & nackmask; 285 curbusy = idsr & busymask; 286 #if (NCPU > IDSR_BN_SETS) 287 if (shipped < ncpuids) { 288 uint64_t cpus_left; 289 uint16_t next = (uint16_t)index; 290 291 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 292 busymask; 293 294 if (cpus_left) { 295 do { 296 /* 297 * Sequence through and ship to the 298 * remainder of the CPUs in the system 299 * (e.g. other than the first 300 * (IDSR_BN_SETS)) in reverse order. 301 */ 302 lo = lowbit(cpus_left) - 1; 303 i = IDSR_BUSY_IDX(lo); 304 shipit(next, i); 305 shipped++; 306 cpuids[i] = next; 307 308 /* 309 * If we've processed all the CPUs, 310 * exit the loop now and save 311 * instructions. 312 */ 313 if (shipped == ncpuids) 314 break; 315 316 for ((index = ((int)next - 1)); 317 index >= 0; index--) 318 if (CPU_IN_SET(set, index)) { 319 next = (uint16_t)index; 320 break; 321 } 322 323 cpus_left &= ~(1ull << lo); 324 } while (cpus_left); 325 #ifdef CHEETAHPLUS_ERRATUM_25 326 /* 327 * Clear recovered because we are sending to 328 * a new set of targets. 329 */ 330 recovered = 0; 331 #endif 332 continue; 333 } 334 } 335 #endif 336 if (curbusy) { 337 busy++; 338 continue; 339 } 340 341 #ifdef SEND_MONDO_STATS 342 { 343 int n = gettick() - starttick; 344 if (n < 8192) 345 x_nack_stimes[n >> 7]++; 346 } 347 #endif 348 while (gettick() < (tick + sys_clock_mhz)) 349 ; 350 do { 351 lo = lowbit(curnack) - 1; 352 i = IDSR_NACK_IDX(lo); 353 shipit(cpuids[i], i); 354 curnack &= ~(1ull << lo); 355 } while (curnack); 356 nack++; 357 busy = 0; 358 } 359 #ifdef SEND_MONDO_STATS 360 { 361 int n = gettick() - starttick; 362 if (n < 8192) 363 x_set_stimes[n >> 7]++; 364 else 365 x_set_ltimes[(n >> 13) & 0xf]++; 366 } 367 x_set_cpus[shipped]++; 368 #endif 369 } 370 371 /* 372 * Handles error logging for implementation specific error types 373 */ 374 /*ARGSUSED1*/ 375 int 376 cpu_impl_async_log_err(void *flt, errorq_elem_t *eqep) 377 { 378 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 379 struct async_flt *aflt = (struct async_flt *)flt; 380 381 switch (ch_flt->flt_type) { 382 383 case CPU_IC_PARITY: 384 cpu_async_log_ic_parity_err(flt); 385 return (CH_ASYNC_LOG_DONE); 386 387 case CPU_DC_PARITY: 388 cpu_async_log_dc_parity_err(flt); 389 return (CH_ASYNC_LOG_DONE); 390 391 case CPU_DUE: 392 cpu_log_err(aflt); 393 cpu_page_retire(ch_flt); 394 return (CH_ASYNC_LOG_DONE); 395 396 case CPU_ITLB_PARITY: 397 case CPU_DTLB_PARITY: 398 cpu_async_log_tlb_parity_err(flt); 399 return (CH_ASYNC_LOG_DONE); 400 401 /* report the error and continue */ 402 case CPU_L3_ADDR_PE: 403 cpu_log_err(aflt); 404 return (CH_ASYNC_LOG_DONE); 405 406 default: 407 return (CH_ASYNC_LOG_UNKNOWN); 408 } 409 } 410 411 /* 412 * Figure out if Ecache is direct-mapped (Cheetah or Cheetah+ with Ecache 413 * control ECCR_ASSOC bit off or 2-way (Cheetah+ with ECCR_ASSOC on). 414 * We need to do this on the fly because we may have mixed Cheetah+'s with 415 * both direct and 2-way Ecaches. Panther only supports 4-way L3$. 416 */ 417 int 418 cpu_ecache_nway(void) 419 { 420 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 421 return (PN_L3_NWAYS); 422 return ((get_ecache_ctrl() & ECCR_ASSOC) ? 2 : 1); 423 } 424 425 /* 426 * Note that these are entered into the table: Fatal Errors (PERR, IERR, ISAP, 427 * EMU, IMU) first, orphaned UCU/UCC, AFAR Overwrite policy, finally IVU, IVC. 428 * Afar overwrite policy is: 429 * Class 4: 430 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 431 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 432 * Class 3: 433 * AFSR -- UE, DUE, EDU, WDU, CPU 434 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 435 * Class 2: 436 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 437 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 438 * Class 1: 439 * AFSR -- TO, DTO, BERR, DBERR 440 */ 441 ecc_type_to_info_t ecc_type_to_info[] = { 442 443 /* Fatal Errors */ 444 C_AFSR_PERR, "PERR ", ECC_ALL_TRAPS, 445 CPU_FATAL, "PERR Fatal", 446 FM_EREPORT_PAYLOAD_SYSTEM2, 447 FM_EREPORT_CPU_USIII_PERR, 448 C_AFSR_IERR, "IERR ", ECC_ALL_TRAPS, 449 CPU_FATAL, "IERR Fatal", 450 FM_EREPORT_PAYLOAD_SYSTEM2, 451 FM_EREPORT_CPU_USIII_IERR, 452 C_AFSR_ISAP, "ISAP ", ECC_ALL_TRAPS, 453 CPU_FATAL, "ISAP Fatal", 454 FM_EREPORT_PAYLOAD_SYSTEM1, 455 FM_EREPORT_CPU_USIII_ISAP, 456 C_AFSR_L3_TUE_SH, "L3_TUE_SH ", ECC_C_TRAP, 457 CPU_FATAL, "L3_TUE_SH Fatal", 458 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 459 FM_EREPORT_CPU_USIII_L3_TUE_SH, 460 C_AFSR_L3_TUE, "L3_TUE ", ECC_C_TRAP, 461 CPU_FATAL, "L3_TUE Fatal", 462 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 463 FM_EREPORT_CPU_USIII_L3_TUE, 464 C_AFSR_TUE_SH, "TUE_SH ", ECC_C_TRAP, 465 CPU_FATAL, "TUE_SH Fatal", 466 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 467 FM_EREPORT_CPU_USIII_TUE_SH, 468 C_AFSR_TUE, "TUE ", ECC_ALL_TRAPS, 469 CPU_FATAL, "TUE Fatal", 470 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 471 FM_EREPORT_CPU_USIII_TUE, 472 C_AFSR_EMU, "EMU ", ECC_ASYNC_TRAPS, 473 CPU_FATAL, "EMU Fatal", 474 FM_EREPORT_PAYLOAD_MEMORY, 475 FM_EREPORT_CPU_USIII_EMU, 476 C_AFSR_IMU, "IMU ", ECC_C_TRAP, 477 CPU_FATAL, "IMU Fatal", 478 FM_EREPORT_PAYLOAD_SYSTEM1, 479 FM_EREPORT_CPU_USIII_IMU, 480 481 /* L3$ Address parity errors are reported via the MECC bit */ 482 C_AFSR_L3_MECC, "L3_MECC ", ECC_MECC_TRAPS, 483 CPU_L3_ADDR_PE, "L3 Address Parity", 484 FM_EREPORT_PAYLOAD_L3_DATA, 485 FM_EREPORT_CPU_USIII_L3_MECC, 486 487 /* Orphaned UCC/UCU Errors */ 488 C_AFSR_L3_UCU, "L3_OUCU ", ECC_ORPH_TRAPS, 489 CPU_ORPH, "Orphaned L3_UCU", 490 FM_EREPORT_PAYLOAD_L3_DATA, 491 FM_EREPORT_CPU_USIII_L3_UCU, 492 C_AFSR_L3_UCC, "L3_OUCC ", ECC_ORPH_TRAPS, 493 CPU_ORPH, "Orphaned L3_UCC", 494 FM_EREPORT_PAYLOAD_L3_DATA, 495 FM_EREPORT_CPU_USIII_L3_UCC, 496 C_AFSR_UCU, "OUCU ", ECC_ORPH_TRAPS, 497 CPU_ORPH, "Orphaned UCU", 498 FM_EREPORT_PAYLOAD_L2_DATA, 499 FM_EREPORT_CPU_USIII_UCU, 500 C_AFSR_UCC, "OUCC ", ECC_ORPH_TRAPS, 501 CPU_ORPH, "Orphaned UCC", 502 FM_EREPORT_PAYLOAD_L2_DATA, 503 FM_EREPORT_CPU_USIII_UCC, 504 505 /* UCU, UCC */ 506 C_AFSR_L3_UCU, "L3_UCU ", ECC_F_TRAP, 507 CPU_UE_ECACHE, "L3_UCU", 508 FM_EREPORT_PAYLOAD_L3_DATA, 509 FM_EREPORT_CPU_USIII_L3_UCU, 510 C_AFSR_L3_UCC, "L3_UCC ", ECC_F_TRAP, 511 CPU_CE_ECACHE, "L3_UCC", 512 FM_EREPORT_PAYLOAD_L3_DATA, 513 FM_EREPORT_CPU_USIII_L3_UCC, 514 C_AFSR_UCU, "UCU ", ECC_F_TRAP, 515 CPU_UE_ECACHE, "UCU", 516 FM_EREPORT_PAYLOAD_L2_DATA, 517 FM_EREPORT_CPU_USIII_UCU, 518 C_AFSR_UCC, "UCC ", ECC_F_TRAP, 519 CPU_CE_ECACHE, "UCC", 520 FM_EREPORT_PAYLOAD_L2_DATA, 521 FM_EREPORT_CPU_USIII_UCC, 522 C_AFSR_TSCE, "TSCE ", ECC_F_TRAP, 523 CPU_CE_ECACHE, "TSCE", 524 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 525 FM_EREPORT_CPU_USIII_TSCE, 526 527 /* UE, EDU:ST, EDU:BLD, WDU, CPU */ 528 C_AFSR_UE, "UE ", ECC_ASYNC_TRAPS, 529 CPU_UE, "Uncorrectable system bus (UE)", 530 FM_EREPORT_PAYLOAD_MEMORY, 531 FM_EREPORT_CPU_USIII_UE, 532 C_AFSR_L3_EDU, "L3_EDU ", ECC_C_TRAP, 533 CPU_UE_ECACHE_RETIRE, "L3_EDU:ST", 534 FM_EREPORT_PAYLOAD_L3_DATA, 535 FM_EREPORT_CPU_USIII_L3_EDUST, 536 C_AFSR_L3_EDU, "L3_EDU ", ECC_D_TRAP, 537 CPU_UE_ECACHE_RETIRE, "L3_EDU:BLD", 538 FM_EREPORT_PAYLOAD_L3_DATA, 539 FM_EREPORT_CPU_USIII_L3_EDUBL, 540 C_AFSR_L3_WDU, "L3_WDU ", ECC_C_TRAP, 541 CPU_UE_ECACHE_RETIRE, "L3_WDU", 542 FM_EREPORT_PAYLOAD_L3_DATA, 543 FM_EREPORT_CPU_USIII_L3_WDU, 544 C_AFSR_L3_CPU, "L3_CPU ", ECC_C_TRAP, 545 CPU_UE_ECACHE, "L3_CPU", 546 FM_EREPORT_PAYLOAD_L3_DATA, 547 FM_EREPORT_CPU_USIII_L3_CPU, 548 C_AFSR_EDU, "EDU ", ECC_C_TRAP, 549 CPU_UE_ECACHE_RETIRE, "EDU:ST", 550 FM_EREPORT_PAYLOAD_L2_DATA, 551 FM_EREPORT_CPU_USIII_EDUST, 552 C_AFSR_EDU, "EDU ", ECC_D_TRAP, 553 CPU_UE_ECACHE_RETIRE, "EDU:BLD", 554 FM_EREPORT_PAYLOAD_L2_DATA, 555 FM_EREPORT_CPU_USIII_EDUBL, 556 C_AFSR_WDU, "WDU ", ECC_C_TRAP, 557 CPU_UE_ECACHE_RETIRE, "WDU", 558 FM_EREPORT_PAYLOAD_L2_DATA, 559 FM_EREPORT_CPU_USIII_WDU, 560 C_AFSR_CPU, "CPU ", ECC_C_TRAP, 561 CPU_UE_ECACHE, "CPU", 562 FM_EREPORT_PAYLOAD_L2_DATA, 563 FM_EREPORT_CPU_USIII_CPU, 564 C_AFSR_DUE, "DUE ", ECC_C_TRAP, 565 CPU_DUE, "DUE", 566 FM_EREPORT_PAYLOAD_MEMORY, 567 FM_EREPORT_CPU_USIII_DUE, 568 569 /* CE, EDC, EMC, WDC, CPC */ 570 C_AFSR_CE, "CE ", ECC_C_TRAP, 571 CPU_CE, "Corrected system bus (CE)", 572 FM_EREPORT_PAYLOAD_MEMORY, 573 FM_EREPORT_CPU_USIII_CE, 574 C_AFSR_L3_EDC, "L3_EDC ", ECC_C_TRAP, 575 CPU_CE_ECACHE, "L3_EDC", 576 FM_EREPORT_PAYLOAD_L3_DATA, 577 FM_EREPORT_CPU_USIII_L3_EDC, 578 C_AFSR_EDC, "EDC ", ECC_C_TRAP, 579 CPU_CE_ECACHE, "EDC", 580 FM_EREPORT_PAYLOAD_L2_DATA, 581 FM_EREPORT_CPU_USIII_EDC, 582 C_AFSR_EMC, "EMC ", ECC_C_TRAP, 583 CPU_EMC, "EMC", 584 FM_EREPORT_PAYLOAD_MEMORY, 585 FM_EREPORT_CPU_USIII_EMC, 586 C_AFSR_L3_WDC, "L3_WDC ", ECC_C_TRAP, 587 CPU_CE_ECACHE, "L3_WDC", 588 FM_EREPORT_PAYLOAD_L3_DATA, 589 FM_EREPORT_CPU_USIII_L3_WDC, 590 C_AFSR_L3_CPC, "L3_CPC ", ECC_C_TRAP, 591 CPU_CE_ECACHE, "L3_CPC", 592 FM_EREPORT_PAYLOAD_L3_DATA, 593 FM_EREPORT_CPU_USIII_L3_CPC, 594 C_AFSR_L3_THCE, "L3_THCE ", ECC_C_TRAP, 595 CPU_CE_ECACHE, "L3_THCE", 596 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 597 FM_EREPORT_CPU_USIII_L3_THCE, 598 C_AFSR_WDC, "WDC ", ECC_C_TRAP, 599 CPU_CE_ECACHE, "WDC", 600 FM_EREPORT_PAYLOAD_L2_DATA, 601 FM_EREPORT_CPU_USIII_WDC, 602 C_AFSR_CPC, "CPC ", ECC_C_TRAP, 603 CPU_CE_ECACHE, "CPC", 604 FM_EREPORT_PAYLOAD_L2_DATA, 605 FM_EREPORT_CPU_USIII_CPC, 606 C_AFSR_THCE, "THCE ", ECC_C_TRAP, 607 CPU_CE_ECACHE, "THCE", 608 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 609 FM_EREPORT_CPU_USIII_THCE, 610 611 /* TO, BERR */ 612 C_AFSR_TO, "TO ", ECC_ASYNC_TRAPS, 613 CPU_TO, "Timeout (TO)", 614 FM_EREPORT_PAYLOAD_IO, 615 FM_EREPORT_CPU_USIII_TO, 616 C_AFSR_BERR, "BERR ", ECC_ASYNC_TRAPS, 617 CPU_BERR, "Bus Error (BERR)", 618 FM_EREPORT_PAYLOAD_IO, 619 FM_EREPORT_CPU_USIII_BERR, 620 C_AFSR_DTO, "DTO ", ECC_C_TRAP, 621 CPU_TO, "Disrupting Timeout (DTO)", 622 FM_EREPORT_PAYLOAD_IO, 623 FM_EREPORT_CPU_USIII_DTO, 624 C_AFSR_DBERR, "DBERR ", ECC_C_TRAP, 625 CPU_BERR, "Disrupting Bus Error (DBERR)", 626 FM_EREPORT_PAYLOAD_IO, 627 FM_EREPORT_CPU_USIII_DBERR, 628 629 /* IVU, IVC, IMC */ 630 C_AFSR_IVU, "IVU ", ECC_C_TRAP, 631 CPU_IV, "IVU", 632 FM_EREPORT_PAYLOAD_SYSTEM1, 633 FM_EREPORT_CPU_USIII_IVU, 634 C_AFSR_IVC, "IVC ", ECC_C_TRAP, 635 CPU_IV, "IVC", 636 FM_EREPORT_PAYLOAD_SYSTEM1, 637 FM_EREPORT_CPU_USIII_IVC, 638 C_AFSR_IMC, "IMC ", ECC_C_TRAP, 639 CPU_IV, "IMC", 640 FM_EREPORT_PAYLOAD_SYSTEM1, 641 FM_EREPORT_CPU_USIII_IMC, 642 643 0, NULL, 0, 644 0, NULL, 645 FM_EREPORT_PAYLOAD_UNKNOWN, 646 FM_EREPORT_CPU_USIII_UNKNOWN, 647 }; 648 649 /* 650 * See Cheetah+ Delta PRM 10.9 and section P.6.1 of the Panther PRM 651 * Class 4: 652 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 653 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 654 * Class 3: 655 * AFSR -- UE, DUE, EDU, EMU, WDU, CPU 656 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 657 * Class 2: 658 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 659 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 660 * Class 1: 661 * AFSR -- TO, DTO, BERR, DBERR 662 * AFSR_EXT -- 663 */ 664 uint64_t afar_overwrite[] = { 665 /* class 4: */ 666 C_AFSR_UCC | C_AFSR_UCU | C_AFSR_TUE | C_AFSR_TSCE | C_AFSR_TUE_SH | 667 C_AFSR_L3_UCC | C_AFSR_L3_UCU | C_AFSR_L3_TUE | C_AFSR_L3_TUE_SH, 668 /* class 3: */ 669 C_AFSR_UE | C_AFSR_DUE | C_AFSR_EDU | C_AFSR_EMU | C_AFSR_WDU | 670 C_AFSR_CPU | C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 671 /* class 2: */ 672 C_AFSR_CE | C_AFSR_EDC | C_AFSR_EMC | C_AFSR_WDC | C_AFSR_CPC | 673 C_AFSR_THCE | C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC | 674 C_AFSR_L3_THCE, 675 /* class 1: */ 676 C_AFSR_TO | C_AFSR_DTO | C_AFSR_BERR | C_AFSR_DBERR, 677 678 0 679 }; 680 681 /* 682 * For Cheetah+, the E_SYND and M_SYND overwrite priorities are combined. 683 * See Cheetah+ Delta PRM 10.9 and Cheetah+ PRM 11.6.2 684 * Class 2: UE, DUE, IVU, EDU, EMU, WDU, UCU, CPU 685 * Class 1: CE, IVC, EDC, EMC, WDC, UCC, CPC 686 */ 687 uint64_t esynd_overwrite[] = { 688 /* class 2: */ 689 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_EMU | 690 C_AFSR_WDU | C_AFSR_UCU | C_AFSR_CPU, 691 /* class 1: */ 692 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_EMC | C_AFSR_WDC | 693 C_AFSR_UCC | C_AFSR_CPC, 694 0 695 }; 696 697 /* 698 * In panther, the E_SYND overwrite policy changed a little bit 699 * by adding one more level. 700 * See Panther PRM P.6.2 701 * class 3: 702 * AFSR -- UCU, UCC 703 * AFSR_EXT -- L3_UCU, L3_UCC 704 * Class 2: 705 * AFSR -- UE, DUE, IVU, EDU, WDU, CPU 706 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 707 * Class 1: 708 * AFSR -- CE, IVC, EDC, WDC, CPC 709 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC 710 */ 711 uint64_t pn_esynd_overwrite[] = { 712 /* class 3: */ 713 C_AFSR_UCU | C_AFSR_UCC | 714 C_AFSR_L3_UCU | C_AFSR_L3_UCC, 715 /* class 2: */ 716 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_WDU | 717 C_AFSR_CPU | 718 C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 719 /* class 1: */ 720 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_CPC | 721 C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC, 722 723 0 724 }; 725 726 int 727 afsr_to_pn_esynd_status(uint64_t afsr, uint64_t afsr_bit) 728 { 729 return (afsr_to_overw_status(afsr, afsr_bit, pn_esynd_overwrite)); 730 } 731 732 /* 733 * Prioritized list of Error bits for MSYND overwrite. 734 * See Panther PRM P.6.2 (For Cheetah+, see esynd_overwrite classes) 735 * Class 2: EMU, IMU 736 * Class 1: EMC, IMC 737 * 738 * Panther adds IMU and IMC. 739 */ 740 uint64_t msynd_overwrite[] = { 741 /* class 2: */ 742 C_AFSR_EMU | C_AFSR_IMU, 743 /* class 1: */ 744 C_AFSR_EMC | C_AFSR_IMC, 745 746 0 747 }; 748 749 /* 750 * change cpu speed bits -- new speed will be normal-speed/divisor. 751 * 752 * The Jalapeno memory controllers are required to drain outstanding 753 * memory transactions within 32 JBus clocks in order to be ready 754 * to enter Estar mode. In some corner cases however, that time 755 * fell short. 756 * 757 * A safe software solution is to force MCU to act like in Estar mode, 758 * then delay 1us (in ppm code) prior to assert J_CHNG_L signal. 759 * To reverse the effect, upon exiting Estar, software restores the 760 * MCU to its original state. 761 */ 762 /* ARGSUSED1 */ 763 void 764 cpu_change_speed(uint64_t divisor, uint64_t arg2) 765 { 766 bus_config_eclk_t *bceclk; 767 uint64_t reg; 768 769 for (bceclk = bus_config_eclk; bceclk->divisor; bceclk++) { 770 if (bceclk->divisor != divisor) 771 continue; 772 reg = get_safari_config(); 773 reg &= ~SAFARI_CONFIG_ECLK_MASK; 774 reg |= bceclk->mask; 775 set_safari_config(reg); 776 CPU->cpu_m.divisor = (uchar_t)divisor; 777 return; 778 } 779 /* 780 * We will reach here only if OBP and kernel don't agree on 781 * the speeds supported by the CPU. 782 */ 783 cmn_err(CE_WARN, "cpu_change_speed: bad divisor %" PRIu64, divisor); 784 } 785 786 /* 787 * Cpu private initialization. This includes allocating the cpu_private 788 * data structure, initializing it, and initializing the scrubber for this 789 * cpu. This function calls cpu_init_ecache_scrub_dr to init the scrubber. 790 * We use kmem_cache_create for the cheetah private data structure because 791 * it needs to be allocated on a PAGESIZE (8192) byte boundary. 792 */ 793 void 794 cpu_init_private(struct cpu *cp) 795 { 796 cheetah_private_t *chprp; 797 int i; 798 799 ASSERT(CPU_PRIVATE(cp) == NULL); 800 801 /* LINTED: E_TRUE_LOGICAL_EXPR */ 802 ASSERT((offsetof(cheetah_private_t, chpr_tl1_err_data) + 803 sizeof (ch_err_tl1_data_t) * CH_ERR_TL1_TLMAX) <= PAGESIZE); 804 805 /* 806 * Running with Cheetah CPUs in a Cheetah+, Jaguar, Panther or 807 * mixed Cheetah+/Jaguar/Panther machine is not a supported 808 * configuration. Attempting to do so may result in unpredictable 809 * failures (e.g. running Cheetah+ CPUs with Cheetah E$ disp flush) 810 * so don't allow it. 811 * 812 * This is just defensive code since this configuration mismatch 813 * should have been caught prior to OS execution. 814 */ 815 if (!(IS_CHEETAH_PLUS(cpunodes[cp->cpu_id].implementation) || 816 IS_JAGUAR(cpunodes[cp->cpu_id].implementation) || 817 IS_PANTHER(cpunodes[cp->cpu_id].implementation))) { 818 cmn_err(CE_PANIC, "CPU%d: UltraSPARC-III not supported" 819 " on UltraSPARC-III+/IV/IV+ code\n", cp->cpu_id); 820 } 821 822 /* 823 * If the ch_private_cache has not been created, create it. 824 */ 825 if (ch_private_cache == NULL) { 826 ch_private_cache = kmem_cache_create("ch_private_cache", 827 sizeof (cheetah_private_t), PAGESIZE, NULL, NULL, 828 NULL, NULL, static_arena, 0); 829 } 830 831 chprp = CPU_PRIVATE(cp) = kmem_cache_alloc(ch_private_cache, KM_SLEEP); 832 833 bzero(chprp, sizeof (cheetah_private_t)); 834 chprp->chpr_fecctl0_logout.clo_data.chd_afar = LOGOUT_INVALID; 835 chprp->chpr_cecc_logout.clo_data.chd_afar = LOGOUT_INVALID; 836 chprp->chpr_async_logout.clo_data.chd_afar = LOGOUT_INVALID; 837 chprp->chpr_tlb_logout.tlo_addr = LOGOUT_INVALID; 838 for (i = 0; i < CH_ERR_TL1_TLMAX; i++) 839 chprp->chpr_tl1_err_data[i].ch_err_tl1_logout.clo_data.chd_afar 840 = LOGOUT_INVALID; 841 842 /* Panther has a larger Icache compared to cheetahplus or Jaguar */ 843 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 844 chprp->chpr_icache_size = PN_ICACHE_SIZE; 845 chprp->chpr_icache_linesize = PN_ICACHE_LSIZE; 846 } else { 847 chprp->chpr_icache_size = CH_ICACHE_SIZE; 848 chprp->chpr_icache_linesize = CH_ICACHE_LSIZE; 849 } 850 851 cpu_init_ecache_scrub_dr(cp); 852 853 /* 854 * Panther's L2$ and E$ are shared between cores, so the scrubber is 855 * only needed on one of the cores. At this point, we assume all cores 856 * are online, and we only enable the scrubber on core 0. 857 */ 858 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 859 chprp->chpr_scrub_misc.chsm_core_state = 860 SCRUBBER_BOTH_CORES_ONLINE; 861 if (cp->cpu_id != (processorid_t)cmp_cpu_to_chip(cp->cpu_id)) { 862 chprp->chpr_scrub_misc.chsm_enable[ 863 CACHE_SCRUBBER_INFO_E] = 0; 864 } 865 } 866 867 chprp->chpr_ec_set_size = cpunodes[cp->cpu_id].ecache_size / 868 cpu_ecache_nway(); 869 870 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 871 ch_err_tl1_paddrs[cp->cpu_id] = va_to_pa(chprp); 872 ASSERT(ch_err_tl1_paddrs[cp->cpu_id] != -1); 873 } 874 875 /* 876 * Clear the error state registers for this CPU. 877 * For Cheetah+/Jaguar, just clear the AFSR but 878 * for Panther we also have to clear the AFSR_EXT. 879 */ 880 void 881 set_cpu_error_state(ch_cpu_errors_t *cpu_error_regs) 882 { 883 set_asyncflt(cpu_error_regs->afsr & ~C_AFSR_FATAL_ERRS); 884 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 885 set_afsr_ext(cpu_error_regs->afsr_ext & ~C_AFSR_EXT_FATAL_ERRS); 886 } 887 } 888 889 void 890 pn_cpu_log_diag_l2_info(ch_async_flt_t *ch_flt) { 891 struct async_flt *aflt = (struct async_flt *)ch_flt; 892 ch_ec_data_t *l2_data = &ch_flt->flt_diag_data.chd_l2_data[0]; 893 uint64_t faddr = aflt->flt_addr; 894 uint8_t log_way_mask = 0; 895 int i; 896 897 /* 898 * Only Panther CPUs have the additional L2$ data that needs 899 * to be logged here 900 */ 901 if (!IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 902 return; 903 904 /* 905 * We'll use a simple bit mask to keep track of which way(s) 906 * of the stored cache line we want to log. The idea is to 907 * log the entry if it is a valid line and it matches our 908 * fault AFAR. If no match is found, we will simply log all 909 * the ways. 910 */ 911 for (i = 0; i < PN_L2_NWAYS; i++) 912 if (pn_matching_valid_l2_line(faddr, &l2_data[i])) 913 log_way_mask |= (1 << i); 914 915 /* If no matching valid lines were found, we log all ways */ 916 if (log_way_mask == 0) 917 log_way_mask = (1 << PN_L2_NWAYS) - 1; 918 919 /* Log the cache lines */ 920 for (i = 0; i < PN_L2_NWAYS; i++) 921 if (log_way_mask & (1 << i)) 922 l2_data[i].ec_logflag = EC_LOGFLAG_MAGIC; 923 } 924 925 /* 926 * For this routine to return true, the L2 tag in question must be valid 927 * and the tag PA must match the fault address (faddr) assuming the correct 928 * index is being used. 929 */ 930 static int 931 pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data) { 932 if ((!PN_L2_LINE_INVALID(clo_l2_data->ec_tag)) && 933 ((faddr & P2ALIGN(C_AFAR_PA, PN_L2_SET_SIZE)) == 934 PN_L2TAG_TO_PA(clo_l2_data->ec_tag))) 935 return (1); 936 return (0); 937 } 938 939 /* 940 * This array is used to convert the 3 digit PgSz encoding (as used in 941 * various MMU registers such as MMU_TAG_ACCESS_EXT) into the corresponding 942 * page size. 943 */ 944 static uint64_t tlb_pgsz_to_size[] = { 945 /* 000 = 8KB: */ 946 0x2000, 947 /* 001 = 64KB: */ 948 0x10000, 949 /* 010 = 512KB: */ 950 0x80000, 951 /* 011 = 4MB: */ 952 0x400000, 953 /* 100 = 32MB: */ 954 0x2000000, 955 /* 101 = 256MB: */ 956 0x10000000, 957 /* undefined for encodings 110 and 111: */ 958 0, 0 959 }; 960 961 /* 962 * The itlb_parity_trap and dtlb_parity_trap handlers transfer control here 963 * after collecting logout information related to the TLB parity error and 964 * flushing the offending TTE entries from the ITLB or DTLB. 965 * 966 * DTLB traps which occur at TL>0 are not recoverable because we will most 967 * likely be corrupting some other trap handler's alternate globals. As 968 * such, we simply panic here when that happens. ITLB parity errors are 969 * not expected to happen at TL>0. 970 */ 971 void 972 cpu_tlb_parity_error(struct regs *rp, ulong_t trap_va, ulong_t tlb_info) { 973 ch_async_flt_t ch_flt; 974 struct async_flt *aflt; 975 pn_tlb_logout_t *tlop = NULL; 976 int immu_parity = (tlb_info & PN_TLO_INFO_IMMU) != 0; 977 int tl1_trap = (tlb_info & PN_TLO_INFO_TL1) != 0; 978 char *error_class; 979 980 bzero(&ch_flt, sizeof (ch_async_flt_t)); 981 982 /* 983 * Get the CPU log out info. If we can't find our CPU private 984 * pointer, or if the logout information does not correspond to 985 * this error, then we will have to make due without detailed 986 * logout information. 987 */ 988 if (CPU_PRIVATE(CPU)) { 989 tlop = CPU_PRIVATE_PTR(CPU, chpr_tlb_logout); 990 if ((tlop->tlo_addr != trap_va) || 991 (tlop->tlo_info != tlb_info)) 992 tlop = NULL; 993 } 994 995 if (tlop) { 996 ch_flt.tlb_diag_data = *tlop; 997 998 /* Zero out + invalidate TLB logout. */ 999 bzero(tlop, sizeof (pn_tlb_logout_t)); 1000 tlop->tlo_addr = LOGOUT_INVALID; 1001 } else { 1002 /* 1003 * Copy what logout information we have and mark 1004 * it incomplete. 1005 */ 1006 ch_flt.flt_data_incomplete = 1; 1007 ch_flt.tlb_diag_data.tlo_info = tlb_info; 1008 ch_flt.tlb_diag_data.tlo_addr = trap_va; 1009 } 1010 1011 /* 1012 * Log the error. 1013 */ 1014 aflt = (struct async_flt *)&ch_flt; 1015 aflt->flt_id = gethrtime_waitfree(); 1016 aflt->flt_bus_id = getprocessorid(); 1017 aflt->flt_inst = CPU->cpu_id; 1018 aflt->flt_pc = (caddr_t)rp->r_pc; 1019 aflt->flt_addr = trap_va; 1020 aflt->flt_prot = AFLT_PROT_NONE; 1021 aflt->flt_class = CPU_FAULT; 1022 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1023 aflt->flt_tl = tl1_trap ? 1 : 0; 1024 aflt->flt_panic = tl1_trap ? 1 : 0; 1025 1026 if (immu_parity) { 1027 aflt->flt_status = ECC_ITLB_TRAP; 1028 ch_flt.flt_type = CPU_ITLB_PARITY; 1029 error_class = FM_EREPORT_CPU_USIII_ITLBPE; 1030 aflt->flt_payload = FM_EREPORT_PAYLOAD_ITLB_PE; 1031 } else { 1032 aflt->flt_status = ECC_DTLB_TRAP; 1033 ch_flt.flt_type = CPU_DTLB_PARITY; 1034 error_class = FM_EREPORT_CPU_USIII_DTLBPE; 1035 aflt->flt_payload = FM_EREPORT_PAYLOAD_DTLB_PE; 1036 } 1037 1038 /* 1039 * The TLB entries have already been flushed by the TL1 trap 1040 * handler so at this point the only thing left to do is log 1041 * the error message. 1042 */ 1043 if (aflt->flt_panic) { 1044 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1045 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 1046 /* 1047 * Panic here if aflt->flt_panic has been set. Enqueued 1048 * errors will be logged as part of the panic flow. 1049 */ 1050 fm_panic("%sError(s)", immu_parity ? "ITLBPE " : "DTLBPE "); 1051 } else { 1052 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1053 sizeof (ch_async_flt_t), ce_queue, aflt->flt_panic); 1054 } 1055 } 1056 1057 /* 1058 * This routine is called when a TLB parity error event is 'ue_drain'ed 1059 * or 'ce_drain'ed from the errorq. 1060 */ 1061 void 1062 cpu_async_log_tlb_parity_err(void *flt) { 1063 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 1064 struct async_flt *aflt = (struct async_flt *)flt; 1065 #ifdef lint 1066 aflt = aflt; 1067 #endif 1068 1069 /* 1070 * We only capture TLB information if we encountered 1071 * a TLB parity error and Panther is the only CPU which 1072 * can detect a TLB parity error. 1073 */ 1074 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1075 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1076 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1077 1078 if (ch_flt->flt_data_incomplete == 0) { 1079 if (ch_flt->flt_type == CPU_ITLB_PARITY) 1080 ch_flt->tlb_diag_data.tlo_logflag = IT_LOGFLAG_MAGIC; 1081 else /* parity error is in DTLB */ 1082 ch_flt->tlb_diag_data.tlo_logflag = DT_LOGFLAG_MAGIC; 1083 } 1084 } 1085 1086 /* 1087 * Add L1 Prefetch cache data to the ereport payload. 1088 */ 1089 void 1090 cpu_payload_add_pcache(struct async_flt *aflt, nvlist_t *nvl) 1091 { 1092 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1093 ch_pc_data_t *pcp; 1094 ch_pc_data_t pcdata[CH_PCACHE_NWAY]; 1095 uint_t nelem; 1096 int i, ways_logged = 0; 1097 1098 /* 1099 * We only capture P$ information if we encountered 1100 * a P$ parity error and Panther is the only CPU which 1101 * can detect a P$ parity error. 1102 */ 1103 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1104 for (i = 0; i < CH_PCACHE_NWAY; i++) { 1105 pcp = &ch_flt->parity_data.dpe.cpl_pc[i]; 1106 if (pcp->pc_logflag == PC_LOGFLAG_MAGIC) { 1107 bcopy(pcp, &pcdata[ways_logged], 1108 sizeof (ch_pc_data_t)); 1109 ways_logged++; 1110 } 1111 } 1112 1113 /* 1114 * Add the pcache data to the payload. 1115 */ 1116 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_WAYS, 1117 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 1118 if (ways_logged != 0) { 1119 nelem = sizeof (ch_pc_data_t) / sizeof (uint64_t) * ways_logged; 1120 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_DATA, 1121 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)pcdata, NULL); 1122 } 1123 } 1124 1125 /* 1126 * Add TLB diagnostic data to the ereport payload. 1127 */ 1128 void 1129 cpu_payload_add_tlb(struct async_flt *aflt, nvlist_t *nvl) 1130 { 1131 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1132 uint8_t num_entries, tlb_data_words; 1133 1134 /* 1135 * We only capture TLB information if we encountered 1136 * a TLB parity error and Panther is the only CPU which 1137 * can detect a TLB parity error. 1138 */ 1139 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1140 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1141 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1142 1143 if (ch_flt->flt_type == CPU_ITLB_PARITY) { 1144 num_entries = (uint8_t)(PN_ITLB_NWAYS * PN_NUM_512_ITLBS); 1145 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1146 num_entries; 1147 1148 /* 1149 * Add the TLB diagnostic data to the payload 1150 * if it was collected. 1151 */ 1152 if (ch_flt->tlb_diag_data.tlo_logflag == IT_LOGFLAG_MAGIC) { 1153 fm_payload_set(nvl, 1154 FM_EREPORT_PAYLOAD_NAME_ITLB_ENTRIES, 1155 DATA_TYPE_UINT8, num_entries, NULL); 1156 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_ITLB_DATA, 1157 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1158 (uint64_t *)ch_flt->tlb_diag_data.tlo_itlb_tte, 1159 NULL); 1160 } 1161 } else { 1162 num_entries = (uint8_t)(PN_DTLB_NWAYS * PN_NUM_512_DTLBS); 1163 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1164 num_entries; 1165 1166 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_VA, 1167 DATA_TYPE_UINT64, ch_flt->tlb_diag_data.tlo_addr, NULL); 1168 1169 /* 1170 * Add the TLB diagnostic data to the payload 1171 * if it was collected. 1172 */ 1173 if (ch_flt->tlb_diag_data.tlo_logflag == DT_LOGFLAG_MAGIC) { 1174 fm_payload_set(nvl, 1175 FM_EREPORT_PAYLOAD_NAME_DTLB_ENTRIES, 1176 DATA_TYPE_UINT8, num_entries, NULL); 1177 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_DTLB_DATA, 1178 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1179 (uint64_t *)ch_flt->tlb_diag_data.tlo_dtlb_tte, 1180 NULL); 1181 } 1182 } 1183 } 1184 1185 /* 1186 * Panther Cache Scrubbing: 1187 * 1188 * In Jaguar, the E$ was split between cores, so the scrubber must run on both 1189 * cores. For Panther, however, the L2$ and L3$ are shared across cores. 1190 * Therefore, the E$ scrubber only needs to run on one of the two cores. 1191 * 1192 * There are four possible states for the E$ scrubber: 1193 * 1194 * 0. If both cores are offline, add core 0 to cpu_offline_set so that 1195 * the offline scrubber will run on it. 1196 * 1. If core 0 is online and core 1 off, we run the scrubber on core 0. 1197 * 2. If core 1 is online and core 0 off, we move the scrubber to run 1198 * on core 1. 1199 * 3. If both cores are online, only run the scrubber on core 0. 1200 * 1201 * These states are enumerated by the SCRUBBER_[BOTH|CORE|NEITHER]_* defines 1202 * above. One of those values is stored in 1203 * chpr_scrub_misc->chsm_core_state on each core. 1204 * 1205 * Also note that, for Panther, ecache_flush_line() will flush out the L2$ 1206 * before the E$, so the L2$ will be scrubbed by the E$ scrubber. No 1207 * additional code is necessary to scrub the L2$. 1208 * 1209 * For all cpu types, whenever a cpu or core is offlined, add it to 1210 * cpu_offline_set so the necessary scrubbers can still run. This is still 1211 * necessary on Panther so the D$ scrubber can still run. 1212 */ 1213 /*ARGSUSED*/ 1214 int 1215 cpu_scrub_cpu_setup(cpu_setup_t what, int cpuid, void *arg) 1216 { 1217 processorid_t core_0_id; 1218 cpu_t *core_cpus[2]; 1219 ch_scrub_misc_t *core_scrub[2]; 1220 int old_state, i; 1221 int new_state = SCRUBBER_NEITHER_CORE_ONLINE; 1222 1223 switch (what) { 1224 case CPU_ON: 1225 case CPU_INIT: 1226 CPUSET_DEL(cpu_offline_set, cpuid); 1227 break; 1228 case CPU_OFF: 1229 CPUSET_ADD(cpu_offline_set, cpuid); 1230 break; 1231 default: 1232 return (0); 1233 } 1234 1235 if (!IS_PANTHER(cpunodes[cpuid].implementation)) { 1236 return (0); 1237 } 1238 1239 /* 1240 * Update the chsm_enable[CACHE_SCRUBBER_INFO_E] value 1241 * if necessary 1242 */ 1243 core_0_id = cmp_cpu_to_chip(cpuid); 1244 core_cpus[0] = cpu_get(core_0_id); 1245 core_cpus[1] = cpu_get_sibling_core(core_cpus[0]); 1246 1247 for (i = 0; i < 2; i++) { 1248 if (core_cpus[i] == NULL) { 1249 /* 1250 * This may happen during DR - one core is offlined 1251 * and completely unconfigured before the second 1252 * core is offlined. Give up and return quietly, 1253 * since the second core should quickly be removed 1254 * anyways. 1255 */ 1256 return (0); 1257 } 1258 core_scrub[i] = CPU_PRIVATE_PTR(core_cpus[i], chpr_scrub_misc); 1259 } 1260 1261 if (cpuid == (processorid_t)cmp_cpu_to_chip(cpuid)) { 1262 /* cpuid is core 0 */ 1263 if (cpu_is_active(core_cpus[1])) { 1264 new_state |= SCRUBBER_CORE_1_ONLINE; 1265 } 1266 if (what != CPU_OFF) { 1267 new_state |= SCRUBBER_CORE_0_ONLINE; 1268 } 1269 } else { 1270 /* cpuid is core 1 */ 1271 if (cpu_is_active(core_cpus[0])) { 1272 new_state |= SCRUBBER_CORE_0_ONLINE; 1273 } 1274 if (what != CPU_OFF) { 1275 new_state |= SCRUBBER_CORE_1_ONLINE; 1276 } 1277 } 1278 1279 old_state = core_scrub[0]->chsm_core_state; 1280 1281 if (old_state == new_state) { 1282 return (0); 1283 } 1284 1285 if (old_state == SCRUBBER_CORE_1_ONLINE) { 1286 /* 1287 * We need to move the scrubber state from core 1 1288 * back to core 0. This data is not protected by 1289 * locks, but the worst that can happen is some 1290 * lines are scrubbed multiple times. chsm_oustanding is 1291 * set to 0 to make sure an interrupt is scheduled the 1292 * first time through do_scrub(). 1293 */ 1294 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1295 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1296 core_scrub[0]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1297 } 1298 1299 switch (new_state) { 1300 case SCRUBBER_NEITHER_CORE_ONLINE: 1301 case SCRUBBER_BOTH_CORES_ONLINE: 1302 case SCRUBBER_CORE_0_ONLINE: 1303 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1304 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1305 break; 1306 1307 case SCRUBBER_CORE_1_ONLINE: 1308 default: 1309 /* 1310 * We need to move the scrubber state from core 0 1311 * to core 1. 1312 */ 1313 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1314 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1315 core_scrub[1]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1316 1317 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1318 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1319 break; 1320 } 1321 1322 core_scrub[0]->chsm_core_state = new_state; 1323 core_scrub[1]->chsm_core_state = new_state; 1324 return (0); 1325 } 1326 1327 /* 1328 * Returns a pointer to the cpu structure of the argument's sibling core. 1329 * If no sibling core can be found, return NULL. 1330 */ 1331 static cpu_t * 1332 cpu_get_sibling_core(cpu_t *cpup) 1333 { 1334 cpu_t *nextp; 1335 1336 if ((cpup == NULL) || (!cmp_cpu_is_cmp(cpup->cpu_id))) 1337 return (NULL); 1338 1339 nextp = cpup->cpu_next_chip; 1340 if ((nextp == NULL) || (nextp == cpup)) 1341 return (NULL); 1342 1343 return (nextp); 1344 } 1345