1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/systm.h> 28 #include <sys/ddi.h> 29 #include <sys/sysmacros.h> 30 #include <sys/archsystm.h> 31 #include <sys/vmsystm.h> 32 #include <sys/machparam.h> 33 #include <sys/machsystm.h> 34 #include <sys/machthread.h> 35 #include <sys/cpu.h> 36 #include <sys/cmp.h> 37 #include <sys/elf_SPARC.h> 38 #include <vm/hat_sfmmu.h> 39 #include <vm/seg_kmem.h> 40 #include <sys/cpuvar.h> 41 #include <sys/cheetahregs.h> 42 #include <sys/us3_module.h> 43 #include <sys/async.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/dditypes.h> 47 #include <sys/prom_debug.h> 48 #include <sys/prom_plat.h> 49 #include <sys/cpu_module.h> 50 #include <sys/sysmacros.h> 51 #include <sys/intreg.h> 52 #include <sys/clock.h> 53 #include <sys/platform_module.h> 54 #include <sys/machtrap.h> 55 #include <sys/ontrap.h> 56 #include <sys/panic.h> 57 #include <sys/memlist.h> 58 #include <sys/bootconf.h> 59 #include <sys/ivintr.h> 60 #include <sys/atomic.h> 61 #include <sys/fm/protocol.h> 62 #include <sys/fm/cpu/UltraSPARC-III.h> 63 #include <sys/fm/util.h> 64 #include <sys/pghw.h> 65 66 #ifdef CHEETAHPLUS_ERRATUM_25 67 #include <sys/cyclic.h> 68 #endif /* CHEETAHPLUS_ERRATUM_25 */ 69 70 /* 71 * See comment above cpu_scrub_cpu_setup() for description 72 */ 73 #define SCRUBBER_NEITHER_CORE_ONLINE 0x0 74 #define SCRUBBER_CORE_0_ONLINE 0x1 75 #define SCRUBBER_CORE_1_ONLINE 0x2 76 #define SCRUBBER_BOTH_CORES_ONLINE (SCRUBBER_CORE_0_ONLINE | \ 77 SCRUBBER_CORE_1_ONLINE) 78 79 static int pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data); 80 static void cpu_async_log_tlb_parity_err(void *flt); 81 static cpu_t *cpu_get_sibling_core(cpu_t *cpup); 82 83 84 /* 85 * Setup trap handlers. 86 */ 87 void 88 cpu_init_trap(void) 89 { 90 CH_SET_TRAP(pil15_epilogue, ch_pil15_interrupt_instr); 91 92 CH_SET_TRAP(tt0_fecc, fecc_err_instr); 93 CH_SET_TRAP(tt1_fecc, fecc_err_tl1_instr); 94 CH_SET_TRAP(tt1_swtrap0, fecc_err_tl1_cont_instr); 95 96 CH_SET_TRAP(tt0_dperr, dcache_parity_instr); 97 CH_SET_TRAP(tt1_dperr, dcache_parity_tl1_instr); 98 CH_SET_TRAP(tt1_swtrap1, dcache_parity_tl1_cont_instr); 99 100 CH_SET_TRAP(tt0_iperr, icache_parity_instr); 101 CH_SET_TRAP(tt1_iperr, icache_parity_tl1_instr); 102 CH_SET_TRAP(tt1_swtrap2, icache_parity_tl1_cont_instr); 103 } 104 105 /* 106 * Set the magic constants of the implementation. 107 */ 108 /*ARGSUSED*/ 109 void 110 cpu_fiximp(pnode_t dnode) 111 { 112 int i, a; 113 extern int vac_size, vac_shift; 114 extern uint_t vac_mask; 115 116 dcache_size = CH_DCACHE_SIZE; 117 dcache_linesize = CH_DCACHE_LSIZE; 118 119 icache_size = CHP_ICACHE_MAX_SIZE; 120 icache_linesize = CHP_ICACHE_MIN_LSIZE; 121 122 ecache_size = CH_ECACHE_MAX_SIZE; 123 ecache_alignsize = CH_ECACHE_MAX_LSIZE; 124 ecache_associativity = CHP_ECACHE_MIN_NWAY; 125 126 /* 127 * ecache_setsize needs to maximum of all cpu ecache setsizes 128 */ 129 ecache_setsize = CHP_ECACHE_MAX_SETSIZE; 130 ASSERT(ecache_setsize >= (ecache_size / ecache_associativity)); 131 132 vac_size = CH_VAC_SIZE; 133 vac_mask = MMU_PAGEMASK & (vac_size - 1); 134 i = 0; a = vac_size; 135 while (a >>= 1) 136 ++i; 137 vac_shift = i; 138 shm_alignment = vac_size; 139 vac = 1; 140 } 141 142 /* 143 * Use Panther values for Panther-only domains. 144 * See Panther PRM, 1.5.4 Cache Hierarchy 145 */ 146 void 147 cpu_fix_allpanther(void) 148 { 149 /* dcache same as Ch+ */ 150 icache_size = PN_ICACHE_SIZE; 151 icache_linesize = PN_ICACHE_LSIZE; 152 ecache_size = PN_L3_SIZE; 153 ecache_alignsize = PN_L3_LINESIZE; 154 ecache_associativity = PN_L3_NWAYS; 155 ecache_setsize = PN_L3_SET_SIZE; 156 ASSERT(ecache_setsize >= (ecache_size / ecache_associativity)); 157 /* vac same as Ch+ */ 158 /* fix hwcaps for USIV+-only domains */ 159 cpu_hwcap_flags |= AV_SPARC_POPC; 160 } 161 162 void 163 send_mondo_set(cpuset_t set) 164 { 165 int lo, busy, nack, shipped = 0; 166 uint16_t i, cpuids[IDSR_BN_SETS]; 167 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 168 uint64_t starttick, endtick, tick, lasttick; 169 #if (NCPU > IDSR_BN_SETS) 170 int index = 0; 171 int ncpuids = 0; 172 #endif 173 #ifdef CHEETAHPLUS_ERRATUM_25 174 int recovered = 0; 175 int cpuid; 176 #endif 177 178 ASSERT(!CPUSET_ISNULL(set)); 179 starttick = lasttick = gettick(); 180 181 #if (NCPU <= IDSR_BN_SETS) 182 for (i = 0; i < NCPU; i++) 183 if (CPU_IN_SET(set, i)) { 184 shipit(i, shipped); 185 nackmask |= IDSR_NACK_BIT(shipped); 186 cpuids[shipped++] = i; 187 CPUSET_DEL(set, i); 188 if (CPUSET_ISNULL(set)) 189 break; 190 } 191 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 192 #else 193 for (i = 0; i < NCPU; i++) 194 if (CPU_IN_SET(set, i)) { 195 ncpuids++; 196 197 /* 198 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 199 * find we have shipped to more than (IDSR_BN_SETS) 200 * CPUs, set "index" to the highest numbered CPU in 201 * the set so we can ship to other CPUs a bit later on. 202 */ 203 if (shipped < IDSR_BN_SETS) { 204 shipit(i, shipped); 205 nackmask |= IDSR_NACK_BIT(shipped); 206 cpuids[shipped++] = i; 207 CPUSET_DEL(set, i); 208 if (CPUSET_ISNULL(set)) 209 break; 210 } else 211 index = (int)i; 212 } 213 214 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 215 #endif 216 217 busymask = IDSR_NACK_TO_BUSY(nackmask); 218 busy = nack = 0; 219 endtick = starttick + xc_tick_limit; 220 for (;;) { 221 idsr = getidsr(); 222 #if (NCPU <= IDSR_BN_SETS) 223 if (idsr == 0) 224 break; 225 #else 226 if (idsr == 0 && shipped == ncpuids) 227 break; 228 #endif 229 tick = gettick(); 230 /* 231 * If there is a big jump between the current tick 232 * count and lasttick, we have probably hit a break 233 * point. Adjust endtick accordingly to avoid panic. 234 */ 235 if (tick > (lasttick + xc_tick_jump_limit)) 236 endtick += (tick - lasttick); 237 lasttick = tick; 238 if (tick > endtick) { 239 if (panic_quiesce) 240 return; 241 #ifdef CHEETAHPLUS_ERRATUM_25 242 cpuid = -1; 243 for (i = 0; i < IDSR_BN_SETS; i++) { 244 if (idsr & (IDSR_NACK_BIT(i) | 245 IDSR_BUSY_BIT(i))) { 246 cpuid = cpuids[i]; 247 break; 248 } 249 } 250 if (cheetah_sendmondo_recover && cpuid != -1 && 251 recovered == 0) { 252 if (mondo_recover(cpuid, i)) { 253 /* 254 * We claimed the whole memory or 255 * full scan is disabled. 256 */ 257 recovered++; 258 } 259 tick = gettick(); 260 endtick = tick + xc_tick_limit; 261 lasttick = tick; 262 /* 263 * Recheck idsr 264 */ 265 continue; 266 } else 267 #endif /* CHEETAHPLUS_ERRATUM_25 */ 268 { 269 cmn_err(CE_CONT, "send mondo timeout " 270 "[%d NACK %d BUSY]\nIDSR 0x%" 271 "" PRIx64 " cpuids:", nack, busy, idsr); 272 for (i = 0; i < IDSR_BN_SETS; i++) { 273 if (idsr & (IDSR_NACK_BIT(i) | 274 IDSR_BUSY_BIT(i))) { 275 cmn_err(CE_CONT, " 0x%x", 276 cpuids[i]); 277 } 278 } 279 cmn_err(CE_CONT, "\n"); 280 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 281 } 282 } 283 curnack = idsr & nackmask; 284 curbusy = idsr & busymask; 285 #if (NCPU > IDSR_BN_SETS) 286 if (shipped < ncpuids) { 287 uint64_t cpus_left; 288 uint16_t next = (uint16_t)index; 289 290 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 291 busymask; 292 293 if (cpus_left) { 294 do { 295 /* 296 * Sequence through and ship to the 297 * remainder of the CPUs in the system 298 * (e.g. other than the first 299 * (IDSR_BN_SETS)) in reverse order. 300 */ 301 lo = lowbit(cpus_left) - 1; 302 i = IDSR_BUSY_IDX(lo); 303 shipit(next, i); 304 shipped++; 305 cpuids[i] = next; 306 307 /* 308 * If we've processed all the CPUs, 309 * exit the loop now and save 310 * instructions. 311 */ 312 if (shipped == ncpuids) 313 break; 314 315 for ((index = ((int)next - 1)); 316 index >= 0; index--) 317 if (CPU_IN_SET(set, index)) { 318 next = (uint16_t)index; 319 break; 320 } 321 322 cpus_left &= ~(1ull << lo); 323 } while (cpus_left); 324 #ifdef CHEETAHPLUS_ERRATUM_25 325 /* 326 * Clear recovered because we are sending to 327 * a new set of targets. 328 */ 329 recovered = 0; 330 #endif 331 continue; 332 } 333 } 334 #endif 335 if (curbusy) { 336 busy++; 337 continue; 338 } 339 340 #ifdef SEND_MONDO_STATS 341 { 342 int n = gettick() - starttick; 343 if (n < 8192) 344 x_nack_stimes[n >> 7]++; 345 } 346 #endif 347 while (gettick() < (tick + sys_clock_mhz)) 348 ; 349 do { 350 lo = lowbit(curnack) - 1; 351 i = IDSR_NACK_IDX(lo); 352 shipit(cpuids[i], i); 353 curnack &= ~(1ull << lo); 354 } while (curnack); 355 nack++; 356 busy = 0; 357 } 358 #ifdef SEND_MONDO_STATS 359 { 360 int n = gettick() - starttick; 361 if (n < 8192) 362 x_set_stimes[n >> 7]++; 363 else 364 x_set_ltimes[(n >> 13) & 0xf]++; 365 } 366 x_set_cpus[shipped]++; 367 #endif 368 } 369 370 /* 371 * Handles error logging for implementation specific error types 372 */ 373 /*ARGSUSED1*/ 374 int 375 cpu_impl_async_log_err(void *flt, errorq_elem_t *eqep) 376 { 377 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 378 struct async_flt *aflt = (struct async_flt *)flt; 379 380 switch (ch_flt->flt_type) { 381 382 case CPU_IC_PARITY: 383 cpu_async_log_ic_parity_err(flt); 384 return (CH_ASYNC_LOG_DONE); 385 386 case CPU_DC_PARITY: 387 cpu_async_log_dc_parity_err(flt); 388 return (CH_ASYNC_LOG_DONE); 389 390 case CPU_DUE: 391 cpu_log_err(aflt); 392 cpu_page_retire(ch_flt); 393 return (CH_ASYNC_LOG_DONE); 394 395 case CPU_ITLB_PARITY: 396 case CPU_DTLB_PARITY: 397 cpu_async_log_tlb_parity_err(flt); 398 return (CH_ASYNC_LOG_DONE); 399 400 /* report the error and continue */ 401 case CPU_L3_ADDR_PE: 402 cpu_log_err(aflt); 403 return (CH_ASYNC_LOG_DONE); 404 405 default: 406 return (CH_ASYNC_LOG_UNKNOWN); 407 } 408 } 409 410 /* 411 * Figure out if Ecache is direct-mapped (Cheetah or Cheetah+ with Ecache 412 * control ECCR_ASSOC bit off or 2-way (Cheetah+ with ECCR_ASSOC on). 413 * We need to do this on the fly because we may have mixed Cheetah+'s with 414 * both direct and 2-way Ecaches. Panther only supports 4-way L3$. 415 */ 416 int 417 cpu_ecache_nway(void) 418 { 419 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 420 return (PN_L3_NWAYS); 421 return ((get_ecache_ctrl() & ECCR_ASSOC) ? 2 : 1); 422 } 423 424 /* 425 * Note that these are entered into the table: Fatal Errors (PERR, IERR, ISAP, 426 * EMU, IMU) first, orphaned UCU/UCC, AFAR Overwrite policy, finally IVU, IVC. 427 * Afar overwrite policy is: 428 * Class 4: 429 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 430 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 431 * Class 3: 432 * AFSR -- UE, DUE, EDU, WDU, CPU 433 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 434 * Class 2: 435 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 436 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 437 * Class 1: 438 * AFSR -- TO, DTO, BERR, DBERR 439 */ 440 ecc_type_to_info_t ecc_type_to_info[] = { 441 442 /* Fatal Errors */ 443 C_AFSR_PERR, "PERR ", ECC_ALL_TRAPS, 444 CPU_FATAL, "PERR Fatal", 445 FM_EREPORT_PAYLOAD_SYSTEM2, 446 FM_EREPORT_CPU_USIII_PERR, 447 C_AFSR_IERR, "IERR ", ECC_ALL_TRAPS, 448 CPU_FATAL, "IERR Fatal", 449 FM_EREPORT_PAYLOAD_SYSTEM2, 450 FM_EREPORT_CPU_USIII_IERR, 451 C_AFSR_ISAP, "ISAP ", ECC_ALL_TRAPS, 452 CPU_FATAL, "ISAP Fatal", 453 FM_EREPORT_PAYLOAD_SYSTEM1, 454 FM_EREPORT_CPU_USIII_ISAP, 455 C_AFSR_L3_TUE_SH, "L3_TUE_SH ", ECC_C_TRAP, 456 CPU_FATAL, "L3_TUE_SH Fatal", 457 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 458 FM_EREPORT_CPU_USIII_L3_TUE_SH, 459 C_AFSR_L3_TUE, "L3_TUE ", ECC_C_TRAP, 460 CPU_FATAL, "L3_TUE Fatal", 461 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 462 FM_EREPORT_CPU_USIII_L3_TUE, 463 C_AFSR_TUE_SH, "TUE_SH ", ECC_C_TRAP, 464 CPU_FATAL, "TUE_SH Fatal", 465 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 466 FM_EREPORT_CPU_USIII_TUE_SH, 467 C_AFSR_TUE, "TUE ", ECC_ALL_TRAPS, 468 CPU_FATAL, "TUE Fatal", 469 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 470 FM_EREPORT_CPU_USIII_TUE, 471 C_AFSR_EMU, "EMU ", ECC_ASYNC_TRAPS, 472 CPU_FATAL, "EMU Fatal", 473 FM_EREPORT_PAYLOAD_MEMORY, 474 FM_EREPORT_CPU_USIII_EMU, 475 C_AFSR_IMU, "IMU ", ECC_C_TRAP, 476 CPU_FATAL, "IMU Fatal", 477 FM_EREPORT_PAYLOAD_SYSTEM1, 478 FM_EREPORT_CPU_USIII_IMU, 479 480 /* L3$ Address parity errors are reported via the MECC bit */ 481 C_AFSR_L3_MECC, "L3_MECC ", ECC_MECC_TRAPS, 482 CPU_L3_ADDR_PE, "L3 Address Parity", 483 FM_EREPORT_PAYLOAD_L3_DATA, 484 FM_EREPORT_CPU_USIII_L3_MECC, 485 486 /* Orphaned UCC/UCU Errors */ 487 C_AFSR_L3_UCU, "L3_OUCU ", ECC_ORPH_TRAPS, 488 CPU_ORPH, "Orphaned L3_UCU", 489 FM_EREPORT_PAYLOAD_L3_DATA, 490 FM_EREPORT_CPU_USIII_L3_UCU, 491 C_AFSR_L3_UCC, "L3_OUCC ", ECC_ORPH_TRAPS, 492 CPU_ORPH, "Orphaned L3_UCC", 493 FM_EREPORT_PAYLOAD_L3_DATA, 494 FM_EREPORT_CPU_USIII_L3_UCC, 495 C_AFSR_UCU, "OUCU ", ECC_ORPH_TRAPS, 496 CPU_ORPH, "Orphaned UCU", 497 FM_EREPORT_PAYLOAD_L2_DATA, 498 FM_EREPORT_CPU_USIII_UCU, 499 C_AFSR_UCC, "OUCC ", ECC_ORPH_TRAPS, 500 CPU_ORPH, "Orphaned UCC", 501 FM_EREPORT_PAYLOAD_L2_DATA, 502 FM_EREPORT_CPU_USIII_UCC, 503 504 /* UCU, UCC */ 505 C_AFSR_L3_UCU, "L3_UCU ", ECC_F_TRAP, 506 CPU_UE_ECACHE, "L3_UCU", 507 FM_EREPORT_PAYLOAD_L3_DATA, 508 FM_EREPORT_CPU_USIII_L3_UCU, 509 C_AFSR_L3_UCC, "L3_UCC ", ECC_F_TRAP, 510 CPU_CE_ECACHE, "L3_UCC", 511 FM_EREPORT_PAYLOAD_L3_DATA, 512 FM_EREPORT_CPU_USIII_L3_UCC, 513 C_AFSR_UCU, "UCU ", ECC_F_TRAP, 514 CPU_UE_ECACHE, "UCU", 515 FM_EREPORT_PAYLOAD_L2_DATA, 516 FM_EREPORT_CPU_USIII_UCU, 517 C_AFSR_UCC, "UCC ", ECC_F_TRAP, 518 CPU_CE_ECACHE, "UCC", 519 FM_EREPORT_PAYLOAD_L2_DATA, 520 FM_EREPORT_CPU_USIII_UCC, 521 C_AFSR_TSCE, "TSCE ", ECC_F_TRAP, 522 CPU_CE_ECACHE, "TSCE", 523 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 524 FM_EREPORT_CPU_USIII_TSCE, 525 526 /* UE, EDU:ST, EDU:BLD, WDU, CPU */ 527 C_AFSR_UE, "UE ", ECC_ASYNC_TRAPS, 528 CPU_UE, "Uncorrectable system bus (UE)", 529 FM_EREPORT_PAYLOAD_MEMORY, 530 FM_EREPORT_CPU_USIII_UE, 531 C_AFSR_L3_EDU, "L3_EDU ", ECC_C_TRAP, 532 CPU_UE_ECACHE_RETIRE, "L3_EDU:ST", 533 FM_EREPORT_PAYLOAD_L3_DATA, 534 FM_EREPORT_CPU_USIII_L3_EDUST, 535 C_AFSR_L3_EDU, "L3_EDU ", ECC_D_TRAP, 536 CPU_UE_ECACHE_RETIRE, "L3_EDU:BLD", 537 FM_EREPORT_PAYLOAD_L3_DATA, 538 FM_EREPORT_CPU_USIII_L3_EDUBL, 539 C_AFSR_L3_WDU, "L3_WDU ", ECC_C_TRAP, 540 CPU_UE_ECACHE_RETIRE, "L3_WDU", 541 FM_EREPORT_PAYLOAD_L3_DATA, 542 FM_EREPORT_CPU_USIII_L3_WDU, 543 C_AFSR_L3_CPU, "L3_CPU ", ECC_C_TRAP, 544 CPU_UE_ECACHE, "L3_CPU", 545 FM_EREPORT_PAYLOAD_L3_DATA, 546 FM_EREPORT_CPU_USIII_L3_CPU, 547 C_AFSR_EDU, "EDU ", ECC_C_TRAP, 548 CPU_UE_ECACHE_RETIRE, "EDU:ST", 549 FM_EREPORT_PAYLOAD_L2_DATA, 550 FM_EREPORT_CPU_USIII_EDUST, 551 C_AFSR_EDU, "EDU ", ECC_D_TRAP, 552 CPU_UE_ECACHE_RETIRE, "EDU:BLD", 553 FM_EREPORT_PAYLOAD_L2_DATA, 554 FM_EREPORT_CPU_USIII_EDUBL, 555 C_AFSR_WDU, "WDU ", ECC_C_TRAP, 556 CPU_UE_ECACHE_RETIRE, "WDU", 557 FM_EREPORT_PAYLOAD_L2_DATA, 558 FM_EREPORT_CPU_USIII_WDU, 559 C_AFSR_CPU, "CPU ", ECC_C_TRAP, 560 CPU_UE_ECACHE, "CPU", 561 FM_EREPORT_PAYLOAD_L2_DATA, 562 FM_EREPORT_CPU_USIII_CPU, 563 C_AFSR_DUE, "DUE ", ECC_C_TRAP, 564 CPU_DUE, "DUE", 565 FM_EREPORT_PAYLOAD_MEMORY, 566 FM_EREPORT_CPU_USIII_DUE, 567 568 /* CE, EDC, EMC, WDC, CPC */ 569 C_AFSR_CE, "CE ", ECC_C_TRAP, 570 CPU_CE, "Corrected system bus (CE)", 571 FM_EREPORT_PAYLOAD_MEMORY, 572 FM_EREPORT_CPU_USIII_CE, 573 C_AFSR_L3_EDC, "L3_EDC ", ECC_C_TRAP, 574 CPU_CE_ECACHE, "L3_EDC", 575 FM_EREPORT_PAYLOAD_L3_DATA, 576 FM_EREPORT_CPU_USIII_L3_EDC, 577 C_AFSR_EDC, "EDC ", ECC_C_TRAP, 578 CPU_CE_ECACHE, "EDC", 579 FM_EREPORT_PAYLOAD_L2_DATA, 580 FM_EREPORT_CPU_USIII_EDC, 581 C_AFSR_EMC, "EMC ", ECC_C_TRAP, 582 CPU_EMC, "EMC", 583 FM_EREPORT_PAYLOAD_MEMORY, 584 FM_EREPORT_CPU_USIII_EMC, 585 C_AFSR_L3_WDC, "L3_WDC ", ECC_C_TRAP, 586 CPU_CE_ECACHE, "L3_WDC", 587 FM_EREPORT_PAYLOAD_L3_DATA, 588 FM_EREPORT_CPU_USIII_L3_WDC, 589 C_AFSR_L3_CPC, "L3_CPC ", ECC_C_TRAP, 590 CPU_CE_ECACHE, "L3_CPC", 591 FM_EREPORT_PAYLOAD_L3_DATA, 592 FM_EREPORT_CPU_USIII_L3_CPC, 593 C_AFSR_L3_THCE, "L3_THCE ", ECC_C_TRAP, 594 CPU_CE_ECACHE, "L3_THCE", 595 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 596 FM_EREPORT_CPU_USIII_L3_THCE, 597 C_AFSR_WDC, "WDC ", ECC_C_TRAP, 598 CPU_CE_ECACHE, "WDC", 599 FM_EREPORT_PAYLOAD_L2_DATA, 600 FM_EREPORT_CPU_USIII_WDC, 601 C_AFSR_CPC, "CPC ", ECC_C_TRAP, 602 CPU_CE_ECACHE, "CPC", 603 FM_EREPORT_PAYLOAD_L2_DATA, 604 FM_EREPORT_CPU_USIII_CPC, 605 C_AFSR_THCE, "THCE ", ECC_C_TRAP, 606 CPU_CE_ECACHE, "THCE", 607 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 608 FM_EREPORT_CPU_USIII_THCE, 609 610 /* TO, BERR */ 611 C_AFSR_TO, "TO ", ECC_ASYNC_TRAPS, 612 CPU_TO, "Timeout (TO)", 613 FM_EREPORT_PAYLOAD_IO, 614 FM_EREPORT_CPU_USIII_TO, 615 C_AFSR_BERR, "BERR ", ECC_ASYNC_TRAPS, 616 CPU_BERR, "Bus Error (BERR)", 617 FM_EREPORT_PAYLOAD_IO, 618 FM_EREPORT_CPU_USIII_BERR, 619 C_AFSR_DTO, "DTO ", ECC_C_TRAP, 620 CPU_TO, "Disrupting Timeout (DTO)", 621 FM_EREPORT_PAYLOAD_IO, 622 FM_EREPORT_CPU_USIII_DTO, 623 C_AFSR_DBERR, "DBERR ", ECC_C_TRAP, 624 CPU_BERR, "Disrupting Bus Error (DBERR)", 625 FM_EREPORT_PAYLOAD_IO, 626 FM_EREPORT_CPU_USIII_DBERR, 627 628 /* IVU, IVC, IMC */ 629 C_AFSR_IVU, "IVU ", ECC_C_TRAP, 630 CPU_IV, "IVU", 631 FM_EREPORT_PAYLOAD_SYSTEM1, 632 FM_EREPORT_CPU_USIII_IVU, 633 C_AFSR_IVC, "IVC ", ECC_C_TRAP, 634 CPU_IV, "IVC", 635 FM_EREPORT_PAYLOAD_SYSTEM1, 636 FM_EREPORT_CPU_USIII_IVC, 637 C_AFSR_IMC, "IMC ", ECC_C_TRAP, 638 CPU_IV, "IMC", 639 FM_EREPORT_PAYLOAD_SYSTEM1, 640 FM_EREPORT_CPU_USIII_IMC, 641 642 0, NULL, 0, 643 0, NULL, 644 FM_EREPORT_PAYLOAD_UNKNOWN, 645 FM_EREPORT_CPU_USIII_UNKNOWN, 646 }; 647 648 /* 649 * See Cheetah+ Delta PRM 10.9 and section P.6.1 of the Panther PRM 650 * Class 4: 651 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 652 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 653 * Class 3: 654 * AFSR -- UE, DUE, EDU, EMU, WDU, CPU 655 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 656 * Class 2: 657 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 658 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 659 * Class 1: 660 * AFSR -- TO, DTO, BERR, DBERR 661 * AFSR_EXT -- 662 */ 663 uint64_t afar_overwrite[] = { 664 /* class 4: */ 665 C_AFSR_UCC | C_AFSR_UCU | C_AFSR_TUE | C_AFSR_TSCE | C_AFSR_TUE_SH | 666 C_AFSR_L3_UCC | C_AFSR_L3_UCU | C_AFSR_L3_TUE | C_AFSR_L3_TUE_SH, 667 /* class 3: */ 668 C_AFSR_UE | C_AFSR_DUE | C_AFSR_EDU | C_AFSR_EMU | C_AFSR_WDU | 669 C_AFSR_CPU | C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 670 /* class 2: */ 671 C_AFSR_CE | C_AFSR_EDC | C_AFSR_EMC | C_AFSR_WDC | C_AFSR_CPC | 672 C_AFSR_THCE | C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC | 673 C_AFSR_L3_THCE, 674 /* class 1: */ 675 C_AFSR_TO | C_AFSR_DTO | C_AFSR_BERR | C_AFSR_DBERR, 676 677 0 678 }; 679 680 /* 681 * For Cheetah+, the E_SYND and M_SYND overwrite priorities are combined. 682 * See Cheetah+ Delta PRM 10.9 and Cheetah+ PRM 11.6.2 683 * Class 2: UE, DUE, IVU, EDU, EMU, WDU, UCU, CPU 684 * Class 1: CE, IVC, EDC, EMC, WDC, UCC, CPC 685 */ 686 uint64_t esynd_overwrite[] = { 687 /* class 2: */ 688 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_EMU | 689 C_AFSR_WDU | C_AFSR_UCU | C_AFSR_CPU, 690 /* class 1: */ 691 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_EMC | C_AFSR_WDC | 692 C_AFSR_UCC | C_AFSR_CPC, 693 0 694 }; 695 696 /* 697 * In panther, the E_SYND overwrite policy changed a little bit 698 * by adding one more level. 699 * See Panther PRM P.6.2 700 * class 3: 701 * AFSR -- UCU, UCC 702 * AFSR_EXT -- L3_UCU, L3_UCC 703 * Class 2: 704 * AFSR -- UE, DUE, IVU, EDU, WDU, CPU 705 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 706 * Class 1: 707 * AFSR -- CE, IVC, EDC, WDC, CPC 708 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC 709 */ 710 uint64_t pn_esynd_overwrite[] = { 711 /* class 3: */ 712 C_AFSR_UCU | C_AFSR_UCC | 713 C_AFSR_L3_UCU | C_AFSR_L3_UCC, 714 /* class 2: */ 715 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_WDU | 716 C_AFSR_CPU | 717 C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 718 /* class 1: */ 719 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_CPC | 720 C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC, 721 722 0 723 }; 724 725 int 726 afsr_to_pn_esynd_status(uint64_t afsr, uint64_t afsr_bit) 727 { 728 return (afsr_to_overw_status(afsr, afsr_bit, pn_esynd_overwrite)); 729 } 730 731 /* 732 * Prioritized list of Error bits for MSYND overwrite. 733 * See Panther PRM P.6.2 (For Cheetah+, see esynd_overwrite classes) 734 * Class 2: EMU, IMU 735 * Class 1: EMC, IMC 736 * 737 * Panther adds IMU and IMC. 738 */ 739 uint64_t msynd_overwrite[] = { 740 /* class 2: */ 741 C_AFSR_EMU | C_AFSR_IMU, 742 /* class 1: */ 743 C_AFSR_EMC | C_AFSR_IMC, 744 745 0 746 }; 747 748 /* 749 * change cpu speed bits -- new speed will be normal-speed/divisor. 750 * 751 * The Jalapeno memory controllers are required to drain outstanding 752 * memory transactions within 32 JBus clocks in order to be ready 753 * to enter Estar mode. In some corner cases however, that time 754 * fell short. 755 * 756 * A safe software solution is to force MCU to act like in Estar mode, 757 * then delay 1us (in ppm code) prior to assert J_CHNG_L signal. 758 * To reverse the effect, upon exiting Estar, software restores the 759 * MCU to its original state. 760 */ 761 /* ARGSUSED1 */ 762 void 763 cpu_change_speed(uint64_t divisor, uint64_t arg2) 764 { 765 bus_config_eclk_t *bceclk; 766 uint64_t reg; 767 processor_info_t *pi = &(CPU->cpu_type_info); 768 769 for (bceclk = bus_config_eclk; bceclk->divisor; bceclk++) { 770 if (bceclk->divisor != divisor) 771 continue; 772 reg = get_safari_config(); 773 reg &= ~SAFARI_CONFIG_ECLK_MASK; 774 reg |= bceclk->mask; 775 set_safari_config(reg); 776 CPU->cpu_m.divisor = (uchar_t)divisor; 777 CPU->cpu_curr_clock = 778 (((uint64_t)pi->pi_clock * 1000000) / divisor); 779 return; 780 } 781 /* 782 * We will reach here only if OBP and kernel don't agree on 783 * the speeds supported by the CPU. 784 */ 785 cmn_err(CE_WARN, "cpu_change_speed: bad divisor %" PRIu64, divisor); 786 } 787 788 /* 789 * Cpu private initialization. This includes allocating the cpu_private 790 * data structure, initializing it, and initializing the scrubber for this 791 * cpu. This function calls cpu_init_ecache_scrub_dr to init the scrubber. 792 * We use kmem_cache_create for the cheetah private data structure because 793 * it needs to be allocated on a PAGESIZE (8192) byte boundary. 794 */ 795 void 796 cpu_init_private(struct cpu *cp) 797 { 798 cheetah_private_t *chprp; 799 int i; 800 801 ASSERT(CPU_PRIVATE(cp) == NULL); 802 803 /* LINTED: E_TRUE_LOGICAL_EXPR */ 804 ASSERT((offsetof(cheetah_private_t, chpr_tl1_err_data) + 805 sizeof (ch_err_tl1_data_t) * CH_ERR_TL1_TLMAX) <= PAGESIZE); 806 807 /* 808 * Running with Cheetah CPUs in a Cheetah+, Jaguar, Panther or 809 * mixed Cheetah+/Jaguar/Panther machine is not a supported 810 * configuration. Attempting to do so may result in unpredictable 811 * failures (e.g. running Cheetah+ CPUs with Cheetah E$ disp flush) 812 * so don't allow it. 813 * 814 * This is just defensive code since this configuration mismatch 815 * should have been caught prior to OS execution. 816 */ 817 if (!(IS_CHEETAH_PLUS(cpunodes[cp->cpu_id].implementation) || 818 IS_JAGUAR(cpunodes[cp->cpu_id].implementation) || 819 IS_PANTHER(cpunodes[cp->cpu_id].implementation))) { 820 cmn_err(CE_PANIC, "CPU%d: UltraSPARC-III not supported" 821 " on UltraSPARC-III+/IV/IV+ code\n", cp->cpu_id); 822 } 823 824 /* 825 * If the ch_private_cache has not been created, create it. 826 */ 827 if (ch_private_cache == NULL) { 828 ch_private_cache = kmem_cache_create("ch_private_cache", 829 sizeof (cheetah_private_t), PAGESIZE, NULL, NULL, 830 NULL, NULL, static_arena, 0); 831 } 832 833 chprp = CPU_PRIVATE(cp) = kmem_cache_alloc(ch_private_cache, KM_SLEEP); 834 835 bzero(chprp, sizeof (cheetah_private_t)); 836 chprp->chpr_fecctl0_logout.clo_data.chd_afar = LOGOUT_INVALID; 837 chprp->chpr_cecc_logout.clo_data.chd_afar = LOGOUT_INVALID; 838 chprp->chpr_async_logout.clo_data.chd_afar = LOGOUT_INVALID; 839 chprp->chpr_tlb_logout.tlo_addr = LOGOUT_INVALID; 840 for (i = 0; i < CH_ERR_TL1_TLMAX; i++) 841 chprp->chpr_tl1_err_data[i].ch_err_tl1_logout.clo_data.chd_afar 842 = LOGOUT_INVALID; 843 844 /* Panther has a larger Icache compared to cheetahplus or Jaguar */ 845 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 846 chprp->chpr_icache_size = PN_ICACHE_SIZE; 847 chprp->chpr_icache_linesize = PN_ICACHE_LSIZE; 848 } else { 849 chprp->chpr_icache_size = CH_ICACHE_SIZE; 850 chprp->chpr_icache_linesize = CH_ICACHE_LSIZE; 851 } 852 853 cpu_init_ecache_scrub_dr(cp); 854 855 /* 856 * Panther's L2$ and E$ are shared between cores, so the scrubber is 857 * only needed on one of the cores. At this point, we assume all cores 858 * are online, and we only enable the scrubber on core 0. 859 */ 860 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 861 chprp->chpr_scrub_misc.chsm_core_state = 862 SCRUBBER_BOTH_CORES_ONLINE; 863 if (cp->cpu_id != (processorid_t)cmp_cpu_to_chip(cp->cpu_id)) { 864 chprp->chpr_scrub_misc.chsm_enable[ 865 CACHE_SCRUBBER_INFO_E] = 0; 866 } 867 } 868 869 chprp->chpr_ec_set_size = cpunodes[cp->cpu_id].ecache_size / 870 cpu_ecache_nway(); 871 872 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 873 ch_err_tl1_paddrs[cp->cpu_id] = va_to_pa(chprp); 874 ASSERT(ch_err_tl1_paddrs[cp->cpu_id] != -1); 875 } 876 877 /* 878 * Clear the error state registers for this CPU. 879 * For Cheetah+/Jaguar, just clear the AFSR but 880 * for Panther we also have to clear the AFSR_EXT. 881 */ 882 void 883 set_cpu_error_state(ch_cpu_errors_t *cpu_error_regs) 884 { 885 set_asyncflt(cpu_error_regs->afsr & ~C_AFSR_FATAL_ERRS); 886 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 887 set_afsr_ext(cpu_error_regs->afsr_ext & ~C_AFSR_EXT_FATAL_ERRS); 888 } 889 } 890 891 void 892 pn_cpu_log_diag_l2_info(ch_async_flt_t *ch_flt) { 893 struct async_flt *aflt = (struct async_flt *)ch_flt; 894 ch_ec_data_t *l2_data = &ch_flt->flt_diag_data.chd_l2_data[0]; 895 uint64_t faddr = aflt->flt_addr; 896 uint8_t log_way_mask = 0; 897 int i; 898 899 /* 900 * Only Panther CPUs have the additional L2$ data that needs 901 * to be logged here 902 */ 903 if (!IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 904 return; 905 906 /* 907 * We'll use a simple bit mask to keep track of which way(s) 908 * of the stored cache line we want to log. The idea is to 909 * log the entry if it is a valid line and it matches our 910 * fault AFAR. If no match is found, we will simply log all 911 * the ways. 912 */ 913 for (i = 0; i < PN_L2_NWAYS; i++) 914 if (pn_matching_valid_l2_line(faddr, &l2_data[i])) 915 log_way_mask |= (1 << i); 916 917 /* If no matching valid lines were found, we log all ways */ 918 if (log_way_mask == 0) 919 log_way_mask = (1 << PN_L2_NWAYS) - 1; 920 921 /* Log the cache lines */ 922 for (i = 0; i < PN_L2_NWAYS; i++) 923 if (log_way_mask & (1 << i)) 924 l2_data[i].ec_logflag = EC_LOGFLAG_MAGIC; 925 } 926 927 /* 928 * For this routine to return true, the L2 tag in question must be valid 929 * and the tag PA must match the fault address (faddr) assuming the correct 930 * index is being used. 931 */ 932 static int 933 pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data) { 934 if ((!PN_L2_LINE_INVALID(clo_l2_data->ec_tag)) && 935 ((faddr & P2ALIGN(C_AFAR_PA, PN_L2_SET_SIZE)) == 936 PN_L2TAG_TO_PA(clo_l2_data->ec_tag))) 937 return (1); 938 return (0); 939 } 940 941 /* 942 * This array is used to convert the 3 digit PgSz encoding (as used in 943 * various MMU registers such as MMU_TAG_ACCESS_EXT) into the corresponding 944 * page size. 945 */ 946 static uint64_t tlb_pgsz_to_size[] = { 947 /* 000 = 8KB: */ 948 0x2000, 949 /* 001 = 64KB: */ 950 0x10000, 951 /* 010 = 512KB: */ 952 0x80000, 953 /* 011 = 4MB: */ 954 0x400000, 955 /* 100 = 32MB: */ 956 0x2000000, 957 /* 101 = 256MB: */ 958 0x10000000, 959 /* undefined for encodings 110 and 111: */ 960 0, 0 961 }; 962 963 /* 964 * The itlb_parity_trap and dtlb_parity_trap handlers transfer control here 965 * after collecting logout information related to the TLB parity error and 966 * flushing the offending TTE entries from the ITLB or DTLB. 967 * 968 * DTLB traps which occur at TL>0 are not recoverable because we will most 969 * likely be corrupting some other trap handler's alternate globals. As 970 * such, we simply panic here when that happens. ITLB parity errors are 971 * not expected to happen at TL>0. 972 */ 973 void 974 cpu_tlb_parity_error(struct regs *rp, ulong_t trap_va, ulong_t tlb_info) { 975 ch_async_flt_t ch_flt; 976 struct async_flt *aflt; 977 pn_tlb_logout_t *tlop = NULL; 978 int immu_parity = (tlb_info & PN_TLO_INFO_IMMU) != 0; 979 int tl1_trap = (tlb_info & PN_TLO_INFO_TL1) != 0; 980 char *error_class; 981 982 bzero(&ch_flt, sizeof (ch_async_flt_t)); 983 984 /* 985 * Get the CPU log out info. If we can't find our CPU private 986 * pointer, or if the logout information does not correspond to 987 * this error, then we will have to make due without detailed 988 * logout information. 989 */ 990 if (CPU_PRIVATE(CPU)) { 991 tlop = CPU_PRIVATE_PTR(CPU, chpr_tlb_logout); 992 if ((tlop->tlo_addr != trap_va) || 993 (tlop->tlo_info != tlb_info)) 994 tlop = NULL; 995 } 996 997 if (tlop) { 998 ch_flt.tlb_diag_data = *tlop; 999 1000 /* Zero out + invalidate TLB logout. */ 1001 bzero(tlop, sizeof (pn_tlb_logout_t)); 1002 tlop->tlo_addr = LOGOUT_INVALID; 1003 } else { 1004 /* 1005 * Copy what logout information we have and mark 1006 * it incomplete. 1007 */ 1008 ch_flt.flt_data_incomplete = 1; 1009 ch_flt.tlb_diag_data.tlo_info = tlb_info; 1010 ch_flt.tlb_diag_data.tlo_addr = trap_va; 1011 } 1012 1013 /* 1014 * Log the error. 1015 */ 1016 aflt = (struct async_flt *)&ch_flt; 1017 aflt->flt_id = gethrtime_waitfree(); 1018 aflt->flt_bus_id = getprocessorid(); 1019 aflt->flt_inst = CPU->cpu_id; 1020 aflt->flt_pc = (caddr_t)rp->r_pc; 1021 aflt->flt_addr = trap_va; 1022 aflt->flt_prot = AFLT_PROT_NONE; 1023 aflt->flt_class = CPU_FAULT; 1024 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1025 aflt->flt_tl = tl1_trap ? 1 : 0; 1026 aflt->flt_panic = tl1_trap ? 1 : 0; 1027 1028 if (immu_parity) { 1029 aflt->flt_status = ECC_ITLB_TRAP; 1030 ch_flt.flt_type = CPU_ITLB_PARITY; 1031 error_class = FM_EREPORT_CPU_USIII_ITLBPE; 1032 aflt->flt_payload = FM_EREPORT_PAYLOAD_ITLB_PE; 1033 } else { 1034 aflt->flt_status = ECC_DTLB_TRAP; 1035 ch_flt.flt_type = CPU_DTLB_PARITY; 1036 error_class = FM_EREPORT_CPU_USIII_DTLBPE; 1037 aflt->flt_payload = FM_EREPORT_PAYLOAD_DTLB_PE; 1038 } 1039 1040 /* 1041 * The TLB entries have already been flushed by the TL1 trap 1042 * handler so at this point the only thing left to do is log 1043 * the error message. 1044 */ 1045 if (aflt->flt_panic) { 1046 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1047 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 1048 /* 1049 * Panic here if aflt->flt_panic has been set. Enqueued 1050 * errors will be logged as part of the panic flow. 1051 */ 1052 fm_panic("%sError(s)", immu_parity ? "ITLBPE " : "DTLBPE "); 1053 } else { 1054 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1055 sizeof (ch_async_flt_t), ce_queue, aflt->flt_panic); 1056 } 1057 } 1058 1059 /* 1060 * This routine is called when a TLB parity error event is 'ue_drain'ed 1061 * or 'ce_drain'ed from the errorq. 1062 */ 1063 void 1064 cpu_async_log_tlb_parity_err(void *flt) { 1065 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 1066 struct async_flt *aflt = (struct async_flt *)flt; 1067 #ifdef lint 1068 aflt = aflt; 1069 #endif 1070 1071 /* 1072 * We only capture TLB information if we encountered 1073 * a TLB parity error and Panther is the only CPU which 1074 * can detect a TLB parity error. 1075 */ 1076 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1077 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1078 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1079 1080 if (ch_flt->flt_data_incomplete == 0) { 1081 if (ch_flt->flt_type == CPU_ITLB_PARITY) 1082 ch_flt->tlb_diag_data.tlo_logflag = IT_LOGFLAG_MAGIC; 1083 else /* parity error is in DTLB */ 1084 ch_flt->tlb_diag_data.tlo_logflag = DT_LOGFLAG_MAGIC; 1085 } 1086 } 1087 1088 /* 1089 * Add L1 Prefetch cache data to the ereport payload. 1090 */ 1091 void 1092 cpu_payload_add_pcache(struct async_flt *aflt, nvlist_t *nvl) 1093 { 1094 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1095 ch_pc_data_t *pcp; 1096 ch_pc_data_t pcdata[CH_PCACHE_NWAY]; 1097 uint_t nelem; 1098 int i, ways_logged = 0; 1099 1100 /* 1101 * We only capture P$ information if we encountered 1102 * a P$ parity error and Panther is the only CPU which 1103 * can detect a P$ parity error. 1104 */ 1105 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1106 for (i = 0; i < CH_PCACHE_NWAY; i++) { 1107 pcp = &ch_flt->parity_data.dpe.cpl_pc[i]; 1108 if (pcp->pc_logflag == PC_LOGFLAG_MAGIC) { 1109 bcopy(pcp, &pcdata[ways_logged], 1110 sizeof (ch_pc_data_t)); 1111 ways_logged++; 1112 } 1113 } 1114 1115 /* 1116 * Add the pcache data to the payload. 1117 */ 1118 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_WAYS, 1119 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 1120 if (ways_logged != 0) { 1121 nelem = sizeof (ch_pc_data_t) / sizeof (uint64_t) * ways_logged; 1122 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_DATA, 1123 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)pcdata, NULL); 1124 } 1125 } 1126 1127 /* 1128 * Add TLB diagnostic data to the ereport payload. 1129 */ 1130 void 1131 cpu_payload_add_tlb(struct async_flt *aflt, nvlist_t *nvl) 1132 { 1133 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1134 uint8_t num_entries, tlb_data_words; 1135 1136 /* 1137 * We only capture TLB information if we encountered 1138 * a TLB parity error and Panther is the only CPU which 1139 * can detect a TLB parity error. 1140 */ 1141 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1142 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1143 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1144 1145 if (ch_flt->flt_type == CPU_ITLB_PARITY) { 1146 num_entries = (uint8_t)(PN_ITLB_NWAYS * PN_NUM_512_ITLBS); 1147 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1148 num_entries; 1149 1150 /* 1151 * Add the TLB diagnostic data to the payload 1152 * if it was collected. 1153 */ 1154 if (ch_flt->tlb_diag_data.tlo_logflag == IT_LOGFLAG_MAGIC) { 1155 fm_payload_set(nvl, 1156 FM_EREPORT_PAYLOAD_NAME_ITLB_ENTRIES, 1157 DATA_TYPE_UINT8, num_entries, NULL); 1158 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_ITLB_DATA, 1159 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1160 (uint64_t *)ch_flt->tlb_diag_data.tlo_itlb_tte, 1161 NULL); 1162 } 1163 } else { 1164 num_entries = (uint8_t)(PN_DTLB_NWAYS * PN_NUM_512_DTLBS); 1165 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1166 num_entries; 1167 1168 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_VA, 1169 DATA_TYPE_UINT64, ch_flt->tlb_diag_data.tlo_addr, NULL); 1170 1171 /* 1172 * Add the TLB diagnostic data to the payload 1173 * if it was collected. 1174 */ 1175 if (ch_flt->tlb_diag_data.tlo_logflag == DT_LOGFLAG_MAGIC) { 1176 fm_payload_set(nvl, 1177 FM_EREPORT_PAYLOAD_NAME_DTLB_ENTRIES, 1178 DATA_TYPE_UINT8, num_entries, NULL); 1179 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_DTLB_DATA, 1180 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1181 (uint64_t *)ch_flt->tlb_diag_data.tlo_dtlb_tte, 1182 NULL); 1183 } 1184 } 1185 } 1186 1187 /* 1188 * Panther Cache Scrubbing: 1189 * 1190 * In Jaguar, the E$ was split between cores, so the scrubber must run on both 1191 * cores. For Panther, however, the L2$ and L3$ are shared across cores. 1192 * Therefore, the E$ scrubber only needs to run on one of the two cores. 1193 * 1194 * There are four possible states for the E$ scrubber: 1195 * 1196 * 0. If both cores are offline, add core 0 to cpu_offline_set so that 1197 * the offline scrubber will run on it. 1198 * 1. If core 0 is online and core 1 off, we run the scrubber on core 0. 1199 * 2. If core 1 is online and core 0 off, we move the scrubber to run 1200 * on core 1. 1201 * 3. If both cores are online, only run the scrubber on core 0. 1202 * 1203 * These states are enumerated by the SCRUBBER_[BOTH|CORE|NEITHER]_* defines 1204 * above. One of those values is stored in 1205 * chpr_scrub_misc->chsm_core_state on each core. 1206 * 1207 * Also note that, for Panther, ecache_flush_line() will flush out the L2$ 1208 * before the E$, so the L2$ will be scrubbed by the E$ scrubber. No 1209 * additional code is necessary to scrub the L2$. 1210 * 1211 * For all cpu types, whenever a cpu or core is offlined, add it to 1212 * cpu_offline_set so the necessary scrubbers can still run. This is still 1213 * necessary on Panther so the D$ scrubber can still run. 1214 */ 1215 /*ARGSUSED*/ 1216 int 1217 cpu_scrub_cpu_setup(cpu_setup_t what, int cpuid, void *arg) 1218 { 1219 processorid_t core_0_id; 1220 cpu_t *core_cpus[2]; 1221 ch_scrub_misc_t *core_scrub[2]; 1222 int old_state, i; 1223 int new_state = SCRUBBER_NEITHER_CORE_ONLINE; 1224 1225 switch (what) { 1226 case CPU_ON: 1227 case CPU_INIT: 1228 CPUSET_DEL(cpu_offline_set, cpuid); 1229 break; 1230 case CPU_OFF: 1231 CPUSET_ADD(cpu_offline_set, cpuid); 1232 break; 1233 default: 1234 return (0); 1235 } 1236 1237 if (!IS_PANTHER(cpunodes[cpuid].implementation)) { 1238 return (0); 1239 } 1240 1241 /* 1242 * Update the chsm_enable[CACHE_SCRUBBER_INFO_E] value 1243 * if necessary 1244 */ 1245 core_0_id = cmp_cpu_to_chip(cpuid); 1246 core_cpus[0] = cpu_get(core_0_id); 1247 core_cpus[1] = cpu_get_sibling_core(core_cpus[0]); 1248 1249 for (i = 0; i < 2; i++) { 1250 if (core_cpus[i] == NULL) { 1251 /* 1252 * This may happen during DR - one core is offlined 1253 * and completely unconfigured before the second 1254 * core is offlined. Give up and return quietly, 1255 * since the second core should quickly be removed 1256 * anyways. 1257 */ 1258 return (0); 1259 } 1260 core_scrub[i] = CPU_PRIVATE_PTR(core_cpus[i], chpr_scrub_misc); 1261 } 1262 1263 if (cpuid == (processorid_t)cmp_cpu_to_chip(cpuid)) { 1264 /* cpuid is core 0 */ 1265 if (cpu_is_active(core_cpus[1])) { 1266 new_state |= SCRUBBER_CORE_1_ONLINE; 1267 } 1268 if (what != CPU_OFF) { 1269 new_state |= SCRUBBER_CORE_0_ONLINE; 1270 } 1271 } else { 1272 /* cpuid is core 1 */ 1273 if (cpu_is_active(core_cpus[0])) { 1274 new_state |= SCRUBBER_CORE_0_ONLINE; 1275 } 1276 if (what != CPU_OFF) { 1277 new_state |= SCRUBBER_CORE_1_ONLINE; 1278 } 1279 } 1280 1281 old_state = core_scrub[0]->chsm_core_state; 1282 1283 if (old_state == new_state) { 1284 return (0); 1285 } 1286 1287 if (old_state == SCRUBBER_CORE_1_ONLINE) { 1288 /* 1289 * We need to move the scrubber state from core 1 1290 * back to core 0. This data is not protected by 1291 * locks, but the worst that can happen is some 1292 * lines are scrubbed multiple times. chsm_oustanding is 1293 * set to 0 to make sure an interrupt is scheduled the 1294 * first time through do_scrub(). 1295 */ 1296 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1297 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1298 core_scrub[0]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1299 } 1300 1301 switch (new_state) { 1302 case SCRUBBER_NEITHER_CORE_ONLINE: 1303 case SCRUBBER_BOTH_CORES_ONLINE: 1304 case SCRUBBER_CORE_0_ONLINE: 1305 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1306 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1307 break; 1308 1309 case SCRUBBER_CORE_1_ONLINE: 1310 default: 1311 /* 1312 * We need to move the scrubber state from core 0 1313 * to core 1. 1314 */ 1315 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1316 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1317 core_scrub[1]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1318 1319 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1320 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1321 break; 1322 } 1323 1324 core_scrub[0]->chsm_core_state = new_state; 1325 core_scrub[1]->chsm_core_state = new_state; 1326 return (0); 1327 } 1328 1329 /* 1330 * Returns a pointer to the cpu structure of the argument's sibling core. 1331 * If no sibling core can be found, return NULL. 1332 */ 1333 static cpu_t * 1334 cpu_get_sibling_core(cpu_t *cpup) 1335 { 1336 cpu_t *nextp; 1337 pg_t *pg; 1338 pg_cpu_itr_t i; 1339 1340 if ((cpup == NULL) || (!cmp_cpu_is_cmp(cpup->cpu_id))) 1341 return (NULL); 1342 pg = (pg_t *)pghw_find_pg(cpup, PGHW_CHIP); 1343 if (pg == NULL) 1344 return (NULL); 1345 1346 /* 1347 * Iterate over the CPUs in the chip PG looking 1348 * for a CPU that isn't cpup 1349 */ 1350 PG_CPU_ITR_INIT(pg, i); 1351 while ((nextp = pg_cpu_next(&i)) != NULL) { 1352 if (nextp != cpup) 1353 break; 1354 } 1355 1356 if (nextp == NULL) 1357 return (NULL); 1358 1359 return (nextp); 1360 } 1361