1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/ddi.h> 31 #include <sys/sysmacros.h> 32 #include <sys/archsystm.h> 33 #include <sys/vmsystm.h> 34 #include <sys/machparam.h> 35 #include <sys/machsystm.h> 36 #include <sys/machthread.h> 37 #include <sys/cpu.h> 38 #include <sys/cmp.h> 39 #include <sys/elf_SPARC.h> 40 #include <vm/hat_sfmmu.h> 41 #include <vm/seg_kmem.h> 42 #include <sys/cpuvar.h> 43 #include <sys/cheetahregs.h> 44 #include <sys/us3_module.h> 45 #include <sys/async.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/dditypes.h> 49 #include <sys/prom_debug.h> 50 #include <sys/prom_plat.h> 51 #include <sys/cpu_module.h> 52 #include <sys/sysmacros.h> 53 #include <sys/intreg.h> 54 #include <sys/clock.h> 55 #include <sys/platform_module.h> 56 #include <sys/machtrap.h> 57 #include <sys/ontrap.h> 58 #include <sys/panic.h> 59 #include <sys/memlist.h> 60 #include <sys/bootconf.h> 61 #include <sys/ivintr.h> 62 #include <sys/atomic.h> 63 #include <sys/fm/protocol.h> 64 #include <sys/fm/cpu/UltraSPARC-III.h> 65 #include <sys/fm/util.h> 66 #include <sys/pghw.h> 67 68 #ifdef CHEETAHPLUS_ERRATUM_25 69 #include <sys/cyclic.h> 70 #endif /* CHEETAHPLUS_ERRATUM_25 */ 71 72 /* 73 * See comment above cpu_scrub_cpu_setup() for description 74 */ 75 #define SCRUBBER_NEITHER_CORE_ONLINE 0x0 76 #define SCRUBBER_CORE_0_ONLINE 0x1 77 #define SCRUBBER_CORE_1_ONLINE 0x2 78 #define SCRUBBER_BOTH_CORES_ONLINE (SCRUBBER_CORE_0_ONLINE | \ 79 SCRUBBER_CORE_1_ONLINE) 80 81 static int pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data); 82 static void cpu_async_log_tlb_parity_err(void *flt); 83 static cpu_t *cpu_get_sibling_core(cpu_t *cpup); 84 85 86 /* 87 * Setup trap handlers. 88 */ 89 void 90 cpu_init_trap(void) 91 { 92 CH_SET_TRAP(tt_pil15, ch_pil15_interrupt_instr); 93 94 CH_SET_TRAP(tt0_fecc, fecc_err_instr); 95 CH_SET_TRAP(tt1_fecc, fecc_err_tl1_instr); 96 CH_SET_TRAP(tt1_swtrap0, fecc_err_tl1_cont_instr); 97 98 CH_SET_TRAP(tt0_dperr, dcache_parity_instr); 99 CH_SET_TRAP(tt1_dperr, dcache_parity_tl1_instr); 100 CH_SET_TRAP(tt1_swtrap1, dcache_parity_tl1_cont_instr); 101 102 CH_SET_TRAP(tt0_iperr, icache_parity_instr); 103 CH_SET_TRAP(tt1_iperr, icache_parity_tl1_instr); 104 CH_SET_TRAP(tt1_swtrap2, icache_parity_tl1_cont_instr); 105 } 106 107 /* 108 * Set the magic constants of the implementation. 109 */ 110 /*ARGSUSED*/ 111 void 112 cpu_fiximp(pnode_t dnode) 113 { 114 int i, a; 115 extern int vac_size, vac_shift; 116 extern uint_t vac_mask; 117 118 dcache_size = CH_DCACHE_SIZE; 119 dcache_linesize = CH_DCACHE_LSIZE; 120 121 icache_size = CHP_ICACHE_MAX_SIZE; 122 icache_linesize = CHP_ICACHE_MIN_LSIZE; 123 124 ecache_size = CH_ECACHE_MAX_SIZE; 125 ecache_alignsize = CH_ECACHE_MAX_LSIZE; 126 ecache_associativity = CHP_ECACHE_MIN_NWAY; 127 128 /* 129 * ecache_setsize needs to maximum of all cpu ecache setsizes 130 */ 131 ecache_setsize = CHP_ECACHE_MAX_SETSIZE; 132 ASSERT(ecache_setsize >= (ecache_size / ecache_associativity)); 133 134 vac_size = CH_VAC_SIZE; 135 vac_mask = MMU_PAGEMASK & (vac_size - 1); 136 i = 0; a = vac_size; 137 while (a >>= 1) 138 ++i; 139 vac_shift = i; 140 shm_alignment = vac_size; 141 vac = 1; 142 } 143 144 /* 145 * Use Panther values for Panther-only domains. 146 * See Panther PRM, 1.5.4 Cache Hierarchy 147 */ 148 void 149 cpu_fix_allpanther(void) 150 { 151 /* dcache same as Ch+ */ 152 icache_size = PN_ICACHE_SIZE; 153 icache_linesize = PN_ICACHE_LSIZE; 154 ecache_size = PN_L3_SIZE; 155 ecache_alignsize = PN_L3_LINESIZE; 156 ecache_associativity = PN_L3_NWAYS; 157 ecache_setsize = PN_L3_SET_SIZE; 158 ASSERT(ecache_setsize >= (ecache_size / ecache_associativity)); 159 /* vac same as Ch+ */ 160 /* fix hwcaps for USIV+-only domains */ 161 cpu_hwcap_flags |= AV_SPARC_POPC; 162 } 163 164 void 165 send_mondo_set(cpuset_t set) 166 { 167 int lo, busy, nack, shipped = 0; 168 uint16_t i, cpuids[IDSR_BN_SETS]; 169 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 170 uint64_t starttick, endtick, tick, lasttick; 171 #if (NCPU > IDSR_BN_SETS) 172 int index = 0; 173 int ncpuids = 0; 174 #endif 175 #ifdef CHEETAHPLUS_ERRATUM_25 176 int recovered = 0; 177 int cpuid; 178 #endif 179 180 ASSERT(!CPUSET_ISNULL(set)); 181 starttick = lasttick = gettick(); 182 183 #if (NCPU <= IDSR_BN_SETS) 184 for (i = 0; i < NCPU; i++) 185 if (CPU_IN_SET(set, i)) { 186 shipit(i, shipped); 187 nackmask |= IDSR_NACK_BIT(shipped); 188 cpuids[shipped++] = i; 189 CPUSET_DEL(set, i); 190 if (CPUSET_ISNULL(set)) 191 break; 192 } 193 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 194 #else 195 for (i = 0; i < NCPU; i++) 196 if (CPU_IN_SET(set, i)) { 197 ncpuids++; 198 199 /* 200 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 201 * find we have shipped to more than (IDSR_BN_SETS) 202 * CPUs, set "index" to the highest numbered CPU in 203 * the set so we can ship to other CPUs a bit later on. 204 */ 205 if (shipped < IDSR_BN_SETS) { 206 shipit(i, shipped); 207 nackmask |= IDSR_NACK_BIT(shipped); 208 cpuids[shipped++] = i; 209 CPUSET_DEL(set, i); 210 if (CPUSET_ISNULL(set)) 211 break; 212 } else 213 index = (int)i; 214 } 215 216 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 217 #endif 218 219 busymask = IDSR_NACK_TO_BUSY(nackmask); 220 busy = nack = 0; 221 endtick = starttick + xc_tick_limit; 222 for (;;) { 223 idsr = getidsr(); 224 #if (NCPU <= IDSR_BN_SETS) 225 if (idsr == 0) 226 break; 227 #else 228 if (idsr == 0 && shipped == ncpuids) 229 break; 230 #endif 231 tick = gettick(); 232 /* 233 * If there is a big jump between the current tick 234 * count and lasttick, we have probably hit a break 235 * point. Adjust endtick accordingly to avoid panic. 236 */ 237 if (tick > (lasttick + xc_tick_jump_limit)) 238 endtick += (tick - lasttick); 239 lasttick = tick; 240 if (tick > endtick) { 241 if (panic_quiesce) 242 return; 243 #ifdef CHEETAHPLUS_ERRATUM_25 244 cpuid = -1; 245 for (i = 0; i < IDSR_BN_SETS; i++) { 246 if (idsr & (IDSR_NACK_BIT(i) | 247 IDSR_BUSY_BIT(i))) { 248 cpuid = cpuids[i]; 249 break; 250 } 251 } 252 if (cheetah_sendmondo_recover && cpuid != -1 && 253 recovered == 0) { 254 if (mondo_recover(cpuid, i)) { 255 /* 256 * We claimed the whole memory or 257 * full scan is disabled. 258 */ 259 recovered++; 260 } 261 tick = gettick(); 262 endtick = tick + xc_tick_limit; 263 lasttick = tick; 264 /* 265 * Recheck idsr 266 */ 267 continue; 268 } else 269 #endif /* CHEETAHPLUS_ERRATUM_25 */ 270 { 271 cmn_err(CE_CONT, "send mondo timeout " 272 "[%d NACK %d BUSY]\nIDSR 0x%" 273 "" PRIx64 " cpuids:", nack, busy, idsr); 274 for (i = 0; i < IDSR_BN_SETS; i++) { 275 if (idsr & (IDSR_NACK_BIT(i) | 276 IDSR_BUSY_BIT(i))) { 277 cmn_err(CE_CONT, " 0x%x", 278 cpuids[i]); 279 } 280 } 281 cmn_err(CE_CONT, "\n"); 282 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 283 } 284 } 285 curnack = idsr & nackmask; 286 curbusy = idsr & busymask; 287 #if (NCPU > IDSR_BN_SETS) 288 if (shipped < ncpuids) { 289 uint64_t cpus_left; 290 uint16_t next = (uint16_t)index; 291 292 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 293 busymask; 294 295 if (cpus_left) { 296 do { 297 /* 298 * Sequence through and ship to the 299 * remainder of the CPUs in the system 300 * (e.g. other than the first 301 * (IDSR_BN_SETS)) in reverse order. 302 */ 303 lo = lowbit(cpus_left) - 1; 304 i = IDSR_BUSY_IDX(lo); 305 shipit(next, i); 306 shipped++; 307 cpuids[i] = next; 308 309 /* 310 * If we've processed all the CPUs, 311 * exit the loop now and save 312 * instructions. 313 */ 314 if (shipped == ncpuids) 315 break; 316 317 for ((index = ((int)next - 1)); 318 index >= 0; index--) 319 if (CPU_IN_SET(set, index)) { 320 next = (uint16_t)index; 321 break; 322 } 323 324 cpus_left &= ~(1ull << lo); 325 } while (cpus_left); 326 #ifdef CHEETAHPLUS_ERRATUM_25 327 /* 328 * Clear recovered because we are sending to 329 * a new set of targets. 330 */ 331 recovered = 0; 332 #endif 333 continue; 334 } 335 } 336 #endif 337 if (curbusy) { 338 busy++; 339 continue; 340 } 341 342 #ifdef SEND_MONDO_STATS 343 { 344 int n = gettick() - starttick; 345 if (n < 8192) 346 x_nack_stimes[n >> 7]++; 347 } 348 #endif 349 while (gettick() < (tick + sys_clock_mhz)) 350 ; 351 do { 352 lo = lowbit(curnack) - 1; 353 i = IDSR_NACK_IDX(lo); 354 shipit(cpuids[i], i); 355 curnack &= ~(1ull << lo); 356 } while (curnack); 357 nack++; 358 busy = 0; 359 } 360 #ifdef SEND_MONDO_STATS 361 { 362 int n = gettick() - starttick; 363 if (n < 8192) 364 x_set_stimes[n >> 7]++; 365 else 366 x_set_ltimes[(n >> 13) & 0xf]++; 367 } 368 x_set_cpus[shipped]++; 369 #endif 370 } 371 372 /* 373 * Handles error logging for implementation specific error types 374 */ 375 /*ARGSUSED1*/ 376 int 377 cpu_impl_async_log_err(void *flt, errorq_elem_t *eqep) 378 { 379 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 380 struct async_flt *aflt = (struct async_flt *)flt; 381 382 switch (ch_flt->flt_type) { 383 384 case CPU_IC_PARITY: 385 cpu_async_log_ic_parity_err(flt); 386 return (CH_ASYNC_LOG_DONE); 387 388 case CPU_DC_PARITY: 389 cpu_async_log_dc_parity_err(flt); 390 return (CH_ASYNC_LOG_DONE); 391 392 case CPU_DUE: 393 cpu_log_err(aflt); 394 cpu_page_retire(ch_flt); 395 return (CH_ASYNC_LOG_DONE); 396 397 case CPU_ITLB_PARITY: 398 case CPU_DTLB_PARITY: 399 cpu_async_log_tlb_parity_err(flt); 400 return (CH_ASYNC_LOG_DONE); 401 402 /* report the error and continue */ 403 case CPU_L3_ADDR_PE: 404 cpu_log_err(aflt); 405 return (CH_ASYNC_LOG_DONE); 406 407 default: 408 return (CH_ASYNC_LOG_UNKNOWN); 409 } 410 } 411 412 /* 413 * Figure out if Ecache is direct-mapped (Cheetah or Cheetah+ with Ecache 414 * control ECCR_ASSOC bit off or 2-way (Cheetah+ with ECCR_ASSOC on). 415 * We need to do this on the fly because we may have mixed Cheetah+'s with 416 * both direct and 2-way Ecaches. Panther only supports 4-way L3$. 417 */ 418 int 419 cpu_ecache_nway(void) 420 { 421 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 422 return (PN_L3_NWAYS); 423 return ((get_ecache_ctrl() & ECCR_ASSOC) ? 2 : 1); 424 } 425 426 /* 427 * Note that these are entered into the table: Fatal Errors (PERR, IERR, ISAP, 428 * EMU, IMU) first, orphaned UCU/UCC, AFAR Overwrite policy, finally IVU, IVC. 429 * Afar overwrite policy is: 430 * Class 4: 431 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 432 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 433 * Class 3: 434 * AFSR -- UE, DUE, EDU, WDU, CPU 435 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 436 * Class 2: 437 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 438 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 439 * Class 1: 440 * AFSR -- TO, DTO, BERR, DBERR 441 */ 442 ecc_type_to_info_t ecc_type_to_info[] = { 443 444 /* Fatal Errors */ 445 C_AFSR_PERR, "PERR ", ECC_ALL_TRAPS, 446 CPU_FATAL, "PERR Fatal", 447 FM_EREPORT_PAYLOAD_SYSTEM2, 448 FM_EREPORT_CPU_USIII_PERR, 449 C_AFSR_IERR, "IERR ", ECC_ALL_TRAPS, 450 CPU_FATAL, "IERR Fatal", 451 FM_EREPORT_PAYLOAD_SYSTEM2, 452 FM_EREPORT_CPU_USIII_IERR, 453 C_AFSR_ISAP, "ISAP ", ECC_ALL_TRAPS, 454 CPU_FATAL, "ISAP Fatal", 455 FM_EREPORT_PAYLOAD_SYSTEM1, 456 FM_EREPORT_CPU_USIII_ISAP, 457 C_AFSR_L3_TUE_SH, "L3_TUE_SH ", ECC_C_TRAP, 458 CPU_FATAL, "L3_TUE_SH Fatal", 459 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 460 FM_EREPORT_CPU_USIII_L3_TUE_SH, 461 C_AFSR_L3_TUE, "L3_TUE ", ECC_C_TRAP, 462 CPU_FATAL, "L3_TUE Fatal", 463 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 464 FM_EREPORT_CPU_USIII_L3_TUE, 465 C_AFSR_TUE_SH, "TUE_SH ", ECC_C_TRAP, 466 CPU_FATAL, "TUE_SH Fatal", 467 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 468 FM_EREPORT_CPU_USIII_TUE_SH, 469 C_AFSR_TUE, "TUE ", ECC_ALL_TRAPS, 470 CPU_FATAL, "TUE Fatal", 471 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 472 FM_EREPORT_CPU_USIII_TUE, 473 C_AFSR_EMU, "EMU ", ECC_ASYNC_TRAPS, 474 CPU_FATAL, "EMU Fatal", 475 FM_EREPORT_PAYLOAD_MEMORY, 476 FM_EREPORT_CPU_USIII_EMU, 477 C_AFSR_IMU, "IMU ", ECC_C_TRAP, 478 CPU_FATAL, "IMU Fatal", 479 FM_EREPORT_PAYLOAD_SYSTEM1, 480 FM_EREPORT_CPU_USIII_IMU, 481 482 /* L3$ Address parity errors are reported via the MECC bit */ 483 C_AFSR_L3_MECC, "L3_MECC ", ECC_MECC_TRAPS, 484 CPU_L3_ADDR_PE, "L3 Address Parity", 485 FM_EREPORT_PAYLOAD_L3_DATA, 486 FM_EREPORT_CPU_USIII_L3_MECC, 487 488 /* Orphaned UCC/UCU Errors */ 489 C_AFSR_L3_UCU, "L3_OUCU ", ECC_ORPH_TRAPS, 490 CPU_ORPH, "Orphaned L3_UCU", 491 FM_EREPORT_PAYLOAD_L3_DATA, 492 FM_EREPORT_CPU_USIII_L3_UCU, 493 C_AFSR_L3_UCC, "L3_OUCC ", ECC_ORPH_TRAPS, 494 CPU_ORPH, "Orphaned L3_UCC", 495 FM_EREPORT_PAYLOAD_L3_DATA, 496 FM_EREPORT_CPU_USIII_L3_UCC, 497 C_AFSR_UCU, "OUCU ", ECC_ORPH_TRAPS, 498 CPU_ORPH, "Orphaned UCU", 499 FM_EREPORT_PAYLOAD_L2_DATA, 500 FM_EREPORT_CPU_USIII_UCU, 501 C_AFSR_UCC, "OUCC ", ECC_ORPH_TRAPS, 502 CPU_ORPH, "Orphaned UCC", 503 FM_EREPORT_PAYLOAD_L2_DATA, 504 FM_EREPORT_CPU_USIII_UCC, 505 506 /* UCU, UCC */ 507 C_AFSR_L3_UCU, "L3_UCU ", ECC_F_TRAP, 508 CPU_UE_ECACHE, "L3_UCU", 509 FM_EREPORT_PAYLOAD_L3_DATA, 510 FM_EREPORT_CPU_USIII_L3_UCU, 511 C_AFSR_L3_UCC, "L3_UCC ", ECC_F_TRAP, 512 CPU_CE_ECACHE, "L3_UCC", 513 FM_EREPORT_PAYLOAD_L3_DATA, 514 FM_EREPORT_CPU_USIII_L3_UCC, 515 C_AFSR_UCU, "UCU ", ECC_F_TRAP, 516 CPU_UE_ECACHE, "UCU", 517 FM_EREPORT_PAYLOAD_L2_DATA, 518 FM_EREPORT_CPU_USIII_UCU, 519 C_AFSR_UCC, "UCC ", ECC_F_TRAP, 520 CPU_CE_ECACHE, "UCC", 521 FM_EREPORT_PAYLOAD_L2_DATA, 522 FM_EREPORT_CPU_USIII_UCC, 523 C_AFSR_TSCE, "TSCE ", ECC_F_TRAP, 524 CPU_CE_ECACHE, "TSCE", 525 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 526 FM_EREPORT_CPU_USIII_TSCE, 527 528 /* UE, EDU:ST, EDU:BLD, WDU, CPU */ 529 C_AFSR_UE, "UE ", ECC_ASYNC_TRAPS, 530 CPU_UE, "Uncorrectable system bus (UE)", 531 FM_EREPORT_PAYLOAD_MEMORY, 532 FM_EREPORT_CPU_USIII_UE, 533 C_AFSR_L3_EDU, "L3_EDU ", ECC_C_TRAP, 534 CPU_UE_ECACHE_RETIRE, "L3_EDU:ST", 535 FM_EREPORT_PAYLOAD_L3_DATA, 536 FM_EREPORT_CPU_USIII_L3_EDUST, 537 C_AFSR_L3_EDU, "L3_EDU ", ECC_D_TRAP, 538 CPU_UE_ECACHE_RETIRE, "L3_EDU:BLD", 539 FM_EREPORT_PAYLOAD_L3_DATA, 540 FM_EREPORT_CPU_USIII_L3_EDUBL, 541 C_AFSR_L3_WDU, "L3_WDU ", ECC_C_TRAP, 542 CPU_UE_ECACHE_RETIRE, "L3_WDU", 543 FM_EREPORT_PAYLOAD_L3_DATA, 544 FM_EREPORT_CPU_USIII_L3_WDU, 545 C_AFSR_L3_CPU, "L3_CPU ", ECC_C_TRAP, 546 CPU_UE_ECACHE, "L3_CPU", 547 FM_EREPORT_PAYLOAD_L3_DATA, 548 FM_EREPORT_CPU_USIII_L3_CPU, 549 C_AFSR_EDU, "EDU ", ECC_C_TRAP, 550 CPU_UE_ECACHE_RETIRE, "EDU:ST", 551 FM_EREPORT_PAYLOAD_L2_DATA, 552 FM_EREPORT_CPU_USIII_EDUST, 553 C_AFSR_EDU, "EDU ", ECC_D_TRAP, 554 CPU_UE_ECACHE_RETIRE, "EDU:BLD", 555 FM_EREPORT_PAYLOAD_L2_DATA, 556 FM_EREPORT_CPU_USIII_EDUBL, 557 C_AFSR_WDU, "WDU ", ECC_C_TRAP, 558 CPU_UE_ECACHE_RETIRE, "WDU", 559 FM_EREPORT_PAYLOAD_L2_DATA, 560 FM_EREPORT_CPU_USIII_WDU, 561 C_AFSR_CPU, "CPU ", ECC_C_TRAP, 562 CPU_UE_ECACHE, "CPU", 563 FM_EREPORT_PAYLOAD_L2_DATA, 564 FM_EREPORT_CPU_USIII_CPU, 565 C_AFSR_DUE, "DUE ", ECC_C_TRAP, 566 CPU_DUE, "DUE", 567 FM_EREPORT_PAYLOAD_MEMORY, 568 FM_EREPORT_CPU_USIII_DUE, 569 570 /* CE, EDC, EMC, WDC, CPC */ 571 C_AFSR_CE, "CE ", ECC_C_TRAP, 572 CPU_CE, "Corrected system bus (CE)", 573 FM_EREPORT_PAYLOAD_MEMORY, 574 FM_EREPORT_CPU_USIII_CE, 575 C_AFSR_L3_EDC, "L3_EDC ", ECC_C_TRAP, 576 CPU_CE_ECACHE, "L3_EDC", 577 FM_EREPORT_PAYLOAD_L3_DATA, 578 FM_EREPORT_CPU_USIII_L3_EDC, 579 C_AFSR_EDC, "EDC ", ECC_C_TRAP, 580 CPU_CE_ECACHE, "EDC", 581 FM_EREPORT_PAYLOAD_L2_DATA, 582 FM_EREPORT_CPU_USIII_EDC, 583 C_AFSR_EMC, "EMC ", ECC_C_TRAP, 584 CPU_EMC, "EMC", 585 FM_EREPORT_PAYLOAD_MEMORY, 586 FM_EREPORT_CPU_USIII_EMC, 587 C_AFSR_L3_WDC, "L3_WDC ", ECC_C_TRAP, 588 CPU_CE_ECACHE, "L3_WDC", 589 FM_EREPORT_PAYLOAD_L3_DATA, 590 FM_EREPORT_CPU_USIII_L3_WDC, 591 C_AFSR_L3_CPC, "L3_CPC ", ECC_C_TRAP, 592 CPU_CE_ECACHE, "L3_CPC", 593 FM_EREPORT_PAYLOAD_L3_DATA, 594 FM_EREPORT_CPU_USIII_L3_CPC, 595 C_AFSR_L3_THCE, "L3_THCE ", ECC_C_TRAP, 596 CPU_CE_ECACHE, "L3_THCE", 597 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 598 FM_EREPORT_CPU_USIII_L3_THCE, 599 C_AFSR_WDC, "WDC ", ECC_C_TRAP, 600 CPU_CE_ECACHE, "WDC", 601 FM_EREPORT_PAYLOAD_L2_DATA, 602 FM_EREPORT_CPU_USIII_WDC, 603 C_AFSR_CPC, "CPC ", ECC_C_TRAP, 604 CPU_CE_ECACHE, "CPC", 605 FM_EREPORT_PAYLOAD_L2_DATA, 606 FM_EREPORT_CPU_USIII_CPC, 607 C_AFSR_THCE, "THCE ", ECC_C_TRAP, 608 CPU_CE_ECACHE, "THCE", 609 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 610 FM_EREPORT_CPU_USIII_THCE, 611 612 /* TO, BERR */ 613 C_AFSR_TO, "TO ", ECC_ASYNC_TRAPS, 614 CPU_TO, "Timeout (TO)", 615 FM_EREPORT_PAYLOAD_IO, 616 FM_EREPORT_CPU_USIII_TO, 617 C_AFSR_BERR, "BERR ", ECC_ASYNC_TRAPS, 618 CPU_BERR, "Bus Error (BERR)", 619 FM_EREPORT_PAYLOAD_IO, 620 FM_EREPORT_CPU_USIII_BERR, 621 C_AFSR_DTO, "DTO ", ECC_C_TRAP, 622 CPU_TO, "Disrupting Timeout (DTO)", 623 FM_EREPORT_PAYLOAD_IO, 624 FM_EREPORT_CPU_USIII_DTO, 625 C_AFSR_DBERR, "DBERR ", ECC_C_TRAP, 626 CPU_BERR, "Disrupting Bus Error (DBERR)", 627 FM_EREPORT_PAYLOAD_IO, 628 FM_EREPORT_CPU_USIII_DBERR, 629 630 /* IVU, IVC, IMC */ 631 C_AFSR_IVU, "IVU ", ECC_C_TRAP, 632 CPU_IV, "IVU", 633 FM_EREPORT_PAYLOAD_SYSTEM1, 634 FM_EREPORT_CPU_USIII_IVU, 635 C_AFSR_IVC, "IVC ", ECC_C_TRAP, 636 CPU_IV, "IVC", 637 FM_EREPORT_PAYLOAD_SYSTEM1, 638 FM_EREPORT_CPU_USIII_IVC, 639 C_AFSR_IMC, "IMC ", ECC_C_TRAP, 640 CPU_IV, "IMC", 641 FM_EREPORT_PAYLOAD_SYSTEM1, 642 FM_EREPORT_CPU_USIII_IMC, 643 644 0, NULL, 0, 645 0, NULL, 646 FM_EREPORT_PAYLOAD_UNKNOWN, 647 FM_EREPORT_CPU_USIII_UNKNOWN, 648 }; 649 650 /* 651 * See Cheetah+ Delta PRM 10.9 and section P.6.1 of the Panther PRM 652 * Class 4: 653 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 654 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 655 * Class 3: 656 * AFSR -- UE, DUE, EDU, EMU, WDU, CPU 657 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 658 * Class 2: 659 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 660 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 661 * Class 1: 662 * AFSR -- TO, DTO, BERR, DBERR 663 * AFSR_EXT -- 664 */ 665 uint64_t afar_overwrite[] = { 666 /* class 4: */ 667 C_AFSR_UCC | C_AFSR_UCU | C_AFSR_TUE | C_AFSR_TSCE | C_AFSR_TUE_SH | 668 C_AFSR_L3_UCC | C_AFSR_L3_UCU | C_AFSR_L3_TUE | C_AFSR_L3_TUE_SH, 669 /* class 3: */ 670 C_AFSR_UE | C_AFSR_DUE | C_AFSR_EDU | C_AFSR_EMU | C_AFSR_WDU | 671 C_AFSR_CPU | C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 672 /* class 2: */ 673 C_AFSR_CE | C_AFSR_EDC | C_AFSR_EMC | C_AFSR_WDC | C_AFSR_CPC | 674 C_AFSR_THCE | C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC | 675 C_AFSR_L3_THCE, 676 /* class 1: */ 677 C_AFSR_TO | C_AFSR_DTO | C_AFSR_BERR | C_AFSR_DBERR, 678 679 0 680 }; 681 682 /* 683 * For Cheetah+, the E_SYND and M_SYND overwrite priorities are combined. 684 * See Cheetah+ Delta PRM 10.9 and Cheetah+ PRM 11.6.2 685 * Class 2: UE, DUE, IVU, EDU, EMU, WDU, UCU, CPU 686 * Class 1: CE, IVC, EDC, EMC, WDC, UCC, CPC 687 */ 688 uint64_t esynd_overwrite[] = { 689 /* class 2: */ 690 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_EMU | 691 C_AFSR_WDU | C_AFSR_UCU | C_AFSR_CPU, 692 /* class 1: */ 693 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_EMC | C_AFSR_WDC | 694 C_AFSR_UCC | C_AFSR_CPC, 695 0 696 }; 697 698 /* 699 * In panther, the E_SYND overwrite policy changed a little bit 700 * by adding one more level. 701 * See Panther PRM P.6.2 702 * class 3: 703 * AFSR -- UCU, UCC 704 * AFSR_EXT -- L3_UCU, L3_UCC 705 * Class 2: 706 * AFSR -- UE, DUE, IVU, EDU, WDU, CPU 707 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 708 * Class 1: 709 * AFSR -- CE, IVC, EDC, WDC, CPC 710 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC 711 */ 712 uint64_t pn_esynd_overwrite[] = { 713 /* class 3: */ 714 C_AFSR_UCU | C_AFSR_UCC | 715 C_AFSR_L3_UCU | C_AFSR_L3_UCC, 716 /* class 2: */ 717 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_WDU | 718 C_AFSR_CPU | 719 C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 720 /* class 1: */ 721 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_CPC | 722 C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC, 723 724 0 725 }; 726 727 int 728 afsr_to_pn_esynd_status(uint64_t afsr, uint64_t afsr_bit) 729 { 730 return (afsr_to_overw_status(afsr, afsr_bit, pn_esynd_overwrite)); 731 } 732 733 /* 734 * Prioritized list of Error bits for MSYND overwrite. 735 * See Panther PRM P.6.2 (For Cheetah+, see esynd_overwrite classes) 736 * Class 2: EMU, IMU 737 * Class 1: EMC, IMC 738 * 739 * Panther adds IMU and IMC. 740 */ 741 uint64_t msynd_overwrite[] = { 742 /* class 2: */ 743 C_AFSR_EMU | C_AFSR_IMU, 744 /* class 1: */ 745 C_AFSR_EMC | C_AFSR_IMC, 746 747 0 748 }; 749 750 /* 751 * change cpu speed bits -- new speed will be normal-speed/divisor. 752 * 753 * The Jalapeno memory controllers are required to drain outstanding 754 * memory transactions within 32 JBus clocks in order to be ready 755 * to enter Estar mode. In some corner cases however, that time 756 * fell short. 757 * 758 * A safe software solution is to force MCU to act like in Estar mode, 759 * then delay 1us (in ppm code) prior to assert J_CHNG_L signal. 760 * To reverse the effect, upon exiting Estar, software restores the 761 * MCU to its original state. 762 */ 763 /* ARGSUSED1 */ 764 void 765 cpu_change_speed(uint64_t divisor, uint64_t arg2) 766 { 767 bus_config_eclk_t *bceclk; 768 uint64_t reg; 769 processor_info_t *pi = &(CPU->cpu_type_info); 770 771 for (bceclk = bus_config_eclk; bceclk->divisor; bceclk++) { 772 if (bceclk->divisor != divisor) 773 continue; 774 reg = get_safari_config(); 775 reg &= ~SAFARI_CONFIG_ECLK_MASK; 776 reg |= bceclk->mask; 777 set_safari_config(reg); 778 CPU->cpu_m.divisor = (uchar_t)divisor; 779 pi->pi_curr_clock = 780 (((uint64_t)pi->pi_clock * 1000000) / divisor); 781 return; 782 } 783 /* 784 * We will reach here only if OBP and kernel don't agree on 785 * the speeds supported by the CPU. 786 */ 787 cmn_err(CE_WARN, "cpu_change_speed: bad divisor %" PRIu64, divisor); 788 } 789 790 /* 791 * Cpu private initialization. This includes allocating the cpu_private 792 * data structure, initializing it, and initializing the scrubber for this 793 * cpu. This function calls cpu_init_ecache_scrub_dr to init the scrubber. 794 * We use kmem_cache_create for the cheetah private data structure because 795 * it needs to be allocated on a PAGESIZE (8192) byte boundary. 796 */ 797 void 798 cpu_init_private(struct cpu *cp) 799 { 800 cheetah_private_t *chprp; 801 int i; 802 803 ASSERT(CPU_PRIVATE(cp) == NULL); 804 805 /* LINTED: E_TRUE_LOGICAL_EXPR */ 806 ASSERT((offsetof(cheetah_private_t, chpr_tl1_err_data) + 807 sizeof (ch_err_tl1_data_t) * CH_ERR_TL1_TLMAX) <= PAGESIZE); 808 809 /* 810 * Running with Cheetah CPUs in a Cheetah+, Jaguar, Panther or 811 * mixed Cheetah+/Jaguar/Panther machine is not a supported 812 * configuration. Attempting to do so may result in unpredictable 813 * failures (e.g. running Cheetah+ CPUs with Cheetah E$ disp flush) 814 * so don't allow it. 815 * 816 * This is just defensive code since this configuration mismatch 817 * should have been caught prior to OS execution. 818 */ 819 if (!(IS_CHEETAH_PLUS(cpunodes[cp->cpu_id].implementation) || 820 IS_JAGUAR(cpunodes[cp->cpu_id].implementation) || 821 IS_PANTHER(cpunodes[cp->cpu_id].implementation))) { 822 cmn_err(CE_PANIC, "CPU%d: UltraSPARC-III not supported" 823 " on UltraSPARC-III+/IV/IV+ code\n", cp->cpu_id); 824 } 825 826 /* 827 * If the ch_private_cache has not been created, create it. 828 */ 829 if (ch_private_cache == NULL) { 830 ch_private_cache = kmem_cache_create("ch_private_cache", 831 sizeof (cheetah_private_t), PAGESIZE, NULL, NULL, 832 NULL, NULL, static_arena, 0); 833 } 834 835 chprp = CPU_PRIVATE(cp) = kmem_cache_alloc(ch_private_cache, KM_SLEEP); 836 837 bzero(chprp, sizeof (cheetah_private_t)); 838 chprp->chpr_fecctl0_logout.clo_data.chd_afar = LOGOUT_INVALID; 839 chprp->chpr_cecc_logout.clo_data.chd_afar = LOGOUT_INVALID; 840 chprp->chpr_async_logout.clo_data.chd_afar = LOGOUT_INVALID; 841 chprp->chpr_tlb_logout.tlo_addr = LOGOUT_INVALID; 842 for (i = 0; i < CH_ERR_TL1_TLMAX; i++) 843 chprp->chpr_tl1_err_data[i].ch_err_tl1_logout.clo_data.chd_afar 844 = LOGOUT_INVALID; 845 846 /* Panther has a larger Icache compared to cheetahplus or Jaguar */ 847 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 848 chprp->chpr_icache_size = PN_ICACHE_SIZE; 849 chprp->chpr_icache_linesize = PN_ICACHE_LSIZE; 850 } else { 851 chprp->chpr_icache_size = CH_ICACHE_SIZE; 852 chprp->chpr_icache_linesize = CH_ICACHE_LSIZE; 853 } 854 855 cpu_init_ecache_scrub_dr(cp); 856 857 /* 858 * Panther's L2$ and E$ are shared between cores, so the scrubber is 859 * only needed on one of the cores. At this point, we assume all cores 860 * are online, and we only enable the scrubber on core 0. 861 */ 862 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 863 chprp->chpr_scrub_misc.chsm_core_state = 864 SCRUBBER_BOTH_CORES_ONLINE; 865 if (cp->cpu_id != (processorid_t)cmp_cpu_to_chip(cp->cpu_id)) { 866 chprp->chpr_scrub_misc.chsm_enable[ 867 CACHE_SCRUBBER_INFO_E] = 0; 868 } 869 } 870 871 chprp->chpr_ec_set_size = cpunodes[cp->cpu_id].ecache_size / 872 cpu_ecache_nway(); 873 874 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 875 ch_err_tl1_paddrs[cp->cpu_id] = va_to_pa(chprp); 876 ASSERT(ch_err_tl1_paddrs[cp->cpu_id] != -1); 877 } 878 879 /* 880 * Clear the error state registers for this CPU. 881 * For Cheetah+/Jaguar, just clear the AFSR but 882 * for Panther we also have to clear the AFSR_EXT. 883 */ 884 void 885 set_cpu_error_state(ch_cpu_errors_t *cpu_error_regs) 886 { 887 set_asyncflt(cpu_error_regs->afsr & ~C_AFSR_FATAL_ERRS); 888 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 889 set_afsr_ext(cpu_error_regs->afsr_ext & ~C_AFSR_EXT_FATAL_ERRS); 890 } 891 } 892 893 void 894 pn_cpu_log_diag_l2_info(ch_async_flt_t *ch_flt) { 895 struct async_flt *aflt = (struct async_flt *)ch_flt; 896 ch_ec_data_t *l2_data = &ch_flt->flt_diag_data.chd_l2_data[0]; 897 uint64_t faddr = aflt->flt_addr; 898 uint8_t log_way_mask = 0; 899 int i; 900 901 /* 902 * Only Panther CPUs have the additional L2$ data that needs 903 * to be logged here 904 */ 905 if (!IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 906 return; 907 908 /* 909 * We'll use a simple bit mask to keep track of which way(s) 910 * of the stored cache line we want to log. The idea is to 911 * log the entry if it is a valid line and it matches our 912 * fault AFAR. If no match is found, we will simply log all 913 * the ways. 914 */ 915 for (i = 0; i < PN_L2_NWAYS; i++) 916 if (pn_matching_valid_l2_line(faddr, &l2_data[i])) 917 log_way_mask |= (1 << i); 918 919 /* If no matching valid lines were found, we log all ways */ 920 if (log_way_mask == 0) 921 log_way_mask = (1 << PN_L2_NWAYS) - 1; 922 923 /* Log the cache lines */ 924 for (i = 0; i < PN_L2_NWAYS; i++) 925 if (log_way_mask & (1 << i)) 926 l2_data[i].ec_logflag = EC_LOGFLAG_MAGIC; 927 } 928 929 /* 930 * For this routine to return true, the L2 tag in question must be valid 931 * and the tag PA must match the fault address (faddr) assuming the correct 932 * index is being used. 933 */ 934 static int 935 pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data) { 936 if ((!PN_L2_LINE_INVALID(clo_l2_data->ec_tag)) && 937 ((faddr & P2ALIGN(C_AFAR_PA, PN_L2_SET_SIZE)) == 938 PN_L2TAG_TO_PA(clo_l2_data->ec_tag))) 939 return (1); 940 return (0); 941 } 942 943 /* 944 * This array is used to convert the 3 digit PgSz encoding (as used in 945 * various MMU registers such as MMU_TAG_ACCESS_EXT) into the corresponding 946 * page size. 947 */ 948 static uint64_t tlb_pgsz_to_size[] = { 949 /* 000 = 8KB: */ 950 0x2000, 951 /* 001 = 64KB: */ 952 0x10000, 953 /* 010 = 512KB: */ 954 0x80000, 955 /* 011 = 4MB: */ 956 0x400000, 957 /* 100 = 32MB: */ 958 0x2000000, 959 /* 101 = 256MB: */ 960 0x10000000, 961 /* undefined for encodings 110 and 111: */ 962 0, 0 963 }; 964 965 /* 966 * The itlb_parity_trap and dtlb_parity_trap handlers transfer control here 967 * after collecting logout information related to the TLB parity error and 968 * flushing the offending TTE entries from the ITLB or DTLB. 969 * 970 * DTLB traps which occur at TL>0 are not recoverable because we will most 971 * likely be corrupting some other trap handler's alternate globals. As 972 * such, we simply panic here when that happens. ITLB parity errors are 973 * not expected to happen at TL>0. 974 */ 975 void 976 cpu_tlb_parity_error(struct regs *rp, ulong_t trap_va, ulong_t tlb_info) { 977 ch_async_flt_t ch_flt; 978 struct async_flt *aflt; 979 pn_tlb_logout_t *tlop = NULL; 980 int immu_parity = (tlb_info & PN_TLO_INFO_IMMU) != 0; 981 int tl1_trap = (tlb_info & PN_TLO_INFO_TL1) != 0; 982 char *error_class; 983 984 bzero(&ch_flt, sizeof (ch_async_flt_t)); 985 986 /* 987 * Get the CPU log out info. If we can't find our CPU private 988 * pointer, or if the logout information does not correspond to 989 * this error, then we will have to make due without detailed 990 * logout information. 991 */ 992 if (CPU_PRIVATE(CPU)) { 993 tlop = CPU_PRIVATE_PTR(CPU, chpr_tlb_logout); 994 if ((tlop->tlo_addr != trap_va) || 995 (tlop->tlo_info != tlb_info)) 996 tlop = NULL; 997 } 998 999 if (tlop) { 1000 ch_flt.tlb_diag_data = *tlop; 1001 1002 /* Zero out + invalidate TLB logout. */ 1003 bzero(tlop, sizeof (pn_tlb_logout_t)); 1004 tlop->tlo_addr = LOGOUT_INVALID; 1005 } else { 1006 /* 1007 * Copy what logout information we have and mark 1008 * it incomplete. 1009 */ 1010 ch_flt.flt_data_incomplete = 1; 1011 ch_flt.tlb_diag_data.tlo_info = tlb_info; 1012 ch_flt.tlb_diag_data.tlo_addr = trap_va; 1013 } 1014 1015 /* 1016 * Log the error. 1017 */ 1018 aflt = (struct async_flt *)&ch_flt; 1019 aflt->flt_id = gethrtime_waitfree(); 1020 aflt->flt_bus_id = getprocessorid(); 1021 aflt->flt_inst = CPU->cpu_id; 1022 aflt->flt_pc = (caddr_t)rp->r_pc; 1023 aflt->flt_addr = trap_va; 1024 aflt->flt_prot = AFLT_PROT_NONE; 1025 aflt->flt_class = CPU_FAULT; 1026 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1027 aflt->flt_tl = tl1_trap ? 1 : 0; 1028 aflt->flt_panic = tl1_trap ? 1 : 0; 1029 1030 if (immu_parity) { 1031 aflt->flt_status = ECC_ITLB_TRAP; 1032 ch_flt.flt_type = CPU_ITLB_PARITY; 1033 error_class = FM_EREPORT_CPU_USIII_ITLBPE; 1034 aflt->flt_payload = FM_EREPORT_PAYLOAD_ITLB_PE; 1035 } else { 1036 aflt->flt_status = ECC_DTLB_TRAP; 1037 ch_flt.flt_type = CPU_DTLB_PARITY; 1038 error_class = FM_EREPORT_CPU_USIII_DTLBPE; 1039 aflt->flt_payload = FM_EREPORT_PAYLOAD_DTLB_PE; 1040 } 1041 1042 /* 1043 * The TLB entries have already been flushed by the TL1 trap 1044 * handler so at this point the only thing left to do is log 1045 * the error message. 1046 */ 1047 if (aflt->flt_panic) { 1048 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1049 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 1050 /* 1051 * Panic here if aflt->flt_panic has been set. Enqueued 1052 * errors will be logged as part of the panic flow. 1053 */ 1054 fm_panic("%sError(s)", immu_parity ? "ITLBPE " : "DTLBPE "); 1055 } else { 1056 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1057 sizeof (ch_async_flt_t), ce_queue, aflt->flt_panic); 1058 } 1059 } 1060 1061 /* 1062 * This routine is called when a TLB parity error event is 'ue_drain'ed 1063 * or 'ce_drain'ed from the errorq. 1064 */ 1065 void 1066 cpu_async_log_tlb_parity_err(void *flt) { 1067 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 1068 struct async_flt *aflt = (struct async_flt *)flt; 1069 #ifdef lint 1070 aflt = aflt; 1071 #endif 1072 1073 /* 1074 * We only capture TLB information if we encountered 1075 * a TLB parity error and Panther is the only CPU which 1076 * can detect a TLB parity error. 1077 */ 1078 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1079 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1080 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1081 1082 if (ch_flt->flt_data_incomplete == 0) { 1083 if (ch_flt->flt_type == CPU_ITLB_PARITY) 1084 ch_flt->tlb_diag_data.tlo_logflag = IT_LOGFLAG_MAGIC; 1085 else /* parity error is in DTLB */ 1086 ch_flt->tlb_diag_data.tlo_logflag = DT_LOGFLAG_MAGIC; 1087 } 1088 } 1089 1090 /* 1091 * Add L1 Prefetch cache data to the ereport payload. 1092 */ 1093 void 1094 cpu_payload_add_pcache(struct async_flt *aflt, nvlist_t *nvl) 1095 { 1096 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1097 ch_pc_data_t *pcp; 1098 ch_pc_data_t pcdata[CH_PCACHE_NWAY]; 1099 uint_t nelem; 1100 int i, ways_logged = 0; 1101 1102 /* 1103 * We only capture P$ information if we encountered 1104 * a P$ parity error and Panther is the only CPU which 1105 * can detect a P$ parity error. 1106 */ 1107 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1108 for (i = 0; i < CH_PCACHE_NWAY; i++) { 1109 pcp = &ch_flt->parity_data.dpe.cpl_pc[i]; 1110 if (pcp->pc_logflag == PC_LOGFLAG_MAGIC) { 1111 bcopy(pcp, &pcdata[ways_logged], 1112 sizeof (ch_pc_data_t)); 1113 ways_logged++; 1114 } 1115 } 1116 1117 /* 1118 * Add the pcache data to the payload. 1119 */ 1120 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_WAYS, 1121 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 1122 if (ways_logged != 0) { 1123 nelem = sizeof (ch_pc_data_t) / sizeof (uint64_t) * ways_logged; 1124 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_DATA, 1125 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)pcdata, NULL); 1126 } 1127 } 1128 1129 /* 1130 * Add TLB diagnostic data to the ereport payload. 1131 */ 1132 void 1133 cpu_payload_add_tlb(struct async_flt *aflt, nvlist_t *nvl) 1134 { 1135 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1136 uint8_t num_entries, tlb_data_words; 1137 1138 /* 1139 * We only capture TLB information if we encountered 1140 * a TLB parity error and Panther is the only CPU which 1141 * can detect a TLB parity error. 1142 */ 1143 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1144 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1145 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1146 1147 if (ch_flt->flt_type == CPU_ITLB_PARITY) { 1148 num_entries = (uint8_t)(PN_ITLB_NWAYS * PN_NUM_512_ITLBS); 1149 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1150 num_entries; 1151 1152 /* 1153 * Add the TLB diagnostic data to the payload 1154 * if it was collected. 1155 */ 1156 if (ch_flt->tlb_diag_data.tlo_logflag == IT_LOGFLAG_MAGIC) { 1157 fm_payload_set(nvl, 1158 FM_EREPORT_PAYLOAD_NAME_ITLB_ENTRIES, 1159 DATA_TYPE_UINT8, num_entries, NULL); 1160 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_ITLB_DATA, 1161 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1162 (uint64_t *)ch_flt->tlb_diag_data.tlo_itlb_tte, 1163 NULL); 1164 } 1165 } else { 1166 num_entries = (uint8_t)(PN_DTLB_NWAYS * PN_NUM_512_DTLBS); 1167 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1168 num_entries; 1169 1170 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_VA, 1171 DATA_TYPE_UINT64, ch_flt->tlb_diag_data.tlo_addr, NULL); 1172 1173 /* 1174 * Add the TLB diagnostic data to the payload 1175 * if it was collected. 1176 */ 1177 if (ch_flt->tlb_diag_data.tlo_logflag == DT_LOGFLAG_MAGIC) { 1178 fm_payload_set(nvl, 1179 FM_EREPORT_PAYLOAD_NAME_DTLB_ENTRIES, 1180 DATA_TYPE_UINT8, num_entries, NULL); 1181 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_DTLB_DATA, 1182 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1183 (uint64_t *)ch_flt->tlb_diag_data.tlo_dtlb_tte, 1184 NULL); 1185 } 1186 } 1187 } 1188 1189 /* 1190 * Panther Cache Scrubbing: 1191 * 1192 * In Jaguar, the E$ was split between cores, so the scrubber must run on both 1193 * cores. For Panther, however, the L2$ and L3$ are shared across cores. 1194 * Therefore, the E$ scrubber only needs to run on one of the two cores. 1195 * 1196 * There are four possible states for the E$ scrubber: 1197 * 1198 * 0. If both cores are offline, add core 0 to cpu_offline_set so that 1199 * the offline scrubber will run on it. 1200 * 1. If core 0 is online and core 1 off, we run the scrubber on core 0. 1201 * 2. If core 1 is online and core 0 off, we move the scrubber to run 1202 * on core 1. 1203 * 3. If both cores are online, only run the scrubber on core 0. 1204 * 1205 * These states are enumerated by the SCRUBBER_[BOTH|CORE|NEITHER]_* defines 1206 * above. One of those values is stored in 1207 * chpr_scrub_misc->chsm_core_state on each core. 1208 * 1209 * Also note that, for Panther, ecache_flush_line() will flush out the L2$ 1210 * before the E$, so the L2$ will be scrubbed by the E$ scrubber. No 1211 * additional code is necessary to scrub the L2$. 1212 * 1213 * For all cpu types, whenever a cpu or core is offlined, add it to 1214 * cpu_offline_set so the necessary scrubbers can still run. This is still 1215 * necessary on Panther so the D$ scrubber can still run. 1216 */ 1217 /*ARGSUSED*/ 1218 int 1219 cpu_scrub_cpu_setup(cpu_setup_t what, int cpuid, void *arg) 1220 { 1221 processorid_t core_0_id; 1222 cpu_t *core_cpus[2]; 1223 ch_scrub_misc_t *core_scrub[2]; 1224 int old_state, i; 1225 int new_state = SCRUBBER_NEITHER_CORE_ONLINE; 1226 1227 switch (what) { 1228 case CPU_ON: 1229 case CPU_INIT: 1230 CPUSET_DEL(cpu_offline_set, cpuid); 1231 break; 1232 case CPU_OFF: 1233 CPUSET_ADD(cpu_offline_set, cpuid); 1234 break; 1235 default: 1236 return (0); 1237 } 1238 1239 if (!IS_PANTHER(cpunodes[cpuid].implementation)) { 1240 return (0); 1241 } 1242 1243 /* 1244 * Update the chsm_enable[CACHE_SCRUBBER_INFO_E] value 1245 * if necessary 1246 */ 1247 core_0_id = cmp_cpu_to_chip(cpuid); 1248 core_cpus[0] = cpu_get(core_0_id); 1249 core_cpus[1] = cpu_get_sibling_core(core_cpus[0]); 1250 1251 for (i = 0; i < 2; i++) { 1252 if (core_cpus[i] == NULL) { 1253 /* 1254 * This may happen during DR - one core is offlined 1255 * and completely unconfigured before the second 1256 * core is offlined. Give up and return quietly, 1257 * since the second core should quickly be removed 1258 * anyways. 1259 */ 1260 return (0); 1261 } 1262 core_scrub[i] = CPU_PRIVATE_PTR(core_cpus[i], chpr_scrub_misc); 1263 } 1264 1265 if (cpuid == (processorid_t)cmp_cpu_to_chip(cpuid)) { 1266 /* cpuid is core 0 */ 1267 if (cpu_is_active(core_cpus[1])) { 1268 new_state |= SCRUBBER_CORE_1_ONLINE; 1269 } 1270 if (what != CPU_OFF) { 1271 new_state |= SCRUBBER_CORE_0_ONLINE; 1272 } 1273 } else { 1274 /* cpuid is core 1 */ 1275 if (cpu_is_active(core_cpus[0])) { 1276 new_state |= SCRUBBER_CORE_0_ONLINE; 1277 } 1278 if (what != CPU_OFF) { 1279 new_state |= SCRUBBER_CORE_1_ONLINE; 1280 } 1281 } 1282 1283 old_state = core_scrub[0]->chsm_core_state; 1284 1285 if (old_state == new_state) { 1286 return (0); 1287 } 1288 1289 if (old_state == SCRUBBER_CORE_1_ONLINE) { 1290 /* 1291 * We need to move the scrubber state from core 1 1292 * back to core 0. This data is not protected by 1293 * locks, but the worst that can happen is some 1294 * lines are scrubbed multiple times. chsm_oustanding is 1295 * set to 0 to make sure an interrupt is scheduled the 1296 * first time through do_scrub(). 1297 */ 1298 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1299 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1300 core_scrub[0]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1301 } 1302 1303 switch (new_state) { 1304 case SCRUBBER_NEITHER_CORE_ONLINE: 1305 case SCRUBBER_BOTH_CORES_ONLINE: 1306 case SCRUBBER_CORE_0_ONLINE: 1307 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1308 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1309 break; 1310 1311 case SCRUBBER_CORE_1_ONLINE: 1312 default: 1313 /* 1314 * We need to move the scrubber state from core 0 1315 * to core 1. 1316 */ 1317 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1318 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1319 core_scrub[1]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1320 1321 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1322 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1323 break; 1324 } 1325 1326 core_scrub[0]->chsm_core_state = new_state; 1327 core_scrub[1]->chsm_core_state = new_state; 1328 return (0); 1329 } 1330 1331 /* 1332 * Returns a pointer to the cpu structure of the argument's sibling core. 1333 * If no sibling core can be found, return NULL. 1334 */ 1335 static cpu_t * 1336 cpu_get_sibling_core(cpu_t *cpup) 1337 { 1338 cpu_t *nextp; 1339 pg_t *pg; 1340 pg_cpu_itr_t i; 1341 1342 if ((cpup == NULL) || (!cmp_cpu_is_cmp(cpup->cpu_id))) 1343 return (NULL); 1344 pg = (pg_t *)pghw_find_pg(cpup, PGHW_CHIP); 1345 if (pg == NULL) 1346 return (NULL); 1347 1348 /* 1349 * Iterate over the CPUs in the chip PG looking 1350 * for a CPU that isn't cpup 1351 */ 1352 PG_CPU_ITR_INIT(pg, i); 1353 while ((nextp = pg_cpu_next(&i)) != NULL) { 1354 if (nextp != cpup) 1355 break; 1356 } 1357 1358 if (nextp == NULL) 1359 return (NULL); 1360 1361 return (nextp); 1362 } 1363