1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/ddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/archsystm.h> 34 #include <sys/vmsystm.h> 35 #include <sys/machparam.h> 36 #include <sys/machsystm.h> 37 #include <sys/machthread.h> 38 #include <sys/cpu.h> 39 #include <sys/cmp.h> 40 #include <sys/elf_SPARC.h> 41 #include <vm/hat_sfmmu.h> 42 #include <vm/seg_kmem.h> 43 #include <sys/cpuvar.h> 44 #include <sys/cheetahregs.h> 45 #include <sys/us3_module.h> 46 #include <sys/async.h> 47 #include <sys/cmn_err.h> 48 #include <sys/debug.h> 49 #include <sys/dditypes.h> 50 #include <sys/prom_debug.h> 51 #include <sys/prom_plat.h> 52 #include <sys/cpu_module.h> 53 #include <sys/sysmacros.h> 54 #include <sys/intreg.h> 55 #include <sys/clock.h> 56 #include <sys/platform_module.h> 57 #include <sys/machtrap.h> 58 #include <sys/ontrap.h> 59 #include <sys/panic.h> 60 #include <sys/memlist.h> 61 #include <sys/bootconf.h> 62 #include <sys/ivintr.h> 63 #include <sys/atomic.h> 64 #include <sys/fm/protocol.h> 65 #include <sys/fm/cpu/UltraSPARC-III.h> 66 #include <sys/fm/util.h> 67 68 #ifdef CHEETAHPLUS_ERRATUM_25 69 #include <sys/cyclic.h> 70 #endif /* CHEETAHPLUS_ERRATUM_25 */ 71 72 /* 73 * See comment above cpu_scrub_cpu_setup() for description 74 */ 75 #define SCRUBBER_NEITHER_CORE_ONLINE 0x0 76 #define SCRUBBER_CORE_0_ONLINE 0x1 77 #define SCRUBBER_CORE_1_ONLINE 0x2 78 #define SCRUBBER_BOTH_CORES_ONLINE (SCRUBBER_CORE_0_ONLINE | \ 79 SCRUBBER_CORE_1_ONLINE) 80 81 static int pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data); 82 static void cpu_async_log_tlb_parity_err(void *flt); 83 static cpu_t *cpu_get_sibling_core(cpu_t *cpup); 84 85 86 /* 87 * Setup trap handlers. 88 */ 89 void 90 cpu_init_trap(void) 91 { 92 CH_SET_TRAP(tt_pil15, ch_pil15_interrupt_instr); 93 94 CH_SET_TRAP(tt0_fecc, fecc_err_instr); 95 CH_SET_TRAP(tt1_fecc, fecc_err_tl1_instr); 96 CH_SET_TRAP(tt1_swtrap0, fecc_err_tl1_cont_instr); 97 98 CH_SET_TRAP(tt0_dperr, dcache_parity_instr); 99 CH_SET_TRAP(tt1_dperr, dcache_parity_tl1_instr); 100 CH_SET_TRAP(tt1_swtrap1, dcache_parity_tl1_cont_instr); 101 102 CH_SET_TRAP(tt0_iperr, icache_parity_instr); 103 CH_SET_TRAP(tt1_iperr, icache_parity_tl1_instr); 104 CH_SET_TRAP(tt1_swtrap2, icache_parity_tl1_cont_instr); 105 } 106 107 /* 108 * Set the magic constants of the implementation. 109 */ 110 /*ARGSUSED*/ 111 void 112 cpu_fiximp(dnode_t dnode) 113 { 114 int i, a; 115 extern int vac_size, vac_shift; 116 extern uint_t vac_mask; 117 118 dcache_size = CH_DCACHE_SIZE; 119 dcache_linesize = CH_DCACHE_LSIZE; 120 121 icache_size = CHP_ICACHE_MAX_SIZE; 122 icache_linesize = CHP_ICACHE_MIN_LSIZE; 123 124 ecache_size = CH_ECACHE_MAX_SIZE; 125 ecache_alignsize = CH_ECACHE_MAX_LSIZE; 126 ecache_associativity = CHP_ECACHE_MIN_NWAY; 127 128 /* 129 * ecache_setsize needs to maximum of all cpu ecache setsizes 130 */ 131 ecache_setsize = CHP_ECACHE_MAX_SETSIZE; 132 ASSERT(ecache_setsize >= (ecache_size / ecache_associativity)); 133 134 vac_size = CH_VAC_SIZE; 135 vac_mask = MMU_PAGEMASK & (vac_size - 1); 136 i = 0; a = vac_size; 137 while (a >>= 1) 138 ++i; 139 vac_shift = i; 140 shm_alignment = vac_size; 141 vac = 1; 142 } 143 144 void 145 send_mondo_set(cpuset_t set) 146 { 147 int lo, busy, nack, shipped = 0; 148 uint16_t i, cpuids[IDSR_BN_SETS]; 149 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 150 uint64_t starttick, endtick, tick, lasttick; 151 #if (NCPU > IDSR_BN_SETS) 152 int index = 0; 153 int ncpuids = 0; 154 #endif 155 #ifdef CHEETAHPLUS_ERRATUM_25 156 int recovered = 0; 157 int cpuid; 158 #endif 159 160 ASSERT(!CPUSET_ISNULL(set)); 161 starttick = lasttick = gettick(); 162 163 #if (NCPU <= IDSR_BN_SETS) 164 for (i = 0; i < NCPU; i++) 165 if (CPU_IN_SET(set, i)) { 166 shipit(i, shipped); 167 nackmask |= IDSR_NACK_BIT(shipped); 168 cpuids[shipped++] = i; 169 CPUSET_DEL(set, i); 170 if (CPUSET_ISNULL(set)) 171 break; 172 } 173 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 174 #else 175 for (i = 0; i < NCPU; i++) 176 if (CPU_IN_SET(set, i)) { 177 ncpuids++; 178 179 /* 180 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 181 * find we have shipped to more than (IDSR_BN_SETS) 182 * CPUs, set "index" to the highest numbered CPU in 183 * the set so we can ship to other CPUs a bit later on. 184 */ 185 if (shipped < IDSR_BN_SETS) { 186 shipit(i, shipped); 187 nackmask |= IDSR_NACK_BIT(shipped); 188 cpuids[shipped++] = i; 189 CPUSET_DEL(set, i); 190 if (CPUSET_ISNULL(set)) 191 break; 192 } else 193 index = (int)i; 194 } 195 196 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 197 #endif 198 199 busymask = IDSR_NACK_TO_BUSY(nackmask); 200 busy = nack = 0; 201 endtick = starttick + xc_tick_limit; 202 for (;;) { 203 idsr = getidsr(); 204 #if (NCPU <= IDSR_BN_SETS) 205 if (idsr == 0) 206 break; 207 #else 208 if (idsr == 0 && shipped == ncpuids) 209 break; 210 #endif 211 tick = gettick(); 212 /* 213 * If there is a big jump between the current tick 214 * count and lasttick, we have probably hit a break 215 * point. Adjust endtick accordingly to avoid panic. 216 */ 217 if (tick > (lasttick + xc_tick_jump_limit)) 218 endtick += (tick - lasttick); 219 lasttick = tick; 220 if (tick > endtick) { 221 if (panic_quiesce) 222 return; 223 #ifdef CHEETAHPLUS_ERRATUM_25 224 cpuid = -1; 225 for (i = 0; i < IDSR_BN_SETS; i++) { 226 if (idsr & (IDSR_NACK_BIT(i) | 227 IDSR_BUSY_BIT(i))) { 228 cpuid = cpuids[i]; 229 break; 230 } 231 } 232 if (cheetah_sendmondo_recover && cpuid != -1 && 233 recovered == 0) { 234 if (mondo_recover(cpuid, i)) { 235 /* 236 * We claimed the whole memory or 237 * full scan is disabled. 238 */ 239 recovered++; 240 } 241 tick = gettick(); 242 endtick = tick + xc_tick_limit; 243 lasttick = tick; 244 /* 245 * Recheck idsr 246 */ 247 continue; 248 } else 249 #endif /* CHEETAHPLUS_ERRATUM_25 */ 250 { 251 cmn_err(CE_CONT, "send mondo timeout " 252 "[%d NACK %d BUSY]\nIDSR 0x%" 253 "" PRIx64 " cpuids:", nack, busy, idsr); 254 for (i = 0; i < IDSR_BN_SETS; i++) { 255 if (idsr & (IDSR_NACK_BIT(i) | 256 IDSR_BUSY_BIT(i))) { 257 cmn_err(CE_CONT, " 0x%x", 258 cpuids[i]); 259 } 260 } 261 cmn_err(CE_CONT, "\n"); 262 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 263 } 264 } 265 curnack = idsr & nackmask; 266 curbusy = idsr & busymask; 267 #if (NCPU > IDSR_BN_SETS) 268 if (shipped < ncpuids) { 269 uint64_t cpus_left; 270 uint16_t next = (uint16_t)index; 271 272 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 273 busymask; 274 275 if (cpus_left) { 276 do { 277 /* 278 * Sequence through and ship to the 279 * remainder of the CPUs in the system 280 * (e.g. other than the first 281 * (IDSR_BN_SETS)) in reverse order. 282 */ 283 lo = lowbit(cpus_left) - 1; 284 i = IDSR_BUSY_IDX(lo); 285 shipit(next, i); 286 shipped++; 287 cpuids[i] = next; 288 289 /* 290 * If we've processed all the CPUs, 291 * exit the loop now and save 292 * instructions. 293 */ 294 if (shipped == ncpuids) 295 break; 296 297 for ((index = ((int)next - 1)); 298 index >= 0; index--) 299 if (CPU_IN_SET(set, index)) { 300 next = (uint16_t)index; 301 break; 302 } 303 304 cpus_left &= ~(1ull << lo); 305 } while (cpus_left); 306 #ifdef CHEETAHPLUS_ERRATUM_25 307 /* 308 * Clear recovered because we are sending to 309 * a new set of targets. 310 */ 311 recovered = 0; 312 #endif 313 continue; 314 } 315 } 316 #endif 317 if (curbusy) { 318 busy++; 319 continue; 320 } 321 322 #ifdef SEND_MONDO_STATS 323 { 324 int n = gettick() - starttick; 325 if (n < 8192) 326 x_nack_stimes[n >> 7]++; 327 } 328 #endif 329 while (gettick() < (tick + sys_clock_mhz)) 330 ; 331 do { 332 lo = lowbit(curnack) - 1; 333 i = IDSR_NACK_IDX(lo); 334 shipit(cpuids[i], i); 335 curnack &= ~(1ull << lo); 336 } while (curnack); 337 nack++; 338 busy = 0; 339 } 340 #ifdef SEND_MONDO_STATS 341 { 342 int n = gettick() - starttick; 343 if (n < 8192) 344 x_set_stimes[n >> 7]++; 345 else 346 x_set_ltimes[(n >> 13) & 0xf]++; 347 } 348 x_set_cpus[shipped]++; 349 #endif 350 } 351 352 /* 353 * Handles error logging for implementation specific error types 354 */ 355 /*ARGSUSED1*/ 356 int 357 cpu_impl_async_log_err(void *flt, errorq_elem_t *eqep) 358 { 359 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 360 struct async_flt *aflt = (struct async_flt *)flt; 361 362 switch (ch_flt->flt_type) { 363 364 case CPU_IC_PARITY: 365 cpu_async_log_ic_parity_err(flt); 366 return (CH_ASYNC_LOG_DONE); 367 368 case CPU_DC_PARITY: 369 cpu_async_log_dc_parity_err(flt); 370 return (CH_ASYNC_LOG_DONE); 371 372 case CPU_DUE: 373 cpu_log_err(aflt); 374 cpu_page_retire(ch_flt); 375 return (CH_ASYNC_LOG_DONE); 376 377 case CPU_ITLB_PARITY: 378 case CPU_DTLB_PARITY: 379 cpu_async_log_tlb_parity_err(flt); 380 return (CH_ASYNC_LOG_DONE); 381 382 default: 383 return (CH_ASYNC_LOG_UNKNOWN); 384 } 385 } 386 387 /* 388 * Figure out if Ecache is direct-mapped (Cheetah or Cheetah+ with Ecache 389 * control ECCR_ASSOC bit off or 2-way (Cheetah+ with ECCR_ASSOC on). 390 * We need to do this on the fly because we may have mixed Cheetah+'s with 391 * both direct and 2-way Ecaches. Panther only supports 4-way L3$. 392 */ 393 int 394 cpu_ecache_nway(void) 395 { 396 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) 397 return (PN_L3_NWAYS); 398 return ((get_ecache_ctrl() & ECCR_ASSOC) ? 2 : 1); 399 } 400 401 /* 402 * Note that these are entered into the table: Fatal Errors (PERR, IERR, ISAP, 403 * EMU, IMU) first, orphaned UCU/UCC, AFAR Overwrite policy, finally IVU, IVC. 404 * Afar overwrite policy is: 405 * Class 4: 406 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 407 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 408 * Class 3: 409 * AFSR -- UE, DUE, EDU, WDU, CPU 410 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 411 * Class 2: 412 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 413 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 414 * Class 1: 415 * AFSR -- TO, DTO, BERR, DBERR 416 */ 417 ecc_type_to_info_t ecc_type_to_info[] = { 418 419 /* Fatal Errors */ 420 C_AFSR_PERR, "PERR ", ECC_ALL_TRAPS, 421 CPU_FATAL, "PERR Fatal", 422 FM_EREPORT_PAYLOAD_SYSTEM2, 423 FM_EREPORT_CPU_USIII_PERR, 424 C_AFSR_IERR, "IERR ", ECC_ALL_TRAPS, 425 CPU_FATAL, "IERR Fatal", 426 FM_EREPORT_PAYLOAD_SYSTEM2, 427 FM_EREPORT_CPU_USIII_IERR, 428 C_AFSR_ISAP, "ISAP ", ECC_ALL_TRAPS, 429 CPU_FATAL, "ISAP Fatal", 430 FM_EREPORT_PAYLOAD_SYSTEM1, 431 FM_EREPORT_CPU_USIII_ISAP, 432 C_AFSR_L3_TUE_SH, "L3_TUE_SH ", ECC_C_TRAP, 433 CPU_FATAL, "L3_TUE_SH Fatal", 434 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 435 FM_EREPORT_CPU_USIII_L3_TUE_SH, 436 C_AFSR_L3_TUE, "L3_TUE ", ECC_C_TRAP, 437 CPU_FATAL, "L3_TUE Fatal", 438 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 439 FM_EREPORT_CPU_USIII_L3_TUE, 440 C_AFSR_TUE_SH, "TUE_SH ", ECC_C_TRAP, 441 CPU_FATAL, "TUE_SH Fatal", 442 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 443 FM_EREPORT_CPU_USIII_TUE_SH, 444 C_AFSR_TUE, "TUE ", ECC_ALL_TRAPS, 445 CPU_FATAL, "TUE Fatal", 446 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 447 FM_EREPORT_CPU_USIII_TUE, 448 C_AFSR_EMU, "EMU ", ECC_ASYNC_TRAPS, 449 CPU_FATAL, "EMU Fatal", 450 FM_EREPORT_PAYLOAD_MEMORY, 451 FM_EREPORT_CPU_USIII_EMU, 452 C_AFSR_IMU, "IMU ", ECC_C_TRAP, 453 CPU_FATAL, "IMU Fatal", 454 FM_EREPORT_PAYLOAD_SYSTEM1, 455 FM_EREPORT_CPU_USIII_IMU, 456 457 /* L3$ Address parity errors are reported via the MECC bit */ 458 C_AFSR_L3_MECC, "L3_MECC ", ECC_MECC_TRAPS, 459 CPU_L3_ADDR_PE, "L3 Address Parity", 460 FM_EREPORT_PAYLOAD_L3_DATA, 461 FM_EREPORT_CPU_USIII_L3_MECC, 462 463 /* Orphaned UCC/UCU Errors */ 464 C_AFSR_L3_UCU, "L3_OUCU ", ECC_ORPH_TRAPS, 465 CPU_ORPH, "Orphaned L3_UCU", 466 FM_EREPORT_PAYLOAD_L3_DATA, 467 FM_EREPORT_CPU_USIII_L3_UCU, 468 C_AFSR_L3_UCC, "L3_OUCC ", ECC_ORPH_TRAPS, 469 CPU_ORPH, "Orphaned L3_UCC", 470 FM_EREPORT_PAYLOAD_L3_DATA, 471 FM_EREPORT_CPU_USIII_L3_UCC, 472 C_AFSR_UCU, "OUCU ", ECC_ORPH_TRAPS, 473 CPU_ORPH, "Orphaned UCU", 474 FM_EREPORT_PAYLOAD_L2_DATA, 475 FM_EREPORT_CPU_USIII_UCU, 476 C_AFSR_UCC, "OUCC ", ECC_ORPH_TRAPS, 477 CPU_ORPH, "Orphaned UCC", 478 FM_EREPORT_PAYLOAD_L2_DATA, 479 FM_EREPORT_CPU_USIII_UCC, 480 481 /* UCU, UCC */ 482 C_AFSR_L3_UCU, "L3_UCU ", ECC_F_TRAP, 483 CPU_UE_ECACHE, "L3_UCU", 484 FM_EREPORT_PAYLOAD_L3_DATA, 485 FM_EREPORT_CPU_USIII_L3_UCU, 486 C_AFSR_L3_UCC, "L3_UCC ", ECC_F_TRAP, 487 CPU_CE_ECACHE, "L3_UCC", 488 FM_EREPORT_PAYLOAD_L3_DATA, 489 FM_EREPORT_CPU_USIII_L3_UCC, 490 C_AFSR_UCU, "UCU ", ECC_F_TRAP, 491 CPU_UE_ECACHE, "UCU", 492 FM_EREPORT_PAYLOAD_L2_DATA, 493 FM_EREPORT_CPU_USIII_UCU, 494 C_AFSR_UCC, "UCC ", ECC_F_TRAP, 495 CPU_CE_ECACHE, "UCC", 496 FM_EREPORT_PAYLOAD_L2_DATA, 497 FM_EREPORT_CPU_USIII_UCC, 498 C_AFSR_TSCE, "TSCE ", ECC_F_TRAP, 499 CPU_CE_ECACHE, "TSCE", 500 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 501 FM_EREPORT_CPU_USIII_TSCE, 502 503 /* UE, EDU:ST, EDU:BLD, WDU, CPU */ 504 C_AFSR_UE, "UE ", ECC_ASYNC_TRAPS, 505 CPU_UE, "Uncorrectable system bus (UE)", 506 FM_EREPORT_PAYLOAD_MEMORY, 507 FM_EREPORT_CPU_USIII_UE, 508 C_AFSR_L3_EDU, "L3_EDU ", ECC_C_TRAP, 509 CPU_UE_ECACHE_RETIRE, "L3_EDU:ST", 510 FM_EREPORT_PAYLOAD_L3_DATA, 511 FM_EREPORT_CPU_USIII_L3_EDUST, 512 C_AFSR_L3_EDU, "L3_EDU ", ECC_D_TRAP, 513 CPU_UE_ECACHE_RETIRE, "L3_EDU:BLD", 514 FM_EREPORT_PAYLOAD_L3_DATA, 515 FM_EREPORT_CPU_USIII_L3_EDUBL, 516 C_AFSR_L3_WDU, "L3_WDU ", ECC_C_TRAP, 517 CPU_UE_ECACHE_RETIRE, "L3_WDU", 518 FM_EREPORT_PAYLOAD_L3_DATA, 519 FM_EREPORT_CPU_USIII_L3_WDU, 520 C_AFSR_L3_CPU, "L3_CPU ", ECC_C_TRAP, 521 CPU_UE_ECACHE, "L3_CPU", 522 FM_EREPORT_PAYLOAD_L3_DATA, 523 FM_EREPORT_CPU_USIII_L3_CPU, 524 C_AFSR_EDU, "EDU ", ECC_C_TRAP, 525 CPU_UE_ECACHE_RETIRE, "EDU:ST", 526 FM_EREPORT_PAYLOAD_L2_DATA, 527 FM_EREPORT_CPU_USIII_EDUST, 528 C_AFSR_EDU, "EDU ", ECC_D_TRAP, 529 CPU_UE_ECACHE_RETIRE, "EDU:BLD", 530 FM_EREPORT_PAYLOAD_L2_DATA, 531 FM_EREPORT_CPU_USIII_EDUBL, 532 C_AFSR_WDU, "WDU ", ECC_C_TRAP, 533 CPU_UE_ECACHE_RETIRE, "WDU", 534 FM_EREPORT_PAYLOAD_L2_DATA, 535 FM_EREPORT_CPU_USIII_WDU, 536 C_AFSR_CPU, "CPU ", ECC_C_TRAP, 537 CPU_UE_ECACHE, "CPU", 538 FM_EREPORT_PAYLOAD_L2_DATA, 539 FM_EREPORT_CPU_USIII_CPU, 540 C_AFSR_DUE, "DUE ", ECC_C_TRAP, 541 CPU_DUE, "DUE", 542 FM_EREPORT_PAYLOAD_MEMORY, 543 FM_EREPORT_CPU_USIII_DUE, 544 545 /* CE, EDC, EMC, WDC, CPC */ 546 C_AFSR_CE, "CE ", ECC_C_TRAP, 547 CPU_CE, "Corrected system bus (CE)", 548 FM_EREPORT_PAYLOAD_MEMORY, 549 FM_EREPORT_CPU_USIII_CE, 550 C_AFSR_L3_EDC, "L3_EDC ", ECC_C_TRAP, 551 CPU_CE_ECACHE, "L3_EDC", 552 FM_EREPORT_PAYLOAD_L3_DATA, 553 FM_EREPORT_CPU_USIII_L3_EDC, 554 C_AFSR_EDC, "EDC ", ECC_C_TRAP, 555 CPU_CE_ECACHE, "EDC", 556 FM_EREPORT_PAYLOAD_L2_DATA, 557 FM_EREPORT_CPU_USIII_EDC, 558 C_AFSR_EMC, "EMC ", ECC_C_TRAP, 559 CPU_EMC, "EMC", 560 FM_EREPORT_PAYLOAD_MEMORY, 561 FM_EREPORT_CPU_USIII_EMC, 562 C_AFSR_L3_WDC, "L3_WDC ", ECC_C_TRAP, 563 CPU_CE_ECACHE, "L3_WDC", 564 FM_EREPORT_PAYLOAD_L3_DATA, 565 FM_EREPORT_CPU_USIII_L3_WDC, 566 C_AFSR_L3_CPC, "L3_CPC ", ECC_C_TRAP, 567 CPU_CE_ECACHE, "L3_CPC", 568 FM_EREPORT_PAYLOAD_L3_DATA, 569 FM_EREPORT_CPU_USIII_L3_CPC, 570 C_AFSR_L3_THCE, "L3_THCE ", ECC_C_TRAP, 571 CPU_CE_ECACHE, "L3_THCE", 572 FM_EREPORT_PAYLOAD_L3_TAG_ECC, 573 FM_EREPORT_CPU_USIII_L3_THCE, 574 C_AFSR_WDC, "WDC ", ECC_C_TRAP, 575 CPU_CE_ECACHE, "WDC", 576 FM_EREPORT_PAYLOAD_L2_DATA, 577 FM_EREPORT_CPU_USIII_WDC, 578 C_AFSR_CPC, "CPC ", ECC_C_TRAP, 579 CPU_CE_ECACHE, "CPC", 580 FM_EREPORT_PAYLOAD_L2_DATA, 581 FM_EREPORT_CPU_USIII_CPC, 582 C_AFSR_THCE, "THCE ", ECC_C_TRAP, 583 CPU_CE_ECACHE, "THCE", 584 FM_EREPORT_PAYLOAD_L2_TAG_ECC, 585 FM_EREPORT_CPU_USIII_THCE, 586 587 /* TO, BERR */ 588 C_AFSR_TO, "TO ", ECC_ASYNC_TRAPS, 589 CPU_TO, "Timeout (TO)", 590 FM_EREPORT_PAYLOAD_IO, 591 FM_EREPORT_CPU_USIII_TO, 592 C_AFSR_BERR, "BERR ", ECC_ASYNC_TRAPS, 593 CPU_BERR, "Bus Error (BERR)", 594 FM_EREPORT_PAYLOAD_IO, 595 FM_EREPORT_CPU_USIII_BERR, 596 C_AFSR_DTO, "DTO ", ECC_C_TRAP, 597 CPU_TO, "Disrupting Timeout (DTO)", 598 FM_EREPORT_PAYLOAD_IO, 599 FM_EREPORT_CPU_USIII_DTO, 600 C_AFSR_DBERR, "DBERR ", ECC_C_TRAP, 601 CPU_BERR, "Disrupting Bus Error (DBERR)", 602 FM_EREPORT_PAYLOAD_IO, 603 FM_EREPORT_CPU_USIII_DBERR, 604 605 /* IVU, IVC, IMC */ 606 C_AFSR_IVU, "IVU ", ECC_C_TRAP, 607 CPU_IV, "IVU", 608 FM_EREPORT_PAYLOAD_SYSTEM1, 609 FM_EREPORT_CPU_USIII_IVU, 610 C_AFSR_IVC, "IVC ", ECC_C_TRAP, 611 CPU_IV, "IVC", 612 FM_EREPORT_PAYLOAD_SYSTEM1, 613 FM_EREPORT_CPU_USIII_IVC, 614 C_AFSR_IMC, "IMC ", ECC_C_TRAP, 615 CPU_IV, "IMC", 616 FM_EREPORT_PAYLOAD_SYSTEM1, 617 FM_EREPORT_CPU_USIII_IMC, 618 619 0, NULL, 0, 620 0, NULL, 621 FM_EREPORT_PAYLOAD_UNKNOWN, 622 FM_EREPORT_CPU_USIII_UNKNOWN, 623 }; 624 625 /* 626 * See Cheetah+ Delta PRM 10.9 and section P.6.1 of the Panther PRM 627 * Class 4: 628 * AFSR -- UCC, UCU, TUE, TSCE, TUE_SH 629 * AFSR_EXT -- L3_UCC, L3_UCU, L3_TUE, L3_TUE_SH 630 * Class 3: 631 * AFSR -- UE, DUE, EDU, EMU, WDU, CPU 632 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 633 * Class 2: 634 * AFSR -- CE, EDC, EMC, WDC, CPC, THCE 635 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC, L3_THCE 636 * Class 1: 637 * AFSR -- TO, DTO, BERR, DBERR 638 * AFSR_EXT -- 639 */ 640 uint64_t afar_overwrite[] = { 641 /* class 4: */ 642 C_AFSR_UCC | C_AFSR_UCU | C_AFSR_TUE | C_AFSR_TSCE | C_AFSR_TUE_SH | 643 C_AFSR_L3_UCC | C_AFSR_L3_UCU | C_AFSR_L3_TUE | C_AFSR_L3_TUE_SH, 644 /* class 3: */ 645 C_AFSR_UE | C_AFSR_DUE | C_AFSR_EDU | C_AFSR_EMU | C_AFSR_WDU | 646 C_AFSR_CPU | C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 647 /* class 2: */ 648 C_AFSR_CE | C_AFSR_EDC | C_AFSR_EMC | C_AFSR_WDC | C_AFSR_CPC | 649 C_AFSR_THCE | C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC | 650 C_AFSR_L3_THCE, 651 /* class 1: */ 652 C_AFSR_TO | C_AFSR_DTO | C_AFSR_BERR | C_AFSR_DBERR, 653 654 0 655 }; 656 657 /* 658 * See Cheetah+ Delta PRM 10.9. 659 * Class 2: UE, DUE, IVU, EDU, WDU, UCU, CPU 660 * Class 1: CE, IVC, EDC, WDC, UCC, CPC 661 */ 662 uint64_t esynd_overwrite[] = { 663 /* class 2: */ 664 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_WDU | 665 C_AFSR_UCU | C_AFSR_CPU, 666 /* class 1: */ 667 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_UCC | 668 C_AFSR_CPC, 669 0 670 }; 671 672 /* 673 * In panther, the E_SYND overwrite policy changed a little bit 674 * by adding one more level. 675 * class 3: 676 * AFSR -- UCU, UCC 677 * AFSR_EXT -- L3_UCU, L3_UCC 678 * Class 2: 679 * AFSR -- UE, DUE, IVU, EDU, WDU, CPU 680 * AFSR_EXT -- L3_EDU, L3_WDU, L3_CPU 681 * Class 1: 682 * AFSR -- CE, IVC, EDC, WDC, CPC 683 * AFSR_EXT -- L3_EDC, L3_WDC, L3_CPC 684 */ 685 uint64_t pn_esynd_overwrite[] = { 686 /* class 3: */ 687 C_AFSR_UCU | C_AFSR_UCC | 688 C_AFSR_L3_UCU | C_AFSR_L3_UCC, 689 /* class 2: */ 690 C_AFSR_UE | C_AFSR_DUE | C_AFSR_IVU | C_AFSR_EDU | C_AFSR_WDU | 691 C_AFSR_CPU | 692 C_AFSR_L3_EDU | C_AFSR_L3_WDU | C_AFSR_L3_CPU, 693 /* class 1: */ 694 C_AFSR_CE | C_AFSR_IVC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_CPC | 695 C_AFSR_L3_EDC | C_AFSR_L3_WDC | C_AFSR_L3_CPC, 696 697 0 698 }; 699 700 int 701 afsr_to_pn_esynd_status(uint64_t afsr, uint64_t afsr_bit) 702 { 703 return (afsr_to_overw_status(afsr, afsr_bit, pn_esynd_overwrite)); 704 } 705 706 /* 707 * Prioritized list of Error bits for MSYND overwrite. 708 * See Cheetah PRM P.6.3 709 * Class 2: EMU 710 * Class 1: EMC 711 * 712 * Panther adds IMU and IMC. 713 */ 714 uint64_t msynd_overwrite[] = { 715 /* class 2: */ 716 C_AFSR_EMU | C_AFSR_IMU, 717 /* class 1: */ 718 C_AFSR_EMC | C_AFSR_IMC, 719 720 0 721 }; 722 723 /* 724 * change cpu speed bits -- new speed will be normal-speed/divisor. 725 * 726 * The Jalapeno memory controllers are required to drain outstanding 727 * memory transactions within 32 JBus clocks in order to be ready 728 * to enter Estar mode. In some corner cases however, that time 729 * fell short. 730 * 731 * A safe software solution is to force MCU to act like in Estar mode, 732 * then delay 1us (in ppm code) prior to assert J_CHNG_L signal. 733 * To reverse the effect, upon exiting Estar, software restores the 734 * MCU to its original state. 735 */ 736 /* ARGSUSED1 */ 737 void 738 cpu_change_speed(uint64_t divisor, uint64_t arg2) 739 { 740 bus_config_eclk_t *bceclk; 741 uint64_t reg; 742 743 for (bceclk = bus_config_eclk; bceclk->divisor; bceclk++) { 744 if (bceclk->divisor != divisor) 745 continue; 746 reg = get_safari_config(); 747 reg &= ~SAFARI_CONFIG_ECLK_MASK; 748 reg |= bceclk->mask; 749 set_safari_config(reg); 750 CPU->cpu_m.divisor = (uchar_t)divisor; 751 return; 752 } 753 /* 754 * We will reach here only if OBP and kernel don't agree on 755 * the speeds supported by the CPU. 756 */ 757 cmn_err(CE_WARN, "cpu_change_speed: bad divisor %" PRIu64, divisor); 758 } 759 760 /* 761 * Cpu private initialization. This includes allocating the cpu_private 762 * data structure, initializing it, and initializing the scrubber for this 763 * cpu. This function calls cpu_init_ecache_scrub_dr to init the scrubber. 764 * We use kmem_cache_create for the cheetah private data structure because 765 * it needs to be allocated on a PAGESIZE (8192) byte boundary. 766 */ 767 void 768 cpu_init_private(struct cpu *cp) 769 { 770 cheetah_private_t *chprp; 771 int i; 772 773 ASSERT(CPU_PRIVATE(cp) == NULL); 774 775 /* LINTED: E_TRUE_LOGICAL_EXPR */ 776 ASSERT((offsetof(cheetah_private_t, chpr_tl1_err_data) + 777 sizeof (ch_err_tl1_data_t) * CH_ERR_TL1_TLMAX) <= PAGESIZE); 778 779 /* 780 * Running with Cheetah CPUs in a Cheetah+, Jaguar, Panther or 781 * mixed Cheetah+/Jaguar/Panther machine is not a supported 782 * configuration. Attempting to do so may result in unpredictable 783 * failures (e.g. running Cheetah+ CPUs with Cheetah E$ disp flush) 784 * so don't allow it. 785 * 786 * This is just defensive code since this configuration mismatch 787 * should have been caught prior to OS execution. 788 */ 789 if (!(IS_CHEETAH_PLUS(cpunodes[cp->cpu_id].implementation) || 790 IS_JAGUAR(cpunodes[cp->cpu_id].implementation) || 791 IS_PANTHER(cpunodes[cp->cpu_id].implementation))) { 792 cmn_err(CE_PANIC, "CPU%d: UltraSPARC-III not supported" 793 " on UltraSPARC-III+/IV/IV+ code\n", cp->cpu_id); 794 } 795 796 /* 797 * If the ch_private_cache has not been created, create it. 798 */ 799 if (ch_private_cache == NULL) { 800 ch_private_cache = kmem_cache_create("ch_private_cache", 801 sizeof (cheetah_private_t), PAGESIZE, NULL, NULL, 802 NULL, NULL, static_arena, 0); 803 } 804 805 chprp = CPU_PRIVATE(cp) = kmem_cache_alloc(ch_private_cache, KM_SLEEP); 806 807 bzero(chprp, sizeof (cheetah_private_t)); 808 chprp->chpr_fecctl0_logout.clo_data.chd_afar = LOGOUT_INVALID; 809 chprp->chpr_cecc_logout.clo_data.chd_afar = LOGOUT_INVALID; 810 chprp->chpr_async_logout.clo_data.chd_afar = LOGOUT_INVALID; 811 chprp->chpr_tlb_logout.tlo_addr = LOGOUT_INVALID; 812 for (i = 0; i < CH_ERR_TL1_TLMAX; i++) 813 chprp->chpr_tl1_err_data[i].ch_err_tl1_logout.clo_data.chd_afar 814 = LOGOUT_INVALID; 815 816 /* Panther has a larger Icache compared to cheetahplus or Jaguar */ 817 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 818 chprp->chpr_icache_size = PN_ICACHE_SIZE; 819 chprp->chpr_icache_linesize = PN_ICACHE_LSIZE; 820 } else { 821 chprp->chpr_icache_size = CH_ICACHE_SIZE; 822 chprp->chpr_icache_linesize = CH_ICACHE_LSIZE; 823 } 824 825 cpu_init_ecache_scrub_dr(cp); 826 827 /* 828 * Panther's L2$ and E$ are shared between cores, so the scrubber is 829 * only needed on one of the cores. At this point, we assume all cores 830 * are online, and we only enable the scrubber on core 0. 831 */ 832 if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) { 833 chprp->chpr_scrub_misc.chsm_core_state = 834 SCRUBBER_BOTH_CORES_ONLINE; 835 if (cp->cpu_id != (processorid_t)cmp_cpu_to_chip(cp->cpu_id)) { 836 chprp->chpr_scrub_misc.chsm_enable[ 837 CACHE_SCRUBBER_INFO_E] = 0; 838 } 839 } 840 841 chprp->chpr_ec_set_size = cpunodes[cp->cpu_id].ecache_size / 842 cpu_ecache_nway(); 843 844 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 845 ch_err_tl1_paddrs[cp->cpu_id] = va_to_pa(chprp); 846 ASSERT(ch_err_tl1_paddrs[cp->cpu_id] != -1); 847 } 848 849 /* 850 * Clear the error state registers for this CPU. 851 * For Cheetah+/Jaguar, just clear the AFSR but 852 * for Panther we also have to clear the AFSR_EXT. 853 */ 854 void 855 set_cpu_error_state(ch_cpu_errors_t *cpu_error_regs) 856 { 857 set_asyncflt(cpu_error_regs->afsr & ~C_AFSR_FATAL_ERRS); 858 if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { 859 set_afsr_ext(cpu_error_regs->afsr_ext & ~C_AFSR_EXT_FATAL_ERRS); 860 } 861 } 862 863 void 864 pn_cpu_log_diag_l2_info(ch_async_flt_t *ch_flt) { 865 struct async_flt *aflt = (struct async_flt *)ch_flt; 866 ch_ec_data_t *l2_data = &ch_flt->flt_diag_data.chd_l2_data[0]; 867 uint64_t faddr = aflt->flt_addr; 868 uint8_t log_way_mask = 0; 869 int i; 870 871 /* 872 * Only Panther CPUs have the additional L2$ data that needs 873 * to be logged here 874 */ 875 if (!IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) 876 return; 877 878 /* 879 * We'll use a simple bit mask to keep track of which way(s) 880 * of the stored cache line we want to log. The idea is to 881 * log the entry if it is a valid line and it matches our 882 * fault AFAR. If no match is found, we will simply log all 883 * the ways. 884 */ 885 for (i = 0; i < PN_L2_NWAYS; i++) 886 if (pn_matching_valid_l2_line(faddr, &l2_data[i])) 887 log_way_mask |= (1 << i); 888 889 /* If no matching valid lines were found, we log all ways */ 890 if (log_way_mask == 0) 891 log_way_mask = (1 << PN_L2_NWAYS) - 1; 892 893 /* Log the cache lines */ 894 for (i = 0; i < PN_L2_NWAYS; i++) 895 if (log_way_mask & (1 << i)) 896 l2_data[i].ec_logflag = EC_LOGFLAG_MAGIC; 897 } 898 899 /* 900 * For this routine to return true, the L2 tag in question must be valid 901 * and the tag PA must match the fault address (faddr) assuming the correct 902 * index is being used. 903 */ 904 static int 905 pn_matching_valid_l2_line(uint64_t faddr, ch_ec_data_t *clo_l2_data) { 906 if ((!PN_L2_LINE_INVALID(clo_l2_data->ec_tag)) && 907 ((faddr & P2ALIGN(C_AFAR_PA, PN_L2_SET_SIZE)) == 908 PN_L2TAG_TO_PA(clo_l2_data->ec_tag))) 909 return (1); 910 return (0); 911 } 912 913 /* 914 * This array is used to convert the 3 digit PgSz encoding (as used in 915 * various MMU registers such as MMU_TAG_ACCESS_EXT) into the corresponding 916 * page size. 917 */ 918 static uint64_t tlb_pgsz_to_size[] = { 919 /* 000 = 8KB: */ 920 0x2000, 921 /* 001 = 64KB: */ 922 0x10000, 923 /* 010 = 512KB: */ 924 0x80000, 925 /* 011 = 4MB: */ 926 0x400000, 927 /* 100 = 32MB: */ 928 0x2000000, 929 /* 101 = 256MB: */ 930 0x10000000, 931 /* undefined for encodings 110 and 111: */ 932 0, 0 933 }; 934 935 /* 936 * The itlb_parity_trap and dtlb_parity_trap handlers transfer control here 937 * after collecting logout information related to the TLB parity error and 938 * flushing the offending TTE entries from the ITLB or DTLB. 939 * 940 * DTLB traps which occur at TL>0 are not recoverable because we will most 941 * likely be corrupting some other trap handler's alternate globals. As 942 * such, we simply panic here when that happens. ITLB parity errors are 943 * not expected to happen at TL>0. 944 */ 945 void 946 cpu_tlb_parity_error(struct regs *rp, ulong_t trap_va, ulong_t tlb_info) { 947 ch_async_flt_t ch_flt; 948 struct async_flt *aflt; 949 pn_tlb_logout_t *tlop = NULL; 950 int immu_parity = (tlb_info & PN_TLO_INFO_IMMU) != 0; 951 int tl1_trap = (tlb_info & PN_TLO_INFO_TL1) != 0; 952 char *error_class; 953 954 bzero(&ch_flt, sizeof (ch_async_flt_t)); 955 956 /* 957 * Get the CPU log out info. If we can't find our CPU private 958 * pointer, or if the logout information does not correspond to 959 * this error, then we will have to make due without detailed 960 * logout information. 961 */ 962 if (CPU_PRIVATE(CPU)) { 963 tlop = CPU_PRIVATE_PTR(CPU, chpr_tlb_logout); 964 if ((tlop->tlo_addr != trap_va) || 965 (tlop->tlo_info != tlb_info)) 966 tlop = NULL; 967 } 968 969 if (tlop) { 970 ch_flt.tlb_diag_data = *tlop; 971 972 /* Zero out + invalidate TLB logout. */ 973 bzero(tlop, sizeof (pn_tlb_logout_t)); 974 tlop->tlo_addr = LOGOUT_INVALID; 975 } else { 976 /* 977 * Copy what logout information we have and mark 978 * it incomplete. 979 */ 980 ch_flt.flt_data_incomplete = 1; 981 ch_flt.tlb_diag_data.tlo_info = tlb_info; 982 ch_flt.tlb_diag_data.tlo_addr = trap_va; 983 } 984 985 /* 986 * Log the error. 987 */ 988 aflt = (struct async_flt *)&ch_flt; 989 aflt->flt_id = gethrtime_waitfree(); 990 aflt->flt_bus_id = getprocessorid(); 991 aflt->flt_inst = CPU->cpu_id; 992 aflt->flt_pc = (caddr_t)rp->r_pc; 993 aflt->flt_addr = trap_va; 994 aflt->flt_prot = AFLT_PROT_NONE; 995 aflt->flt_class = CPU_FAULT; 996 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 997 aflt->flt_tl = tl1_trap ? 1 : 0; 998 aflt->flt_panic = tl1_trap ? 1 : 0; 999 1000 if (immu_parity) { 1001 aflt->flt_status = ECC_ITLB_TRAP; 1002 ch_flt.flt_type = CPU_ITLB_PARITY; 1003 error_class = FM_EREPORT_CPU_USIII_ITLBPE; 1004 aflt->flt_payload = FM_EREPORT_PAYLOAD_ITLB_PE; 1005 } else { 1006 aflt->flt_status = ECC_DTLB_TRAP; 1007 ch_flt.flt_type = CPU_DTLB_PARITY; 1008 error_class = FM_EREPORT_CPU_USIII_DTLBPE; 1009 aflt->flt_payload = FM_EREPORT_PAYLOAD_DTLB_PE; 1010 } 1011 1012 /* 1013 * The TLB entries have already been flushed by the TL1 trap 1014 * handler so at this point the only thing left to do is log 1015 * the error message. 1016 */ 1017 if (aflt->flt_panic) { 1018 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1019 sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); 1020 /* 1021 * Panic here if aflt->flt_panic has been set. Enqueued 1022 * errors will be logged as part of the panic flow. 1023 */ 1024 fm_panic("%sError(s)", immu_parity ? "ITLBPE " : "DTLBPE "); 1025 } else { 1026 cpu_errorq_dispatch(error_class, (void *)&ch_flt, 1027 sizeof (ch_async_flt_t), ce_queue, aflt->flt_panic); 1028 } 1029 } 1030 1031 /* 1032 * This routine is called when a TLB parity error event is 'ue_drain'ed 1033 * or 'ce_drain'ed from the errorq. 1034 */ 1035 void 1036 cpu_async_log_tlb_parity_err(void *flt) { 1037 ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; 1038 struct async_flt *aflt = (struct async_flt *)flt; 1039 #ifdef lint 1040 aflt = aflt; 1041 #endif 1042 1043 /* 1044 * We only capture TLB information if we encountered 1045 * a TLB parity error and Panther is the only CPU which 1046 * can detect a TLB parity error. 1047 */ 1048 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1049 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1050 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1051 1052 if (ch_flt->flt_data_incomplete == 0) { 1053 if (ch_flt->flt_type == CPU_ITLB_PARITY) 1054 ch_flt->tlb_diag_data.tlo_logflag = IT_LOGFLAG_MAGIC; 1055 else /* parity error is in DTLB */ 1056 ch_flt->tlb_diag_data.tlo_logflag = DT_LOGFLAG_MAGIC; 1057 } 1058 } 1059 1060 /* 1061 * Add L1 Prefetch cache data to the ereport payload. 1062 */ 1063 void 1064 cpu_payload_add_pcache(struct async_flt *aflt, nvlist_t *nvl) 1065 { 1066 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1067 ch_pc_data_t *pcp; 1068 ch_pc_data_t pcdata[CH_PCACHE_NWAY]; 1069 uint_t nelem; 1070 int i, ways_logged = 0; 1071 1072 /* 1073 * We only capture P$ information if we encountered 1074 * a P$ parity error and Panther is the only CPU which 1075 * can detect a P$ parity error. 1076 */ 1077 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1078 for (i = 0; i < CH_PCACHE_NWAY; i++) { 1079 pcp = &ch_flt->parity_data.dpe.cpl_pc[i]; 1080 if (pcp->pc_logflag == PC_LOGFLAG_MAGIC) { 1081 bcopy(pcp, &pcdata[ways_logged], 1082 sizeof (ch_pc_data_t)); 1083 ways_logged++; 1084 } 1085 } 1086 1087 /* 1088 * Add the pcache data to the payload. 1089 */ 1090 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_WAYS, 1091 DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); 1092 if (ways_logged != 0) { 1093 nelem = sizeof (ch_pc_data_t) / sizeof (uint64_t) * ways_logged; 1094 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1P_DATA, 1095 DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)pcdata, NULL); 1096 } 1097 } 1098 1099 /* 1100 * Add TLB diagnostic data to the ereport payload. 1101 */ 1102 void 1103 cpu_payload_add_tlb(struct async_flt *aflt, nvlist_t *nvl) 1104 { 1105 ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; 1106 uint8_t num_entries, tlb_data_words; 1107 1108 /* 1109 * We only capture TLB information if we encountered 1110 * a TLB parity error and Panther is the only CPU which 1111 * can detect a TLB parity error. 1112 */ 1113 ASSERT(IS_PANTHER(cpunodes[aflt->flt_inst].implementation)); 1114 ASSERT((ch_flt->flt_type == CPU_ITLB_PARITY) || 1115 (ch_flt->flt_type == CPU_DTLB_PARITY)); 1116 1117 if (ch_flt->flt_type == CPU_ITLB_PARITY) { 1118 num_entries = (uint8_t)(PN_ITLB_NWAYS * PN_NUM_512_ITLBS); 1119 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1120 num_entries; 1121 1122 /* 1123 * Add the TLB diagnostic data to the payload 1124 * if it was collected. 1125 */ 1126 if (ch_flt->tlb_diag_data.tlo_logflag == IT_LOGFLAG_MAGIC) { 1127 fm_payload_set(nvl, 1128 FM_EREPORT_PAYLOAD_NAME_ITLB_ENTRIES, 1129 DATA_TYPE_UINT8, num_entries, NULL); 1130 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_ITLB_DATA, 1131 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1132 (uint64_t *)ch_flt->tlb_diag_data.tlo_itlb_tte, 1133 NULL); 1134 } 1135 } else { 1136 num_entries = (uint8_t)(PN_DTLB_NWAYS * PN_NUM_512_DTLBS); 1137 tlb_data_words = sizeof (ch_tte_entry_t) / sizeof (uint64_t) * 1138 num_entries; 1139 1140 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_VA, 1141 DATA_TYPE_UINT64, ch_flt->tlb_diag_data.tlo_addr, NULL); 1142 1143 /* 1144 * Add the TLB diagnostic data to the payload 1145 * if it was collected. 1146 */ 1147 if (ch_flt->tlb_diag_data.tlo_logflag == DT_LOGFLAG_MAGIC) { 1148 fm_payload_set(nvl, 1149 FM_EREPORT_PAYLOAD_NAME_DTLB_ENTRIES, 1150 DATA_TYPE_UINT8, num_entries, NULL); 1151 fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_DTLB_DATA, 1152 DATA_TYPE_UINT64_ARRAY, tlb_data_words, 1153 (uint64_t *)ch_flt->tlb_diag_data.tlo_dtlb_tte, 1154 NULL); 1155 } 1156 } 1157 } 1158 1159 /* 1160 * Panther Cache Scrubbing: 1161 * 1162 * In Jaguar, the E$ was split between cores, so the scrubber must run on both 1163 * cores. For Panther, however, the L2$ and L3$ are shared across cores. 1164 * Therefore, the E$ scrubber only needs to run on one of the two cores. 1165 * 1166 * There are four possible states for the E$ scrubber: 1167 * 1168 * 0. If both cores are offline, add core 0 to cpu_offline_set so that 1169 * the offline scrubber will run on it. 1170 * 1. If core 0 is online and core 1 off, we run the scrubber on core 0. 1171 * 2. If core 1 is online and core 0 off, we move the scrubber to run 1172 * on core 1. 1173 * 3. If both cores are online, only run the scrubber on core 0. 1174 * 1175 * These states are enumerated by the SCRUBBER_[BOTH|CORE|NEITHER]_* defines 1176 * above. One of those values is stored in 1177 * chpr_scrub_misc->chsm_core_state on each core. 1178 * 1179 * Also note that, for Panther, ecache_flush_line() will flush out the L2$ 1180 * before the E$, so the L2$ will be scrubbed by the E$ scrubber. No 1181 * additional code is necessary to scrub the L2$. 1182 * 1183 * For all cpu types, whenever a cpu or core is offlined, add it to 1184 * cpu_offline_set so the necessary scrubbers can still run. This is still 1185 * necessary on Panther so the D$ scrubber can still run. 1186 */ 1187 /*ARGSUSED*/ 1188 int 1189 cpu_scrub_cpu_setup(cpu_setup_t what, int cpuid, void *arg) 1190 { 1191 processorid_t core_0_id; 1192 cpu_t *core_cpus[2]; 1193 ch_scrub_misc_t *core_scrub[2]; 1194 int old_state, i; 1195 int new_state = SCRUBBER_NEITHER_CORE_ONLINE; 1196 1197 switch (what) { 1198 case CPU_ON: 1199 case CPU_INIT: 1200 CPUSET_DEL(cpu_offline_set, cpuid); 1201 break; 1202 case CPU_OFF: 1203 CPUSET_ADD(cpu_offline_set, cpuid); 1204 break; 1205 default: 1206 return (0); 1207 } 1208 1209 if (!IS_PANTHER(cpunodes[cpuid].implementation)) { 1210 return (0); 1211 } 1212 1213 /* 1214 * Update the chsm_enable[CACHE_SCRUBBER_INFO_E] value 1215 * if necessary 1216 */ 1217 core_0_id = cmp_cpu_to_chip(cpuid); 1218 core_cpus[0] = cpu_get(core_0_id); 1219 core_cpus[1] = cpu_get_sibling_core(core_cpus[0]); 1220 1221 for (i = 0; i < 2; i++) { 1222 if (core_cpus[i] == NULL) { 1223 /* 1224 * This should only happen if one of the two cores is 1225 * blacklisted, which should only happen when we're 1226 * doing hardware bringup or debugging. Give up and 1227 * return quietly. 1228 */ 1229 return (0); 1230 } 1231 core_scrub[i] = CPU_PRIVATE_PTR(core_cpus[i], chpr_scrub_misc); 1232 } 1233 1234 if (cpuid == (processorid_t)cmp_cpu_to_chip(cpuid)) { 1235 /* cpuid is core 0 */ 1236 if (cpu_is_active(core_cpus[1])) { 1237 new_state |= SCRUBBER_CORE_1_ONLINE; 1238 } 1239 if (what != CPU_OFF) { 1240 new_state |= SCRUBBER_CORE_0_ONLINE; 1241 } 1242 } else { 1243 /* cpuid is core 1 */ 1244 if (cpu_is_active(core_cpus[0])) { 1245 new_state |= SCRUBBER_CORE_0_ONLINE; 1246 } 1247 if (what != CPU_OFF) { 1248 new_state |= SCRUBBER_CORE_1_ONLINE; 1249 } 1250 } 1251 1252 old_state = core_scrub[0]->chsm_core_state; 1253 1254 if (old_state == new_state) { 1255 return (0); 1256 } 1257 1258 if (old_state == SCRUBBER_CORE_1_ONLINE) { 1259 /* 1260 * We need to move the scrubber state from core 1 1261 * back to core 0. This data is not protected by 1262 * locks, but the worst that can happen is some 1263 * lines are scrubbed multiple times. chsm_oustanding is 1264 * set to 0 to make sure an interrupt is scheduled the 1265 * first time through do_scrub(). 1266 */ 1267 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1268 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1269 core_scrub[0]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1270 } 1271 1272 switch (new_state) { 1273 case SCRUBBER_NEITHER_CORE_ONLINE: 1274 case SCRUBBER_BOTH_CORES_ONLINE: 1275 case SCRUBBER_CORE_0_ONLINE: 1276 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1277 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1278 break; 1279 1280 case SCRUBBER_CORE_1_ONLINE: 1281 default: 1282 /* 1283 * We need to move the scrubber state from core 0 1284 * to core 1. 1285 */ 1286 core_scrub[1]->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = 1287 core_scrub[0]->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; 1288 core_scrub[1]->chsm_outstanding[CACHE_SCRUBBER_INFO_E] = 0; 1289 1290 core_scrub[0]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 0; 1291 core_scrub[1]->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; 1292 break; 1293 } 1294 1295 core_scrub[0]->chsm_core_state = new_state; 1296 core_scrub[1]->chsm_core_state = new_state; 1297 return (0); 1298 } 1299 1300 /* 1301 * Returns a pointer to the cpu structure of the argument's sibling core. 1302 * If no sibling core can be found, return NULL. 1303 */ 1304 static cpu_t * 1305 cpu_get_sibling_core(cpu_t *cpup) 1306 { 1307 cpu_t *nextp; 1308 1309 if (!cmp_cpu_is_cmp(cpup->cpu_id)) 1310 return (NULL); 1311 1312 nextp = cpup->cpu_next_chip; 1313 if ((nextp == NULL) || (nextp == cpup)) 1314 return (NULL); 1315 1316 return (nextp); 1317 } 1318