1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII). 28 */ 29 30 31 #include <sys/types.h> 32 #include <sys/systm.h> 33 #include <sys/ddi.h> 34 #include <sys/sysmacros.h> 35 #include <sys/archsystm.h> 36 #include <sys/vmsystm.h> 37 #include <sys/machparam.h> 38 #include <sys/machsystm.h> 39 #include <sys/machthread.h> 40 #include <sys/cpu.h> 41 #include <sys/cmp.h> 42 #include <sys/elf_SPARC.h> 43 #include <vm/vm_dep.h> 44 #include <vm/hat_sfmmu.h> 45 #include <vm/seg_kpm.h> 46 #include <vm/seg_kmem.h> 47 #include <sys/cpuvar.h> 48 #include <sys/opl_olympus_regs.h> 49 #include <sys/opl_module.h> 50 #include <sys/async.h> 51 #include <sys/cmn_err.h> 52 #include <sys/debug.h> 53 #include <sys/dditypes.h> 54 #include <sys/cpu_module.h> 55 #include <sys/sysmacros.h> 56 #include <sys/intreg.h> 57 #include <sys/clock.h> 58 #include <sys/platform_module.h> 59 #include <sys/ontrap.h> 60 #include <sys/panic.h> 61 #include <sys/memlist.h> 62 #include <sys/ndifm.h> 63 #include <sys/ddifm.h> 64 #include <sys/fm/protocol.h> 65 #include <sys/fm/util.h> 66 #include <sys/fm/cpu/SPARC64-VI.h> 67 #include <sys/dtrace.h> 68 #include <sys/watchpoint.h> 69 #include <sys/promif.h> 70 71 /* 72 * Internal functions. 73 */ 74 static int cpu_sync_log_err(void *flt); 75 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 76 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 77 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 78 static int prom_SPARC64VII_support_enabled(void); 79 static void opl_ta3(); 80 static int plat_prom_preserve_kctx_is_supported(void); 81 82 /* 83 * Error counters resetting interval. 84 */ 85 static int opl_async_check_interval = 60; /* 1 min */ 86 87 uint_t cpu_impl_dual_pgsz = 1; 88 89 /* 90 * PA[22:0] represent Displacement in Jupiter 91 * configuration space. 92 */ 93 uint_t root_phys_addr_lo_mask = 0x7fffffu; 94 95 /* 96 * set in /etc/system to control logging of user BERR/TO's 97 */ 98 int cpu_berr_to_verbose = 0; 99 100 /* 101 * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled). 102 */ 103 int cpu_alljupiter = 0; 104 105 /* 106 * The sfmmu_cext field to be used by processes in a shared context domain. 107 */ 108 static uchar_t shctx_cext = TAGACCEXT_MKSZPAIR(DEFAULT_ISM_PAGESZC, TTE8K); 109 110 static int min_ecache_size; 111 static uint_t priv_hcl_1; 112 static uint_t priv_hcl_2; 113 static uint_t priv_hcl_4; 114 static uint_t priv_hcl_8; 115 116 /* 117 * Olympus error log 118 */ 119 static opl_errlog_t *opl_err_log; 120 static int opl_cpu0_log_setup; 121 122 /* 123 * OPL ta 3 save area. 124 */ 125 char *opl_ta3_save; 126 127 /* 128 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 129 * No any other ecc_type_info insertion is allowed in between the following 130 * four UE classess. 131 */ 132 ecc_type_to_info_t ecc_type_to_info[] = { 133 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 134 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 135 FM_EREPORT_CPU_UE_MEM, 136 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 137 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 138 FM_EREPORT_CPU_UE_CHANNEL, 139 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 140 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 141 FM_EREPORT_CPU_UE_CPU, 142 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 143 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 144 FM_EREPORT_CPU_UE_PATH, 145 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 146 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 147 FM_EREPORT_CPU_BERR, 148 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 149 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 150 FM_EREPORT_CPU_BTO, 151 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 152 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 153 FM_EREPORT_CPU_MTLB, 154 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 155 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 156 FM_EREPORT_CPU_TLBP, 157 158 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 159 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 160 FM_EREPORT_CPU_CRE, 161 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 162 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 163 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 164 FM_EREPORT_CPU_TSBCTX, 165 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 166 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 167 FM_EREPORT_CPU_TSBP, 168 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 169 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 170 FM_EREPORT_CPU_PSTATE, 171 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 172 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 173 FM_EREPORT_CPU_TSTATE, 174 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 175 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 176 FM_EREPORT_CPU_IUG_F, 177 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 178 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 179 FM_EREPORT_CPU_IUG_R, 180 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 181 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 182 FM_EREPORT_CPU_SDC, 183 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 184 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 185 FM_EREPORT_CPU_WDT, 186 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 187 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 188 FM_EREPORT_CPU_DTLB, 189 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 190 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 191 FM_EREPORT_CPU_ITLB, 192 UGESR_IUG_COREERR, "IUG_COREERR", 193 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 194 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 195 FM_EREPORT_CPU_CORE, 196 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 197 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 198 FM_EREPORT_CPU_DAE, 199 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 200 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 201 FM_EREPORT_CPU_IAE, 202 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 203 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 204 FM_EREPORT_CPU_UGE, 205 0, NULL, 0, 0, 206 NULL, 0, 0, 207 }; 208 209 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 210 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 211 int *segsp, int *banksp, int *mcidp); 212 213 214 /* 215 * Setup trap handlers for 0xA, 0x32, 0x40 trap types 216 * and "ta 3" and "ta 4". 217 */ 218 void 219 cpu_init_trap(void) 220 { 221 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 222 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 223 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 224 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 225 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 226 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 227 OPL_SET_TRAP(tt0_flushw, opl_ta3_instr); 228 OPL_PATCH_28(opl_cleanw_patch, opl_ta4_instr); 229 } 230 231 static int 232 getintprop(pnode_t node, char *name, int deflt) 233 { 234 int value; 235 236 switch (prom_getproplen(node, name)) { 237 case sizeof (int): 238 (void) prom_getprop(node, name, (caddr_t)&value); 239 break; 240 241 default: 242 value = deflt; 243 break; 244 } 245 246 return (value); 247 } 248 249 /* 250 * Set the magic constants of the implementation. 251 */ 252 /*ARGSUSED*/ 253 void 254 cpu_fiximp(pnode_t dnode) 255 { 256 int i, a; 257 extern int vac_size, vac_shift; 258 extern uint_t vac_mask; 259 260 static struct { 261 char *name; 262 int *var; 263 int defval; 264 } prop[] = { 265 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 266 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 267 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 268 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 269 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 270 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 271 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 272 }; 273 274 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 275 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 276 277 ecache_setsize = ecache_size / ecache_associativity; 278 279 vac_size = OPL_VAC_SIZE; 280 vac_mask = MMU_PAGEMASK & (vac_size - 1); 281 i = 0; a = vac_size; 282 while (a >>= 1) 283 ++i; 284 vac_shift = i; 285 shm_alignment = vac_size; 286 vac = 1; 287 } 288 289 /* 290 * Enable features for Jupiter-only domains. 291 */ 292 void 293 cpu_fix_alljupiter(void) 294 { 295 if (!prom_SPARC64VII_support_enabled()) { 296 /* 297 * Do not enable all-Jupiter features and do not turn on 298 * the cpu_alljupiter flag. 299 */ 300 return; 301 } 302 303 cpu_alljupiter = 1; 304 305 /* 306 * Enable ima hwcap for Jupiter-only domains. DR will prevent 307 * addition of Olympus-C to all-Jupiter domains to preserve ima 308 * hwcap semantics. 309 */ 310 cpu_hwcap_flags |= AV_SPARC_IMA; 311 312 /* 313 * Enable shared context support. 314 */ 315 shctx_on = 1; 316 } 317 318 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 319 /* 320 * Quick and dirty way to redefine locally in 321 * OPL the value of IDSR_BN_SETS to 31 instead 322 * of the standard 32 value. This is to workaround 323 * REV_B of Olympus_c processor's problem in handling 324 * more than 31 xcall broadcast. 325 */ 326 #undef IDSR_BN_SETS 327 #define IDSR_BN_SETS 31 328 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 329 330 void 331 send_mondo_set(cpuset_t set) 332 { 333 int lo, busy, nack, shipped = 0; 334 uint16_t i, cpuids[IDSR_BN_SETS]; 335 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 336 uint64_t starttick, endtick, tick, lasttick; 337 #if (NCPU > IDSR_BN_SETS) 338 int index = 0; 339 int ncpuids = 0; 340 #endif 341 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 342 int bn_sets = IDSR_BN_SETS; 343 uint64_t ver; 344 345 ASSERT(NCPU > bn_sets); 346 #endif 347 348 ASSERT(!CPUSET_ISNULL(set)); 349 starttick = lasttick = gettick(); 350 351 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 352 ver = ultra_getver(); 353 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 354 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 355 bn_sets = 1; 356 #endif 357 358 #if (NCPU <= IDSR_BN_SETS) 359 for (i = 0; i < NCPU; i++) 360 if (CPU_IN_SET(set, i)) { 361 shipit(i, shipped); 362 nackmask |= IDSR_NACK_BIT(shipped); 363 cpuids[shipped++] = i; 364 CPUSET_DEL(set, i); 365 if (CPUSET_ISNULL(set)) 366 break; 367 } 368 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 369 #else 370 for (i = 0; i < NCPU; i++) 371 if (CPU_IN_SET(set, i)) { 372 ncpuids++; 373 374 /* 375 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 376 * find we have shipped to more than (IDSR_BN_SETS) 377 * CPUs, set "index" to the highest numbered CPU in 378 * the set so we can ship to other CPUs a bit later on. 379 */ 380 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 381 if (shipped < bn_sets) { 382 #else 383 if (shipped < IDSR_BN_SETS) { 384 #endif 385 shipit(i, shipped); 386 nackmask |= IDSR_NACK_BIT(shipped); 387 cpuids[shipped++] = i; 388 CPUSET_DEL(set, i); 389 if (CPUSET_ISNULL(set)) 390 break; 391 } else 392 index = (int)i; 393 } 394 395 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 396 #endif 397 398 busymask = IDSR_NACK_TO_BUSY(nackmask); 399 busy = nack = 0; 400 endtick = starttick + xc_tick_limit; 401 for (;;) { 402 idsr = getidsr(); 403 #if (NCPU <= IDSR_BN_SETS) 404 if (idsr == 0) 405 break; 406 #else 407 if (idsr == 0 && shipped == ncpuids) 408 break; 409 #endif 410 tick = gettick(); 411 /* 412 * If there is a big jump between the current tick 413 * count and lasttick, we have probably hit a break 414 * point. Adjust endtick accordingly to avoid panic. 415 */ 416 if (tick > (lasttick + xc_tick_jump_limit)) 417 endtick += (tick - lasttick); 418 lasttick = tick; 419 if (tick > endtick) { 420 if (panic_quiesce) 421 return; 422 cmn_err(CE_CONT, "send mondo timeout [%d NACK %d " 423 "BUSY]\nIDSR 0x%" PRIx64 " cpuids:", 424 nack, busy, idsr); 425 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 426 for (i = 0; i < bn_sets; i++) { 427 #else 428 for (i = 0; i < IDSR_BN_SETS; i++) { 429 #endif 430 if (idsr & (IDSR_NACK_BIT(i) | 431 IDSR_BUSY_BIT(i))) { 432 cmn_err(CE_CONT, " 0x%x", cpuids[i]); 433 } 434 } 435 cmn_err(CE_CONT, "\n"); 436 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 437 } 438 curnack = idsr & nackmask; 439 curbusy = idsr & busymask; 440 441 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 442 /* 443 * Only proceed to send more xcalls if all the 444 * cpus in the previous IDSR_BN_SETS were completed. 445 */ 446 if (curbusy) { 447 busy++; 448 continue; 449 } 450 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 451 452 #if (NCPU > IDSR_BN_SETS) 453 if (shipped < ncpuids) { 454 uint64_t cpus_left; 455 uint16_t next = (uint16_t)index; 456 457 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 458 busymask; 459 460 if (cpus_left) { 461 do { 462 /* 463 * Sequence through and ship to the 464 * remainder of the CPUs in the system 465 * (e.g. other than the first 466 * (IDSR_BN_SETS)) in reverse order. 467 */ 468 lo = lowbit(cpus_left) - 1; 469 i = IDSR_BUSY_IDX(lo); 470 shipit(next, i); 471 shipped++; 472 cpuids[i] = next; 473 474 /* 475 * If we've processed all the CPUs, 476 * exit the loop now and save 477 * instructions. 478 */ 479 if (shipped == ncpuids) 480 break; 481 482 for ((index = ((int)next - 1)); 483 index >= 0; index--) 484 if (CPU_IN_SET(set, index)) { 485 next = (uint16_t)index; 486 break; 487 } 488 489 cpus_left &= ~(1ull << lo); 490 } while (cpus_left); 491 continue; 492 } 493 } 494 #endif 495 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 496 if (curbusy) { 497 busy++; 498 continue; 499 } 500 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 501 #ifdef SEND_MONDO_STATS 502 { 503 int n = gettick() - starttick; 504 if (n < 8192) 505 x_nack_stimes[n >> 7]++; 506 } 507 #endif 508 while (gettick() < (tick + sys_clock_mhz)) 509 ; 510 do { 511 lo = lowbit(curnack) - 1; 512 i = IDSR_NACK_IDX(lo); 513 shipit(cpuids[i], i); 514 curnack &= ~(1ull << lo); 515 } while (curnack); 516 nack++; 517 busy = 0; 518 } 519 #ifdef SEND_MONDO_STATS 520 { 521 int n = gettick() - starttick; 522 if (n < 8192) 523 x_set_stimes[n >> 7]++; 524 else 525 x_set_ltimes[(n >> 13) & 0xf]++; 526 } 527 x_set_cpus[shipped]++; 528 #endif 529 } 530 531 /* 532 * Cpu private initialization. 533 */ 534 void 535 cpu_init_private(struct cpu *cp) 536 { 537 if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) || 538 (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) { 539 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is " 540 "supported", cp->cpu_id, 541 cpunodes[cp->cpu_id].implementation); 542 } 543 544 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 545 } 546 547 void 548 cpu_setup(void) 549 { 550 extern int at_flags; 551 extern int cpc_has_overflow_intr; 552 uint64_t cpu0_log; 553 extern uint64_t opl_cpu0_err_log; 554 555 /* 556 * Initialize Error log Scratch register for error handling. 557 */ 558 559 cpu0_log = va_to_pa(&opl_cpu0_err_log); 560 opl_error_setup(cpu0_log); 561 opl_cpu0_log_setup = 1; 562 563 /* 564 * Enable MMU translating multiple page sizes for 565 * sITLB and sDTLB. 566 */ 567 opl_mpg_enable(); 568 569 /* 570 * Setup chip-specific trap handlers. 571 */ 572 cpu_init_trap(); 573 574 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 575 576 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 577 578 /* 579 * Due to the number of entries in the fully-associative tlb 580 * this may have to be tuned lower than in spitfire. 581 */ 582 pp_slots = MIN(8, MAXPP_SLOTS); 583 584 /* 585 * Block stores do not invalidate all pages of the d$, pagecopy 586 * et. al. need virtual translations with virtual coloring taken 587 * into consideration. prefetch/ldd will pollute the d$ on the 588 * load side. 589 */ 590 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 591 592 if (use_page_coloring) { 593 do_pg_coloring = 1; 594 } 595 596 isa_list = 597 "sparcv9+vis2 sparcv9+vis sparcv9 " 598 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 599 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 600 601 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 | 602 AV_SPARC_POPC | AV_SPARC_FMAF; 603 604 /* 605 * On SPARC64-VI, there's no hole in the virtual address space 606 */ 607 hole_start = hole_end = 0; 608 609 /* 610 * The kpm mapping window. 611 * kpm_size: 612 * The size of a single kpm range. 613 * The overall size will be: kpm_size * vac_colors. 614 * kpm_vbase: 615 * The virtual start address of the kpm range within the kernel 616 * virtual address space. kpm_vbase has to be kpm_size aligned. 617 */ 618 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 619 kpm_size_shift = 47; 620 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 621 kpm_smallpages = 1; 622 623 /* 624 * The traptrace code uses either %tick or %stick for 625 * timestamping. We have %stick so we can use it. 626 */ 627 traptrace_use_stick = 1; 628 629 /* 630 * SPARC64-VI has a performance counter overflow interrupt 631 */ 632 cpc_has_overflow_intr = 1; 633 634 /* 635 * Declare that this architecture/cpu combination does not support 636 * fpRAS. 637 */ 638 fpras_implemented = 0; 639 } 640 641 /* 642 * Called by setcpudelay 643 */ 644 void 645 cpu_init_tick_freq(void) 646 { 647 /* 648 * For SPARC64-VI we want to use the system clock rate as 649 * the basis for low level timing, due to support of mixed 650 * speed CPUs and power managment. 651 */ 652 if (system_clock_freq == 0) 653 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 654 655 sys_tick_freq = system_clock_freq; 656 } 657 658 #ifdef SEND_MONDO_STATS 659 uint32_t x_one_stimes[64]; 660 uint32_t x_one_ltimes[16]; 661 uint32_t x_set_stimes[64]; 662 uint32_t x_set_ltimes[16]; 663 uint32_t x_set_cpus[NCPU]; 664 uint32_t x_nack_stimes[64]; 665 #endif 666 667 /* 668 * Note: A version of this function is used by the debugger via the KDI, 669 * and must be kept in sync with this version. Any changes made to this 670 * function to support new chips or to accomodate errata must also be included 671 * in the KDI-specific version. See us3_kdi.c. 672 */ 673 void 674 send_one_mondo(int cpuid) 675 { 676 int busy, nack; 677 uint64_t idsr, starttick, endtick, tick, lasttick; 678 uint64_t busymask; 679 680 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 681 starttick = lasttick = gettick(); 682 shipit(cpuid, 0); 683 endtick = starttick + xc_tick_limit; 684 busy = nack = 0; 685 busymask = IDSR_BUSY; 686 for (;;) { 687 idsr = getidsr(); 688 if (idsr == 0) 689 break; 690 691 tick = gettick(); 692 /* 693 * If there is a big jump between the current tick 694 * count and lasttick, we have probably hit a break 695 * point. Adjust endtick accordingly to avoid panic. 696 */ 697 if (tick > (lasttick + xc_tick_jump_limit)) 698 endtick += (tick - lasttick); 699 lasttick = tick; 700 if (tick > endtick) { 701 if (panic_quiesce) 702 return; 703 cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) " 704 "[%d NACK %d BUSY]", cpuid, nack, busy); 705 } 706 707 if (idsr & busymask) { 708 busy++; 709 continue; 710 } 711 drv_usecwait(1); 712 shipit(cpuid, 0); 713 nack++; 714 busy = 0; 715 } 716 #ifdef SEND_MONDO_STATS 717 { 718 int n = gettick() - starttick; 719 if (n < 8192) 720 x_one_stimes[n >> 7]++; 721 else 722 x_one_ltimes[(n >> 13) & 0xf]++; 723 } 724 #endif 725 } 726 727 /* 728 * init_mmu_page_sizes is set to one after the bootup time initialization 729 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 730 * valid value. 731 * 732 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 733 * versions of disable_ism_large_pages and disable_large_pages, and feed back 734 * into those two hat variables at hat initialization time. 735 * 736 */ 737 int init_mmu_page_sizes = 0; 738 739 static uint_t mmu_disable_large_pages = 0; 740 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | 741 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 742 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 743 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 744 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | 745 (1 << TTE512K)); 746 747 /* 748 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 749 * Called during very early bootup from check_cpus_set(). 750 * Can be called to verify that mmu_page_sizes are set up correctly. 751 * 752 * Set Olympus defaults. We do not use the function parameter. 753 */ 754 /*ARGSUSED*/ 755 void 756 mmu_init_scd(sf_scd_t *scdp) 757 { 758 scdp->scd_sfmmup->sfmmu_cext = shctx_cext; 759 } 760 761 /*ARGSUSED*/ 762 int 763 mmu_init_mmu_page_sizes(int32_t not_used) 764 { 765 if (!init_mmu_page_sizes) { 766 mmu_page_sizes = MMU_PAGE_SIZES; 767 mmu_hashcnt = MAX_HASHCNT; 768 mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; 769 mmu_exported_pagesize_mask = (1 << TTE8K) | 770 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 771 (1 << TTE32M) | (1 << TTE256M); 772 init_mmu_page_sizes = 1; 773 return (0); 774 } 775 return (1); 776 } 777 778 /* SPARC64-VI worst case DTLB parameters */ 779 #ifndef LOCKED_DTLB_ENTRIES 780 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 781 #endif 782 #define TOTAL_DTLB_ENTRIES 32 783 #define AVAIL_32M_ENTRIES 0 784 #define AVAIL_256M_ENTRIES 0 785 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 786 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 787 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 788 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 789 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 790 791 /* 792 * The function returns the mmu-specific values for the 793 * hat's disable_large_pages, disable_ism_large_pages, and 794 * disable_auto_data_large_pages and 795 * disable_text_data_large_pages variables. 796 */ 797 uint_t 798 mmu_large_pages_disabled(uint_t flag) 799 { 800 uint_t pages_disable = 0; 801 extern int use_text_pgsz64K; 802 extern int use_text_pgsz512K; 803 804 if (flag == HAT_LOAD) { 805 pages_disable = mmu_disable_large_pages; 806 } else if (flag == HAT_LOAD_SHARE) { 807 pages_disable = mmu_disable_ism_large_pages; 808 } else if (flag == HAT_AUTO_DATA) { 809 pages_disable = mmu_disable_auto_data_large_pages; 810 } else if (flag == HAT_AUTO_TEXT) { 811 pages_disable = mmu_disable_auto_text_large_pages; 812 if (use_text_pgsz512K) { 813 pages_disable &= ~(1 << TTE512K); 814 } 815 if (use_text_pgsz64K) { 816 pages_disable &= ~(1 << TTE64K); 817 } 818 } 819 return (pages_disable); 820 } 821 822 /* 823 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 824 * It may be called from set_platform_defaults, if some value other than 4M 825 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 826 * then only warn, since it would be bad form to panic due to a user typo. 827 * 828 * The function re-initializes the mmu_disable_ism_large_pages variable. 829 */ 830 void 831 mmu_init_large_pages(size_t ism_pagesize) 832 { 833 834 switch (ism_pagesize) { 835 case MMU_PAGESIZE4M: 836 mmu_disable_ism_large_pages = ((1 << TTE64K) | 837 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 838 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 839 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 840 shctx_cext = TAGACCEXT_MKSZPAIR(TTE4M, TTE8K); 841 break; 842 case MMU_PAGESIZE32M: 843 mmu_disable_ism_large_pages = ((1 << TTE64K) | 844 (1 << TTE512K) | (1 << TTE256M)); 845 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 846 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); 847 adjust_data_maxlpsize(ism_pagesize); 848 shctx_cext = TAGACCEXT_MKSZPAIR(TTE32M, TTE8K); 849 break; 850 case MMU_PAGESIZE256M: 851 mmu_disable_ism_large_pages = ((1 << TTE64K) | 852 (1 << TTE512K) | (1 << TTE32M)); 853 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 854 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); 855 adjust_data_maxlpsize(ism_pagesize); 856 shctx_cext = TAGACCEXT_MKSZPAIR(TTE256M, TTE8K); 857 break; 858 default: 859 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 860 ism_pagesize); 861 break; 862 } 863 } 864 865 /* 866 * Function to reprogram the TLBs when page sizes used 867 * by a process change significantly. 868 */ 869 static void 870 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) 871 { 872 uint8_t pgsz0, pgsz1; 873 874 /* 875 * Don't program 2nd dtlb for kernel and ism hat 876 */ 877 ASSERT(hat->sfmmu_ismhat == NULL); 878 ASSERT(hat != ksfmmup); 879 880 /* 881 * hat->sfmmu_pgsz[] is an array whose elements 882 * contain a sorted order of page sizes. Element 883 * 0 is the most commonly used page size, followed 884 * by element 1, and so on. 885 * 886 * ttecnt[] is an array of per-page-size page counts 887 * mapped into the process. 888 * 889 * If the HAT's choice for page sizes is unsuitable, 890 * we can override it here. The new values written 891 * to the array will be handed back to us later to 892 * do the actual programming of the TLB hardware. 893 * 894 */ 895 pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]); 896 pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]); 897 898 /* 899 * This implements PAGESIZE programming of the sTLB 900 * if large TTE counts don't exceed the thresholds. 901 */ 902 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 903 pgsz0 = page_szc(MMU_PAGESIZE); 904 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 905 pgsz1 = page_szc(MMU_PAGESIZE); 906 tmp_pgsz[0] = pgsz0; 907 tmp_pgsz[1] = pgsz1; 908 /* otherwise, accept what the HAT chose for us */ 909 } 910 911 /* 912 * The HAT calls this function when an MMU context is allocated so that we 913 * can reprogram the large TLBs appropriately for the new process using 914 * the context. 915 * 916 * The caller must hold the HAT lock. 917 */ 918 void 919 mmu_set_ctx_page_sizes(struct hat *hat) 920 { 921 uint8_t pgsz0, pgsz1; 922 uint8_t new_cext; 923 924 ASSERT(sfmmu_hat_lock_held(hat)); 925 /* 926 * Don't program 2nd dtlb for kernel and ism hat 927 */ 928 if (hat->sfmmu_ismhat || hat == ksfmmup) 929 return; 930 931 /* 932 * If supported, reprogram the TLBs to a larger pagesize. 933 */ 934 if (hat->sfmmu_scdp != NULL) { 935 new_cext = hat->sfmmu_scdp->scd_sfmmup->sfmmu_cext; 936 ASSERT(new_cext == shctx_cext); 937 } else { 938 pgsz0 = hat->sfmmu_pgsz[0]; 939 pgsz1 = hat->sfmmu_pgsz[1]; 940 ASSERT(pgsz0 < mmu_page_sizes); 941 ASSERT(pgsz1 < mmu_page_sizes); 942 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 943 } 944 if (hat->sfmmu_cext != new_cext) { 945 #ifdef DEBUG 946 int i; 947 /* 948 * assert cnum should be invalid, this is because pagesize 949 * can only be changed after a proc's ctxs are invalidated. 950 */ 951 for (i = 0; i < max_mmu_ctxdoms; i++) { 952 ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 953 } 954 #endif /* DEBUG */ 955 hat->sfmmu_cext = new_cext; 956 } 957 /* 958 * sfmmu_setctx_sec() will take care of the 959 * rest of the dirty work for us. 960 */ 961 } 962 963 /* 964 * This function assumes that there are either four or six supported page 965 * sizes and at most two programmable TLBs, so we need to decide which 966 * page sizes are most important and then adjust the TLB page sizes 967 * accordingly (if supported). 968 * 969 * If these assumptions change, this function will need to be 970 * updated to support whatever the new limits are. 971 */ 972 void 973 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 974 { 975 uint64_t sortcnt[MMU_PAGE_SIZES]; 976 uint8_t tmp_pgsz[MMU_PAGE_SIZES]; 977 uint8_t i, j, max; 978 uint16_t oldval, newval; 979 980 /* 981 * We only consider reprogramming the TLBs if one or more of 982 * the two most used page sizes changes and we're using 983 * large pages in this process. 984 */ 985 if (SFMMU_LGPGS_INUSE(sfmmup)) { 986 /* Sort page sizes. */ 987 for (i = 0; i < mmu_page_sizes; i++) { 988 sortcnt[i] = ttecnt[i]; 989 } 990 for (j = 0; j < mmu_page_sizes; j++) { 991 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 992 if (sortcnt[i] > sortcnt[max]) 993 max = i; 994 } 995 tmp_pgsz[j] = max; 996 sortcnt[max] = 0; 997 } 998 999 oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1]; 1000 1001 mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz); 1002 1003 /* Check 2 largest values after the sort. */ 1004 newval = tmp_pgsz[0] << 8 | tmp_pgsz[1]; 1005 if (newval != oldval) { 1006 sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz); 1007 } 1008 } 1009 } 1010 1011 /* 1012 * Return processor specific async error structure 1013 * size used. 1014 */ 1015 int 1016 cpu_aflt_size(void) 1017 { 1018 return (sizeof (opl_async_flt_t)); 1019 } 1020 1021 /* 1022 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 1023 * post-process CPU events that are dequeued. As such, it can be invoked 1024 * from softint context, from AST processing in the trap() flow, or from the 1025 * panic flow. We decode the CPU-specific data, and take appropriate actions. 1026 * Historically this entry point was used to log the actual cmn_err(9F) text; 1027 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 1028 * With FMA this function now also returns a flag which indicates to the 1029 * caller whether the ereport should be posted (1) or suppressed (0). 1030 */ 1031 /*ARGSUSED*/ 1032 static int 1033 cpu_sync_log_err(void *flt) 1034 { 1035 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 1036 struct async_flt *aflt = (struct async_flt *)flt; 1037 1038 /* 1039 * No extra processing of urgent error events. 1040 * Always generate ereports for these events. 1041 */ 1042 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 1043 return (1); 1044 1045 /* 1046 * Additional processing for synchronous errors. 1047 */ 1048 switch (opl_flt->flt_type) { 1049 case OPL_CPU_INV_SFSR: 1050 return (1); 1051 1052 case OPL_CPU_SYNC_UE: 1053 /* 1054 * The validity: SFSR_MK_UE bit has been checked 1055 * in opl_cpu_sync_error() 1056 * No more check is required. 1057 * 1058 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1059 * and they have been retrieved in cpu_queue_events() 1060 */ 1061 1062 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1063 ASSERT(aflt->flt_in_memory); 1064 /* 1065 * We want to skip logging only if ALL the following 1066 * conditions are true: 1067 * 1068 * 1. We are not panicing already. 1069 * 2. The error is a memory error. 1070 * 3. There is only one error. 1071 * 4. The error is on a retired page. 1072 * 5. The error occurred under on_trap 1073 * protection AFLT_PROT_EC 1074 */ 1075 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1076 page_retire_check(aflt->flt_addr, NULL) == 0) { 1077 /* 1078 * Do not log an error from 1079 * the retired page 1080 */ 1081 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1082 return (0); 1083 } 1084 if (!panicstr) 1085 cpu_page_retire(opl_flt); 1086 } 1087 return (1); 1088 1089 case OPL_CPU_SYNC_OTHERS: 1090 /* 1091 * For the following error cases, the processor HW does 1092 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1093 * to assign appropriate values here to reflect what we 1094 * think is the most likely cause of the problem w.r.t to 1095 * the particular error event. For Buserr and timeout 1096 * error event, we will assign OPL_ERRID_CHANNEL as the 1097 * most likely reason. For TLB parity or multiple hit 1098 * error events, we will assign the reason as 1099 * OPL_ERRID_CPU (cpu related problem) and set the 1100 * flt_eid_sid to point to the cpuid. 1101 */ 1102 1103 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1104 /* 1105 * flt_eid_sid will not be used for this case. 1106 */ 1107 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1108 } 1109 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1110 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1111 opl_flt->flt_eid_sid = aflt->flt_inst; 1112 } 1113 1114 /* 1115 * In case of no effective error bit 1116 */ 1117 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1118 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1119 opl_flt->flt_eid_sid = aflt->flt_inst; 1120 } 1121 break; 1122 1123 default: 1124 return (1); 1125 } 1126 return (1); 1127 } 1128 1129 /* 1130 * Retire the bad page that may contain the flushed error. 1131 */ 1132 void 1133 cpu_page_retire(opl_async_flt_t *opl_flt) 1134 { 1135 struct async_flt *aflt = (struct async_flt *)opl_flt; 1136 (void) page_retire(aflt->flt_addr, PR_UE); 1137 } 1138 1139 /* 1140 * Invoked by error_init() early in startup and therefore before 1141 * startup_errorq() is called to drain any error Q - 1142 * 1143 * startup() 1144 * startup_end() 1145 * error_init() 1146 * cpu_error_init() 1147 * errorq_init() 1148 * errorq_drain() 1149 * start_other_cpus() 1150 * 1151 * The purpose of this routine is to create error-related taskqs. Taskqs 1152 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1153 * context. 1154 * 1155 */ 1156 /*ARGSUSED*/ 1157 void 1158 cpu_error_init(int items) 1159 { 1160 opl_err_log = (opl_errlog_t *) 1161 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1162 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1163 cmn_err(CE_PANIC, "The base address of the error log " 1164 "is not page aligned"); 1165 } 1166 1167 /* 1168 * We route all errors through a single switch statement. 1169 */ 1170 void 1171 cpu_ue_log_err(struct async_flt *aflt) 1172 { 1173 switch (aflt->flt_class) { 1174 case CPU_FAULT: 1175 if (cpu_sync_log_err(aflt)) 1176 cpu_ereport_post(aflt); 1177 break; 1178 1179 case BUS_FAULT: 1180 bus_async_log_err(aflt); 1181 break; 1182 1183 default: 1184 cmn_err(CE_WARN, "discarding async error %p with invalid " 1185 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1186 return; 1187 } 1188 } 1189 1190 /* 1191 * Routine for panic hook callback from panic_idle(). 1192 * 1193 * Nothing to do here. 1194 */ 1195 void 1196 cpu_async_panic_callb(void) 1197 { 1198 } 1199 1200 /* 1201 * Routine to return a string identifying the physical name 1202 * associated with a memory/cache error. 1203 */ 1204 /*ARGSUSED*/ 1205 int 1206 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1207 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1208 ushort_t flt_status, char *buf, int buflen, int *lenp) 1209 { 1210 int synd_code; 1211 int ret; 1212 1213 /* 1214 * An AFSR of -1 defaults to a memory syndrome. 1215 */ 1216 synd_code = (int)flt_synd; 1217 1218 if (&plat_get_mem_unum) { 1219 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1220 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1221 buf[0] = '\0'; 1222 *lenp = 0; 1223 } 1224 return (ret); 1225 } 1226 buf[0] = '\0'; 1227 *lenp = 0; 1228 return (ENOTSUP); 1229 } 1230 1231 /* 1232 * Wrapper for cpu_get_mem_unum() routine that takes an 1233 * async_flt struct rather than explicit arguments. 1234 */ 1235 int 1236 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1237 char *buf, int buflen, int *lenp) 1238 { 1239 /* 1240 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1241 * memory error. 1242 */ 1243 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1244 (uint64_t)-1, 1245 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1246 aflt->flt_status, buf, buflen, lenp)); 1247 } 1248 1249 /* 1250 * This routine is a more generic interface to cpu_get_mem_unum() 1251 * that may be used by other modules (e.g. mm). 1252 */ 1253 /*ARGSUSED*/ 1254 int 1255 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1256 char *buf, int buflen, int *lenp) 1257 { 1258 int synd_status, flt_in_memory, ret; 1259 ushort_t flt_status = 0; 1260 char unum[UNUM_NAMLEN]; 1261 1262 /* 1263 * Check for an invalid address. 1264 */ 1265 if (afar == (uint64_t)-1) 1266 return (ENXIO); 1267 1268 if (synd == (uint64_t)-1) 1269 synd_status = AFLT_STAT_INVALID; 1270 else 1271 synd_status = AFLT_STAT_VALID; 1272 1273 flt_in_memory = (*afsr & SFSR_MEMORY) && 1274 pf_is_memory(afar >> MMU_PAGESHIFT); 1275 1276 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1277 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 1278 if (ret != 0) 1279 return (ret); 1280 1281 if (*lenp >= buflen) 1282 return (ENAMETOOLONG); 1283 1284 (void) strncpy(buf, unum, buflen); 1285 1286 return (0); 1287 } 1288 1289 /* 1290 * Routine to return memory information associated 1291 * with a physical address and syndrome. 1292 */ 1293 /*ARGSUSED*/ 1294 int 1295 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1296 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1297 int *segsp, int *banksp, int *mcidp) 1298 { 1299 int synd_code = (int)synd; 1300 1301 if (afar == (uint64_t)-1) 1302 return (ENXIO); 1303 1304 if (p2get_mem_info != NULL) 1305 return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, 1306 bank_sizep, segsp, banksp, mcidp)); 1307 else 1308 return (ENOTSUP); 1309 } 1310 1311 /* 1312 * Routine to return a string identifying the physical 1313 * name associated with a cpuid. 1314 */ 1315 int 1316 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1317 { 1318 int ret; 1319 char unum[UNUM_NAMLEN]; 1320 1321 if (&plat_get_cpu_unum) { 1322 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, 1323 lenp)) != 0) 1324 return (ret); 1325 } else { 1326 return (ENOTSUP); 1327 } 1328 1329 if (*lenp >= buflen) 1330 return (ENAMETOOLONG); 1331 1332 (void) strncpy(buf, unum, *lenp); 1333 1334 return (0); 1335 } 1336 1337 /* 1338 * This routine exports the name buffer size. 1339 */ 1340 size_t 1341 cpu_get_name_bufsize() 1342 { 1343 return (UNUM_NAMLEN); 1344 } 1345 1346 /* 1347 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1348 */ 1349 void 1350 cpu_flush_ecache(void) 1351 { 1352 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1353 cpunodes[CPU->cpu_id].ecache_linesize); 1354 } 1355 1356 static uint8_t 1357 flt_to_trap_type(struct async_flt *aflt) 1358 { 1359 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1360 return (TRAP_TYPE_ECC_I); 1361 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1362 return (TRAP_TYPE_ECC_D); 1363 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1364 return (TRAP_TYPE_URGENT); 1365 return (TRAP_TYPE_UNKNOWN); 1366 } 1367 1368 /* 1369 * Encode the data saved in the opl_async_flt_t struct into 1370 * the FM ereport payload. 1371 */ 1372 /* ARGSUSED */ 1373 static void 1374 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1375 nvlist_t *resource) 1376 { 1377 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1378 char unum[UNUM_NAMLEN]; 1379 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1380 int len; 1381 1382 1383 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1384 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1385 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1386 } 1387 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1388 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1389 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1390 } 1391 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1392 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1393 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1394 } 1395 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1396 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1397 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1398 } 1399 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1400 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1401 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1402 } 1403 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1404 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1405 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1406 } 1407 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1408 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1409 DATA_TYPE_BOOLEAN_VALUE, 1410 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1411 } 1412 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1413 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1414 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1415 } 1416 1417 switch (opl_flt->flt_eid_mod) { 1418 case OPL_ERRID_CPU: 1419 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1420 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1421 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1422 NULL, opl_flt->flt_eid_sid, 1423 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf); 1424 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1425 DATA_TYPE_NVLIST, resource, NULL); 1426 break; 1427 1428 case OPL_ERRID_CHANNEL: 1429 /* 1430 * No resource is created but the cpumem DE will find 1431 * the defective path by retreiving EID from SFSR which is 1432 * included in the payload. 1433 */ 1434 break; 1435 1436 case OPL_ERRID_MEM: 1437 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1438 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1439 unum, NULL, (uint64_t)-1); 1440 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1441 DATA_TYPE_NVLIST, resource, NULL); 1442 break; 1443 1444 case OPL_ERRID_PATH: 1445 /* 1446 * No resource is created but the cpumem DE will find 1447 * the defective path by retreiving EID from SFSR which is 1448 * included in the payload. 1449 */ 1450 break; 1451 } 1452 } 1453 1454 /* 1455 * Returns whether fault address is valid for this error bit and 1456 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1457 */ 1458 /*ARGSUSED*/ 1459 static int 1460 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1461 { 1462 struct async_flt *aflt = (struct async_flt *)opl_flt; 1463 1464 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1465 return ((t_afsr_bit & SFSR_MEMORY) && 1466 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1467 } 1468 return (0); 1469 } 1470 1471 /* 1472 * In OPL SCF does the stick synchronization. 1473 */ 1474 void 1475 sticksync_slave(void) 1476 { 1477 } 1478 1479 /* 1480 * In OPL SCF does the stick synchronization. 1481 */ 1482 void 1483 sticksync_master(void) 1484 { 1485 } 1486 1487 /* 1488 * Cpu private unitialization. OPL cpus do not use the private area. 1489 */ 1490 void 1491 cpu_uninit_private(struct cpu *cp) 1492 { 1493 cmp_delete_cpu(cp->cpu_id); 1494 } 1495 1496 /* 1497 * Always flush an entire cache. 1498 */ 1499 void 1500 cpu_error_ecache_flush(void) 1501 { 1502 cpu_flush_ecache(); 1503 } 1504 1505 void 1506 cpu_ereport_post(struct async_flt *aflt) 1507 { 1508 char *cpu_type, buf[FM_MAX_CLASS]; 1509 nv_alloc_t *nva = NULL; 1510 nvlist_t *ereport, *detector, *resource; 1511 errorq_elem_t *eqep; 1512 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1513 1514 if (aflt->flt_panic || panicstr) { 1515 eqep = errorq_reserve(ereport_errorq); 1516 if (eqep == NULL) 1517 return; 1518 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1519 nva = errorq_elem_nva(ereport_errorq, eqep); 1520 } else { 1521 ereport = fm_nvlist_create(nva); 1522 } 1523 1524 /* 1525 * Create the scheme "cpu" FMRI. 1526 */ 1527 detector = fm_nvlist_create(nva); 1528 resource = fm_nvlist_create(nva); 1529 switch (cpunodes[aflt->flt_inst].implementation) { 1530 case OLYMPUS_C_IMPL: 1531 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1532 break; 1533 case JUPITER_IMPL: 1534 cpu_type = FM_EREPORT_CPU_SPARC64_VII; 1535 break; 1536 default: 1537 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1538 break; 1539 } 1540 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1541 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1542 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1543 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1544 sbuf); 1545 1546 /* 1547 * Encode all the common data into the ereport. 1548 */ 1549 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1550 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1551 1552 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1553 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1554 1555 /* 1556 * Encode the error specific data that was saved in 1557 * the async_flt structure into the ereport. 1558 */ 1559 cpu_payload_add_aflt(aflt, ereport, resource); 1560 1561 if (aflt->flt_panic || panicstr) { 1562 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1563 } else { 1564 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1565 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1566 fm_nvlist_destroy(detector, FM_NVA_FREE); 1567 fm_nvlist_destroy(resource, FM_NVA_FREE); 1568 } 1569 } 1570 1571 void 1572 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1573 { 1574 int status; 1575 ddi_fm_error_t de; 1576 1577 bzero(&de, sizeof (ddi_fm_error_t)); 1578 1579 de.fme_version = DDI_FME_VERSION; 1580 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1581 de.fme_flag = expected; 1582 de.fme_bus_specific = (void *)aflt->flt_addr; 1583 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1584 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1585 aflt->flt_panic = 1; 1586 } 1587 1588 void 1589 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1590 errorq_t *eqp, uint_t flag) 1591 { 1592 struct async_flt *aflt = (struct async_flt *)payload; 1593 1594 aflt->flt_erpt_class = error_class; 1595 errorq_dispatch(eqp, payload, payload_sz, flag); 1596 } 1597 1598 void 1599 adjust_hw_copy_limits(int ecache_size) 1600 { 1601 /* 1602 * Set hw copy limits. 1603 * 1604 * /etc/system will be parsed later and can override one or more 1605 * of these settings. 1606 * 1607 * At this time, ecache size seems only mildly relevant. 1608 * We seem to run into issues with the d-cache and stalls 1609 * we see on misses. 1610 * 1611 * Cycle measurement indicates that 2 byte aligned copies fare 1612 * little better than doing things with VIS at around 512 bytes. 1613 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1614 * aligned is faster whenever the source and destination data 1615 * in cache and the total size is less than 2 Kbytes. The 2K 1616 * limit seems to be driven by the 2K write cache. 1617 * When more than 2K of copies are done in non-VIS mode, stores 1618 * backup in the write cache. In VIS mode, the write cache is 1619 * bypassed, allowing faster cache-line writes aligned on cache 1620 * boundaries. 1621 * 1622 * In addition, in non-VIS mode, there is no prefetching, so 1623 * for larger copies, the advantage of prefetching to avoid even 1624 * occasional cache misses is enough to justify using the VIS code. 1625 * 1626 * During testing, it was discovered that netbench ran 3% slower 1627 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1628 * applications, data is only used once (copied to the output 1629 * buffer, then copied by the network device off the system). Using 1630 * the VIS copy saves more L2 cache state. Network copies are 1631 * around 1.3K to 1.5K in size for historical reasons. 1632 * 1633 * Therefore, a limit of 1K bytes will be used for the 8 byte 1634 * aligned copy even for large caches and 8 MB ecache. The 1635 * infrastructure to allow different limits for different sized 1636 * caches is kept to allow further tuning in later releases. 1637 */ 1638 1639 if (min_ecache_size == 0 && use_hw_bcopy) { 1640 /* 1641 * First time through - should be before /etc/system 1642 * is read. 1643 * Could skip the checks for zero but this lets us 1644 * preserve any debugger rewrites. 1645 */ 1646 if (hw_copy_limit_1 == 0) { 1647 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1648 priv_hcl_1 = hw_copy_limit_1; 1649 } 1650 if (hw_copy_limit_2 == 0) { 1651 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1652 priv_hcl_2 = hw_copy_limit_2; 1653 } 1654 if (hw_copy_limit_4 == 0) { 1655 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1656 priv_hcl_4 = hw_copy_limit_4; 1657 } 1658 if (hw_copy_limit_8 == 0) { 1659 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1660 priv_hcl_8 = hw_copy_limit_8; 1661 } 1662 min_ecache_size = ecache_size; 1663 } else { 1664 /* 1665 * MP initialization. Called *after* /etc/system has 1666 * been parsed. One CPU has already been initialized. 1667 * Need to cater for /etc/system having scragged one 1668 * of our values. 1669 */ 1670 if (ecache_size == min_ecache_size) { 1671 /* 1672 * Same size ecache. We do nothing unless we 1673 * have a pessimistic ecache setting. In that 1674 * case we become more optimistic (if the cache is 1675 * large enough). 1676 */ 1677 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1678 /* 1679 * Need to adjust hw_copy_limit* from our 1680 * pessimistic uniprocessor value to a more 1681 * optimistic UP value *iff* it hasn't been 1682 * reset. 1683 */ 1684 if ((ecache_size > 1048576) && 1685 (priv_hcl_8 == hw_copy_limit_8)) { 1686 if (ecache_size <= 2097152) 1687 hw_copy_limit_8 = 4 * 1688 VIS_COPY_THRESHOLD; 1689 else if (ecache_size <= 4194304) 1690 hw_copy_limit_8 = 4 * 1691 VIS_COPY_THRESHOLD; 1692 else 1693 hw_copy_limit_8 = 4 * 1694 VIS_COPY_THRESHOLD; 1695 priv_hcl_8 = hw_copy_limit_8; 1696 } 1697 } 1698 } else if (ecache_size < min_ecache_size) { 1699 /* 1700 * A different ecache size. Can this even happen? 1701 */ 1702 if (priv_hcl_8 == hw_copy_limit_8) { 1703 /* 1704 * The previous value that we set 1705 * is unchanged (i.e., it hasn't been 1706 * scragged by /etc/system). Rewrite it. 1707 */ 1708 if (ecache_size <= 1048576) 1709 hw_copy_limit_8 = 8 * 1710 VIS_COPY_THRESHOLD; 1711 else if (ecache_size <= 2097152) 1712 hw_copy_limit_8 = 8 * 1713 VIS_COPY_THRESHOLD; 1714 else if (ecache_size <= 4194304) 1715 hw_copy_limit_8 = 8 * 1716 VIS_COPY_THRESHOLD; 1717 else 1718 hw_copy_limit_8 = 10 * 1719 VIS_COPY_THRESHOLD; 1720 priv_hcl_8 = hw_copy_limit_8; 1721 min_ecache_size = ecache_size; 1722 } 1723 } 1724 } 1725 } 1726 1727 #define VIS_BLOCKSIZE 64 1728 1729 int 1730 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1731 { 1732 int ret, watched; 1733 1734 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1735 ret = dtrace_blksuword32(addr, data, 0); 1736 if (watched) 1737 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1738 1739 return (ret); 1740 } 1741 1742 void 1743 opl_cpu_reg_init() 1744 { 1745 uint64_t this_cpu_log; 1746 1747 if (cpu[getprocessorid()] == &cpu0 && opl_cpu0_log_setup == 1) { 1748 /* 1749 * Support for "ta 3" 1750 */ 1751 opl_ta3(); 1752 1753 /* 1754 * If we are being called at boot time on cpu0 the error 1755 * log is already set up in cpu_setup. Clear the 1756 * opl_cpu0_log_setup flag so that a subsequent DR of cpu0 will 1757 * do the proper initialization. 1758 */ 1759 opl_cpu0_log_setup = 0; 1760 return; 1761 } 1762 1763 /* 1764 * Initialize Error log Scratch register for error handling. 1765 */ 1766 1767 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1768 ERRLOG_BUFSZ * (getprocessorid()))); 1769 opl_error_setup(this_cpu_log); 1770 1771 /* 1772 * Enable MMU translating multiple page sizes for 1773 * sITLB and sDTLB. 1774 */ 1775 opl_mpg_enable(); 1776 } 1777 1778 /* 1779 * Queue one event in ue_queue based on ecc_type_to_info entry. 1780 */ 1781 static void 1782 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1783 ecc_type_to_info_t *eccp) 1784 { 1785 struct async_flt *aflt = (struct async_flt *)opl_flt; 1786 1787 if (reason && 1788 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1789 (void) strcat(reason, eccp->ec_reason); 1790 } 1791 1792 opl_flt->flt_bit = eccp->ec_afsr_bit; 1793 opl_flt->flt_type = eccp->ec_flt_type; 1794 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1795 aflt->flt_payload = eccp->ec_err_payload; 1796 1797 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1798 cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt, 1799 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1800 } 1801 1802 /* 1803 * Queue events on async event queue one event per error bit. 1804 * Return number of events queued. 1805 */ 1806 int 1807 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1808 { 1809 struct async_flt *aflt = (struct async_flt *)opl_flt; 1810 ecc_type_to_info_t *eccp; 1811 int nevents = 0; 1812 1813 /* 1814 * Queue expected errors, error bit and fault type must must match 1815 * in the ecc_type_to_info table. 1816 */ 1817 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1818 eccp++) { 1819 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1820 (eccp->ec_flags & aflt->flt_status) != 0) { 1821 /* 1822 * UE error event can be further 1823 * classified/breakdown into finer granularity 1824 * based on the flt_eid_mod value set by HW. We do 1825 * special handling here so that we can report UE 1826 * error in finer granularity as ue_mem, 1827 * ue_channel, ue_cpu or ue_path. 1828 */ 1829 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1830 opl_flt->flt_eid_mod = (aflt->flt_stat & 1831 SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT; 1832 opl_flt->flt_eid_sid = (aflt->flt_stat & 1833 SFSR_EID_SID) >> SFSR_EID_SID_SHIFT; 1834 /* 1835 * Need to advance eccp pointer by flt_eid_mod 1836 * so that we get an appropriate ecc pointer 1837 * 1838 * EID # of advances 1839 * ---------------------------------- 1840 * OPL_ERRID_MEM 0 1841 * OPL_ERRID_CHANNEL 1 1842 * OPL_ERRID_CPU 2 1843 * OPL_ERRID_PATH 3 1844 */ 1845 eccp += opl_flt->flt_eid_mod; 1846 } 1847 cpu_queue_one_event(opl_flt, reason, eccp); 1848 t_afsr_errs &= ~eccp->ec_afsr_bit; 1849 nevents++; 1850 } 1851 } 1852 1853 return (nevents); 1854 } 1855 1856 /* 1857 * Sync. error wrapper functions. 1858 * We use these functions in order to transfer here from the 1859 * nucleus trap handler information about trap type (data or 1860 * instruction) and trap level (0 or above 0). This way we 1861 * get rid of using SFSR's reserved bits. 1862 */ 1863 1864 #define OPL_SYNC_TL0 0 1865 #define OPL_SYNC_TL1 1 1866 #define OPL_ISYNC_ERR 0 1867 #define OPL_DSYNC_ERR 1 1868 1869 void 1870 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1871 { 1872 uint64_t t_sfar = p_sfar; 1873 uint64_t t_sfsr = p_sfsr; 1874 1875 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1876 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1877 } 1878 1879 void 1880 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1881 { 1882 uint64_t t_sfar = p_sfar; 1883 uint64_t t_sfsr = p_sfsr; 1884 1885 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1886 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1887 } 1888 1889 void 1890 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1891 { 1892 uint64_t t_sfar = p_sfar; 1893 uint64_t t_sfsr = p_sfsr; 1894 1895 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1896 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1897 } 1898 1899 void 1900 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1901 { 1902 uint64_t t_sfar = p_sfar; 1903 uint64_t t_sfsr = p_sfsr; 1904 1905 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1906 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1907 } 1908 1909 /* 1910 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1911 * and TLB_PRT. 1912 * This function is designed based on cpu_deferred_error(). 1913 */ 1914 1915 static void 1916 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1917 uint_t tl, uint_t derr) 1918 { 1919 opl_async_flt_t opl_flt; 1920 struct async_flt *aflt; 1921 int trampolined = 0; 1922 char pr_reason[MAX_REASON_STRING]; 1923 uint64_t log_sfsr; 1924 int expected = DDI_FM_ERR_UNEXPECTED; 1925 ddi_acc_hdl_t *hp; 1926 1927 /* 1928 * We need to look at p_flag to determine if the thread detected an 1929 * error while dumping core. We can't grab p_lock here, but it's ok 1930 * because we just need a consistent snapshot and we know that everyone 1931 * else will store a consistent set of bits while holding p_lock. We 1932 * don't have to worry about a race because SDOCORE is set once prior 1933 * to doing i/o from the process's address space and is never cleared. 1934 */ 1935 uint_t pflag = ttoproc(curthread)->p_flag; 1936 1937 pr_reason[0] = '\0'; 1938 1939 /* 1940 * handle the specific error 1941 */ 1942 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1943 aflt = (struct async_flt *)&opl_flt; 1944 aflt->flt_id = gethrtime_waitfree(); 1945 aflt->flt_bus_id = getprocessorid(); 1946 aflt->flt_inst = CPU->cpu_id; 1947 aflt->flt_stat = t_sfsr; 1948 aflt->flt_addr = t_sfar; 1949 aflt->flt_pc = (caddr_t)rp->r_pc; 1950 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1951 aflt->flt_class = (uchar_t)CPU_FAULT; 1952 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & 1953 TSTATE_PRIV) ? 1 : 0)); 1954 aflt->flt_tl = (uchar_t)tl; 1955 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1956 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1957 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1958 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1959 1960 /* 1961 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1962 * So, clear all error bits to avoid mis-handling and force the system 1963 * panicked. 1964 * We skip all the procedures below down to the panic message call. 1965 */ 1966 if (!(t_sfsr & SFSR_FV)) { 1967 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1968 aflt->flt_panic = 1; 1969 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1970 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 1971 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1972 fm_panic("%sErrors(s)", "invalid SFSR"); 1973 } 1974 1975 /* 1976 * If either UE and MK bit is off, this is not valid UE error. 1977 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1978 * mis-handling below. 1979 * aflt->flt_stat keeps the original bits as a reference. 1980 */ 1981 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1982 (SFSR_MK_UE|SFSR_UE)) { 1983 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1984 } 1985 1986 /* 1987 * If the trap occurred in privileged mode at TL=0, we need to check to 1988 * see if we were executing in the kernel under on_trap() or t_lofault 1989 * protection. If so, modify the saved registers so that we return 1990 * from the trap to the appropriate trampoline routine. 1991 */ 1992 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1993 if (curthread->t_ontrap != NULL) { 1994 on_trap_data_t *otp = curthread->t_ontrap; 1995 1996 if (otp->ot_prot & OT_DATA_EC) { 1997 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1998 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1999 rp->r_pc = otp->ot_trampoline; 2000 rp->r_npc = rp->r_pc + 4; 2001 trampolined = 1; 2002 } 2003 2004 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 2005 (otp->ot_prot & OT_DATA_ACCESS)) { 2006 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 2007 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 2008 rp->r_pc = otp->ot_trampoline; 2009 rp->r_npc = rp->r_pc + 4; 2010 trampolined = 1; 2011 /* 2012 * for peeks and caut_gets errors are expected 2013 */ 2014 hp = (ddi_acc_hdl_t *)otp->ot_handle; 2015 if (!hp) 2016 expected = DDI_FM_ERR_PEEK; 2017 else if (hp->ah_acc.devacc_attr_access == 2018 DDI_CAUTIOUS_ACC) 2019 expected = DDI_FM_ERR_EXPECTED; 2020 } 2021 2022 } else if (curthread->t_lofault) { 2023 aflt->flt_prot = AFLT_PROT_COPY; 2024 rp->r_g1 = EFAULT; 2025 rp->r_pc = curthread->t_lofault; 2026 rp->r_npc = rp->r_pc + 4; 2027 trampolined = 1; 2028 } 2029 } 2030 2031 /* 2032 * If we're in user mode or we're doing a protected copy, we either 2033 * want the ASTON code below to send a signal to the user process 2034 * or we want to panic if aft_panic is set. 2035 * 2036 * If we're in privileged mode and we're not doing a copy, then we 2037 * need to check if we've trampolined. If we haven't trampolined, 2038 * we should panic. 2039 */ 2040 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2041 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2042 aflt->flt_panic |= aft_panic; 2043 } else if (!trampolined) { 2044 aflt->flt_panic = 1; 2045 } 2046 2047 /* 2048 * If we've trampolined due to a privileged TO or BERR, or if an 2049 * unprivileged TO or BERR occurred, we don't want to enqueue an 2050 * event for that TO or BERR. Queue all other events (if any) besides 2051 * the TO/BERR. 2052 */ 2053 log_sfsr = t_sfsr; 2054 if (trampolined) { 2055 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2056 } else if (!aflt->flt_priv) { 2057 /* 2058 * User mode, suppress messages if 2059 * cpu_berr_to_verbose is not set. 2060 */ 2061 if (!cpu_berr_to_verbose) 2062 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2063 } 2064 2065 if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason, 2066 t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) { 2067 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2068 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2069 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 2070 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2071 } 2072 2073 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2074 cpu_run_bus_error_handlers(aflt, expected); 2075 } 2076 2077 /* 2078 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2079 * be logged as part of the panic flow. 2080 */ 2081 if (aflt->flt_panic) { 2082 if (pr_reason[0] == 0) 2083 strcpy(pr_reason, "invalid SFSR "); 2084 2085 fm_panic("%sErrors(s)", pr_reason); 2086 } 2087 2088 /* 2089 * If we queued an error and we are going to return from the trap and 2090 * the error was in user mode or inside of a copy routine, set AST flag 2091 * so the queue will be drained before returning to user mode. The 2092 * AST processing will also act on our failure policy. 2093 */ 2094 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2095 int pcb_flag = 0; 2096 2097 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2098 pcb_flag |= ASYNC_HWERR; 2099 2100 if (t_sfsr & SFSR_BERR) 2101 pcb_flag |= ASYNC_BERR; 2102 2103 if (t_sfsr & SFSR_TO) 2104 pcb_flag |= ASYNC_BTO; 2105 2106 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2107 aston(curthread); 2108 } 2109 } 2110 2111 /*ARGSUSED*/ 2112 void 2113 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2114 { 2115 opl_async_flt_t opl_flt; 2116 struct async_flt *aflt; 2117 char pr_reason[MAX_REASON_STRING]; 2118 2119 /* normalize tl */ 2120 tl = (tl >= 2 ? 1 : 0); 2121 pr_reason[0] = '\0'; 2122 2123 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2124 aflt = (struct async_flt *)&opl_flt; 2125 aflt->flt_id = gethrtime_waitfree(); 2126 aflt->flt_bus_id = getprocessorid(); 2127 aflt->flt_inst = CPU->cpu_id; 2128 aflt->flt_stat = p_ugesr; 2129 aflt->flt_pc = (caddr_t)rp->r_pc; 2130 aflt->flt_class = (uchar_t)CPU_FAULT; 2131 aflt->flt_tl = tl; 2132 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 2133 1 : 0)); 2134 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2135 aflt->flt_panic = 1; 2136 /* 2137 * HW does not set mod/sid in case of urgent error. 2138 * So we have to set it here. 2139 */ 2140 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2141 opl_flt.flt_eid_sid = aflt->flt_inst; 2142 2143 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2144 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2145 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2146 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt, 2147 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2148 } 2149 2150 fm_panic("Urgent Error"); 2151 } 2152 2153 /* 2154 * Initialization error counters resetting. 2155 */ 2156 /* ARGSUSED */ 2157 static void 2158 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2159 { 2160 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2161 hdlr->cyh_level = CY_LOW_LEVEL; 2162 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2163 2164 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2165 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2166 } 2167 2168 void 2169 cpu_mp_init(void) 2170 { 2171 cyc_omni_handler_t hdlr; 2172 2173 hdlr.cyo_online = opl_ras_online; 2174 hdlr.cyo_offline = NULL; 2175 hdlr.cyo_arg = NULL; 2176 mutex_enter(&cpu_lock); 2177 (void) cyclic_add_omni(&hdlr); 2178 mutex_exit(&cpu_lock); 2179 } 2180 2181 int heaplp_use_stlb = 0; 2182 2183 void 2184 mmu_init_kernel_pgsz(struct hat *hat) 2185 { 2186 uint_t tte = page_szc(segkmem_lpsize); 2187 uchar_t new_cext_primary, new_cext_nucleus; 2188 2189 if (heaplp_use_stlb == 0) { 2190 /* do not reprogram stlb */ 2191 tte = TTE8K; 2192 } else if (!plat_prom_preserve_kctx_is_supported()) { 2193 /* OBP does not support non-zero primary context */ 2194 tte = TTE8K; 2195 heaplp_use_stlb = 0; 2196 } 2197 2198 new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K); 2199 new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte); 2200 2201 hat->sfmmu_cext = new_cext_primary; 2202 kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | 2203 ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); 2204 } 2205 2206 size_t 2207 mmu_get_kernel_lpsize(size_t lpsize) 2208 { 2209 uint_t tte; 2210 2211 if (lpsize == 0) { 2212 /* no setting for segkmem_lpsize in /etc/system: use default */ 2213 return (MMU_PAGESIZE4M); 2214 } 2215 2216 for (tte = TTE8K; tte <= TTE4M; tte++) { 2217 if (lpsize == TTEBYTES(tte)) 2218 return (lpsize); 2219 } 2220 2221 return (TTEBYTES(TTE8K)); 2222 } 2223 2224 /* 2225 * Support for ta 3. 2226 * We allocate here a buffer for each cpu 2227 * for saving the current register window. 2228 */ 2229 typedef struct win_regs { 2230 uint64_t l[8]; 2231 uint64_t i[8]; 2232 } win_regs_t; 2233 static void 2234 opl_ta3(void) 2235 { 2236 /* 2237 * opl_ta3 should only be called once at boot time. 2238 */ 2239 if (opl_ta3_save == NULL) 2240 opl_ta3_save = (char *)kmem_alloc(NCPU * sizeof (win_regs_t), 2241 KM_SLEEP); 2242 } 2243 2244 /* 2245 * The following are functions that are unused in 2246 * OPL cpu module. They are defined here to resolve 2247 * dependencies in the "unix" module. 2248 * Unused functions that should never be called in 2249 * OPL are coded with ASSERT(0). 2250 */ 2251 2252 void 2253 cpu_disable_errors(void) 2254 {} 2255 2256 void 2257 cpu_enable_errors(void) 2258 { ASSERT(0); } 2259 2260 /*ARGSUSED*/ 2261 void 2262 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2263 { ASSERT(0); } 2264 2265 /*ARGSUSED*/ 2266 void 2267 cpu_faulted_enter(struct cpu *cp) 2268 {} 2269 2270 /*ARGSUSED*/ 2271 void 2272 cpu_faulted_exit(struct cpu *cp) 2273 {} 2274 2275 /*ARGSUSED*/ 2276 void 2277 cpu_check_allcpus(struct async_flt *aflt) 2278 {} 2279 2280 /*ARGSUSED*/ 2281 void 2282 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2283 { ASSERT(0); } 2284 2285 /*ARGSUSED*/ 2286 void 2287 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2288 { ASSERT(0); } 2289 2290 /*ARGSUSED*/ 2291 void 2292 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2293 { ASSERT(0); } 2294 2295 /*ARGSUSED*/ 2296 void 2297 cpu_busy_ecache_scrub(struct cpu *cp) 2298 {} 2299 2300 /*ARGSUSED*/ 2301 void 2302 cpu_idle_ecache_scrub(struct cpu *cp) 2303 {} 2304 2305 /* ARGSUSED */ 2306 void 2307 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2308 { ASSERT(0); } 2309 2310 void 2311 cpu_init_cache_scrub(void) 2312 {} 2313 2314 /* ARGSUSED */ 2315 int 2316 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2317 { 2318 if (&plat_get_mem_sid) { 2319 return (plat_get_mem_sid(unum, buf, buflen, lenp)); 2320 } else { 2321 return (ENOTSUP); 2322 } 2323 } 2324 2325 /* ARGSUSED */ 2326 int 2327 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2328 { 2329 if (&plat_get_mem_addr) { 2330 return (plat_get_mem_addr(unum, sid, offset, addrp)); 2331 } else { 2332 return (ENOTSUP); 2333 } 2334 } 2335 2336 /* ARGSUSED */ 2337 int 2338 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2339 { 2340 if (&plat_get_mem_offset) { 2341 return (plat_get_mem_offset(flt_addr, offp)); 2342 } else { 2343 return (ENOTSUP); 2344 } 2345 } 2346 2347 /*ARGSUSED*/ 2348 void 2349 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2350 { ASSERT(0); } 2351 2352 /*ARGSUSED*/ 2353 void 2354 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2355 { ASSERT(0); } 2356 2357 /*ARGSUSED*/ 2358 void 2359 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 2360 { ASSERT(0); } 2361 2362 /*ARGSUSED*/ 2363 int 2364 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 2365 errorq_elem_t *eqep, size_t afltoffset) 2366 { 2367 ASSERT(0); 2368 return (0); 2369 } 2370 2371 /*ARGSUSED*/ 2372 char * 2373 flt_to_error_type(struct async_flt *aflt) 2374 { 2375 ASSERT(0); 2376 return (NULL); 2377 } 2378 2379 #define PROM_SPARC64VII_MODE_PROPNAME "SPARC64-VII-mode" 2380 2381 /* 2382 * Check for existence of OPL OBP property that indicates 2383 * SPARC64-VII support. By default, only enable Jupiter 2384 * features if the property is present. It will be 2385 * present in all-Jupiter domains by OBP if the domain has 2386 * been selected by the user on the system controller to 2387 * run in Jupiter mode. Basically, this OBP property must 2388 * be present to turn on the cpu_alljupiter flag. 2389 */ 2390 static int 2391 prom_SPARC64VII_support_enabled(void) 2392 { 2393 int val; 2394 2395 return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME, 2396 (caddr_t)&val) == 0) ? 1 : 0); 2397 } 2398 2399 #define PROM_KCTX_PRESERVED_PROPNAME "context0-page-size-preserved" 2400 2401 /* 2402 * Check for existence of OPL OBP property that indicates support for 2403 * preserving Solaris kernel page sizes when entering OBP. We need to 2404 * check the prom tree since the ddi tree is not yet built when the 2405 * platform startup sequence is called. 2406 */ 2407 static int 2408 plat_prom_preserve_kctx_is_supported(void) 2409 { 2410 pnode_t pnode; 2411 int val; 2412 2413 /* 2414 * Check for existence of context0-page-size-preserved property 2415 * in virtual-memory prom node. 2416 */ 2417 pnode = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 2418 return ((prom_getprop(pnode, PROM_KCTX_PRESERVED_PROPNAME, 2419 (caddr_t)&val) == 0) ? 1 : 0); 2420 } 2421