1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII). 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/ddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/archsystm.h> 37 #include <sys/vmsystm.h> 38 #include <sys/machparam.h> 39 #include <sys/machsystm.h> 40 #include <sys/machthread.h> 41 #include <sys/cpu.h> 42 #include <sys/cmp.h> 43 #include <sys/elf_SPARC.h> 44 #include <vm/vm_dep.h> 45 #include <vm/hat_sfmmu.h> 46 #include <vm/seg_kpm.h> 47 #include <vm/seg_kmem.h> 48 #include <sys/cpuvar.h> 49 #include <sys/opl_olympus_regs.h> 50 #include <sys/opl_module.h> 51 #include <sys/async.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/dditypes.h> 55 #include <sys/cpu_module.h> 56 #include <sys/sysmacros.h> 57 #include <sys/intreg.h> 58 #include <sys/clock.h> 59 #include <sys/platform_module.h> 60 #include <sys/ontrap.h> 61 #include <sys/panic.h> 62 #include <sys/memlist.h> 63 #include <sys/ndifm.h> 64 #include <sys/ddifm.h> 65 #include <sys/fm/protocol.h> 66 #include <sys/fm/util.h> 67 #include <sys/fm/cpu/SPARC64-VI.h> 68 #include <sys/dtrace.h> 69 #include <sys/watchpoint.h> 70 #include <sys/promif.h> 71 72 /* 73 * Internal functions. 74 */ 75 static int cpu_sync_log_err(void *flt); 76 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 77 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 78 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 79 static int prom_SPARC64VII_support_enabled(void); 80 static void opl_ta3(); 81 static int plat_prom_preserve_kctx_is_supported(void); 82 83 /* 84 * Error counters resetting interval. 85 */ 86 static int opl_async_check_interval = 60; /* 1 min */ 87 88 uint_t cpu_impl_dual_pgsz = 1; 89 90 /* 91 * PA[22:0] represent Displacement in Jupiter 92 * configuration space. 93 */ 94 uint_t root_phys_addr_lo_mask = 0x7fffffu; 95 96 /* 97 * set in /etc/system to control logging of user BERR/TO's 98 */ 99 int cpu_berr_to_verbose = 0; 100 101 /* 102 * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled). 103 */ 104 int cpu_alljupiter = 0; 105 106 /* 107 * The sfmmu_cext field to be used by processes in a shared context domain. 108 */ 109 static uchar_t shctx_cext = TAGACCEXT_MKSZPAIR(DEFAULT_ISM_PAGESZC, TTE8K); 110 111 static int min_ecache_size; 112 static uint_t priv_hcl_1; 113 static uint_t priv_hcl_2; 114 static uint_t priv_hcl_4; 115 static uint_t priv_hcl_8; 116 117 /* 118 * Olympus error log 119 */ 120 static opl_errlog_t *opl_err_log; 121 static int opl_cpu0_log_setup; 122 123 /* 124 * OPL ta 3 save area. 125 */ 126 char *opl_ta3_save; 127 128 /* 129 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 130 * No any other ecc_type_info insertion is allowed in between the following 131 * four UE classess. 132 */ 133 ecc_type_to_info_t ecc_type_to_info[] = { 134 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 135 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 136 FM_EREPORT_CPU_UE_MEM, 137 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 138 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 139 FM_EREPORT_CPU_UE_CHANNEL, 140 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 141 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 142 FM_EREPORT_CPU_UE_CPU, 143 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 144 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 145 FM_EREPORT_CPU_UE_PATH, 146 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 147 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 148 FM_EREPORT_CPU_BERR, 149 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 150 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 151 FM_EREPORT_CPU_BTO, 152 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 153 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 154 FM_EREPORT_CPU_MTLB, 155 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 156 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 157 FM_EREPORT_CPU_TLBP, 158 159 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 160 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 161 FM_EREPORT_CPU_CRE, 162 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 163 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 164 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 165 FM_EREPORT_CPU_TSBCTX, 166 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 167 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 168 FM_EREPORT_CPU_TSBP, 169 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 170 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 171 FM_EREPORT_CPU_PSTATE, 172 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 173 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 174 FM_EREPORT_CPU_TSTATE, 175 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 176 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 177 FM_EREPORT_CPU_IUG_F, 178 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 179 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 180 FM_EREPORT_CPU_IUG_R, 181 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 182 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 183 FM_EREPORT_CPU_SDC, 184 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 185 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 186 FM_EREPORT_CPU_WDT, 187 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 188 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 189 FM_EREPORT_CPU_DTLB, 190 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 191 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 192 FM_EREPORT_CPU_ITLB, 193 UGESR_IUG_COREERR, "IUG_COREERR", 194 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 195 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 196 FM_EREPORT_CPU_CORE, 197 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 198 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 199 FM_EREPORT_CPU_DAE, 200 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 201 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 202 FM_EREPORT_CPU_IAE, 203 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 204 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 205 FM_EREPORT_CPU_UGE, 206 0, NULL, 0, 0, 207 NULL, 0, 0, 208 }; 209 210 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 211 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 212 int *segsp, int *banksp, int *mcidp); 213 214 215 /* 216 * Setup trap handlers for 0xA, 0x32, 0x40 trap types 217 * and "ta 3" and "ta 4". 218 */ 219 void 220 cpu_init_trap(void) 221 { 222 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 223 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 224 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 225 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 226 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 227 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 228 OPL_SET_TRAP(tt0_flushw, opl_ta3_instr); 229 OPL_PATCH_28(opl_cleanw_patch, opl_ta4_instr); 230 } 231 232 static int 233 getintprop(pnode_t node, char *name, int deflt) 234 { 235 int value; 236 237 switch (prom_getproplen(node, name)) { 238 case sizeof (int): 239 (void) prom_getprop(node, name, (caddr_t)&value); 240 break; 241 242 default: 243 value = deflt; 244 break; 245 } 246 247 return (value); 248 } 249 250 /* 251 * Set the magic constants of the implementation. 252 */ 253 /*ARGSUSED*/ 254 void 255 cpu_fiximp(pnode_t dnode) 256 { 257 int i, a; 258 extern int vac_size, vac_shift; 259 extern uint_t vac_mask; 260 261 static struct { 262 char *name; 263 int *var; 264 int defval; 265 } prop[] = { 266 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 267 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 268 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 269 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 270 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 271 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 272 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 273 }; 274 275 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 276 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 277 278 ecache_setsize = ecache_size / ecache_associativity; 279 280 vac_size = OPL_VAC_SIZE; 281 vac_mask = MMU_PAGEMASK & (vac_size - 1); 282 i = 0; a = vac_size; 283 while (a >>= 1) 284 ++i; 285 vac_shift = i; 286 shm_alignment = vac_size; 287 vac = 1; 288 } 289 290 /* 291 * Enable features for Jupiter-only domains. 292 */ 293 void 294 cpu_fix_alljupiter(void) 295 { 296 if (!prom_SPARC64VII_support_enabled()) { 297 /* 298 * Do not enable all-Jupiter features and do not turn on 299 * the cpu_alljupiter flag. 300 */ 301 return; 302 } 303 304 cpu_alljupiter = 1; 305 306 /* 307 * Enable ima hwcap for Jupiter-only domains. DR will prevent 308 * addition of Olympus-C to all-Jupiter domains to preserve ima 309 * hwcap semantics. 310 */ 311 cpu_hwcap_flags |= AV_SPARC_IMA; 312 313 /* 314 * Disable shared context support. 315 */ 316 shctx_on = 0; 317 } 318 319 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 320 /* 321 * Quick and dirty way to redefine locally in 322 * OPL the value of IDSR_BN_SETS to 31 instead 323 * of the standard 32 value. This is to workaround 324 * REV_B of Olympus_c processor's problem in handling 325 * more than 31 xcall broadcast. 326 */ 327 #undef IDSR_BN_SETS 328 #define IDSR_BN_SETS 31 329 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 330 331 void 332 send_mondo_set(cpuset_t set) 333 { 334 int lo, busy, nack, shipped = 0; 335 uint16_t i, cpuids[IDSR_BN_SETS]; 336 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 337 uint64_t starttick, endtick, tick, lasttick; 338 #if (NCPU > IDSR_BN_SETS) 339 int index = 0; 340 int ncpuids = 0; 341 #endif 342 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 343 int bn_sets = IDSR_BN_SETS; 344 uint64_t ver; 345 346 ASSERT(NCPU > bn_sets); 347 #endif 348 349 ASSERT(!CPUSET_ISNULL(set)); 350 starttick = lasttick = gettick(); 351 352 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 353 ver = ultra_getver(); 354 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 355 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 356 bn_sets = 1; 357 #endif 358 359 #if (NCPU <= IDSR_BN_SETS) 360 for (i = 0; i < NCPU; i++) 361 if (CPU_IN_SET(set, i)) { 362 shipit(i, shipped); 363 nackmask |= IDSR_NACK_BIT(shipped); 364 cpuids[shipped++] = i; 365 CPUSET_DEL(set, i); 366 if (CPUSET_ISNULL(set)) 367 break; 368 } 369 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 370 #else 371 for (i = 0; i < NCPU; i++) 372 if (CPU_IN_SET(set, i)) { 373 ncpuids++; 374 375 /* 376 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 377 * find we have shipped to more than (IDSR_BN_SETS) 378 * CPUs, set "index" to the highest numbered CPU in 379 * the set so we can ship to other CPUs a bit later on. 380 */ 381 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 382 if (shipped < bn_sets) { 383 #else 384 if (shipped < IDSR_BN_SETS) { 385 #endif 386 shipit(i, shipped); 387 nackmask |= IDSR_NACK_BIT(shipped); 388 cpuids[shipped++] = i; 389 CPUSET_DEL(set, i); 390 if (CPUSET_ISNULL(set)) 391 break; 392 } else 393 index = (int)i; 394 } 395 396 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 397 #endif 398 399 busymask = IDSR_NACK_TO_BUSY(nackmask); 400 busy = nack = 0; 401 endtick = starttick + xc_tick_limit; 402 for (;;) { 403 idsr = getidsr(); 404 #if (NCPU <= IDSR_BN_SETS) 405 if (idsr == 0) 406 break; 407 #else 408 if (idsr == 0 && shipped == ncpuids) 409 break; 410 #endif 411 tick = gettick(); 412 /* 413 * If there is a big jump between the current tick 414 * count and lasttick, we have probably hit a break 415 * point. Adjust endtick accordingly to avoid panic. 416 */ 417 if (tick > (lasttick + xc_tick_jump_limit)) 418 endtick += (tick - lasttick); 419 lasttick = tick; 420 if (tick > endtick) { 421 if (panic_quiesce) 422 return; 423 cmn_err(CE_CONT, "send mondo timeout [%d NACK %d " 424 "BUSY]\nIDSR 0x%" PRIx64 " cpuids:", 425 nack, busy, idsr); 426 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 427 for (i = 0; i < bn_sets; i++) { 428 #else 429 for (i = 0; i < IDSR_BN_SETS; i++) { 430 #endif 431 if (idsr & (IDSR_NACK_BIT(i) | 432 IDSR_BUSY_BIT(i))) { 433 cmn_err(CE_CONT, " 0x%x", cpuids[i]); 434 } 435 } 436 cmn_err(CE_CONT, "\n"); 437 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 438 } 439 curnack = idsr & nackmask; 440 curbusy = idsr & busymask; 441 442 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 443 /* 444 * Only proceed to send more xcalls if all the 445 * cpus in the previous IDSR_BN_SETS were completed. 446 */ 447 if (curbusy) { 448 busy++; 449 continue; 450 } 451 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 452 453 #if (NCPU > IDSR_BN_SETS) 454 if (shipped < ncpuids) { 455 uint64_t cpus_left; 456 uint16_t next = (uint16_t)index; 457 458 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 459 busymask; 460 461 if (cpus_left) { 462 do { 463 /* 464 * Sequence through and ship to the 465 * remainder of the CPUs in the system 466 * (e.g. other than the first 467 * (IDSR_BN_SETS)) in reverse order. 468 */ 469 lo = lowbit(cpus_left) - 1; 470 i = IDSR_BUSY_IDX(lo); 471 shipit(next, i); 472 shipped++; 473 cpuids[i] = next; 474 475 /* 476 * If we've processed all the CPUs, 477 * exit the loop now and save 478 * instructions. 479 */ 480 if (shipped == ncpuids) 481 break; 482 483 for ((index = ((int)next - 1)); 484 index >= 0; index--) 485 if (CPU_IN_SET(set, index)) { 486 next = (uint16_t)index; 487 break; 488 } 489 490 cpus_left &= ~(1ull << lo); 491 } while (cpus_left); 492 continue; 493 } 494 } 495 #endif 496 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 497 if (curbusy) { 498 busy++; 499 continue; 500 } 501 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 502 #ifdef SEND_MONDO_STATS 503 { 504 int n = gettick() - starttick; 505 if (n < 8192) 506 x_nack_stimes[n >> 7]++; 507 } 508 #endif 509 while (gettick() < (tick + sys_clock_mhz)) 510 ; 511 do { 512 lo = lowbit(curnack) - 1; 513 i = IDSR_NACK_IDX(lo); 514 shipit(cpuids[i], i); 515 curnack &= ~(1ull << lo); 516 } while (curnack); 517 nack++; 518 busy = 0; 519 } 520 #ifdef SEND_MONDO_STATS 521 { 522 int n = gettick() - starttick; 523 if (n < 8192) 524 x_set_stimes[n >> 7]++; 525 else 526 x_set_ltimes[(n >> 13) & 0xf]++; 527 } 528 x_set_cpus[shipped]++; 529 #endif 530 } 531 532 /* 533 * Cpu private initialization. 534 */ 535 void 536 cpu_init_private(struct cpu *cp) 537 { 538 if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) || 539 (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) { 540 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is " 541 "supported", cp->cpu_id, 542 cpunodes[cp->cpu_id].implementation); 543 } 544 545 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 546 } 547 548 void 549 cpu_setup(void) 550 { 551 extern int at_flags; 552 extern int cpc_has_overflow_intr; 553 uint64_t cpu0_log; 554 extern uint64_t opl_cpu0_err_log; 555 556 /* 557 * Initialize Error log Scratch register for error handling. 558 */ 559 560 cpu0_log = va_to_pa(&opl_cpu0_err_log); 561 opl_error_setup(cpu0_log); 562 opl_cpu0_log_setup = 1; 563 564 /* 565 * Enable MMU translating multiple page sizes for 566 * sITLB and sDTLB. 567 */ 568 opl_mpg_enable(); 569 570 /* 571 * Setup chip-specific trap handlers. 572 */ 573 cpu_init_trap(); 574 575 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 576 577 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 578 579 /* 580 * Due to the number of entries in the fully-associative tlb 581 * this may have to be tuned lower than in spitfire. 582 */ 583 pp_slots = MIN(8, MAXPP_SLOTS); 584 585 /* 586 * Block stores do not invalidate all pages of the d$, pagecopy 587 * et. al. need virtual translations with virtual coloring taken 588 * into consideration. prefetch/ldd will pollute the d$ on the 589 * load side. 590 */ 591 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 592 593 if (use_page_coloring) { 594 do_pg_coloring = 1; 595 } 596 597 isa_list = 598 "sparcv9+vis2 sparcv9+vis sparcv9 " 599 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 600 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 601 602 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 | 603 AV_SPARC_POPC | AV_SPARC_FMAF; 604 605 /* 606 * On SPARC64-VI, there's no hole in the virtual address space 607 */ 608 hole_start = hole_end = 0; 609 610 /* 611 * The kpm mapping window. 612 * kpm_size: 613 * The size of a single kpm range. 614 * The overall size will be: kpm_size * vac_colors. 615 * kpm_vbase: 616 * The virtual start address of the kpm range within the kernel 617 * virtual address space. kpm_vbase has to be kpm_size aligned. 618 */ 619 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 620 kpm_size_shift = 47; 621 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 622 kpm_smallpages = 1; 623 624 /* 625 * The traptrace code uses either %tick or %stick for 626 * timestamping. We have %stick so we can use it. 627 */ 628 traptrace_use_stick = 1; 629 630 /* 631 * SPARC64-VI has a performance counter overflow interrupt 632 */ 633 cpc_has_overflow_intr = 1; 634 635 /* 636 * Declare that this architecture/cpu combination does not support 637 * fpRAS. 638 */ 639 fpras_implemented = 0; 640 } 641 642 /* 643 * Called by setcpudelay 644 */ 645 void 646 cpu_init_tick_freq(void) 647 { 648 /* 649 * For SPARC64-VI we want to use the system clock rate as 650 * the basis for low level timing, due to support of mixed 651 * speed CPUs and power managment. 652 */ 653 if (system_clock_freq == 0) 654 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 655 656 sys_tick_freq = system_clock_freq; 657 } 658 659 #ifdef SEND_MONDO_STATS 660 uint32_t x_one_stimes[64]; 661 uint32_t x_one_ltimes[16]; 662 uint32_t x_set_stimes[64]; 663 uint32_t x_set_ltimes[16]; 664 uint32_t x_set_cpus[NCPU]; 665 uint32_t x_nack_stimes[64]; 666 #endif 667 668 /* 669 * Note: A version of this function is used by the debugger via the KDI, 670 * and must be kept in sync with this version. Any changes made to this 671 * function to support new chips or to accomodate errata must also be included 672 * in the KDI-specific version. See us3_kdi.c. 673 */ 674 void 675 send_one_mondo(int cpuid) 676 { 677 int busy, nack; 678 uint64_t idsr, starttick, endtick, tick, lasttick; 679 uint64_t busymask; 680 681 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 682 starttick = lasttick = gettick(); 683 shipit(cpuid, 0); 684 endtick = starttick + xc_tick_limit; 685 busy = nack = 0; 686 busymask = IDSR_BUSY; 687 for (;;) { 688 idsr = getidsr(); 689 if (idsr == 0) 690 break; 691 692 tick = gettick(); 693 /* 694 * If there is a big jump between the current tick 695 * count and lasttick, we have probably hit a break 696 * point. Adjust endtick accordingly to avoid panic. 697 */ 698 if (tick > (lasttick + xc_tick_jump_limit)) 699 endtick += (tick - lasttick); 700 lasttick = tick; 701 if (tick > endtick) { 702 if (panic_quiesce) 703 return; 704 cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) " 705 "[%d NACK %d BUSY]", cpuid, nack, busy); 706 } 707 708 if (idsr & busymask) { 709 busy++; 710 continue; 711 } 712 drv_usecwait(1); 713 shipit(cpuid, 0); 714 nack++; 715 busy = 0; 716 } 717 #ifdef SEND_MONDO_STATS 718 { 719 int n = gettick() - starttick; 720 if (n < 8192) 721 x_one_stimes[n >> 7]++; 722 else 723 x_one_ltimes[(n >> 13) & 0xf]++; 724 } 725 #endif 726 } 727 728 /* 729 * init_mmu_page_sizes is set to one after the bootup time initialization 730 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 731 * valid value. 732 * 733 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 734 * versions of disable_ism_large_pages and disable_large_pages, and feed back 735 * into those two hat variables at hat initialization time. 736 * 737 */ 738 int init_mmu_page_sizes = 0; 739 740 static uint_t mmu_disable_large_pages = 0; 741 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | 742 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 743 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 744 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 745 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | 746 (1 << TTE512K)); 747 748 /* 749 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 750 * Called during very early bootup from check_cpus_set(). 751 * Can be called to verify that mmu_page_sizes are set up correctly. 752 * 753 * Set Olympus defaults. We do not use the function parameter. 754 */ 755 /*ARGSUSED*/ 756 int 757 mmu_init_mmu_page_sizes(int32_t not_used) 758 { 759 if (!init_mmu_page_sizes) { 760 mmu_page_sizes = MMU_PAGE_SIZES; 761 mmu_hashcnt = MAX_HASHCNT; 762 mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; 763 mmu_exported_pagesize_mask = (1 << TTE8K) | 764 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 765 (1 << TTE32M) | (1 << TTE256M); 766 init_mmu_page_sizes = 1; 767 return (0); 768 } 769 return (1); 770 } 771 772 /* SPARC64-VI worst case DTLB parameters */ 773 #ifndef LOCKED_DTLB_ENTRIES 774 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 775 #endif 776 #define TOTAL_DTLB_ENTRIES 32 777 #define AVAIL_32M_ENTRIES 0 778 #define AVAIL_256M_ENTRIES 0 779 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 780 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 781 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 782 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 783 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 784 785 /* 786 * The function returns the mmu-specific values for the 787 * hat's disable_large_pages, disable_ism_large_pages, and 788 * disable_auto_data_large_pages and 789 * disable_text_data_large_pages variables. 790 */ 791 uint_t 792 mmu_large_pages_disabled(uint_t flag) 793 { 794 uint_t pages_disable = 0; 795 extern int use_text_pgsz64K; 796 extern int use_text_pgsz512K; 797 798 if (flag == HAT_LOAD) { 799 pages_disable = mmu_disable_large_pages; 800 } else if (flag == HAT_LOAD_SHARE) { 801 pages_disable = mmu_disable_ism_large_pages; 802 } else if (flag == HAT_AUTO_DATA) { 803 pages_disable = mmu_disable_auto_data_large_pages; 804 } else if (flag == HAT_AUTO_TEXT) { 805 pages_disable = mmu_disable_auto_text_large_pages; 806 if (use_text_pgsz512K) { 807 pages_disable &= ~(1 << TTE512K); 808 } 809 if (use_text_pgsz64K) { 810 pages_disable &= ~(1 << TTE64K); 811 } 812 } 813 return (pages_disable); 814 } 815 816 /* 817 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 818 * It may be called from set_platform_defaults, if some value other than 4M 819 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 820 * then only warn, since it would be bad form to panic due to a user typo. 821 * 822 * The function re-initializes the mmu_disable_ism_large_pages variable. 823 */ 824 void 825 mmu_init_large_pages(size_t ism_pagesize) 826 { 827 828 switch (ism_pagesize) { 829 case MMU_PAGESIZE4M: 830 mmu_disable_ism_large_pages = ((1 << TTE64K) | 831 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 832 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 833 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 834 shctx_cext = TAGACCEXT_MKSZPAIR(TTE4M, TTE8K); 835 break; 836 case MMU_PAGESIZE32M: 837 mmu_disable_ism_large_pages = ((1 << TTE64K) | 838 (1 << TTE512K) | (1 << TTE256M)); 839 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 840 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); 841 adjust_data_maxlpsize(ism_pagesize); 842 shctx_cext = TAGACCEXT_MKSZPAIR(TTE32M, TTE8K); 843 break; 844 case MMU_PAGESIZE256M: 845 mmu_disable_ism_large_pages = ((1 << TTE64K) | 846 (1 << TTE512K) | (1 << TTE32M)); 847 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 848 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); 849 adjust_data_maxlpsize(ism_pagesize); 850 shctx_cext = TAGACCEXT_MKSZPAIR(TTE256M, TTE8K); 851 break; 852 default: 853 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 854 ism_pagesize); 855 break; 856 } 857 } 858 859 /* 860 * Function to reprogram the TLBs when page sizes used 861 * by a process change significantly. 862 */ 863 static void 864 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) 865 { 866 uint8_t pgsz0, pgsz1; 867 868 /* 869 * Don't program 2nd dtlb for kernel and ism hat 870 */ 871 ASSERT(hat->sfmmu_ismhat == NULL); 872 ASSERT(hat != ksfmmup); 873 874 /* 875 * hat->sfmmu_pgsz[] is an array whose elements 876 * contain a sorted order of page sizes. Element 877 * 0 is the most commonly used page size, followed 878 * by element 1, and so on. 879 * 880 * ttecnt[] is an array of per-page-size page counts 881 * mapped into the process. 882 * 883 * If the HAT's choice for page sizes is unsuitable, 884 * we can override it here. The new values written 885 * to the array will be handed back to us later to 886 * do the actual programming of the TLB hardware. 887 * 888 */ 889 pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]); 890 pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]); 891 892 /* 893 * This implements PAGESIZE programming of the sTLB 894 * if large TTE counts don't exceed the thresholds. 895 */ 896 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 897 pgsz0 = page_szc(MMU_PAGESIZE); 898 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 899 pgsz1 = page_szc(MMU_PAGESIZE); 900 tmp_pgsz[0] = pgsz0; 901 tmp_pgsz[1] = pgsz1; 902 /* otherwise, accept what the HAT chose for us */ 903 } 904 905 /* 906 * The HAT calls this function when an MMU context is allocated so that we 907 * can reprogram the large TLBs appropriately for the new process using 908 * the context. 909 * 910 * The caller must hold the HAT lock. 911 */ 912 void 913 mmu_set_ctx_page_sizes(struct hat *hat) 914 { 915 uint8_t pgsz0, pgsz1; 916 uint8_t new_cext; 917 918 ASSERT(sfmmu_hat_lock_held(hat)); 919 /* 920 * Don't program 2nd dtlb for kernel and ism hat 921 */ 922 if (hat->sfmmu_ismhat || hat == ksfmmup) 923 return; 924 925 /* 926 * If supported, reprogram the TLBs to a larger pagesize. 927 */ 928 if (hat->sfmmu_scdp != NULL) { 929 new_cext = shctx_cext; 930 } else { 931 pgsz0 = hat->sfmmu_pgsz[0]; 932 pgsz1 = hat->sfmmu_pgsz[1]; 933 ASSERT(pgsz0 < mmu_page_sizes); 934 ASSERT(pgsz1 < mmu_page_sizes); 935 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 936 } 937 if (hat->sfmmu_cext != new_cext) { 938 #ifdef DEBUG 939 int i; 940 /* 941 * assert cnum should be invalid, this is because pagesize 942 * can only be changed after a proc's ctxs are invalidated. 943 */ 944 for (i = 0; i < max_mmu_ctxdoms; i++) { 945 ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 946 } 947 #endif /* DEBUG */ 948 hat->sfmmu_cext = new_cext; 949 } 950 /* 951 * sfmmu_setctx_sec() will take care of the 952 * rest of the dirty work for us. 953 */ 954 } 955 956 /* 957 * This function assumes that there are either four or six supported page 958 * sizes and at most two programmable TLBs, so we need to decide which 959 * page sizes are most important and then adjust the TLB page sizes 960 * accordingly (if supported). 961 * 962 * If these assumptions change, this function will need to be 963 * updated to support whatever the new limits are. 964 */ 965 void 966 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 967 { 968 uint64_t sortcnt[MMU_PAGE_SIZES]; 969 uint8_t tmp_pgsz[MMU_PAGE_SIZES]; 970 uint8_t i, j, max; 971 uint16_t oldval, newval; 972 973 /* 974 * We only consider reprogramming the TLBs if one or more of 975 * the two most used page sizes changes and we're using 976 * large pages in this process. 977 */ 978 if (SFMMU_LGPGS_INUSE(sfmmup)) { 979 /* Sort page sizes. */ 980 for (i = 0; i < mmu_page_sizes; i++) { 981 sortcnt[i] = ttecnt[i]; 982 } 983 for (j = 0; j < mmu_page_sizes; j++) { 984 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 985 if (sortcnt[i] > sortcnt[max]) 986 max = i; 987 } 988 tmp_pgsz[j] = max; 989 sortcnt[max] = 0; 990 } 991 992 oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1]; 993 994 mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz); 995 996 /* Check 2 largest values after the sort. */ 997 newval = tmp_pgsz[0] << 8 | tmp_pgsz[1]; 998 if (newval != oldval) { 999 sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz); 1000 } 1001 } 1002 } 1003 1004 /* 1005 * Return processor specific async error structure 1006 * size used. 1007 */ 1008 int 1009 cpu_aflt_size(void) 1010 { 1011 return (sizeof (opl_async_flt_t)); 1012 } 1013 1014 /* 1015 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 1016 * post-process CPU events that are dequeued. As such, it can be invoked 1017 * from softint context, from AST processing in the trap() flow, or from the 1018 * panic flow. We decode the CPU-specific data, and take appropriate actions. 1019 * Historically this entry point was used to log the actual cmn_err(9F) text; 1020 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 1021 * With FMA this function now also returns a flag which indicates to the 1022 * caller whether the ereport should be posted (1) or suppressed (0). 1023 */ 1024 /*ARGSUSED*/ 1025 static int 1026 cpu_sync_log_err(void *flt) 1027 { 1028 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 1029 struct async_flt *aflt = (struct async_flt *)flt; 1030 1031 /* 1032 * No extra processing of urgent error events. 1033 * Always generate ereports for these events. 1034 */ 1035 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 1036 return (1); 1037 1038 /* 1039 * Additional processing for synchronous errors. 1040 */ 1041 switch (opl_flt->flt_type) { 1042 case OPL_CPU_INV_SFSR: 1043 return (1); 1044 1045 case OPL_CPU_SYNC_UE: 1046 /* 1047 * The validity: SFSR_MK_UE bit has been checked 1048 * in opl_cpu_sync_error() 1049 * No more check is required. 1050 * 1051 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1052 * and they have been retrieved in cpu_queue_events() 1053 */ 1054 1055 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1056 ASSERT(aflt->flt_in_memory); 1057 /* 1058 * We want to skip logging only if ALL the following 1059 * conditions are true: 1060 * 1061 * 1. We are not panicing already. 1062 * 2. The error is a memory error. 1063 * 3. There is only one error. 1064 * 4. The error is on a retired page. 1065 * 5. The error occurred under on_trap 1066 * protection AFLT_PROT_EC 1067 */ 1068 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1069 page_retire_check(aflt->flt_addr, NULL) == 0) { 1070 /* 1071 * Do not log an error from 1072 * the retired page 1073 */ 1074 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1075 return (0); 1076 } 1077 if (!panicstr) 1078 cpu_page_retire(opl_flt); 1079 } 1080 return (1); 1081 1082 case OPL_CPU_SYNC_OTHERS: 1083 /* 1084 * For the following error cases, the processor HW does 1085 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1086 * to assign appropriate values here to reflect what we 1087 * think is the most likely cause of the problem w.r.t to 1088 * the particular error event. For Buserr and timeout 1089 * error event, we will assign OPL_ERRID_CHANNEL as the 1090 * most likely reason. For TLB parity or multiple hit 1091 * error events, we will assign the reason as 1092 * OPL_ERRID_CPU (cpu related problem) and set the 1093 * flt_eid_sid to point to the cpuid. 1094 */ 1095 1096 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1097 /* 1098 * flt_eid_sid will not be used for this case. 1099 */ 1100 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1101 } 1102 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1103 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1104 opl_flt->flt_eid_sid = aflt->flt_inst; 1105 } 1106 1107 /* 1108 * In case of no effective error bit 1109 */ 1110 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1111 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1112 opl_flt->flt_eid_sid = aflt->flt_inst; 1113 } 1114 break; 1115 1116 default: 1117 return (1); 1118 } 1119 return (1); 1120 } 1121 1122 /* 1123 * Retire the bad page that may contain the flushed error. 1124 */ 1125 void 1126 cpu_page_retire(opl_async_flt_t *opl_flt) 1127 { 1128 struct async_flt *aflt = (struct async_flt *)opl_flt; 1129 (void) page_retire(aflt->flt_addr, PR_UE); 1130 } 1131 1132 /* 1133 * Invoked by error_init() early in startup and therefore before 1134 * startup_errorq() is called to drain any error Q - 1135 * 1136 * startup() 1137 * startup_end() 1138 * error_init() 1139 * cpu_error_init() 1140 * errorq_init() 1141 * errorq_drain() 1142 * start_other_cpus() 1143 * 1144 * The purpose of this routine is to create error-related taskqs. Taskqs 1145 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1146 * context. 1147 * 1148 */ 1149 /*ARGSUSED*/ 1150 void 1151 cpu_error_init(int items) 1152 { 1153 opl_err_log = (opl_errlog_t *) 1154 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1155 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1156 cmn_err(CE_PANIC, "The base address of the error log " 1157 "is not page aligned"); 1158 } 1159 1160 /* 1161 * We route all errors through a single switch statement. 1162 */ 1163 void 1164 cpu_ue_log_err(struct async_flt *aflt) 1165 { 1166 switch (aflt->flt_class) { 1167 case CPU_FAULT: 1168 if (cpu_sync_log_err(aflt)) 1169 cpu_ereport_post(aflt); 1170 break; 1171 1172 case BUS_FAULT: 1173 bus_async_log_err(aflt); 1174 break; 1175 1176 default: 1177 cmn_err(CE_WARN, "discarding async error %p with invalid " 1178 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1179 return; 1180 } 1181 } 1182 1183 /* 1184 * Routine for panic hook callback from panic_idle(). 1185 * 1186 * Nothing to do here. 1187 */ 1188 void 1189 cpu_async_panic_callb(void) 1190 { 1191 } 1192 1193 /* 1194 * Routine to return a string identifying the physical name 1195 * associated with a memory/cache error. 1196 */ 1197 /*ARGSUSED*/ 1198 int 1199 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1200 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1201 ushort_t flt_status, char *buf, int buflen, int *lenp) 1202 { 1203 int synd_code; 1204 int ret; 1205 1206 /* 1207 * An AFSR of -1 defaults to a memory syndrome. 1208 */ 1209 synd_code = (int)flt_synd; 1210 1211 if (&plat_get_mem_unum) { 1212 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1213 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1214 buf[0] = '\0'; 1215 *lenp = 0; 1216 } 1217 return (ret); 1218 } 1219 buf[0] = '\0'; 1220 *lenp = 0; 1221 return (ENOTSUP); 1222 } 1223 1224 /* 1225 * Wrapper for cpu_get_mem_unum() routine that takes an 1226 * async_flt struct rather than explicit arguments. 1227 */ 1228 int 1229 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1230 char *buf, int buflen, int *lenp) 1231 { 1232 /* 1233 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1234 * memory error. 1235 */ 1236 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1237 (uint64_t)-1, 1238 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1239 aflt->flt_status, buf, buflen, lenp)); 1240 } 1241 1242 /* 1243 * This routine is a more generic interface to cpu_get_mem_unum() 1244 * that may be used by other modules (e.g. mm). 1245 */ 1246 /*ARGSUSED*/ 1247 int 1248 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1249 char *buf, int buflen, int *lenp) 1250 { 1251 int synd_status, flt_in_memory, ret; 1252 ushort_t flt_status = 0; 1253 char unum[UNUM_NAMLEN]; 1254 1255 /* 1256 * Check for an invalid address. 1257 */ 1258 if (afar == (uint64_t)-1) 1259 return (ENXIO); 1260 1261 if (synd == (uint64_t)-1) 1262 synd_status = AFLT_STAT_INVALID; 1263 else 1264 synd_status = AFLT_STAT_VALID; 1265 1266 flt_in_memory = (*afsr & SFSR_MEMORY) && 1267 pf_is_memory(afar >> MMU_PAGESHIFT); 1268 1269 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1270 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 1271 if (ret != 0) 1272 return (ret); 1273 1274 if (*lenp >= buflen) 1275 return (ENAMETOOLONG); 1276 1277 (void) strncpy(buf, unum, buflen); 1278 1279 return (0); 1280 } 1281 1282 /* 1283 * Routine to return memory information associated 1284 * with a physical address and syndrome. 1285 */ 1286 /*ARGSUSED*/ 1287 int 1288 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1289 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1290 int *segsp, int *banksp, int *mcidp) 1291 { 1292 int synd_code = (int)synd; 1293 1294 if (afar == (uint64_t)-1) 1295 return (ENXIO); 1296 1297 if (p2get_mem_info != NULL) 1298 return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, 1299 bank_sizep, segsp, banksp, mcidp)); 1300 else 1301 return (ENOTSUP); 1302 } 1303 1304 /* 1305 * Routine to return a string identifying the physical 1306 * name associated with a cpuid. 1307 */ 1308 int 1309 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1310 { 1311 int ret; 1312 char unum[UNUM_NAMLEN]; 1313 1314 if (&plat_get_cpu_unum) { 1315 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, 1316 lenp)) != 0) 1317 return (ret); 1318 } else { 1319 return (ENOTSUP); 1320 } 1321 1322 if (*lenp >= buflen) 1323 return (ENAMETOOLONG); 1324 1325 (void) strncpy(buf, unum, *lenp); 1326 1327 return (0); 1328 } 1329 1330 /* 1331 * This routine exports the name buffer size. 1332 */ 1333 size_t 1334 cpu_get_name_bufsize() 1335 { 1336 return (UNUM_NAMLEN); 1337 } 1338 1339 /* 1340 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1341 */ 1342 void 1343 cpu_flush_ecache(void) 1344 { 1345 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1346 cpunodes[CPU->cpu_id].ecache_linesize); 1347 } 1348 1349 static uint8_t 1350 flt_to_trap_type(struct async_flt *aflt) 1351 { 1352 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1353 return (TRAP_TYPE_ECC_I); 1354 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1355 return (TRAP_TYPE_ECC_D); 1356 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1357 return (TRAP_TYPE_URGENT); 1358 return (TRAP_TYPE_UNKNOWN); 1359 } 1360 1361 /* 1362 * Encode the data saved in the opl_async_flt_t struct into 1363 * the FM ereport payload. 1364 */ 1365 /* ARGSUSED */ 1366 static void 1367 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1368 nvlist_t *resource) 1369 { 1370 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1371 char unum[UNUM_NAMLEN]; 1372 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1373 int len; 1374 1375 1376 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1377 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1378 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1379 } 1380 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1381 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1382 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1383 } 1384 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1385 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1386 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1387 } 1388 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1389 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1390 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1391 } 1392 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1393 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1394 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1395 } 1396 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1397 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1398 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1399 } 1400 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1401 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1402 DATA_TYPE_BOOLEAN_VALUE, 1403 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1404 } 1405 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1406 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1407 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1408 } 1409 1410 switch (opl_flt->flt_eid_mod) { 1411 case OPL_ERRID_CPU: 1412 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1413 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1414 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1415 NULL, opl_flt->flt_eid_sid, 1416 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf); 1417 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1418 DATA_TYPE_NVLIST, resource, NULL); 1419 break; 1420 1421 case OPL_ERRID_CHANNEL: 1422 /* 1423 * No resource is created but the cpumem DE will find 1424 * the defective path by retreiving EID from SFSR which is 1425 * included in the payload. 1426 */ 1427 break; 1428 1429 case OPL_ERRID_MEM: 1430 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1431 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1432 unum, NULL, (uint64_t)-1); 1433 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1434 DATA_TYPE_NVLIST, resource, NULL); 1435 break; 1436 1437 case OPL_ERRID_PATH: 1438 /* 1439 * No resource is created but the cpumem DE will find 1440 * the defective path by retreiving EID from SFSR which is 1441 * included in the payload. 1442 */ 1443 break; 1444 } 1445 } 1446 1447 /* 1448 * Returns whether fault address is valid for this error bit and 1449 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1450 */ 1451 /*ARGSUSED*/ 1452 static int 1453 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1454 { 1455 struct async_flt *aflt = (struct async_flt *)opl_flt; 1456 1457 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1458 return ((t_afsr_bit & SFSR_MEMORY) && 1459 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1460 } 1461 return (0); 1462 } 1463 1464 /* 1465 * In OPL SCF does the stick synchronization. 1466 */ 1467 void 1468 sticksync_slave(void) 1469 { 1470 } 1471 1472 /* 1473 * In OPL SCF does the stick synchronization. 1474 */ 1475 void 1476 sticksync_master(void) 1477 { 1478 } 1479 1480 /* 1481 * Cpu private unitialization. OPL cpus do not use the private area. 1482 */ 1483 void 1484 cpu_uninit_private(struct cpu *cp) 1485 { 1486 cmp_delete_cpu(cp->cpu_id); 1487 } 1488 1489 /* 1490 * Always flush an entire cache. 1491 */ 1492 void 1493 cpu_error_ecache_flush(void) 1494 { 1495 cpu_flush_ecache(); 1496 } 1497 1498 void 1499 cpu_ereport_post(struct async_flt *aflt) 1500 { 1501 char *cpu_type, buf[FM_MAX_CLASS]; 1502 nv_alloc_t *nva = NULL; 1503 nvlist_t *ereport, *detector, *resource; 1504 errorq_elem_t *eqep; 1505 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1506 1507 if (aflt->flt_panic || panicstr) { 1508 eqep = errorq_reserve(ereport_errorq); 1509 if (eqep == NULL) 1510 return; 1511 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1512 nva = errorq_elem_nva(ereport_errorq, eqep); 1513 } else { 1514 ereport = fm_nvlist_create(nva); 1515 } 1516 1517 /* 1518 * Create the scheme "cpu" FMRI. 1519 */ 1520 detector = fm_nvlist_create(nva); 1521 resource = fm_nvlist_create(nva); 1522 switch (cpunodes[aflt->flt_inst].implementation) { 1523 case OLYMPUS_C_IMPL: 1524 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1525 break; 1526 case JUPITER_IMPL: 1527 cpu_type = FM_EREPORT_CPU_SPARC64_VII; 1528 break; 1529 default: 1530 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1531 break; 1532 } 1533 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1534 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1535 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1536 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1537 sbuf); 1538 1539 /* 1540 * Encode all the common data into the ereport. 1541 */ 1542 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1543 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1544 1545 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1546 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1547 1548 /* 1549 * Encode the error specific data that was saved in 1550 * the async_flt structure into the ereport. 1551 */ 1552 cpu_payload_add_aflt(aflt, ereport, resource); 1553 1554 if (aflt->flt_panic || panicstr) { 1555 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1556 } else { 1557 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1558 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1559 fm_nvlist_destroy(detector, FM_NVA_FREE); 1560 fm_nvlist_destroy(resource, FM_NVA_FREE); 1561 } 1562 } 1563 1564 void 1565 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1566 { 1567 int status; 1568 ddi_fm_error_t de; 1569 1570 bzero(&de, sizeof (ddi_fm_error_t)); 1571 1572 de.fme_version = DDI_FME_VERSION; 1573 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1574 de.fme_flag = expected; 1575 de.fme_bus_specific = (void *)aflt->flt_addr; 1576 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1577 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1578 aflt->flt_panic = 1; 1579 } 1580 1581 void 1582 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1583 errorq_t *eqp, uint_t flag) 1584 { 1585 struct async_flt *aflt = (struct async_flt *)payload; 1586 1587 aflt->flt_erpt_class = error_class; 1588 errorq_dispatch(eqp, payload, payload_sz, flag); 1589 } 1590 1591 void 1592 adjust_hw_copy_limits(int ecache_size) 1593 { 1594 /* 1595 * Set hw copy limits. 1596 * 1597 * /etc/system will be parsed later and can override one or more 1598 * of these settings. 1599 * 1600 * At this time, ecache size seems only mildly relevant. 1601 * We seem to run into issues with the d-cache and stalls 1602 * we see on misses. 1603 * 1604 * Cycle measurement indicates that 2 byte aligned copies fare 1605 * little better than doing things with VIS at around 512 bytes. 1606 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1607 * aligned is faster whenever the source and destination data 1608 * in cache and the total size is less than 2 Kbytes. The 2K 1609 * limit seems to be driven by the 2K write cache. 1610 * When more than 2K of copies are done in non-VIS mode, stores 1611 * backup in the write cache. In VIS mode, the write cache is 1612 * bypassed, allowing faster cache-line writes aligned on cache 1613 * boundaries. 1614 * 1615 * In addition, in non-VIS mode, there is no prefetching, so 1616 * for larger copies, the advantage of prefetching to avoid even 1617 * occasional cache misses is enough to justify using the VIS code. 1618 * 1619 * During testing, it was discovered that netbench ran 3% slower 1620 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1621 * applications, data is only used once (copied to the output 1622 * buffer, then copied by the network device off the system). Using 1623 * the VIS copy saves more L2 cache state. Network copies are 1624 * around 1.3K to 1.5K in size for historical reasons. 1625 * 1626 * Therefore, a limit of 1K bytes will be used for the 8 byte 1627 * aligned copy even for large caches and 8 MB ecache. The 1628 * infrastructure to allow different limits for different sized 1629 * caches is kept to allow further tuning in later releases. 1630 */ 1631 1632 if (min_ecache_size == 0 && use_hw_bcopy) { 1633 /* 1634 * First time through - should be before /etc/system 1635 * is read. 1636 * Could skip the checks for zero but this lets us 1637 * preserve any debugger rewrites. 1638 */ 1639 if (hw_copy_limit_1 == 0) { 1640 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1641 priv_hcl_1 = hw_copy_limit_1; 1642 } 1643 if (hw_copy_limit_2 == 0) { 1644 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1645 priv_hcl_2 = hw_copy_limit_2; 1646 } 1647 if (hw_copy_limit_4 == 0) { 1648 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1649 priv_hcl_4 = hw_copy_limit_4; 1650 } 1651 if (hw_copy_limit_8 == 0) { 1652 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1653 priv_hcl_8 = hw_copy_limit_8; 1654 } 1655 min_ecache_size = ecache_size; 1656 } else { 1657 /* 1658 * MP initialization. Called *after* /etc/system has 1659 * been parsed. One CPU has already been initialized. 1660 * Need to cater for /etc/system having scragged one 1661 * of our values. 1662 */ 1663 if (ecache_size == min_ecache_size) { 1664 /* 1665 * Same size ecache. We do nothing unless we 1666 * have a pessimistic ecache setting. In that 1667 * case we become more optimistic (if the cache is 1668 * large enough). 1669 */ 1670 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1671 /* 1672 * Need to adjust hw_copy_limit* from our 1673 * pessimistic uniprocessor value to a more 1674 * optimistic UP value *iff* it hasn't been 1675 * reset. 1676 */ 1677 if ((ecache_size > 1048576) && 1678 (priv_hcl_8 == hw_copy_limit_8)) { 1679 if (ecache_size <= 2097152) 1680 hw_copy_limit_8 = 4 * 1681 VIS_COPY_THRESHOLD; 1682 else if (ecache_size <= 4194304) 1683 hw_copy_limit_8 = 4 * 1684 VIS_COPY_THRESHOLD; 1685 else 1686 hw_copy_limit_8 = 4 * 1687 VIS_COPY_THRESHOLD; 1688 priv_hcl_8 = hw_copy_limit_8; 1689 } 1690 } 1691 } else if (ecache_size < min_ecache_size) { 1692 /* 1693 * A different ecache size. Can this even happen? 1694 */ 1695 if (priv_hcl_8 == hw_copy_limit_8) { 1696 /* 1697 * The previous value that we set 1698 * is unchanged (i.e., it hasn't been 1699 * scragged by /etc/system). Rewrite it. 1700 */ 1701 if (ecache_size <= 1048576) 1702 hw_copy_limit_8 = 8 * 1703 VIS_COPY_THRESHOLD; 1704 else if (ecache_size <= 2097152) 1705 hw_copy_limit_8 = 8 * 1706 VIS_COPY_THRESHOLD; 1707 else if (ecache_size <= 4194304) 1708 hw_copy_limit_8 = 8 * 1709 VIS_COPY_THRESHOLD; 1710 else 1711 hw_copy_limit_8 = 10 * 1712 VIS_COPY_THRESHOLD; 1713 priv_hcl_8 = hw_copy_limit_8; 1714 min_ecache_size = ecache_size; 1715 } 1716 } 1717 } 1718 } 1719 1720 #define VIS_BLOCKSIZE 64 1721 1722 int 1723 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1724 { 1725 int ret, watched; 1726 1727 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1728 ret = dtrace_blksuword32(addr, data, 0); 1729 if (watched) 1730 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1731 1732 return (ret); 1733 } 1734 1735 void 1736 opl_cpu_reg_init() 1737 { 1738 uint64_t this_cpu_log; 1739 1740 if (cpu[getprocessorid()] == &cpu0 && opl_cpu0_log_setup == 1) { 1741 /* 1742 * Support for "ta 3" 1743 */ 1744 opl_ta3(); 1745 1746 /* 1747 * If we are being called at boot time on cpu0 the error 1748 * log is already set up in cpu_setup. Clear the 1749 * opl_cpu0_log_setup flag so that a subsequent DR of cpu0 will 1750 * do the proper initialization. 1751 */ 1752 opl_cpu0_log_setup = 0; 1753 return; 1754 } 1755 1756 /* 1757 * Initialize Error log Scratch register for error handling. 1758 */ 1759 1760 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1761 ERRLOG_BUFSZ * (getprocessorid()))); 1762 opl_error_setup(this_cpu_log); 1763 1764 /* 1765 * Enable MMU translating multiple page sizes for 1766 * sITLB and sDTLB. 1767 */ 1768 opl_mpg_enable(); 1769 } 1770 1771 /* 1772 * Queue one event in ue_queue based on ecc_type_to_info entry. 1773 */ 1774 static void 1775 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1776 ecc_type_to_info_t *eccp) 1777 { 1778 struct async_flt *aflt = (struct async_flt *)opl_flt; 1779 1780 if (reason && 1781 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1782 (void) strcat(reason, eccp->ec_reason); 1783 } 1784 1785 opl_flt->flt_bit = eccp->ec_afsr_bit; 1786 opl_flt->flt_type = eccp->ec_flt_type; 1787 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1788 aflt->flt_payload = eccp->ec_err_payload; 1789 1790 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1791 cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt, 1792 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1793 } 1794 1795 /* 1796 * Queue events on async event queue one event per error bit. 1797 * Return number of events queued. 1798 */ 1799 int 1800 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1801 { 1802 struct async_flt *aflt = (struct async_flt *)opl_flt; 1803 ecc_type_to_info_t *eccp; 1804 int nevents = 0; 1805 1806 /* 1807 * Queue expected errors, error bit and fault type must must match 1808 * in the ecc_type_to_info table. 1809 */ 1810 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1811 eccp++) { 1812 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1813 (eccp->ec_flags & aflt->flt_status) != 0) { 1814 /* 1815 * UE error event can be further 1816 * classified/breakdown into finer granularity 1817 * based on the flt_eid_mod value set by HW. We do 1818 * special handling here so that we can report UE 1819 * error in finer granularity as ue_mem, 1820 * ue_channel, ue_cpu or ue_path. 1821 */ 1822 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1823 opl_flt->flt_eid_mod = (aflt->flt_stat & 1824 SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT; 1825 opl_flt->flt_eid_sid = (aflt->flt_stat & 1826 SFSR_EID_SID) >> SFSR_EID_SID_SHIFT; 1827 /* 1828 * Need to advance eccp pointer by flt_eid_mod 1829 * so that we get an appropriate ecc pointer 1830 * 1831 * EID # of advances 1832 * ---------------------------------- 1833 * OPL_ERRID_MEM 0 1834 * OPL_ERRID_CHANNEL 1 1835 * OPL_ERRID_CPU 2 1836 * OPL_ERRID_PATH 3 1837 */ 1838 eccp += opl_flt->flt_eid_mod; 1839 } 1840 cpu_queue_one_event(opl_flt, reason, eccp); 1841 t_afsr_errs &= ~eccp->ec_afsr_bit; 1842 nevents++; 1843 } 1844 } 1845 1846 return (nevents); 1847 } 1848 1849 /* 1850 * Sync. error wrapper functions. 1851 * We use these functions in order to transfer here from the 1852 * nucleus trap handler information about trap type (data or 1853 * instruction) and trap level (0 or above 0). This way we 1854 * get rid of using SFSR's reserved bits. 1855 */ 1856 1857 #define OPL_SYNC_TL0 0 1858 #define OPL_SYNC_TL1 1 1859 #define OPL_ISYNC_ERR 0 1860 #define OPL_DSYNC_ERR 1 1861 1862 void 1863 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1864 { 1865 uint64_t t_sfar = p_sfar; 1866 uint64_t t_sfsr = p_sfsr; 1867 1868 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1869 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1870 } 1871 1872 void 1873 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1874 { 1875 uint64_t t_sfar = p_sfar; 1876 uint64_t t_sfsr = p_sfsr; 1877 1878 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1879 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1880 } 1881 1882 void 1883 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1884 { 1885 uint64_t t_sfar = p_sfar; 1886 uint64_t t_sfsr = p_sfsr; 1887 1888 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1889 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1890 } 1891 1892 void 1893 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1894 { 1895 uint64_t t_sfar = p_sfar; 1896 uint64_t t_sfsr = p_sfsr; 1897 1898 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1899 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1900 } 1901 1902 /* 1903 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1904 * and TLB_PRT. 1905 * This function is designed based on cpu_deferred_error(). 1906 */ 1907 1908 static void 1909 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1910 uint_t tl, uint_t derr) 1911 { 1912 opl_async_flt_t opl_flt; 1913 struct async_flt *aflt; 1914 int trampolined = 0; 1915 char pr_reason[MAX_REASON_STRING]; 1916 uint64_t log_sfsr; 1917 int expected = DDI_FM_ERR_UNEXPECTED; 1918 ddi_acc_hdl_t *hp; 1919 1920 /* 1921 * We need to look at p_flag to determine if the thread detected an 1922 * error while dumping core. We can't grab p_lock here, but it's ok 1923 * because we just need a consistent snapshot and we know that everyone 1924 * else will store a consistent set of bits while holding p_lock. We 1925 * don't have to worry about a race because SDOCORE is set once prior 1926 * to doing i/o from the process's address space and is never cleared. 1927 */ 1928 uint_t pflag = ttoproc(curthread)->p_flag; 1929 1930 pr_reason[0] = '\0'; 1931 1932 /* 1933 * handle the specific error 1934 */ 1935 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1936 aflt = (struct async_flt *)&opl_flt; 1937 aflt->flt_id = gethrtime_waitfree(); 1938 aflt->flt_bus_id = getprocessorid(); 1939 aflt->flt_inst = CPU->cpu_id; 1940 aflt->flt_stat = t_sfsr; 1941 aflt->flt_addr = t_sfar; 1942 aflt->flt_pc = (caddr_t)rp->r_pc; 1943 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1944 aflt->flt_class = (uchar_t)CPU_FAULT; 1945 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & 1946 TSTATE_PRIV) ? 1 : 0)); 1947 aflt->flt_tl = (uchar_t)tl; 1948 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1949 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1950 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1951 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1952 1953 /* 1954 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1955 * So, clear all error bits to avoid mis-handling and force the system 1956 * panicked. 1957 * We skip all the procedures below down to the panic message call. 1958 */ 1959 if (!(t_sfsr & SFSR_FV)) { 1960 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1961 aflt->flt_panic = 1; 1962 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1963 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 1964 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1965 fm_panic("%sErrors(s)", "invalid SFSR"); 1966 } 1967 1968 /* 1969 * If either UE and MK bit is off, this is not valid UE error. 1970 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1971 * mis-handling below. 1972 * aflt->flt_stat keeps the original bits as a reference. 1973 */ 1974 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1975 (SFSR_MK_UE|SFSR_UE)) { 1976 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1977 } 1978 1979 /* 1980 * If the trap occurred in privileged mode at TL=0, we need to check to 1981 * see if we were executing in the kernel under on_trap() or t_lofault 1982 * protection. If so, modify the saved registers so that we return 1983 * from the trap to the appropriate trampoline routine. 1984 */ 1985 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1986 if (curthread->t_ontrap != NULL) { 1987 on_trap_data_t *otp = curthread->t_ontrap; 1988 1989 if (otp->ot_prot & OT_DATA_EC) { 1990 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1991 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1992 rp->r_pc = otp->ot_trampoline; 1993 rp->r_npc = rp->r_pc + 4; 1994 trampolined = 1; 1995 } 1996 1997 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1998 (otp->ot_prot & OT_DATA_ACCESS)) { 1999 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 2000 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 2001 rp->r_pc = otp->ot_trampoline; 2002 rp->r_npc = rp->r_pc + 4; 2003 trampolined = 1; 2004 /* 2005 * for peeks and caut_gets errors are expected 2006 */ 2007 hp = (ddi_acc_hdl_t *)otp->ot_handle; 2008 if (!hp) 2009 expected = DDI_FM_ERR_PEEK; 2010 else if (hp->ah_acc.devacc_attr_access == 2011 DDI_CAUTIOUS_ACC) 2012 expected = DDI_FM_ERR_EXPECTED; 2013 } 2014 2015 } else if (curthread->t_lofault) { 2016 aflt->flt_prot = AFLT_PROT_COPY; 2017 rp->r_g1 = EFAULT; 2018 rp->r_pc = curthread->t_lofault; 2019 rp->r_npc = rp->r_pc + 4; 2020 trampolined = 1; 2021 } 2022 } 2023 2024 /* 2025 * If we're in user mode or we're doing a protected copy, we either 2026 * want the ASTON code below to send a signal to the user process 2027 * or we want to panic if aft_panic is set. 2028 * 2029 * If we're in privileged mode and we're not doing a copy, then we 2030 * need to check if we've trampolined. If we haven't trampolined, 2031 * we should panic. 2032 */ 2033 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2034 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2035 aflt->flt_panic |= aft_panic; 2036 } else if (!trampolined) { 2037 aflt->flt_panic = 1; 2038 } 2039 2040 /* 2041 * If we've trampolined due to a privileged TO or BERR, or if an 2042 * unprivileged TO or BERR occurred, we don't want to enqueue an 2043 * event for that TO or BERR. Queue all other events (if any) besides 2044 * the TO/BERR. 2045 */ 2046 log_sfsr = t_sfsr; 2047 if (trampolined) { 2048 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2049 } else if (!aflt->flt_priv) { 2050 /* 2051 * User mode, suppress messages if 2052 * cpu_berr_to_verbose is not set. 2053 */ 2054 if (!cpu_berr_to_verbose) 2055 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2056 } 2057 2058 if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason, 2059 t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) { 2060 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2061 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2062 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 2063 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2064 } 2065 2066 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2067 cpu_run_bus_error_handlers(aflt, expected); 2068 } 2069 2070 /* 2071 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2072 * be logged as part of the panic flow. 2073 */ 2074 if (aflt->flt_panic) { 2075 if (pr_reason[0] == 0) 2076 strcpy(pr_reason, "invalid SFSR "); 2077 2078 fm_panic("%sErrors(s)", pr_reason); 2079 } 2080 2081 /* 2082 * If we queued an error and we are going to return from the trap and 2083 * the error was in user mode or inside of a copy routine, set AST flag 2084 * so the queue will be drained before returning to user mode. The 2085 * AST processing will also act on our failure policy. 2086 */ 2087 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2088 int pcb_flag = 0; 2089 2090 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2091 pcb_flag |= ASYNC_HWERR; 2092 2093 if (t_sfsr & SFSR_BERR) 2094 pcb_flag |= ASYNC_BERR; 2095 2096 if (t_sfsr & SFSR_TO) 2097 pcb_flag |= ASYNC_BTO; 2098 2099 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2100 aston(curthread); 2101 } 2102 } 2103 2104 /*ARGSUSED*/ 2105 void 2106 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2107 { 2108 opl_async_flt_t opl_flt; 2109 struct async_flt *aflt; 2110 char pr_reason[MAX_REASON_STRING]; 2111 2112 /* normalize tl */ 2113 tl = (tl >= 2 ? 1 : 0); 2114 pr_reason[0] = '\0'; 2115 2116 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2117 aflt = (struct async_flt *)&opl_flt; 2118 aflt->flt_id = gethrtime_waitfree(); 2119 aflt->flt_bus_id = getprocessorid(); 2120 aflt->flt_inst = CPU->cpu_id; 2121 aflt->flt_stat = p_ugesr; 2122 aflt->flt_pc = (caddr_t)rp->r_pc; 2123 aflt->flt_class = (uchar_t)CPU_FAULT; 2124 aflt->flt_tl = tl; 2125 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 2126 1 : 0)); 2127 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2128 aflt->flt_panic = 1; 2129 /* 2130 * HW does not set mod/sid in case of urgent error. 2131 * So we have to set it here. 2132 */ 2133 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2134 opl_flt.flt_eid_sid = aflt->flt_inst; 2135 2136 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2137 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2138 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2139 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt, 2140 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2141 } 2142 2143 fm_panic("Urgent Error"); 2144 } 2145 2146 /* 2147 * Initialization error counters resetting. 2148 */ 2149 /* ARGSUSED */ 2150 static void 2151 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2152 { 2153 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2154 hdlr->cyh_level = CY_LOW_LEVEL; 2155 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2156 2157 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2158 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2159 } 2160 2161 void 2162 cpu_mp_init(void) 2163 { 2164 cyc_omni_handler_t hdlr; 2165 2166 hdlr.cyo_online = opl_ras_online; 2167 hdlr.cyo_offline = NULL; 2168 hdlr.cyo_arg = NULL; 2169 mutex_enter(&cpu_lock); 2170 (void) cyclic_add_omni(&hdlr); 2171 mutex_exit(&cpu_lock); 2172 } 2173 2174 int heaplp_use_stlb = 0; 2175 2176 void 2177 mmu_init_kernel_pgsz(struct hat *hat) 2178 { 2179 uint_t tte = page_szc(segkmem_lpsize); 2180 uchar_t new_cext_primary, new_cext_nucleus; 2181 2182 if (heaplp_use_stlb == 0) { 2183 /* do not reprogram stlb */ 2184 tte = TTE8K; 2185 } else if (!plat_prom_preserve_kctx_is_supported()) { 2186 /* OBP does not support non-zero primary context */ 2187 tte = TTE8K; 2188 heaplp_use_stlb = 0; 2189 } 2190 2191 new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K); 2192 new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte); 2193 2194 hat->sfmmu_cext = new_cext_primary; 2195 kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | 2196 ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); 2197 } 2198 2199 size_t 2200 mmu_get_kernel_lpsize(size_t lpsize) 2201 { 2202 uint_t tte; 2203 2204 if (lpsize == 0) { 2205 /* no setting for segkmem_lpsize in /etc/system: use default */ 2206 return (MMU_PAGESIZE4M); 2207 } 2208 2209 for (tte = TTE8K; tte <= TTE4M; tte++) { 2210 if (lpsize == TTEBYTES(tte)) 2211 return (lpsize); 2212 } 2213 2214 return (TTEBYTES(TTE8K)); 2215 } 2216 2217 /* 2218 * Support for ta 3. 2219 * We allocate here a buffer for each cpu 2220 * for saving the current register window. 2221 */ 2222 typedef struct win_regs { 2223 uint64_t l[8]; 2224 uint64_t i[8]; 2225 } win_regs_t; 2226 static void 2227 opl_ta3(void) 2228 { 2229 /* 2230 * opl_ta3 should only be called once at boot time. 2231 */ 2232 if (opl_ta3_save == NULL) 2233 opl_ta3_save = (char *)kmem_alloc(NCPU * sizeof (win_regs_t), 2234 KM_SLEEP); 2235 } 2236 2237 /* 2238 * The following are functions that are unused in 2239 * OPL cpu module. They are defined here to resolve 2240 * dependencies in the "unix" module. 2241 * Unused functions that should never be called in 2242 * OPL are coded with ASSERT(0). 2243 */ 2244 2245 void 2246 cpu_disable_errors(void) 2247 {} 2248 2249 void 2250 cpu_enable_errors(void) 2251 { ASSERT(0); } 2252 2253 /*ARGSUSED*/ 2254 void 2255 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2256 { ASSERT(0); } 2257 2258 /*ARGSUSED*/ 2259 void 2260 cpu_faulted_enter(struct cpu *cp) 2261 {} 2262 2263 /*ARGSUSED*/ 2264 void 2265 cpu_faulted_exit(struct cpu *cp) 2266 {} 2267 2268 /*ARGSUSED*/ 2269 void 2270 cpu_check_allcpus(struct async_flt *aflt) 2271 {} 2272 2273 /*ARGSUSED*/ 2274 void 2275 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2276 { ASSERT(0); } 2277 2278 /*ARGSUSED*/ 2279 void 2280 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2281 { ASSERT(0); } 2282 2283 /*ARGSUSED*/ 2284 void 2285 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2286 { ASSERT(0); } 2287 2288 /*ARGSUSED*/ 2289 void 2290 cpu_busy_ecache_scrub(struct cpu *cp) 2291 {} 2292 2293 /*ARGSUSED*/ 2294 void 2295 cpu_idle_ecache_scrub(struct cpu *cp) 2296 {} 2297 2298 /* ARGSUSED */ 2299 void 2300 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2301 { ASSERT(0); } 2302 2303 void 2304 cpu_init_cache_scrub(void) 2305 {} 2306 2307 /* ARGSUSED */ 2308 int 2309 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2310 { 2311 if (&plat_get_mem_sid) { 2312 return (plat_get_mem_sid(unum, buf, buflen, lenp)); 2313 } else { 2314 return (ENOTSUP); 2315 } 2316 } 2317 2318 /* ARGSUSED */ 2319 int 2320 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2321 { 2322 if (&plat_get_mem_addr) { 2323 return (plat_get_mem_addr(unum, sid, offset, addrp)); 2324 } else { 2325 return (ENOTSUP); 2326 } 2327 } 2328 2329 /* ARGSUSED */ 2330 int 2331 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2332 { 2333 if (&plat_get_mem_offset) { 2334 return (plat_get_mem_offset(flt_addr, offp)); 2335 } else { 2336 return (ENOTSUP); 2337 } 2338 } 2339 2340 /*ARGSUSED*/ 2341 void 2342 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2343 { ASSERT(0); } 2344 2345 /*ARGSUSED*/ 2346 void 2347 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2348 { ASSERT(0); } 2349 2350 /*ARGSUSED*/ 2351 void 2352 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 2353 { ASSERT(0); } 2354 2355 /*ARGSUSED*/ 2356 int 2357 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 2358 errorq_elem_t *eqep, size_t afltoffset) 2359 { 2360 ASSERT(0); 2361 return (0); 2362 } 2363 2364 /*ARGSUSED*/ 2365 char * 2366 flt_to_error_type(struct async_flt *aflt) 2367 { 2368 ASSERT(0); 2369 return (NULL); 2370 } 2371 2372 #define PROM_SPARC64VII_MODE_PROPNAME "SPARC64-VII-mode" 2373 2374 /* 2375 * Check for existence of OPL OBP property that indicates 2376 * SPARC64-VII support. By default, only enable Jupiter 2377 * features if the property is present. It will be 2378 * present in all-Jupiter domains by OBP if the domain has 2379 * been selected by the user on the system controller to 2380 * run in Jupiter mode. Basically, this OBP property must 2381 * be present to turn on the cpu_alljupiter flag. 2382 */ 2383 static int 2384 prom_SPARC64VII_support_enabled(void) 2385 { 2386 int val; 2387 2388 return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME, 2389 (caddr_t)&val) == 0) ? 1 : 0); 2390 } 2391 2392 #define PROM_KCTX_PRESERVED_PROPNAME "context0-page-size-preserved" 2393 2394 /* 2395 * Check for existence of OPL OBP property that indicates support for 2396 * preserving Solaris kernel page sizes when entering OBP. We need to 2397 * check the prom tree since the ddi tree is not yet built when the 2398 * platform startup sequence is called. 2399 */ 2400 static int 2401 plat_prom_preserve_kctx_is_supported(void) 2402 { 2403 pnode_t pnode; 2404 int val; 2405 2406 /* 2407 * Check for existence of context0-page-size-preserved property 2408 * in virtual-memory prom node. 2409 */ 2410 pnode = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 2411 return ((prom_getprop(pnode, PROM_KCTX_PRESERVED_PROPNAME, 2412 (caddr_t)&val) == 0) ? 1 : 0); 2413 } 2414