1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII). 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/ddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/archsystm.h> 37 #include <sys/vmsystm.h> 38 #include <sys/machparam.h> 39 #include <sys/machsystm.h> 40 #include <sys/machthread.h> 41 #include <sys/cpu.h> 42 #include <sys/cmp.h> 43 #include <sys/elf_SPARC.h> 44 #include <vm/vm_dep.h> 45 #include <vm/hat_sfmmu.h> 46 #include <vm/seg_kpm.h> 47 #include <vm/seg_kmem.h> 48 #include <sys/cpuvar.h> 49 #include <sys/opl_olympus_regs.h> 50 #include <sys/opl_module.h> 51 #include <sys/async.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/dditypes.h> 55 #include <sys/cpu_module.h> 56 #include <sys/sysmacros.h> 57 #include <sys/intreg.h> 58 #include <sys/clock.h> 59 #include <sys/platform_module.h> 60 #include <sys/ontrap.h> 61 #include <sys/panic.h> 62 #include <sys/memlist.h> 63 #include <sys/ndifm.h> 64 #include <sys/ddifm.h> 65 #include <sys/fm/protocol.h> 66 #include <sys/fm/util.h> 67 #include <sys/fm/cpu/SPARC64-VI.h> 68 #include <sys/dtrace.h> 69 #include <sys/watchpoint.h> 70 #include <sys/promif.h> 71 72 /* 73 * Internal functions. 74 */ 75 static int cpu_sync_log_err(void *flt); 76 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 77 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 78 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 79 static int prom_SPARC64VII_support_enabled(void); 80 static void opl_ta3(); 81 static int plat_prom_preserve_kctx_is_supported(void); 82 83 /* 84 * Error counters resetting interval. 85 */ 86 static int opl_async_check_interval = 60; /* 1 min */ 87 88 uint_t cpu_impl_dual_pgsz = 1; 89 90 /* 91 * PA[22:0] represent Displacement in Jupiter 92 * configuration space. 93 */ 94 uint_t root_phys_addr_lo_mask = 0x7fffffu; 95 96 /* 97 * set in /etc/system to control logging of user BERR/TO's 98 */ 99 int cpu_berr_to_verbose = 0; 100 101 /* 102 * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled). 103 */ 104 int cpu_alljupiter = 0; 105 106 static int min_ecache_size; 107 static uint_t priv_hcl_1; 108 static uint_t priv_hcl_2; 109 static uint_t priv_hcl_4; 110 static uint_t priv_hcl_8; 111 112 /* 113 * Olympus error log 114 */ 115 static opl_errlog_t *opl_err_log; 116 117 /* 118 * OPL ta 3 save area. 119 */ 120 char *opl_ta3_save; 121 122 /* 123 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 124 * No any other ecc_type_info insertion is allowed in between the following 125 * four UE classess. 126 */ 127 ecc_type_to_info_t ecc_type_to_info[] = { 128 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 129 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 130 FM_EREPORT_CPU_UE_MEM, 131 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 132 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 133 FM_EREPORT_CPU_UE_CHANNEL, 134 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 135 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 136 FM_EREPORT_CPU_UE_CPU, 137 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 138 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 139 FM_EREPORT_CPU_UE_PATH, 140 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 141 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 142 FM_EREPORT_CPU_BERR, 143 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 144 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 145 FM_EREPORT_CPU_BTO, 146 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 147 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 148 FM_EREPORT_CPU_MTLB, 149 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 150 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 151 FM_EREPORT_CPU_TLBP, 152 153 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 154 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 155 FM_EREPORT_CPU_CRE, 156 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 157 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 158 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 159 FM_EREPORT_CPU_TSBCTX, 160 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 161 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 162 FM_EREPORT_CPU_TSBP, 163 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 164 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 165 FM_EREPORT_CPU_PSTATE, 166 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 167 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 168 FM_EREPORT_CPU_TSTATE, 169 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 170 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 171 FM_EREPORT_CPU_IUG_F, 172 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 173 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 174 FM_EREPORT_CPU_IUG_R, 175 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 176 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 177 FM_EREPORT_CPU_SDC, 178 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 179 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 180 FM_EREPORT_CPU_WDT, 181 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 182 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 183 FM_EREPORT_CPU_DTLB, 184 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 185 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 186 FM_EREPORT_CPU_ITLB, 187 UGESR_IUG_COREERR, "IUG_COREERR", 188 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 189 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 190 FM_EREPORT_CPU_CORE, 191 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 192 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 193 FM_EREPORT_CPU_DAE, 194 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 195 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 196 FM_EREPORT_CPU_IAE, 197 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 198 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 199 FM_EREPORT_CPU_UGE, 200 0, NULL, 0, 0, 201 NULL, 0, 0, 202 }; 203 204 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 205 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 206 int *segsp, int *banksp, int *mcidp); 207 208 209 /* 210 * Setup trap handlers for 0xA, 0x32, 0x40 trap types 211 * and "ta 3" and "ta 4". 212 */ 213 void 214 cpu_init_trap(void) 215 { 216 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 217 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 218 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 219 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 220 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 221 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 222 OPL_SET_TRAP(tt0_flushw, opl_ta3_instr); 223 OPL_PATCH_28(opl_cleanw_patch, opl_ta4_instr); 224 } 225 226 static int 227 getintprop(pnode_t node, char *name, int deflt) 228 { 229 int value; 230 231 switch (prom_getproplen(node, name)) { 232 case sizeof (int): 233 (void) prom_getprop(node, name, (caddr_t)&value); 234 break; 235 236 default: 237 value = deflt; 238 break; 239 } 240 241 return (value); 242 } 243 244 /* 245 * Set the magic constants of the implementation. 246 */ 247 /*ARGSUSED*/ 248 void 249 cpu_fiximp(pnode_t dnode) 250 { 251 int i, a; 252 extern int vac_size, vac_shift; 253 extern uint_t vac_mask; 254 255 static struct { 256 char *name; 257 int *var; 258 int defval; 259 } prop[] = { 260 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 261 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 262 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 263 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 264 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 265 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 266 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 267 }; 268 269 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 270 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 271 272 ecache_setsize = ecache_size / ecache_associativity; 273 274 vac_size = OPL_VAC_SIZE; 275 vac_mask = MMU_PAGEMASK & (vac_size - 1); 276 i = 0; a = vac_size; 277 while (a >>= 1) 278 ++i; 279 vac_shift = i; 280 shm_alignment = vac_size; 281 vac = 1; 282 } 283 284 /* 285 * Enable features for Jupiter-only domains. 286 */ 287 void 288 cpu_fix_alljupiter(void) 289 { 290 if (!prom_SPARC64VII_support_enabled()) { 291 /* 292 * Do not enable all-Jupiter features and do not turn on 293 * the cpu_alljupiter flag. 294 */ 295 return; 296 } 297 298 cpu_alljupiter = 1; 299 300 /* 301 * Enable ima hwcap for Jupiter-only domains. DR will prevent 302 * addition of Olympus-C to all-Jupiter domains to preserve ima 303 * hwcap semantics. 304 */ 305 cpu_hwcap_flags |= AV_SPARC_IMA; 306 } 307 308 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 309 /* 310 * Quick and dirty way to redefine locally in 311 * OPL the value of IDSR_BN_SETS to 31 instead 312 * of the standard 32 value. This is to workaround 313 * REV_B of Olympus_c processor's problem in handling 314 * more than 31 xcall broadcast. 315 */ 316 #undef IDSR_BN_SETS 317 #define IDSR_BN_SETS 31 318 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 319 320 void 321 send_mondo_set(cpuset_t set) 322 { 323 int lo, busy, nack, shipped = 0; 324 uint16_t i, cpuids[IDSR_BN_SETS]; 325 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 326 uint64_t starttick, endtick, tick, lasttick; 327 #if (NCPU > IDSR_BN_SETS) 328 int index = 0; 329 int ncpuids = 0; 330 #endif 331 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 332 int bn_sets = IDSR_BN_SETS; 333 uint64_t ver; 334 335 ASSERT(NCPU > bn_sets); 336 #endif 337 338 ASSERT(!CPUSET_ISNULL(set)); 339 starttick = lasttick = gettick(); 340 341 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 342 ver = ultra_getver(); 343 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 344 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 345 bn_sets = 1; 346 #endif 347 348 #if (NCPU <= IDSR_BN_SETS) 349 for (i = 0; i < NCPU; i++) 350 if (CPU_IN_SET(set, i)) { 351 shipit(i, shipped); 352 nackmask |= IDSR_NACK_BIT(shipped); 353 cpuids[shipped++] = i; 354 CPUSET_DEL(set, i); 355 if (CPUSET_ISNULL(set)) 356 break; 357 } 358 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 359 #else 360 for (i = 0; i < NCPU; i++) 361 if (CPU_IN_SET(set, i)) { 362 ncpuids++; 363 364 /* 365 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 366 * find we have shipped to more than (IDSR_BN_SETS) 367 * CPUs, set "index" to the highest numbered CPU in 368 * the set so we can ship to other CPUs a bit later on. 369 */ 370 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 371 if (shipped < bn_sets) { 372 #else 373 if (shipped < IDSR_BN_SETS) { 374 #endif 375 shipit(i, shipped); 376 nackmask |= IDSR_NACK_BIT(shipped); 377 cpuids[shipped++] = i; 378 CPUSET_DEL(set, i); 379 if (CPUSET_ISNULL(set)) 380 break; 381 } else 382 index = (int)i; 383 } 384 385 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 386 #endif 387 388 busymask = IDSR_NACK_TO_BUSY(nackmask); 389 busy = nack = 0; 390 endtick = starttick + xc_tick_limit; 391 for (;;) { 392 idsr = getidsr(); 393 #if (NCPU <= IDSR_BN_SETS) 394 if (idsr == 0) 395 break; 396 #else 397 if (idsr == 0 && shipped == ncpuids) 398 break; 399 #endif 400 tick = gettick(); 401 /* 402 * If there is a big jump between the current tick 403 * count and lasttick, we have probably hit a break 404 * point. Adjust endtick accordingly to avoid panic. 405 */ 406 if (tick > (lasttick + xc_tick_jump_limit)) 407 endtick += (tick - lasttick); 408 lasttick = tick; 409 if (tick > endtick) { 410 if (panic_quiesce) 411 return; 412 cmn_err(CE_CONT, "send mondo timeout [%d NACK %d " 413 "BUSY]\nIDSR 0x%" PRIx64 " cpuids:", 414 nack, busy, idsr); 415 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 416 for (i = 0; i < bn_sets; i++) { 417 #else 418 for (i = 0; i < IDSR_BN_SETS; i++) { 419 #endif 420 if (idsr & (IDSR_NACK_BIT(i) | 421 IDSR_BUSY_BIT(i))) { 422 cmn_err(CE_CONT, " 0x%x", cpuids[i]); 423 } 424 } 425 cmn_err(CE_CONT, "\n"); 426 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 427 } 428 curnack = idsr & nackmask; 429 curbusy = idsr & busymask; 430 431 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 432 /* 433 * Only proceed to send more xcalls if all the 434 * cpus in the previous IDSR_BN_SETS were completed. 435 */ 436 if (curbusy) { 437 busy++; 438 continue; 439 } 440 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 441 442 #if (NCPU > IDSR_BN_SETS) 443 if (shipped < ncpuids) { 444 uint64_t cpus_left; 445 uint16_t next = (uint16_t)index; 446 447 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 448 busymask; 449 450 if (cpus_left) { 451 do { 452 /* 453 * Sequence through and ship to the 454 * remainder of the CPUs in the system 455 * (e.g. other than the first 456 * (IDSR_BN_SETS)) in reverse order. 457 */ 458 lo = lowbit(cpus_left) - 1; 459 i = IDSR_BUSY_IDX(lo); 460 shipit(next, i); 461 shipped++; 462 cpuids[i] = next; 463 464 /* 465 * If we've processed all the CPUs, 466 * exit the loop now and save 467 * instructions. 468 */ 469 if (shipped == ncpuids) 470 break; 471 472 for ((index = ((int)next - 1)); 473 index >= 0; index--) 474 if (CPU_IN_SET(set, index)) { 475 next = (uint16_t)index; 476 break; 477 } 478 479 cpus_left &= ~(1ull << lo); 480 } while (cpus_left); 481 continue; 482 } 483 } 484 #endif 485 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 486 if (curbusy) { 487 busy++; 488 continue; 489 } 490 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 491 #ifdef SEND_MONDO_STATS 492 { 493 int n = gettick() - starttick; 494 if (n < 8192) 495 x_nack_stimes[n >> 7]++; 496 } 497 #endif 498 while (gettick() < (tick + sys_clock_mhz)) 499 ; 500 do { 501 lo = lowbit(curnack) - 1; 502 i = IDSR_NACK_IDX(lo); 503 shipit(cpuids[i], i); 504 curnack &= ~(1ull << lo); 505 } while (curnack); 506 nack++; 507 busy = 0; 508 } 509 #ifdef SEND_MONDO_STATS 510 { 511 int n = gettick() - starttick; 512 if (n < 8192) 513 x_set_stimes[n >> 7]++; 514 else 515 x_set_ltimes[(n >> 13) & 0xf]++; 516 } 517 x_set_cpus[shipped]++; 518 #endif 519 } 520 521 /* 522 * Cpu private initialization. 523 */ 524 void 525 cpu_init_private(struct cpu *cp) 526 { 527 if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) || 528 (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) { 529 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is " 530 "supported", cp->cpu_id, 531 cpunodes[cp->cpu_id].implementation); 532 } 533 534 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 535 } 536 537 void 538 cpu_setup(void) 539 { 540 extern int at_flags; 541 extern int cpc_has_overflow_intr; 542 uint64_t cpu0_log; 543 extern uint64_t opl_cpu0_err_log; 544 545 /* 546 * Initialize Error log Scratch register for error handling. 547 */ 548 549 cpu0_log = va_to_pa(&opl_cpu0_err_log); 550 opl_error_setup(cpu0_log); 551 552 /* 553 * Enable MMU translating multiple page sizes for 554 * sITLB and sDTLB. 555 */ 556 opl_mpg_enable(); 557 558 /* 559 * Setup chip-specific trap handlers. 560 */ 561 cpu_init_trap(); 562 563 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 564 565 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 566 567 /* 568 * Due to the number of entries in the fully-associative tlb 569 * this may have to be tuned lower than in spitfire. 570 */ 571 pp_slots = MIN(8, MAXPP_SLOTS); 572 573 /* 574 * Block stores do not invalidate all pages of the d$, pagecopy 575 * et. al. need virtual translations with virtual coloring taken 576 * into consideration. prefetch/ldd will pollute the d$ on the 577 * load side. 578 */ 579 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 580 581 if (use_page_coloring) { 582 do_pg_coloring = 1; 583 } 584 585 isa_list = 586 "sparcv9+vis2 sparcv9+vis sparcv9 " 587 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 588 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 589 590 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 | 591 AV_SPARC_POPC | AV_SPARC_FMAF; 592 593 /* 594 * On SPARC64-VI, there's no hole in the virtual address space 595 */ 596 hole_start = hole_end = 0; 597 598 /* 599 * The kpm mapping window. 600 * kpm_size: 601 * The size of a single kpm range. 602 * The overall size will be: kpm_size * vac_colors. 603 * kpm_vbase: 604 * The virtual start address of the kpm range within the kernel 605 * virtual address space. kpm_vbase has to be kpm_size aligned. 606 */ 607 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 608 kpm_size_shift = 47; 609 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 610 kpm_smallpages = 1; 611 612 /* 613 * The traptrace code uses either %tick or %stick for 614 * timestamping. We have %stick so we can use it. 615 */ 616 traptrace_use_stick = 1; 617 618 /* 619 * SPARC64-VI has a performance counter overflow interrupt 620 */ 621 cpc_has_overflow_intr = 1; 622 623 /* 624 * Declare that this architecture/cpu combination does not support 625 * fpRAS. 626 */ 627 fpras_implemented = 0; 628 } 629 630 /* 631 * Called by setcpudelay 632 */ 633 void 634 cpu_init_tick_freq(void) 635 { 636 /* 637 * For SPARC64-VI we want to use the system clock rate as 638 * the basis for low level timing, due to support of mixed 639 * speed CPUs and power managment. 640 */ 641 if (system_clock_freq == 0) 642 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 643 644 sys_tick_freq = system_clock_freq; 645 } 646 647 #ifdef SEND_MONDO_STATS 648 uint32_t x_one_stimes[64]; 649 uint32_t x_one_ltimes[16]; 650 uint32_t x_set_stimes[64]; 651 uint32_t x_set_ltimes[16]; 652 uint32_t x_set_cpus[NCPU]; 653 uint32_t x_nack_stimes[64]; 654 #endif 655 656 /* 657 * Note: A version of this function is used by the debugger via the KDI, 658 * and must be kept in sync with this version. Any changes made to this 659 * function to support new chips or to accomodate errata must also be included 660 * in the KDI-specific version. See us3_kdi.c. 661 */ 662 void 663 send_one_mondo(int cpuid) 664 { 665 int busy, nack; 666 uint64_t idsr, starttick, endtick, tick, lasttick; 667 uint64_t busymask; 668 669 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 670 starttick = lasttick = gettick(); 671 shipit(cpuid, 0); 672 endtick = starttick + xc_tick_limit; 673 busy = nack = 0; 674 busymask = IDSR_BUSY; 675 for (;;) { 676 idsr = getidsr(); 677 if (idsr == 0) 678 break; 679 680 tick = gettick(); 681 /* 682 * If there is a big jump between the current tick 683 * count and lasttick, we have probably hit a break 684 * point. Adjust endtick accordingly to avoid panic. 685 */ 686 if (tick > (lasttick + xc_tick_jump_limit)) 687 endtick += (tick - lasttick); 688 lasttick = tick; 689 if (tick > endtick) { 690 if (panic_quiesce) 691 return; 692 cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) " 693 "[%d NACK %d BUSY]", cpuid, nack, busy); 694 } 695 696 if (idsr & busymask) { 697 busy++; 698 continue; 699 } 700 drv_usecwait(1); 701 shipit(cpuid, 0); 702 nack++; 703 busy = 0; 704 } 705 #ifdef SEND_MONDO_STATS 706 { 707 int n = gettick() - starttick; 708 if (n < 8192) 709 x_one_stimes[n >> 7]++; 710 else 711 x_one_ltimes[(n >> 13) & 0xf]++; 712 } 713 #endif 714 } 715 716 /* 717 * init_mmu_page_sizes is set to one after the bootup time initialization 718 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 719 * valid value. 720 * 721 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 722 * versions of disable_ism_large_pages and disable_large_pages, and feed back 723 * into those two hat variables at hat initialization time. 724 * 725 */ 726 int init_mmu_page_sizes = 0; 727 728 static uint_t mmu_disable_large_pages = 0; 729 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | 730 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 731 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 732 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 733 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | 734 (1 << TTE512K)); 735 736 /* 737 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 738 * Called during very early bootup from check_cpus_set(). 739 * Can be called to verify that mmu_page_sizes are set up correctly. 740 * 741 * Set Olympus defaults. We do not use the function parameter. 742 */ 743 /*ARGSUSED*/ 744 int 745 mmu_init_mmu_page_sizes(int32_t not_used) 746 { 747 if (!init_mmu_page_sizes) { 748 mmu_page_sizes = MMU_PAGE_SIZES; 749 mmu_hashcnt = MAX_HASHCNT; 750 mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; 751 mmu_exported_pagesize_mask = (1 << TTE8K) | 752 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 753 (1 << TTE32M) | (1 << TTE256M); 754 init_mmu_page_sizes = 1; 755 return (0); 756 } 757 return (1); 758 } 759 760 /* SPARC64-VI worst case DTLB parameters */ 761 #ifndef LOCKED_DTLB_ENTRIES 762 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 763 #endif 764 #define TOTAL_DTLB_ENTRIES 32 765 #define AVAIL_32M_ENTRIES 0 766 #define AVAIL_256M_ENTRIES 0 767 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 768 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 769 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 770 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 771 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 772 773 /* 774 * The function returns the mmu-specific values for the 775 * hat's disable_large_pages, disable_ism_large_pages, and 776 * disable_auto_data_large_pages and 777 * disable_text_data_large_pages variables. 778 */ 779 uint_t 780 mmu_large_pages_disabled(uint_t flag) 781 { 782 uint_t pages_disable = 0; 783 extern int use_text_pgsz64K; 784 extern int use_text_pgsz512K; 785 786 if (flag == HAT_LOAD) { 787 pages_disable = mmu_disable_large_pages; 788 } else if (flag == HAT_LOAD_SHARE) { 789 pages_disable = mmu_disable_ism_large_pages; 790 } else if (flag == HAT_AUTO_DATA) { 791 pages_disable = mmu_disable_auto_data_large_pages; 792 } else if (flag == HAT_AUTO_TEXT) { 793 pages_disable = mmu_disable_auto_text_large_pages; 794 if (use_text_pgsz512K) { 795 pages_disable &= ~(1 << TTE512K); 796 } 797 if (use_text_pgsz64K) { 798 pages_disable &= ~(1 << TTE64K); 799 } 800 } 801 return (pages_disable); 802 } 803 804 /* 805 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 806 * It may be called from set_platform_defaults, if some value other than 32M 807 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 808 * then only warn, since it would be bad form to panic due to a user typo. 809 * 810 * The function re-initializes the mmu_disable_ism_large_pages variable. 811 */ 812 void 813 mmu_init_large_pages(size_t ism_pagesize) 814 { 815 switch (ism_pagesize) { 816 case MMU_PAGESIZE4M: 817 mmu_disable_ism_large_pages = ((1 << TTE64K) | 818 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 819 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 820 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 821 break; 822 case MMU_PAGESIZE32M: 823 mmu_disable_ism_large_pages = ((1 << TTE64K) | 824 (1 << TTE512K) | (1 << TTE256M)); 825 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 826 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); 827 adjust_data_maxlpsize(ism_pagesize); 828 break; 829 case MMU_PAGESIZE256M: 830 mmu_disable_ism_large_pages = ((1 << TTE64K) | 831 (1 << TTE512K) | (1 << TTE32M)); 832 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 833 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); 834 adjust_data_maxlpsize(ism_pagesize); 835 break; 836 default: 837 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 838 ism_pagesize); 839 break; 840 } 841 } 842 843 /* 844 * Function to reprogram the TLBs when page sizes used 845 * by a process change significantly. 846 */ 847 void 848 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) 849 { 850 uint8_t pgsz0, pgsz1; 851 852 /* 853 * Don't program 2nd dtlb for kernel and ism hat 854 */ 855 ASSERT(hat->sfmmu_ismhat == NULL); 856 ASSERT(hat != ksfmmup); 857 858 /* 859 * hat->sfmmu_pgsz[] is an array whose elements 860 * contain a sorted order of page sizes. Element 861 * 0 is the most commonly used page size, followed 862 * by element 1, and so on. 863 * 864 * ttecnt[] is an array of per-page-size page counts 865 * mapped into the process. 866 * 867 * If the HAT's choice for page sizes is unsuitable, 868 * we can override it here. The new values written 869 * to the array will be handed back to us later to 870 * do the actual programming of the TLB hardware. 871 * 872 */ 873 pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]); 874 pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]); 875 876 /* 877 * This implements PAGESIZE programming of the sTLB 878 * if large TTE counts don't exceed the thresholds. 879 */ 880 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 881 pgsz0 = page_szc(MMU_PAGESIZE); 882 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 883 pgsz1 = page_szc(MMU_PAGESIZE); 884 tmp_pgsz[0] = pgsz0; 885 tmp_pgsz[1] = pgsz1; 886 /* otherwise, accept what the HAT chose for us */ 887 } 888 889 /* 890 * The HAT calls this function when an MMU context is allocated so that we 891 * can reprogram the large TLBs appropriately for the new process using 892 * the context. 893 * 894 * The caller must hold the HAT lock. 895 */ 896 void 897 mmu_set_ctx_page_sizes(struct hat *hat) 898 { 899 uint8_t pgsz0, pgsz1; 900 uint8_t new_cext; 901 902 ASSERT(sfmmu_hat_lock_held(hat)); 903 /* 904 * Don't program 2nd dtlb for kernel and ism hat 905 */ 906 if (hat->sfmmu_ismhat || hat == ksfmmup) 907 return; 908 909 /* 910 * If supported, reprogram the TLBs to a larger pagesize. 911 */ 912 pgsz0 = hat->sfmmu_pgsz[0]; 913 pgsz1 = hat->sfmmu_pgsz[1]; 914 ASSERT(pgsz0 < mmu_page_sizes); 915 ASSERT(pgsz1 < mmu_page_sizes); 916 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 917 if (hat->sfmmu_cext != new_cext) { 918 #ifdef DEBUG 919 int i; 920 /* 921 * assert cnum should be invalid, this is because pagesize 922 * can only be changed after a proc's ctxs are invalidated. 923 */ 924 for (i = 0; i < max_mmu_ctxdoms; i++) { 925 ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 926 } 927 #endif /* DEBUG */ 928 hat->sfmmu_cext = new_cext; 929 } 930 /* 931 * sfmmu_setctx_sec() will take care of the 932 * rest of the dirty work for us. 933 */ 934 } 935 936 /* 937 * This function assumes that there are either four or six supported page 938 * sizes and at most two programmable TLBs, so we need to decide which 939 * page sizes are most important and then adjust the TLB page sizes 940 * accordingly (if supported). 941 * 942 * If these assumptions change, this function will need to be 943 * updated to support whatever the new limits are. 944 */ 945 void 946 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 947 { 948 uint64_t sortcnt[MMU_PAGE_SIZES]; 949 uint8_t tmp_pgsz[MMU_PAGE_SIZES]; 950 uint8_t i, j, max; 951 uint16_t oldval, newval; 952 953 /* 954 * We only consider reprogramming the TLBs if one or more of 955 * the two most used page sizes changes and we're using 956 * large pages in this process. 957 */ 958 if (SFMMU_LGPGS_INUSE(sfmmup)) { 959 /* Sort page sizes. */ 960 for (i = 0; i < mmu_page_sizes; i++) { 961 sortcnt[i] = ttecnt[i]; 962 } 963 for (j = 0; j < mmu_page_sizes; j++) { 964 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 965 if (sortcnt[i] > sortcnt[max]) 966 max = i; 967 } 968 tmp_pgsz[j] = max; 969 sortcnt[max] = 0; 970 } 971 972 oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1]; 973 974 mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz); 975 976 /* Check 2 largest values after the sort. */ 977 newval = tmp_pgsz[0] << 8 | tmp_pgsz[1]; 978 if (newval != oldval) { 979 sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz); 980 } 981 } 982 } 983 984 /* 985 * Return processor specific async error structure 986 * size used. 987 */ 988 int 989 cpu_aflt_size(void) 990 { 991 return (sizeof (opl_async_flt_t)); 992 } 993 994 /* 995 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 996 * post-process CPU events that are dequeued. As such, it can be invoked 997 * from softint context, from AST processing in the trap() flow, or from the 998 * panic flow. We decode the CPU-specific data, and take appropriate actions. 999 * Historically this entry point was used to log the actual cmn_err(9F) text; 1000 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 1001 * With FMA this function now also returns a flag which indicates to the 1002 * caller whether the ereport should be posted (1) or suppressed (0). 1003 */ 1004 /*ARGSUSED*/ 1005 static int 1006 cpu_sync_log_err(void *flt) 1007 { 1008 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 1009 struct async_flt *aflt = (struct async_flt *)flt; 1010 1011 /* 1012 * No extra processing of urgent error events. 1013 * Always generate ereports for these events. 1014 */ 1015 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 1016 return (1); 1017 1018 /* 1019 * Additional processing for synchronous errors. 1020 */ 1021 switch (opl_flt->flt_type) { 1022 case OPL_CPU_INV_SFSR: 1023 return (1); 1024 1025 case OPL_CPU_SYNC_UE: 1026 /* 1027 * The validity: SFSR_MK_UE bit has been checked 1028 * in opl_cpu_sync_error() 1029 * No more check is required. 1030 * 1031 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1032 * and they have been retrieved in cpu_queue_events() 1033 */ 1034 1035 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1036 ASSERT(aflt->flt_in_memory); 1037 /* 1038 * We want to skip logging only if ALL the following 1039 * conditions are true: 1040 * 1041 * 1. We are not panicing already. 1042 * 2. The error is a memory error. 1043 * 3. There is only one error. 1044 * 4. The error is on a retired page. 1045 * 5. The error occurred under on_trap 1046 * protection AFLT_PROT_EC 1047 */ 1048 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1049 page_retire_check(aflt->flt_addr, NULL) == 0) { 1050 /* 1051 * Do not log an error from 1052 * the retired page 1053 */ 1054 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1055 return (0); 1056 } 1057 if (!panicstr) 1058 cpu_page_retire(opl_flt); 1059 } 1060 return (1); 1061 1062 case OPL_CPU_SYNC_OTHERS: 1063 /* 1064 * For the following error cases, the processor HW does 1065 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1066 * to assign appropriate values here to reflect what we 1067 * think is the most likely cause of the problem w.r.t to 1068 * the particular error event. For Buserr and timeout 1069 * error event, we will assign OPL_ERRID_CHANNEL as the 1070 * most likely reason. For TLB parity or multiple hit 1071 * error events, we will assign the reason as 1072 * OPL_ERRID_CPU (cpu related problem) and set the 1073 * flt_eid_sid to point to the cpuid. 1074 */ 1075 1076 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1077 /* 1078 * flt_eid_sid will not be used for this case. 1079 */ 1080 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1081 } 1082 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1083 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1084 opl_flt->flt_eid_sid = aflt->flt_inst; 1085 } 1086 1087 /* 1088 * In case of no effective error bit 1089 */ 1090 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1091 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1092 opl_flt->flt_eid_sid = aflt->flt_inst; 1093 } 1094 break; 1095 1096 default: 1097 return (1); 1098 } 1099 return (1); 1100 } 1101 1102 /* 1103 * Retire the bad page that may contain the flushed error. 1104 */ 1105 void 1106 cpu_page_retire(opl_async_flt_t *opl_flt) 1107 { 1108 struct async_flt *aflt = (struct async_flt *)opl_flt; 1109 (void) page_retire(aflt->flt_addr, PR_UE); 1110 } 1111 1112 /* 1113 * Invoked by error_init() early in startup and therefore before 1114 * startup_errorq() is called to drain any error Q - 1115 * 1116 * startup() 1117 * startup_end() 1118 * error_init() 1119 * cpu_error_init() 1120 * errorq_init() 1121 * errorq_drain() 1122 * start_other_cpus() 1123 * 1124 * The purpose of this routine is to create error-related taskqs. Taskqs 1125 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1126 * context. 1127 * 1128 */ 1129 /*ARGSUSED*/ 1130 void 1131 cpu_error_init(int items) 1132 { 1133 opl_err_log = (opl_errlog_t *) 1134 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1135 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1136 cmn_err(CE_PANIC, "The base address of the error log " 1137 "is not page aligned"); 1138 } 1139 1140 /* 1141 * We route all errors through a single switch statement. 1142 */ 1143 void 1144 cpu_ue_log_err(struct async_flt *aflt) 1145 { 1146 switch (aflt->flt_class) { 1147 case CPU_FAULT: 1148 if (cpu_sync_log_err(aflt)) 1149 cpu_ereport_post(aflt); 1150 break; 1151 1152 case BUS_FAULT: 1153 bus_async_log_err(aflt); 1154 break; 1155 1156 default: 1157 cmn_err(CE_WARN, "discarding async error %p with invalid " 1158 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1159 return; 1160 } 1161 } 1162 1163 /* 1164 * Routine for panic hook callback from panic_idle(). 1165 * 1166 * Nothing to do here. 1167 */ 1168 void 1169 cpu_async_panic_callb(void) 1170 { 1171 } 1172 1173 /* 1174 * Routine to return a string identifying the physical name 1175 * associated with a memory/cache error. 1176 */ 1177 /*ARGSUSED*/ 1178 int 1179 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1180 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1181 ushort_t flt_status, char *buf, int buflen, int *lenp) 1182 { 1183 int synd_code; 1184 int ret; 1185 1186 /* 1187 * An AFSR of -1 defaults to a memory syndrome. 1188 */ 1189 synd_code = (int)flt_synd; 1190 1191 if (&plat_get_mem_unum) { 1192 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1193 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1194 buf[0] = '\0'; 1195 *lenp = 0; 1196 } 1197 return (ret); 1198 } 1199 buf[0] = '\0'; 1200 *lenp = 0; 1201 return (ENOTSUP); 1202 } 1203 1204 /* 1205 * Wrapper for cpu_get_mem_unum() routine that takes an 1206 * async_flt struct rather than explicit arguments. 1207 */ 1208 int 1209 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1210 char *buf, int buflen, int *lenp) 1211 { 1212 /* 1213 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1214 * memory error. 1215 */ 1216 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1217 (uint64_t)-1, 1218 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1219 aflt->flt_status, buf, buflen, lenp)); 1220 } 1221 1222 /* 1223 * This routine is a more generic interface to cpu_get_mem_unum() 1224 * that may be used by other modules (e.g. mm). 1225 */ 1226 /*ARGSUSED*/ 1227 int 1228 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1229 char *buf, int buflen, int *lenp) 1230 { 1231 int synd_status, flt_in_memory, ret; 1232 ushort_t flt_status = 0; 1233 char unum[UNUM_NAMLEN]; 1234 1235 /* 1236 * Check for an invalid address. 1237 */ 1238 if (afar == (uint64_t)-1) 1239 return (ENXIO); 1240 1241 if (synd == (uint64_t)-1) 1242 synd_status = AFLT_STAT_INVALID; 1243 else 1244 synd_status = AFLT_STAT_VALID; 1245 1246 flt_in_memory = (*afsr & SFSR_MEMORY) && 1247 pf_is_memory(afar >> MMU_PAGESHIFT); 1248 1249 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1250 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 1251 if (ret != 0) 1252 return (ret); 1253 1254 if (*lenp >= buflen) 1255 return (ENAMETOOLONG); 1256 1257 (void) strncpy(buf, unum, buflen); 1258 1259 return (0); 1260 } 1261 1262 /* 1263 * Routine to return memory information associated 1264 * with a physical address and syndrome. 1265 */ 1266 /*ARGSUSED*/ 1267 int 1268 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1269 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1270 int *segsp, int *banksp, int *mcidp) 1271 { 1272 int synd_code = (int)synd; 1273 1274 if (afar == (uint64_t)-1) 1275 return (ENXIO); 1276 1277 if (p2get_mem_info != NULL) 1278 return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, 1279 bank_sizep, segsp, banksp, mcidp)); 1280 else 1281 return (ENOTSUP); 1282 } 1283 1284 /* 1285 * Routine to return a string identifying the physical 1286 * name associated with a cpuid. 1287 */ 1288 int 1289 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1290 { 1291 int ret; 1292 char unum[UNUM_NAMLEN]; 1293 1294 if (&plat_get_cpu_unum) { 1295 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, 1296 lenp)) != 0) 1297 return (ret); 1298 } else { 1299 return (ENOTSUP); 1300 } 1301 1302 if (*lenp >= buflen) 1303 return (ENAMETOOLONG); 1304 1305 (void) strncpy(buf, unum, *lenp); 1306 1307 return (0); 1308 } 1309 1310 /* 1311 * This routine exports the name buffer size. 1312 */ 1313 size_t 1314 cpu_get_name_bufsize() 1315 { 1316 return (UNUM_NAMLEN); 1317 } 1318 1319 /* 1320 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1321 */ 1322 void 1323 cpu_flush_ecache(void) 1324 { 1325 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1326 cpunodes[CPU->cpu_id].ecache_linesize); 1327 } 1328 1329 static uint8_t 1330 flt_to_trap_type(struct async_flt *aflt) 1331 { 1332 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1333 return (TRAP_TYPE_ECC_I); 1334 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1335 return (TRAP_TYPE_ECC_D); 1336 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1337 return (TRAP_TYPE_URGENT); 1338 return (TRAP_TYPE_UNKNOWN); 1339 } 1340 1341 /* 1342 * Encode the data saved in the opl_async_flt_t struct into 1343 * the FM ereport payload. 1344 */ 1345 /* ARGSUSED */ 1346 static void 1347 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1348 nvlist_t *resource) 1349 { 1350 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1351 char unum[UNUM_NAMLEN]; 1352 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1353 int len; 1354 1355 1356 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1357 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1358 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1359 } 1360 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1361 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1362 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1363 } 1364 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1365 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1366 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1367 } 1368 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1369 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1370 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1371 } 1372 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1373 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1374 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1375 } 1376 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1377 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1378 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1379 } 1380 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1381 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1382 DATA_TYPE_BOOLEAN_VALUE, 1383 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1384 } 1385 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1386 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1387 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1388 } 1389 1390 switch (opl_flt->flt_eid_mod) { 1391 case OPL_ERRID_CPU: 1392 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1393 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1394 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1395 NULL, opl_flt->flt_eid_sid, 1396 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf); 1397 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1398 DATA_TYPE_NVLIST, resource, NULL); 1399 break; 1400 1401 case OPL_ERRID_CHANNEL: 1402 /* 1403 * No resource is created but the cpumem DE will find 1404 * the defective path by retreiving EID from SFSR which is 1405 * included in the payload. 1406 */ 1407 break; 1408 1409 case OPL_ERRID_MEM: 1410 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1411 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1412 unum, NULL, (uint64_t)-1); 1413 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1414 DATA_TYPE_NVLIST, resource, NULL); 1415 break; 1416 1417 case OPL_ERRID_PATH: 1418 /* 1419 * No resource is created but the cpumem DE will find 1420 * the defective path by retreiving EID from SFSR which is 1421 * included in the payload. 1422 */ 1423 break; 1424 } 1425 } 1426 1427 /* 1428 * Returns whether fault address is valid for this error bit and 1429 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1430 */ 1431 /*ARGSUSED*/ 1432 static int 1433 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1434 { 1435 struct async_flt *aflt = (struct async_flt *)opl_flt; 1436 1437 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1438 return ((t_afsr_bit & SFSR_MEMORY) && 1439 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1440 } 1441 return (0); 1442 } 1443 1444 /* 1445 * In OPL SCF does the stick synchronization. 1446 */ 1447 void 1448 sticksync_slave(void) 1449 { 1450 } 1451 1452 /* 1453 * In OPL SCF does the stick synchronization. 1454 */ 1455 void 1456 sticksync_master(void) 1457 { 1458 } 1459 1460 /* 1461 * Cpu private unitialization. OPL cpus do not use the private area. 1462 */ 1463 void 1464 cpu_uninit_private(struct cpu *cp) 1465 { 1466 cmp_delete_cpu(cp->cpu_id); 1467 } 1468 1469 /* 1470 * Always flush an entire cache. 1471 */ 1472 void 1473 cpu_error_ecache_flush(void) 1474 { 1475 cpu_flush_ecache(); 1476 } 1477 1478 void 1479 cpu_ereport_post(struct async_flt *aflt) 1480 { 1481 char *cpu_type, buf[FM_MAX_CLASS]; 1482 nv_alloc_t *nva = NULL; 1483 nvlist_t *ereport, *detector, *resource; 1484 errorq_elem_t *eqep; 1485 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1486 1487 if (aflt->flt_panic || panicstr) { 1488 eqep = errorq_reserve(ereport_errorq); 1489 if (eqep == NULL) 1490 return; 1491 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1492 nva = errorq_elem_nva(ereport_errorq, eqep); 1493 } else { 1494 ereport = fm_nvlist_create(nva); 1495 } 1496 1497 /* 1498 * Create the scheme "cpu" FMRI. 1499 */ 1500 detector = fm_nvlist_create(nva); 1501 resource = fm_nvlist_create(nva); 1502 switch (cpunodes[aflt->flt_inst].implementation) { 1503 case OLYMPUS_C_IMPL: 1504 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1505 break; 1506 case JUPITER_IMPL: 1507 cpu_type = FM_EREPORT_CPU_SPARC64_VII; 1508 break; 1509 default: 1510 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1511 break; 1512 } 1513 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1514 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1515 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1516 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1517 sbuf); 1518 1519 /* 1520 * Encode all the common data into the ereport. 1521 */ 1522 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1523 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1524 1525 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1526 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1527 1528 /* 1529 * Encode the error specific data that was saved in 1530 * the async_flt structure into the ereport. 1531 */ 1532 cpu_payload_add_aflt(aflt, ereport, resource); 1533 1534 if (aflt->flt_panic || panicstr) { 1535 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1536 } else { 1537 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1538 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1539 fm_nvlist_destroy(detector, FM_NVA_FREE); 1540 fm_nvlist_destroy(resource, FM_NVA_FREE); 1541 } 1542 } 1543 1544 void 1545 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1546 { 1547 int status; 1548 ddi_fm_error_t de; 1549 1550 bzero(&de, sizeof (ddi_fm_error_t)); 1551 1552 de.fme_version = DDI_FME_VERSION; 1553 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1554 de.fme_flag = expected; 1555 de.fme_bus_specific = (void *)aflt->flt_addr; 1556 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1557 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1558 aflt->flt_panic = 1; 1559 } 1560 1561 void 1562 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1563 errorq_t *eqp, uint_t flag) 1564 { 1565 struct async_flt *aflt = (struct async_flt *)payload; 1566 1567 aflt->flt_erpt_class = error_class; 1568 errorq_dispatch(eqp, payload, payload_sz, flag); 1569 } 1570 1571 void 1572 adjust_hw_copy_limits(int ecache_size) 1573 { 1574 /* 1575 * Set hw copy limits. 1576 * 1577 * /etc/system will be parsed later and can override one or more 1578 * of these settings. 1579 * 1580 * At this time, ecache size seems only mildly relevant. 1581 * We seem to run into issues with the d-cache and stalls 1582 * we see on misses. 1583 * 1584 * Cycle measurement indicates that 2 byte aligned copies fare 1585 * little better than doing things with VIS at around 512 bytes. 1586 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1587 * aligned is faster whenever the source and destination data 1588 * in cache and the total size is less than 2 Kbytes. The 2K 1589 * limit seems to be driven by the 2K write cache. 1590 * When more than 2K of copies are done in non-VIS mode, stores 1591 * backup in the write cache. In VIS mode, the write cache is 1592 * bypassed, allowing faster cache-line writes aligned on cache 1593 * boundaries. 1594 * 1595 * In addition, in non-VIS mode, there is no prefetching, so 1596 * for larger copies, the advantage of prefetching to avoid even 1597 * occasional cache misses is enough to justify using the VIS code. 1598 * 1599 * During testing, it was discovered that netbench ran 3% slower 1600 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1601 * applications, data is only used once (copied to the output 1602 * buffer, then copied by the network device off the system). Using 1603 * the VIS copy saves more L2 cache state. Network copies are 1604 * around 1.3K to 1.5K in size for historical reasons. 1605 * 1606 * Therefore, a limit of 1K bytes will be used for the 8 byte 1607 * aligned copy even for large caches and 8 MB ecache. The 1608 * infrastructure to allow different limits for different sized 1609 * caches is kept to allow further tuning in later releases. 1610 */ 1611 1612 if (min_ecache_size == 0 && use_hw_bcopy) { 1613 /* 1614 * First time through - should be before /etc/system 1615 * is read. 1616 * Could skip the checks for zero but this lets us 1617 * preserve any debugger rewrites. 1618 */ 1619 if (hw_copy_limit_1 == 0) { 1620 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1621 priv_hcl_1 = hw_copy_limit_1; 1622 } 1623 if (hw_copy_limit_2 == 0) { 1624 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1625 priv_hcl_2 = hw_copy_limit_2; 1626 } 1627 if (hw_copy_limit_4 == 0) { 1628 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1629 priv_hcl_4 = hw_copy_limit_4; 1630 } 1631 if (hw_copy_limit_8 == 0) { 1632 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1633 priv_hcl_8 = hw_copy_limit_8; 1634 } 1635 min_ecache_size = ecache_size; 1636 } else { 1637 /* 1638 * MP initialization. Called *after* /etc/system has 1639 * been parsed. One CPU has already been initialized. 1640 * Need to cater for /etc/system having scragged one 1641 * of our values. 1642 */ 1643 if (ecache_size == min_ecache_size) { 1644 /* 1645 * Same size ecache. We do nothing unless we 1646 * have a pessimistic ecache setting. In that 1647 * case we become more optimistic (if the cache is 1648 * large enough). 1649 */ 1650 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1651 /* 1652 * Need to adjust hw_copy_limit* from our 1653 * pessimistic uniprocessor value to a more 1654 * optimistic UP value *iff* it hasn't been 1655 * reset. 1656 */ 1657 if ((ecache_size > 1048576) && 1658 (priv_hcl_8 == hw_copy_limit_8)) { 1659 if (ecache_size <= 2097152) 1660 hw_copy_limit_8 = 4 * 1661 VIS_COPY_THRESHOLD; 1662 else if (ecache_size <= 4194304) 1663 hw_copy_limit_8 = 4 * 1664 VIS_COPY_THRESHOLD; 1665 else 1666 hw_copy_limit_8 = 4 * 1667 VIS_COPY_THRESHOLD; 1668 priv_hcl_8 = hw_copy_limit_8; 1669 } 1670 } 1671 } else if (ecache_size < min_ecache_size) { 1672 /* 1673 * A different ecache size. Can this even happen? 1674 */ 1675 if (priv_hcl_8 == hw_copy_limit_8) { 1676 /* 1677 * The previous value that we set 1678 * is unchanged (i.e., it hasn't been 1679 * scragged by /etc/system). Rewrite it. 1680 */ 1681 if (ecache_size <= 1048576) 1682 hw_copy_limit_8 = 8 * 1683 VIS_COPY_THRESHOLD; 1684 else if (ecache_size <= 2097152) 1685 hw_copy_limit_8 = 8 * 1686 VIS_COPY_THRESHOLD; 1687 else if (ecache_size <= 4194304) 1688 hw_copy_limit_8 = 8 * 1689 VIS_COPY_THRESHOLD; 1690 else 1691 hw_copy_limit_8 = 10 * 1692 VIS_COPY_THRESHOLD; 1693 priv_hcl_8 = hw_copy_limit_8; 1694 min_ecache_size = ecache_size; 1695 } 1696 } 1697 } 1698 } 1699 1700 #define VIS_BLOCKSIZE 64 1701 1702 int 1703 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1704 { 1705 int ret, watched; 1706 1707 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1708 ret = dtrace_blksuword32(addr, data, 0); 1709 if (watched) 1710 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1711 1712 return (ret); 1713 } 1714 1715 void 1716 opl_cpu_reg_init() 1717 { 1718 uint64_t this_cpu_log; 1719 1720 /* 1721 * We do not need to re-initialize cpu0 registers. 1722 */ 1723 if (cpu[getprocessorid()] == &cpu0) { 1724 /* 1725 * Support for "ta 3" 1726 */ 1727 opl_ta3(); 1728 return; 1729 } 1730 1731 /* 1732 * Initialize Error log Scratch register for error handling. 1733 */ 1734 1735 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1736 ERRLOG_BUFSZ * (getprocessorid()))); 1737 opl_error_setup(this_cpu_log); 1738 1739 /* 1740 * Enable MMU translating multiple page sizes for 1741 * sITLB and sDTLB. 1742 */ 1743 opl_mpg_enable(); 1744 } 1745 1746 /* 1747 * Queue one event in ue_queue based on ecc_type_to_info entry. 1748 */ 1749 static void 1750 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1751 ecc_type_to_info_t *eccp) 1752 { 1753 struct async_flt *aflt = (struct async_flt *)opl_flt; 1754 1755 if (reason && 1756 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1757 (void) strcat(reason, eccp->ec_reason); 1758 } 1759 1760 opl_flt->flt_bit = eccp->ec_afsr_bit; 1761 opl_flt->flt_type = eccp->ec_flt_type; 1762 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1763 aflt->flt_payload = eccp->ec_err_payload; 1764 1765 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1766 cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt, 1767 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1768 } 1769 1770 /* 1771 * Queue events on async event queue one event per error bit. 1772 * Return number of events queued. 1773 */ 1774 int 1775 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1776 { 1777 struct async_flt *aflt = (struct async_flt *)opl_flt; 1778 ecc_type_to_info_t *eccp; 1779 int nevents = 0; 1780 1781 /* 1782 * Queue expected errors, error bit and fault type must must match 1783 * in the ecc_type_to_info table. 1784 */ 1785 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1786 eccp++) { 1787 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1788 (eccp->ec_flags & aflt->flt_status) != 0) { 1789 /* 1790 * UE error event can be further 1791 * classified/breakdown into finer granularity 1792 * based on the flt_eid_mod value set by HW. We do 1793 * special handling here so that we can report UE 1794 * error in finer granularity as ue_mem, 1795 * ue_channel, ue_cpu or ue_path. 1796 */ 1797 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1798 opl_flt->flt_eid_mod = (aflt->flt_stat & 1799 SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT; 1800 opl_flt->flt_eid_sid = (aflt->flt_stat & 1801 SFSR_EID_SID) >> SFSR_EID_SID_SHIFT; 1802 /* 1803 * Need to advance eccp pointer by flt_eid_mod 1804 * so that we get an appropriate ecc pointer 1805 * 1806 * EID # of advances 1807 * ---------------------------------- 1808 * OPL_ERRID_MEM 0 1809 * OPL_ERRID_CHANNEL 1 1810 * OPL_ERRID_CPU 2 1811 * OPL_ERRID_PATH 3 1812 */ 1813 eccp += opl_flt->flt_eid_mod; 1814 } 1815 cpu_queue_one_event(opl_flt, reason, eccp); 1816 t_afsr_errs &= ~eccp->ec_afsr_bit; 1817 nevents++; 1818 } 1819 } 1820 1821 return (nevents); 1822 } 1823 1824 /* 1825 * Sync. error wrapper functions. 1826 * We use these functions in order to transfer here from the 1827 * nucleus trap handler information about trap type (data or 1828 * instruction) and trap level (0 or above 0). This way we 1829 * get rid of using SFSR's reserved bits. 1830 */ 1831 1832 #define OPL_SYNC_TL0 0 1833 #define OPL_SYNC_TL1 1 1834 #define OPL_ISYNC_ERR 0 1835 #define OPL_DSYNC_ERR 1 1836 1837 void 1838 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1839 { 1840 uint64_t t_sfar = p_sfar; 1841 uint64_t t_sfsr = p_sfsr; 1842 1843 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1844 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1845 } 1846 1847 void 1848 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1849 { 1850 uint64_t t_sfar = p_sfar; 1851 uint64_t t_sfsr = p_sfsr; 1852 1853 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1854 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1855 } 1856 1857 void 1858 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1859 { 1860 uint64_t t_sfar = p_sfar; 1861 uint64_t t_sfsr = p_sfsr; 1862 1863 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1864 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1865 } 1866 1867 void 1868 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1869 { 1870 uint64_t t_sfar = p_sfar; 1871 uint64_t t_sfsr = p_sfsr; 1872 1873 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1874 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1875 } 1876 1877 /* 1878 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1879 * and TLB_PRT. 1880 * This function is designed based on cpu_deferred_error(). 1881 */ 1882 1883 static void 1884 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1885 uint_t tl, uint_t derr) 1886 { 1887 opl_async_flt_t opl_flt; 1888 struct async_flt *aflt; 1889 int trampolined = 0; 1890 char pr_reason[MAX_REASON_STRING]; 1891 uint64_t log_sfsr; 1892 int expected = DDI_FM_ERR_UNEXPECTED; 1893 ddi_acc_hdl_t *hp; 1894 1895 /* 1896 * We need to look at p_flag to determine if the thread detected an 1897 * error while dumping core. We can't grab p_lock here, but it's ok 1898 * because we just need a consistent snapshot and we know that everyone 1899 * else will store a consistent set of bits while holding p_lock. We 1900 * don't have to worry about a race because SDOCORE is set once prior 1901 * to doing i/o from the process's address space and is never cleared. 1902 */ 1903 uint_t pflag = ttoproc(curthread)->p_flag; 1904 1905 pr_reason[0] = '\0'; 1906 1907 /* 1908 * handle the specific error 1909 */ 1910 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1911 aflt = (struct async_flt *)&opl_flt; 1912 aflt->flt_id = gethrtime_waitfree(); 1913 aflt->flt_bus_id = getprocessorid(); 1914 aflt->flt_inst = CPU->cpu_id; 1915 aflt->flt_stat = t_sfsr; 1916 aflt->flt_addr = t_sfar; 1917 aflt->flt_pc = (caddr_t)rp->r_pc; 1918 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1919 aflt->flt_class = (uchar_t)CPU_FAULT; 1920 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & 1921 TSTATE_PRIV) ? 1 : 0)); 1922 aflt->flt_tl = (uchar_t)tl; 1923 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1924 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1925 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1926 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1927 1928 /* 1929 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1930 * So, clear all error bits to avoid mis-handling and force the system 1931 * panicked. 1932 * We skip all the procedures below down to the panic message call. 1933 */ 1934 if (!(t_sfsr & SFSR_FV)) { 1935 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1936 aflt->flt_panic = 1; 1937 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1938 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 1939 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1940 fm_panic("%sErrors(s)", "invalid SFSR"); 1941 } 1942 1943 /* 1944 * If either UE and MK bit is off, this is not valid UE error. 1945 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1946 * mis-handling below. 1947 * aflt->flt_stat keeps the original bits as a reference. 1948 */ 1949 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1950 (SFSR_MK_UE|SFSR_UE)) { 1951 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1952 } 1953 1954 /* 1955 * If the trap occurred in privileged mode at TL=0, we need to check to 1956 * see if we were executing in the kernel under on_trap() or t_lofault 1957 * protection. If so, modify the saved registers so that we return 1958 * from the trap to the appropriate trampoline routine. 1959 */ 1960 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1961 if (curthread->t_ontrap != NULL) { 1962 on_trap_data_t *otp = curthread->t_ontrap; 1963 1964 if (otp->ot_prot & OT_DATA_EC) { 1965 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1966 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1967 rp->r_pc = otp->ot_trampoline; 1968 rp->r_npc = rp->r_pc + 4; 1969 trampolined = 1; 1970 } 1971 1972 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1973 (otp->ot_prot & OT_DATA_ACCESS)) { 1974 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 1975 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 1976 rp->r_pc = otp->ot_trampoline; 1977 rp->r_npc = rp->r_pc + 4; 1978 trampolined = 1; 1979 /* 1980 * for peeks and caut_gets errors are expected 1981 */ 1982 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1983 if (!hp) 1984 expected = DDI_FM_ERR_PEEK; 1985 else if (hp->ah_acc.devacc_attr_access == 1986 DDI_CAUTIOUS_ACC) 1987 expected = DDI_FM_ERR_EXPECTED; 1988 } 1989 1990 } else if (curthread->t_lofault) { 1991 aflt->flt_prot = AFLT_PROT_COPY; 1992 rp->r_g1 = EFAULT; 1993 rp->r_pc = curthread->t_lofault; 1994 rp->r_npc = rp->r_pc + 4; 1995 trampolined = 1; 1996 } 1997 } 1998 1999 /* 2000 * If we're in user mode or we're doing a protected copy, we either 2001 * want the ASTON code below to send a signal to the user process 2002 * or we want to panic if aft_panic is set. 2003 * 2004 * If we're in privileged mode and we're not doing a copy, then we 2005 * need to check if we've trampolined. If we haven't trampolined, 2006 * we should panic. 2007 */ 2008 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2009 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2010 aflt->flt_panic |= aft_panic; 2011 } else if (!trampolined) { 2012 aflt->flt_panic = 1; 2013 } 2014 2015 /* 2016 * If we've trampolined due to a privileged TO or BERR, or if an 2017 * unprivileged TO or BERR occurred, we don't want to enqueue an 2018 * event for that TO or BERR. Queue all other events (if any) besides 2019 * the TO/BERR. 2020 */ 2021 log_sfsr = t_sfsr; 2022 if (trampolined) { 2023 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2024 } else if (!aflt->flt_priv) { 2025 /* 2026 * User mode, suppress messages if 2027 * cpu_berr_to_verbose is not set. 2028 */ 2029 if (!cpu_berr_to_verbose) 2030 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2031 } 2032 2033 if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason, 2034 t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) { 2035 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2036 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2037 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 2038 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2039 } 2040 2041 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2042 cpu_run_bus_error_handlers(aflt, expected); 2043 } 2044 2045 /* 2046 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2047 * be logged as part of the panic flow. 2048 */ 2049 if (aflt->flt_panic) { 2050 if (pr_reason[0] == 0) 2051 strcpy(pr_reason, "invalid SFSR "); 2052 2053 fm_panic("%sErrors(s)", pr_reason); 2054 } 2055 2056 /* 2057 * If we queued an error and we are going to return from the trap and 2058 * the error was in user mode or inside of a copy routine, set AST flag 2059 * so the queue will be drained before returning to user mode. The 2060 * AST processing will also act on our failure policy. 2061 */ 2062 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2063 int pcb_flag = 0; 2064 2065 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2066 pcb_flag |= ASYNC_HWERR; 2067 2068 if (t_sfsr & SFSR_BERR) 2069 pcb_flag |= ASYNC_BERR; 2070 2071 if (t_sfsr & SFSR_TO) 2072 pcb_flag |= ASYNC_BTO; 2073 2074 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2075 aston(curthread); 2076 } 2077 } 2078 2079 /*ARGSUSED*/ 2080 void 2081 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2082 { 2083 opl_async_flt_t opl_flt; 2084 struct async_flt *aflt; 2085 char pr_reason[MAX_REASON_STRING]; 2086 2087 /* normalize tl */ 2088 tl = (tl >= 2 ? 1 : 0); 2089 pr_reason[0] = '\0'; 2090 2091 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2092 aflt = (struct async_flt *)&opl_flt; 2093 aflt->flt_id = gethrtime_waitfree(); 2094 aflt->flt_bus_id = getprocessorid(); 2095 aflt->flt_inst = CPU->cpu_id; 2096 aflt->flt_stat = p_ugesr; 2097 aflt->flt_pc = (caddr_t)rp->r_pc; 2098 aflt->flt_class = (uchar_t)CPU_FAULT; 2099 aflt->flt_tl = tl; 2100 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 2101 1 : 0)); 2102 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2103 aflt->flt_panic = 1; 2104 /* 2105 * HW does not set mod/sid in case of urgent error. 2106 * So we have to set it here. 2107 */ 2108 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2109 opl_flt.flt_eid_sid = aflt->flt_inst; 2110 2111 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2112 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2113 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2114 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt, 2115 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2116 } 2117 2118 fm_panic("Urgent Error"); 2119 } 2120 2121 /* 2122 * Initialization error counters resetting. 2123 */ 2124 /* ARGSUSED */ 2125 static void 2126 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2127 { 2128 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2129 hdlr->cyh_level = CY_LOW_LEVEL; 2130 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2131 2132 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2133 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2134 } 2135 2136 void 2137 cpu_mp_init(void) 2138 { 2139 cyc_omni_handler_t hdlr; 2140 2141 hdlr.cyo_online = opl_ras_online; 2142 hdlr.cyo_offline = NULL; 2143 hdlr.cyo_arg = NULL; 2144 mutex_enter(&cpu_lock); 2145 (void) cyclic_add_omni(&hdlr); 2146 mutex_exit(&cpu_lock); 2147 } 2148 2149 int heaplp_use_stlb = -1; 2150 2151 void 2152 mmu_init_kernel_pgsz(struct hat *hat) 2153 { 2154 uint_t tte = page_szc(segkmem_lpsize); 2155 uchar_t new_cext_primary, new_cext_nucleus; 2156 2157 if (heaplp_use_stlb == 0) { 2158 /* do not reprogram stlb */ 2159 tte = TTE8K; 2160 } else if (!plat_prom_preserve_kctx_is_supported()) { 2161 /* OBP does not support non-zero primary context */ 2162 tte = TTE8K; 2163 heaplp_use_stlb = 0; 2164 } 2165 2166 new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K); 2167 new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte); 2168 2169 hat->sfmmu_cext = new_cext_primary; 2170 kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | 2171 ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); 2172 } 2173 2174 size_t 2175 mmu_get_kernel_lpsize(size_t lpsize) 2176 { 2177 uint_t tte; 2178 2179 if (lpsize == 0) { 2180 /* no setting for segkmem_lpsize in /etc/system: use default */ 2181 return (MMU_PAGESIZE4M); 2182 } 2183 2184 for (tte = TTE8K; tte <= TTE4M; tte++) { 2185 if (lpsize == TTEBYTES(tte)) 2186 return (lpsize); 2187 } 2188 2189 return (TTEBYTES(TTE8K)); 2190 } 2191 2192 /* 2193 * Support for ta 3. 2194 * We allocate here a buffer for each cpu 2195 * for saving the current register window. 2196 */ 2197 typedef struct win_regs { 2198 uint64_t l[8]; 2199 uint64_t i[8]; 2200 } win_regs_t; 2201 static void 2202 opl_ta3(void) 2203 { 2204 opl_ta3_save = (char *)kmem_alloc(NCPU * sizeof (win_regs_t), KM_SLEEP); 2205 } 2206 2207 /* 2208 * The following are functions that are unused in 2209 * OPL cpu module. They are defined here to resolve 2210 * dependencies in the "unix" module. 2211 * Unused functions that should never be called in 2212 * OPL are coded with ASSERT(0). 2213 */ 2214 2215 void 2216 cpu_disable_errors(void) 2217 {} 2218 2219 void 2220 cpu_enable_errors(void) 2221 { ASSERT(0); } 2222 2223 /*ARGSUSED*/ 2224 void 2225 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2226 { ASSERT(0); } 2227 2228 /*ARGSUSED*/ 2229 void 2230 cpu_faulted_enter(struct cpu *cp) 2231 {} 2232 2233 /*ARGSUSED*/ 2234 void 2235 cpu_faulted_exit(struct cpu *cp) 2236 {} 2237 2238 /*ARGSUSED*/ 2239 void 2240 cpu_check_allcpus(struct async_flt *aflt) 2241 {} 2242 2243 /*ARGSUSED*/ 2244 void 2245 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2246 { ASSERT(0); } 2247 2248 /*ARGSUSED*/ 2249 void 2250 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2251 { ASSERT(0); } 2252 2253 /*ARGSUSED*/ 2254 void 2255 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2256 { ASSERT(0); } 2257 2258 /*ARGSUSED*/ 2259 void 2260 cpu_busy_ecache_scrub(struct cpu *cp) 2261 {} 2262 2263 /*ARGSUSED*/ 2264 void 2265 cpu_idle_ecache_scrub(struct cpu *cp) 2266 {} 2267 2268 /* ARGSUSED */ 2269 void 2270 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2271 { ASSERT(0); } 2272 2273 void 2274 cpu_init_cache_scrub(void) 2275 {} 2276 2277 /* ARGSUSED */ 2278 int 2279 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2280 { 2281 if (&plat_get_mem_sid) { 2282 return (plat_get_mem_sid(unum, buf, buflen, lenp)); 2283 } else { 2284 return (ENOTSUP); 2285 } 2286 } 2287 2288 /* ARGSUSED */ 2289 int 2290 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2291 { 2292 if (&plat_get_mem_addr) { 2293 return (plat_get_mem_addr(unum, sid, offset, addrp)); 2294 } else { 2295 return (ENOTSUP); 2296 } 2297 } 2298 2299 /* ARGSUSED */ 2300 int 2301 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2302 { 2303 if (&plat_get_mem_offset) { 2304 return (plat_get_mem_offset(flt_addr, offp)); 2305 } else { 2306 return (ENOTSUP); 2307 } 2308 } 2309 2310 /*ARGSUSED*/ 2311 void 2312 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2313 { ASSERT(0); } 2314 2315 /*ARGSUSED*/ 2316 void 2317 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2318 { ASSERT(0); } 2319 2320 /*ARGSUSED*/ 2321 void 2322 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 2323 { ASSERT(0); } 2324 2325 /*ARGSUSED*/ 2326 int 2327 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 2328 errorq_elem_t *eqep, size_t afltoffset) 2329 { 2330 ASSERT(0); 2331 return (0); 2332 } 2333 2334 /*ARGSUSED*/ 2335 char * 2336 flt_to_error_type(struct async_flt *aflt) 2337 { 2338 ASSERT(0); 2339 return (NULL); 2340 } 2341 2342 #define PROM_SPARC64VII_MODE_PROPNAME "SPARC64-VII-mode" 2343 2344 /* 2345 * Check for existence of OPL OBP property that indicates 2346 * SPARC64-VII support. By default, only enable Jupiter 2347 * features if the property is present. It will be 2348 * present in all-Jupiter domains by OBP if the domain has 2349 * been selected by the user on the system controller to 2350 * run in Jupiter mode. Basically, this OBP property must 2351 * be present to turn on the cpu_alljupiter flag. 2352 */ 2353 static int 2354 prom_SPARC64VII_support_enabled(void) 2355 { 2356 int val; 2357 2358 return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME, 2359 (caddr_t)&val) == 0) ? 1 : 0); 2360 } 2361 2362 #define PROM_KCTX_PRESERVED_PROPNAME "context0-page-size-preserved" 2363 2364 /* 2365 * Check for existence of OPL OBP property that indicates support for 2366 * preserving Solaris kernel page sizes when entering OBP. We need to 2367 * check the prom tree since the ddi tree is not yet built when the 2368 * platform startup sequence is called. 2369 */ 2370 static int 2371 plat_prom_preserve_kctx_is_supported(void) 2372 { 2373 pnode_t pnode; 2374 int val; 2375 2376 /* 2377 * Check for existence of context0-page-size-preserved property 2378 * in virtual-memory prom node. 2379 */ 2380 pnode = (pnode_t)prom_getphandle(prom_mmu_ihandle()); 2381 return ((prom_getprop(pnode, PROM_KCTX_PRESERVED_PROPNAME, 2382 (caddr_t)&val) == 0) ? 1 : 0); 2383 } 2384