1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII). 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/ddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/archsystm.h> 37 #include <sys/vmsystm.h> 38 #include <sys/machparam.h> 39 #include <sys/machsystm.h> 40 #include <sys/machthread.h> 41 #include <sys/cpu.h> 42 #include <sys/cmp.h> 43 #include <sys/elf_SPARC.h> 44 #include <vm/vm_dep.h> 45 #include <vm/hat_sfmmu.h> 46 #include <vm/seg_kpm.h> 47 #include <vm/seg_kmem.h> 48 #include <sys/cpuvar.h> 49 #include <sys/opl_olympus_regs.h> 50 #include <sys/opl_module.h> 51 #include <sys/async.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/dditypes.h> 55 #include <sys/cpu_module.h> 56 #include <sys/sysmacros.h> 57 #include <sys/intreg.h> 58 #include <sys/clock.h> 59 #include <sys/platform_module.h> 60 #include <sys/ontrap.h> 61 #include <sys/panic.h> 62 #include <sys/memlist.h> 63 #include <sys/ndifm.h> 64 #include <sys/ddifm.h> 65 #include <sys/fm/protocol.h> 66 #include <sys/fm/util.h> 67 #include <sys/fm/cpu/SPARC64-VI.h> 68 #include <sys/dtrace.h> 69 #include <sys/watchpoint.h> 70 #include <sys/promif.h> 71 72 /* 73 * Internal functions. 74 */ 75 static int cpu_sync_log_err(void *flt); 76 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 77 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 78 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 79 static int prom_SPARC64VII_support_enabled(void); 80 static void opl_ta3(); 81 82 /* 83 * Error counters resetting interval. 84 */ 85 static int opl_async_check_interval = 60; /* 1 min */ 86 87 uint_t cpu_impl_dual_pgsz = 1; 88 89 /* 90 * PA[22:0] represent Displacement in Jupiter 91 * configuration space. 92 */ 93 uint_t root_phys_addr_lo_mask = 0x7fffffu; 94 95 /* 96 * set in /etc/system to control logging of user BERR/TO's 97 */ 98 int cpu_berr_to_verbose = 0; 99 100 /* 101 * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled). 102 */ 103 int cpu_alljupiter = 0; 104 105 static int min_ecache_size; 106 static uint_t priv_hcl_1; 107 static uint_t priv_hcl_2; 108 static uint_t priv_hcl_4; 109 static uint_t priv_hcl_8; 110 111 /* 112 * Olympus error log 113 */ 114 static opl_errlog_t *opl_err_log; 115 116 /* 117 * OPL ta 3 save area. 118 */ 119 char *opl_ta3_save; 120 121 /* 122 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 123 * No any other ecc_type_info insertion is allowed in between the following 124 * four UE classess. 125 */ 126 ecc_type_to_info_t ecc_type_to_info[] = { 127 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 128 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 129 FM_EREPORT_CPU_UE_MEM, 130 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 131 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 132 FM_EREPORT_CPU_UE_CHANNEL, 133 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 134 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 135 FM_EREPORT_CPU_UE_CPU, 136 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 137 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 138 FM_EREPORT_CPU_UE_PATH, 139 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 140 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 141 FM_EREPORT_CPU_BERR, 142 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 143 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 144 FM_EREPORT_CPU_BTO, 145 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 146 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 147 FM_EREPORT_CPU_MTLB, 148 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 149 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 150 FM_EREPORT_CPU_TLBP, 151 152 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 153 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 154 FM_EREPORT_CPU_CRE, 155 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 156 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 157 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 158 FM_EREPORT_CPU_TSBCTX, 159 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 160 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 161 FM_EREPORT_CPU_TSBP, 162 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 163 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 164 FM_EREPORT_CPU_PSTATE, 165 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 166 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 167 FM_EREPORT_CPU_TSTATE, 168 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 169 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 170 FM_EREPORT_CPU_IUG_F, 171 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 172 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 173 FM_EREPORT_CPU_IUG_R, 174 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 175 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 176 FM_EREPORT_CPU_SDC, 177 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 178 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 179 FM_EREPORT_CPU_WDT, 180 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 181 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 182 FM_EREPORT_CPU_DTLB, 183 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 184 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 185 FM_EREPORT_CPU_ITLB, 186 UGESR_IUG_COREERR, "IUG_COREERR", 187 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 188 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 189 FM_EREPORT_CPU_CORE, 190 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 191 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 192 FM_EREPORT_CPU_DAE, 193 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 194 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 195 FM_EREPORT_CPU_IAE, 196 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 197 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 198 FM_EREPORT_CPU_UGE, 199 0, NULL, 0, 0, 200 NULL, 0, 0, 201 }; 202 203 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 204 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 205 int *segsp, int *banksp, int *mcidp); 206 207 208 /* 209 * Setup trap handlers for 0xA, 0x32, 0x40 trap types 210 * and "ta 3" and "ta 4". 211 */ 212 void 213 cpu_init_trap(void) 214 { 215 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 216 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 217 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 218 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 219 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 220 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 221 OPL_SET_TRAP(tt0_flushw, opl_ta3_instr); 222 OPL_PATCH_28(opl_cleanw_patch, opl_ta4_instr); 223 } 224 225 static int 226 getintprop(pnode_t node, char *name, int deflt) 227 { 228 int value; 229 230 switch (prom_getproplen(node, name)) { 231 case sizeof (int): 232 (void) prom_getprop(node, name, (caddr_t)&value); 233 break; 234 235 default: 236 value = deflt; 237 break; 238 } 239 240 return (value); 241 } 242 243 /* 244 * Set the magic constants of the implementation. 245 */ 246 /*ARGSUSED*/ 247 void 248 cpu_fiximp(pnode_t dnode) 249 { 250 int i, a; 251 extern int vac_size, vac_shift; 252 extern uint_t vac_mask; 253 254 static struct { 255 char *name; 256 int *var; 257 int defval; 258 } prop[] = { 259 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 260 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 261 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 262 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 263 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 264 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 265 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 266 }; 267 268 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 269 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 270 271 ecache_setsize = ecache_size / ecache_associativity; 272 273 vac_size = OPL_VAC_SIZE; 274 vac_mask = MMU_PAGEMASK & (vac_size - 1); 275 i = 0; a = vac_size; 276 while (a >>= 1) 277 ++i; 278 vac_shift = i; 279 shm_alignment = vac_size; 280 vac = 1; 281 } 282 283 /* 284 * Enable features for Jupiter-only domains. 285 */ 286 void 287 cpu_fix_alljupiter(void) 288 { 289 if (!prom_SPARC64VII_support_enabled()) { 290 /* 291 * Do not enable all-Jupiter features and do not turn on 292 * the cpu_alljupiter flag. 293 */ 294 return; 295 } 296 297 cpu_alljupiter = 1; 298 299 /* 300 * Enable ima hwcap for Jupiter-only domains. DR will prevent 301 * addition of Olympus-C to all-Jupiter domains to preserve ima 302 * hwcap semantics. 303 */ 304 cpu_hwcap_flags |= AV_SPARC_IMA; 305 } 306 307 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 308 /* 309 * Quick and dirty way to redefine locally in 310 * OPL the value of IDSR_BN_SETS to 31 instead 311 * of the standard 32 value. This is to workaround 312 * REV_B of Olympus_c processor's problem in handling 313 * more than 31 xcall broadcast. 314 */ 315 #undef IDSR_BN_SETS 316 #define IDSR_BN_SETS 31 317 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 318 319 void 320 send_mondo_set(cpuset_t set) 321 { 322 int lo, busy, nack, shipped = 0; 323 uint16_t i, cpuids[IDSR_BN_SETS]; 324 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 325 uint64_t starttick, endtick, tick, lasttick; 326 #if (NCPU > IDSR_BN_SETS) 327 int index = 0; 328 int ncpuids = 0; 329 #endif 330 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 331 int bn_sets = IDSR_BN_SETS; 332 uint64_t ver; 333 334 ASSERT(NCPU > bn_sets); 335 #endif 336 337 ASSERT(!CPUSET_ISNULL(set)); 338 starttick = lasttick = gettick(); 339 340 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 341 ver = ultra_getver(); 342 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 343 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 344 bn_sets = 1; 345 #endif 346 347 #if (NCPU <= IDSR_BN_SETS) 348 for (i = 0; i < NCPU; i++) 349 if (CPU_IN_SET(set, i)) { 350 shipit(i, shipped); 351 nackmask |= IDSR_NACK_BIT(shipped); 352 cpuids[shipped++] = i; 353 CPUSET_DEL(set, i); 354 if (CPUSET_ISNULL(set)) 355 break; 356 } 357 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 358 #else 359 for (i = 0; i < NCPU; i++) 360 if (CPU_IN_SET(set, i)) { 361 ncpuids++; 362 363 /* 364 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 365 * find we have shipped to more than (IDSR_BN_SETS) 366 * CPUs, set "index" to the highest numbered CPU in 367 * the set so we can ship to other CPUs a bit later on. 368 */ 369 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 370 if (shipped < bn_sets) { 371 #else 372 if (shipped < IDSR_BN_SETS) { 373 #endif 374 shipit(i, shipped); 375 nackmask |= IDSR_NACK_BIT(shipped); 376 cpuids[shipped++] = i; 377 CPUSET_DEL(set, i); 378 if (CPUSET_ISNULL(set)) 379 break; 380 } else 381 index = (int)i; 382 } 383 384 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 385 #endif 386 387 busymask = IDSR_NACK_TO_BUSY(nackmask); 388 busy = nack = 0; 389 endtick = starttick + xc_tick_limit; 390 for (;;) { 391 idsr = getidsr(); 392 #if (NCPU <= IDSR_BN_SETS) 393 if (idsr == 0) 394 break; 395 #else 396 if (idsr == 0 && shipped == ncpuids) 397 break; 398 #endif 399 tick = gettick(); 400 /* 401 * If there is a big jump between the current tick 402 * count and lasttick, we have probably hit a break 403 * point. Adjust endtick accordingly to avoid panic. 404 */ 405 if (tick > (lasttick + xc_tick_jump_limit)) 406 endtick += (tick - lasttick); 407 lasttick = tick; 408 if (tick > endtick) { 409 if (panic_quiesce) 410 return; 411 cmn_err(CE_CONT, "send mondo timeout [%d NACK %d " 412 "BUSY]\nIDSR 0x%" PRIx64 " cpuids:", 413 nack, busy, idsr); 414 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 415 for (i = 0; i < bn_sets; i++) { 416 #else 417 for (i = 0; i < IDSR_BN_SETS; i++) { 418 #endif 419 if (idsr & (IDSR_NACK_BIT(i) | 420 IDSR_BUSY_BIT(i))) { 421 cmn_err(CE_CONT, " 0x%x", cpuids[i]); 422 } 423 } 424 cmn_err(CE_CONT, "\n"); 425 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 426 } 427 curnack = idsr & nackmask; 428 curbusy = idsr & busymask; 429 430 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 431 /* 432 * Only proceed to send more xcalls if all the 433 * cpus in the previous IDSR_BN_SETS were completed. 434 */ 435 if (curbusy) { 436 busy++; 437 continue; 438 } 439 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 440 441 #if (NCPU > IDSR_BN_SETS) 442 if (shipped < ncpuids) { 443 uint64_t cpus_left; 444 uint16_t next = (uint16_t)index; 445 446 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 447 busymask; 448 449 if (cpus_left) { 450 do { 451 /* 452 * Sequence through and ship to the 453 * remainder of the CPUs in the system 454 * (e.g. other than the first 455 * (IDSR_BN_SETS)) in reverse order. 456 */ 457 lo = lowbit(cpus_left) - 1; 458 i = IDSR_BUSY_IDX(lo); 459 shipit(next, i); 460 shipped++; 461 cpuids[i] = next; 462 463 /* 464 * If we've processed all the CPUs, 465 * exit the loop now and save 466 * instructions. 467 */ 468 if (shipped == ncpuids) 469 break; 470 471 for ((index = ((int)next - 1)); 472 index >= 0; index--) 473 if (CPU_IN_SET(set, index)) { 474 next = (uint16_t)index; 475 break; 476 } 477 478 cpus_left &= ~(1ull << lo); 479 } while (cpus_left); 480 continue; 481 } 482 } 483 #endif 484 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 485 if (curbusy) { 486 busy++; 487 continue; 488 } 489 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 490 #ifdef SEND_MONDO_STATS 491 { 492 int n = gettick() - starttick; 493 if (n < 8192) 494 x_nack_stimes[n >> 7]++; 495 } 496 #endif 497 while (gettick() < (tick + sys_clock_mhz)) 498 ; 499 do { 500 lo = lowbit(curnack) - 1; 501 i = IDSR_NACK_IDX(lo); 502 shipit(cpuids[i], i); 503 curnack &= ~(1ull << lo); 504 } while (curnack); 505 nack++; 506 busy = 0; 507 } 508 #ifdef SEND_MONDO_STATS 509 { 510 int n = gettick() - starttick; 511 if (n < 8192) 512 x_set_stimes[n >> 7]++; 513 else 514 x_set_ltimes[(n >> 13) & 0xf]++; 515 } 516 x_set_cpus[shipped]++; 517 #endif 518 } 519 520 /* 521 * Cpu private initialization. 522 */ 523 void 524 cpu_init_private(struct cpu *cp) 525 { 526 if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) || 527 (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) { 528 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is " 529 "supported", cp->cpu_id, 530 cpunodes[cp->cpu_id].implementation); 531 } 532 533 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 534 } 535 536 void 537 cpu_setup(void) 538 { 539 extern int at_flags; 540 extern int cpc_has_overflow_intr; 541 uint64_t cpu0_log; 542 extern uint64_t opl_cpu0_err_log; 543 544 /* 545 * Initialize Error log Scratch register for error handling. 546 */ 547 548 cpu0_log = va_to_pa(&opl_cpu0_err_log); 549 opl_error_setup(cpu0_log); 550 551 /* 552 * Enable MMU translating multiple page sizes for 553 * sITLB and sDTLB. 554 */ 555 opl_mpg_enable(); 556 557 /* 558 * Setup chip-specific trap handlers. 559 */ 560 cpu_init_trap(); 561 562 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 563 564 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 565 566 /* 567 * Due to the number of entries in the fully-associative tlb 568 * this may have to be tuned lower than in spitfire. 569 */ 570 pp_slots = MIN(8, MAXPP_SLOTS); 571 572 /* 573 * Block stores do not invalidate all pages of the d$, pagecopy 574 * et. al. need virtual translations with virtual coloring taken 575 * into consideration. prefetch/ldd will pollute the d$ on the 576 * load side. 577 */ 578 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 579 580 if (use_page_coloring) { 581 do_pg_coloring = 1; 582 } 583 584 isa_list = 585 "sparcv9+vis2 sparcv9+vis sparcv9 " 586 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 587 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 588 589 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 | 590 AV_SPARC_POPC | AV_SPARC_FMAF; 591 592 /* 593 * On SPARC64-VI, there's no hole in the virtual address space 594 */ 595 hole_start = hole_end = 0; 596 597 /* 598 * The kpm mapping window. 599 * kpm_size: 600 * The size of a single kpm range. 601 * The overall size will be: kpm_size * vac_colors. 602 * kpm_vbase: 603 * The virtual start address of the kpm range within the kernel 604 * virtual address space. kpm_vbase has to be kpm_size aligned. 605 */ 606 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 607 kpm_size_shift = 47; 608 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 609 kpm_smallpages = 1; 610 611 /* 612 * The traptrace code uses either %tick or %stick for 613 * timestamping. We have %stick so we can use it. 614 */ 615 traptrace_use_stick = 1; 616 617 /* 618 * SPARC64-VI has a performance counter overflow interrupt 619 */ 620 cpc_has_overflow_intr = 1; 621 622 /* 623 * Declare that this architecture/cpu combination does not support 624 * fpRAS. 625 */ 626 fpras_implemented = 0; 627 } 628 629 /* 630 * Called by setcpudelay 631 */ 632 void 633 cpu_init_tick_freq(void) 634 { 635 /* 636 * For SPARC64-VI we want to use the system clock rate as 637 * the basis for low level timing, due to support of mixed 638 * speed CPUs and power managment. 639 */ 640 if (system_clock_freq == 0) 641 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 642 643 sys_tick_freq = system_clock_freq; 644 } 645 646 #ifdef SEND_MONDO_STATS 647 uint32_t x_one_stimes[64]; 648 uint32_t x_one_ltimes[16]; 649 uint32_t x_set_stimes[64]; 650 uint32_t x_set_ltimes[16]; 651 uint32_t x_set_cpus[NCPU]; 652 uint32_t x_nack_stimes[64]; 653 #endif 654 655 /* 656 * Note: A version of this function is used by the debugger via the KDI, 657 * and must be kept in sync with this version. Any changes made to this 658 * function to support new chips or to accomodate errata must also be included 659 * in the KDI-specific version. See us3_kdi.c. 660 */ 661 void 662 send_one_mondo(int cpuid) 663 { 664 int busy, nack; 665 uint64_t idsr, starttick, endtick, tick, lasttick; 666 uint64_t busymask; 667 668 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 669 starttick = lasttick = gettick(); 670 shipit(cpuid, 0); 671 endtick = starttick + xc_tick_limit; 672 busy = nack = 0; 673 busymask = IDSR_BUSY; 674 for (;;) { 675 idsr = getidsr(); 676 if (idsr == 0) 677 break; 678 679 tick = gettick(); 680 /* 681 * If there is a big jump between the current tick 682 * count and lasttick, we have probably hit a break 683 * point. Adjust endtick accordingly to avoid panic. 684 */ 685 if (tick > (lasttick + xc_tick_jump_limit)) 686 endtick += (tick - lasttick); 687 lasttick = tick; 688 if (tick > endtick) { 689 if (panic_quiesce) 690 return; 691 cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) " 692 "[%d NACK %d BUSY]", cpuid, nack, busy); 693 } 694 695 if (idsr & busymask) { 696 busy++; 697 continue; 698 } 699 drv_usecwait(1); 700 shipit(cpuid, 0); 701 nack++; 702 busy = 0; 703 } 704 #ifdef SEND_MONDO_STATS 705 { 706 int n = gettick() - starttick; 707 if (n < 8192) 708 x_one_stimes[n >> 7]++; 709 else 710 x_one_ltimes[(n >> 13) & 0xf]++; 711 } 712 #endif 713 } 714 715 /* 716 * init_mmu_page_sizes is set to one after the bootup time initialization 717 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 718 * valid value. 719 * 720 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 721 * versions of disable_ism_large_pages and disable_large_pages, and feed back 722 * into those two hat variables at hat initialization time. 723 * 724 */ 725 int init_mmu_page_sizes = 0; 726 727 static uint_t mmu_disable_large_pages = 0; 728 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | 729 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 730 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 731 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 732 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | 733 (1 << TTE512K)); 734 735 /* 736 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 737 * Called during very early bootup from check_cpus_set(). 738 * Can be called to verify that mmu_page_sizes are set up correctly. 739 * 740 * Set Olympus defaults. We do not use the function parameter. 741 */ 742 /*ARGSUSED*/ 743 int 744 mmu_init_mmu_page_sizes(int32_t not_used) 745 { 746 if (!init_mmu_page_sizes) { 747 mmu_page_sizes = MMU_PAGE_SIZES; 748 mmu_hashcnt = MAX_HASHCNT; 749 mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; 750 mmu_exported_pagesize_mask = (1 << TTE8K) | 751 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 752 (1 << TTE32M) | (1 << TTE256M); 753 init_mmu_page_sizes = 1; 754 return (0); 755 } 756 return (1); 757 } 758 759 /* SPARC64-VI worst case DTLB parameters */ 760 #ifndef LOCKED_DTLB_ENTRIES 761 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 762 #endif 763 #define TOTAL_DTLB_ENTRIES 32 764 #define AVAIL_32M_ENTRIES 0 765 #define AVAIL_256M_ENTRIES 0 766 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 767 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 768 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 769 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 770 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 771 772 /* 773 * The function returns the mmu-specific values for the 774 * hat's disable_large_pages, disable_ism_large_pages, and 775 * disable_auto_data_large_pages and 776 * disable_text_data_large_pages variables. 777 */ 778 uint_t 779 mmu_large_pages_disabled(uint_t flag) 780 { 781 uint_t pages_disable = 0; 782 extern int use_text_pgsz64K; 783 extern int use_text_pgsz512K; 784 785 if (flag == HAT_LOAD) { 786 pages_disable = mmu_disable_large_pages; 787 } else if (flag == HAT_LOAD_SHARE) { 788 pages_disable = mmu_disable_ism_large_pages; 789 } else if (flag == HAT_AUTO_DATA) { 790 pages_disable = mmu_disable_auto_data_large_pages; 791 } else if (flag == HAT_AUTO_TEXT) { 792 pages_disable = mmu_disable_auto_text_large_pages; 793 if (use_text_pgsz512K) { 794 pages_disable &= ~(1 << TTE512K); 795 } 796 if (use_text_pgsz64K) { 797 pages_disable &= ~(1 << TTE64K); 798 } 799 } 800 return (pages_disable); 801 } 802 803 /* 804 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 805 * It may be called from set_platform_defaults, if some value other than 32M 806 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 807 * then only warn, since it would be bad form to panic due to a user typo. 808 * 809 * The function re-initializes the mmu_disable_ism_large_pages variable. 810 */ 811 void 812 mmu_init_large_pages(size_t ism_pagesize) 813 { 814 switch (ism_pagesize) { 815 case MMU_PAGESIZE4M: 816 mmu_disable_ism_large_pages = ((1 << TTE64K) | 817 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 818 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 819 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 820 break; 821 case MMU_PAGESIZE32M: 822 mmu_disable_ism_large_pages = ((1 << TTE64K) | 823 (1 << TTE512K) | (1 << TTE256M)); 824 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 825 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); 826 adjust_data_maxlpsize(ism_pagesize); 827 break; 828 case MMU_PAGESIZE256M: 829 mmu_disable_ism_large_pages = ((1 << TTE64K) | 830 (1 << TTE512K) | (1 << TTE32M)); 831 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 832 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); 833 adjust_data_maxlpsize(ism_pagesize); 834 break; 835 default: 836 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 837 ism_pagesize); 838 break; 839 } 840 } 841 842 /* 843 * Function to reprogram the TLBs when page sizes used 844 * by a process change significantly. 845 */ 846 void 847 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) 848 { 849 uint8_t pgsz0, pgsz1; 850 851 /* 852 * Don't program 2nd dtlb for kernel and ism hat 853 */ 854 ASSERT(hat->sfmmu_ismhat == NULL); 855 ASSERT(hat != ksfmmup); 856 857 /* 858 * hat->sfmmu_pgsz[] is an array whose elements 859 * contain a sorted order of page sizes. Element 860 * 0 is the most commonly used page size, followed 861 * by element 1, and so on. 862 * 863 * ttecnt[] is an array of per-page-size page counts 864 * mapped into the process. 865 * 866 * If the HAT's choice for page sizes is unsuitable, 867 * we can override it here. The new values written 868 * to the array will be handed back to us later to 869 * do the actual programming of the TLB hardware. 870 * 871 */ 872 pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]); 873 pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]); 874 875 /* 876 * This implements PAGESIZE programming of the sTLB 877 * if large TTE counts don't exceed the thresholds. 878 */ 879 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 880 pgsz0 = page_szc(MMU_PAGESIZE); 881 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 882 pgsz1 = page_szc(MMU_PAGESIZE); 883 tmp_pgsz[0] = pgsz0; 884 tmp_pgsz[1] = pgsz1; 885 /* otherwise, accept what the HAT chose for us */ 886 } 887 888 /* 889 * The HAT calls this function when an MMU context is allocated so that we 890 * can reprogram the large TLBs appropriately for the new process using 891 * the context. 892 * 893 * The caller must hold the HAT lock. 894 */ 895 void 896 mmu_set_ctx_page_sizes(struct hat *hat) 897 { 898 uint8_t pgsz0, pgsz1; 899 uint8_t new_cext; 900 901 ASSERT(sfmmu_hat_lock_held(hat)); 902 /* 903 * Don't program 2nd dtlb for kernel and ism hat 904 */ 905 if (hat->sfmmu_ismhat || hat == ksfmmup) 906 return; 907 908 /* 909 * If supported, reprogram the TLBs to a larger pagesize. 910 */ 911 pgsz0 = hat->sfmmu_pgsz[0]; 912 pgsz1 = hat->sfmmu_pgsz[1]; 913 ASSERT(pgsz0 < mmu_page_sizes); 914 ASSERT(pgsz1 < mmu_page_sizes); 915 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 916 if (hat->sfmmu_cext != new_cext) { 917 #ifdef DEBUG 918 int i; 919 /* 920 * assert cnum should be invalid, this is because pagesize 921 * can only be changed after a proc's ctxs are invalidated. 922 */ 923 for (i = 0; i < max_mmu_ctxdoms; i++) { 924 ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 925 } 926 #endif /* DEBUG */ 927 hat->sfmmu_cext = new_cext; 928 } 929 /* 930 * sfmmu_setctx_sec() will take care of the 931 * rest of the dirty work for us. 932 */ 933 } 934 935 /* 936 * This function assumes that there are either four or six supported page 937 * sizes and at most two programmable TLBs, so we need to decide which 938 * page sizes are most important and then adjust the TLB page sizes 939 * accordingly (if supported). 940 * 941 * If these assumptions change, this function will need to be 942 * updated to support whatever the new limits are. 943 */ 944 void 945 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 946 { 947 uint64_t sortcnt[MMU_PAGE_SIZES]; 948 uint8_t tmp_pgsz[MMU_PAGE_SIZES]; 949 uint8_t i, j, max; 950 uint16_t oldval, newval; 951 952 /* 953 * We only consider reprogramming the TLBs if one or more of 954 * the two most used page sizes changes and we're using 955 * large pages in this process. 956 */ 957 if (SFMMU_LGPGS_INUSE(sfmmup)) { 958 /* Sort page sizes. */ 959 for (i = 0; i < mmu_page_sizes; i++) { 960 sortcnt[i] = ttecnt[i]; 961 } 962 for (j = 0; j < mmu_page_sizes; j++) { 963 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 964 if (sortcnt[i] > sortcnt[max]) 965 max = i; 966 } 967 tmp_pgsz[j] = max; 968 sortcnt[max] = 0; 969 } 970 971 oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1]; 972 973 mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz); 974 975 /* Check 2 largest values after the sort. */ 976 newval = tmp_pgsz[0] << 8 | tmp_pgsz[1]; 977 if (newval != oldval) { 978 sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz); 979 } 980 } 981 } 982 983 /* 984 * Return processor specific async error structure 985 * size used. 986 */ 987 int 988 cpu_aflt_size(void) 989 { 990 return (sizeof (opl_async_flt_t)); 991 } 992 993 /* 994 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 995 * post-process CPU events that are dequeued. As such, it can be invoked 996 * from softint context, from AST processing in the trap() flow, or from the 997 * panic flow. We decode the CPU-specific data, and take appropriate actions. 998 * Historically this entry point was used to log the actual cmn_err(9F) text; 999 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 1000 * With FMA this function now also returns a flag which indicates to the 1001 * caller whether the ereport should be posted (1) or suppressed (0). 1002 */ 1003 /*ARGSUSED*/ 1004 static int 1005 cpu_sync_log_err(void *flt) 1006 { 1007 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 1008 struct async_flt *aflt = (struct async_flt *)flt; 1009 1010 /* 1011 * No extra processing of urgent error events. 1012 * Always generate ereports for these events. 1013 */ 1014 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 1015 return (1); 1016 1017 /* 1018 * Additional processing for synchronous errors. 1019 */ 1020 switch (opl_flt->flt_type) { 1021 case OPL_CPU_INV_SFSR: 1022 return (1); 1023 1024 case OPL_CPU_SYNC_UE: 1025 /* 1026 * The validity: SFSR_MK_UE bit has been checked 1027 * in opl_cpu_sync_error() 1028 * No more check is required. 1029 * 1030 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1031 * and they have been retrieved in cpu_queue_events() 1032 */ 1033 1034 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1035 ASSERT(aflt->flt_in_memory); 1036 /* 1037 * We want to skip logging only if ALL the following 1038 * conditions are true: 1039 * 1040 * 1. We are not panicing already. 1041 * 2. The error is a memory error. 1042 * 3. There is only one error. 1043 * 4. The error is on a retired page. 1044 * 5. The error occurred under on_trap 1045 * protection AFLT_PROT_EC 1046 */ 1047 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1048 page_retire_check(aflt->flt_addr, NULL) == 0) { 1049 /* 1050 * Do not log an error from 1051 * the retired page 1052 */ 1053 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1054 return (0); 1055 } 1056 if (!panicstr) 1057 cpu_page_retire(opl_flt); 1058 } 1059 return (1); 1060 1061 case OPL_CPU_SYNC_OTHERS: 1062 /* 1063 * For the following error cases, the processor HW does 1064 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1065 * to assign appropriate values here to reflect what we 1066 * think is the most likely cause of the problem w.r.t to 1067 * the particular error event. For Buserr and timeout 1068 * error event, we will assign OPL_ERRID_CHANNEL as the 1069 * most likely reason. For TLB parity or multiple hit 1070 * error events, we will assign the reason as 1071 * OPL_ERRID_CPU (cpu related problem) and set the 1072 * flt_eid_sid to point to the cpuid. 1073 */ 1074 1075 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1076 /* 1077 * flt_eid_sid will not be used for this case. 1078 */ 1079 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1080 } 1081 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1082 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1083 opl_flt->flt_eid_sid = aflt->flt_inst; 1084 } 1085 1086 /* 1087 * In case of no effective error bit 1088 */ 1089 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1090 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1091 opl_flt->flt_eid_sid = aflt->flt_inst; 1092 } 1093 break; 1094 1095 default: 1096 return (1); 1097 } 1098 return (1); 1099 } 1100 1101 /* 1102 * Retire the bad page that may contain the flushed error. 1103 */ 1104 void 1105 cpu_page_retire(opl_async_flt_t *opl_flt) 1106 { 1107 struct async_flt *aflt = (struct async_flt *)opl_flt; 1108 (void) page_retire(aflt->flt_addr, PR_UE); 1109 } 1110 1111 /* 1112 * Invoked by error_init() early in startup and therefore before 1113 * startup_errorq() is called to drain any error Q - 1114 * 1115 * startup() 1116 * startup_end() 1117 * error_init() 1118 * cpu_error_init() 1119 * errorq_init() 1120 * errorq_drain() 1121 * start_other_cpus() 1122 * 1123 * The purpose of this routine is to create error-related taskqs. Taskqs 1124 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1125 * context. 1126 * 1127 */ 1128 /*ARGSUSED*/ 1129 void 1130 cpu_error_init(int items) 1131 { 1132 opl_err_log = (opl_errlog_t *) 1133 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1134 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1135 cmn_err(CE_PANIC, "The base address of the error log " 1136 "is not page aligned"); 1137 } 1138 1139 /* 1140 * We route all errors through a single switch statement. 1141 */ 1142 void 1143 cpu_ue_log_err(struct async_flt *aflt) 1144 { 1145 switch (aflt->flt_class) { 1146 case CPU_FAULT: 1147 if (cpu_sync_log_err(aflt)) 1148 cpu_ereport_post(aflt); 1149 break; 1150 1151 case BUS_FAULT: 1152 bus_async_log_err(aflt); 1153 break; 1154 1155 default: 1156 cmn_err(CE_WARN, "discarding async error %p with invalid " 1157 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1158 return; 1159 } 1160 } 1161 1162 /* 1163 * Routine for panic hook callback from panic_idle(). 1164 * 1165 * Nothing to do here. 1166 */ 1167 void 1168 cpu_async_panic_callb(void) 1169 { 1170 } 1171 1172 /* 1173 * Routine to return a string identifying the physical name 1174 * associated with a memory/cache error. 1175 */ 1176 /*ARGSUSED*/ 1177 int 1178 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1179 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1180 ushort_t flt_status, char *buf, int buflen, int *lenp) 1181 { 1182 int synd_code; 1183 int ret; 1184 1185 /* 1186 * An AFSR of -1 defaults to a memory syndrome. 1187 */ 1188 synd_code = (int)flt_synd; 1189 1190 if (&plat_get_mem_unum) { 1191 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1192 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1193 buf[0] = '\0'; 1194 *lenp = 0; 1195 } 1196 return (ret); 1197 } 1198 buf[0] = '\0'; 1199 *lenp = 0; 1200 return (ENOTSUP); 1201 } 1202 1203 /* 1204 * Wrapper for cpu_get_mem_unum() routine that takes an 1205 * async_flt struct rather than explicit arguments. 1206 */ 1207 int 1208 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1209 char *buf, int buflen, int *lenp) 1210 { 1211 /* 1212 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1213 * memory error. 1214 */ 1215 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1216 (uint64_t)-1, 1217 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1218 aflt->flt_status, buf, buflen, lenp)); 1219 } 1220 1221 /* 1222 * This routine is a more generic interface to cpu_get_mem_unum() 1223 * that may be used by other modules (e.g. mm). 1224 */ 1225 /*ARGSUSED*/ 1226 int 1227 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1228 char *buf, int buflen, int *lenp) 1229 { 1230 int synd_status, flt_in_memory, ret; 1231 ushort_t flt_status = 0; 1232 char unum[UNUM_NAMLEN]; 1233 1234 /* 1235 * Check for an invalid address. 1236 */ 1237 if (afar == (uint64_t)-1) 1238 return (ENXIO); 1239 1240 if (synd == (uint64_t)-1) 1241 synd_status = AFLT_STAT_INVALID; 1242 else 1243 synd_status = AFLT_STAT_VALID; 1244 1245 flt_in_memory = (*afsr & SFSR_MEMORY) && 1246 pf_is_memory(afar >> MMU_PAGESHIFT); 1247 1248 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1249 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 1250 if (ret != 0) 1251 return (ret); 1252 1253 if (*lenp >= buflen) 1254 return (ENAMETOOLONG); 1255 1256 (void) strncpy(buf, unum, buflen); 1257 1258 return (0); 1259 } 1260 1261 /* 1262 * Routine to return memory information associated 1263 * with a physical address and syndrome. 1264 */ 1265 /*ARGSUSED*/ 1266 int 1267 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1268 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1269 int *segsp, int *banksp, int *mcidp) 1270 { 1271 int synd_code = (int)synd; 1272 1273 if (afar == (uint64_t)-1) 1274 return (ENXIO); 1275 1276 if (p2get_mem_info != NULL) 1277 return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, 1278 bank_sizep, segsp, banksp, mcidp)); 1279 else 1280 return (ENOTSUP); 1281 } 1282 1283 /* 1284 * Routine to return a string identifying the physical 1285 * name associated with a cpuid. 1286 */ 1287 int 1288 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1289 { 1290 int ret; 1291 char unum[UNUM_NAMLEN]; 1292 1293 if (&plat_get_cpu_unum) { 1294 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, 1295 lenp)) != 0) 1296 return (ret); 1297 } else { 1298 return (ENOTSUP); 1299 } 1300 1301 if (*lenp >= buflen) 1302 return (ENAMETOOLONG); 1303 1304 (void) strncpy(buf, unum, *lenp); 1305 1306 return (0); 1307 } 1308 1309 /* 1310 * This routine exports the name buffer size. 1311 */ 1312 size_t 1313 cpu_get_name_bufsize() 1314 { 1315 return (UNUM_NAMLEN); 1316 } 1317 1318 /* 1319 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1320 */ 1321 void 1322 cpu_flush_ecache(void) 1323 { 1324 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1325 cpunodes[CPU->cpu_id].ecache_linesize); 1326 } 1327 1328 static uint8_t 1329 flt_to_trap_type(struct async_flt *aflt) 1330 { 1331 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1332 return (TRAP_TYPE_ECC_I); 1333 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1334 return (TRAP_TYPE_ECC_D); 1335 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1336 return (TRAP_TYPE_URGENT); 1337 return (TRAP_TYPE_UNKNOWN); 1338 } 1339 1340 /* 1341 * Encode the data saved in the opl_async_flt_t struct into 1342 * the FM ereport payload. 1343 */ 1344 /* ARGSUSED */ 1345 static void 1346 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1347 nvlist_t *resource) 1348 { 1349 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1350 char unum[UNUM_NAMLEN]; 1351 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1352 int len; 1353 1354 1355 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1356 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1357 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1358 } 1359 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1360 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1361 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1362 } 1363 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1364 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1365 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1366 } 1367 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1368 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1369 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1370 } 1371 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1372 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1373 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1374 } 1375 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1376 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1377 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1378 } 1379 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1380 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1381 DATA_TYPE_BOOLEAN_VALUE, 1382 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1383 } 1384 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1385 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1386 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1387 } 1388 1389 switch (opl_flt->flt_eid_mod) { 1390 case OPL_ERRID_CPU: 1391 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1392 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1393 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1394 NULL, opl_flt->flt_eid_sid, 1395 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf); 1396 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1397 DATA_TYPE_NVLIST, resource, NULL); 1398 break; 1399 1400 case OPL_ERRID_CHANNEL: 1401 /* 1402 * No resource is created but the cpumem DE will find 1403 * the defective path by retreiving EID from SFSR which is 1404 * included in the payload. 1405 */ 1406 break; 1407 1408 case OPL_ERRID_MEM: 1409 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1410 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1411 unum, NULL, (uint64_t)-1); 1412 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1413 DATA_TYPE_NVLIST, resource, NULL); 1414 break; 1415 1416 case OPL_ERRID_PATH: 1417 /* 1418 * No resource is created but the cpumem DE will find 1419 * the defective path by retreiving EID from SFSR which is 1420 * included in the payload. 1421 */ 1422 break; 1423 } 1424 } 1425 1426 /* 1427 * Returns whether fault address is valid for this error bit and 1428 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1429 */ 1430 /*ARGSUSED*/ 1431 static int 1432 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1433 { 1434 struct async_flt *aflt = (struct async_flt *)opl_flt; 1435 1436 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1437 return ((t_afsr_bit & SFSR_MEMORY) && 1438 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1439 } 1440 return (0); 1441 } 1442 1443 /* 1444 * In OPL SCF does the stick synchronization. 1445 */ 1446 void 1447 sticksync_slave(void) 1448 { 1449 } 1450 1451 /* 1452 * In OPL SCF does the stick synchronization. 1453 */ 1454 void 1455 sticksync_master(void) 1456 { 1457 } 1458 1459 /* 1460 * Cpu private unitialization. OPL cpus do not use the private area. 1461 */ 1462 void 1463 cpu_uninit_private(struct cpu *cp) 1464 { 1465 cmp_delete_cpu(cp->cpu_id); 1466 } 1467 1468 /* 1469 * Always flush an entire cache. 1470 */ 1471 void 1472 cpu_error_ecache_flush(void) 1473 { 1474 cpu_flush_ecache(); 1475 } 1476 1477 void 1478 cpu_ereport_post(struct async_flt *aflt) 1479 { 1480 char *cpu_type, buf[FM_MAX_CLASS]; 1481 nv_alloc_t *nva = NULL; 1482 nvlist_t *ereport, *detector, *resource; 1483 errorq_elem_t *eqep; 1484 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1485 1486 if (aflt->flt_panic || panicstr) { 1487 eqep = errorq_reserve(ereport_errorq); 1488 if (eqep == NULL) 1489 return; 1490 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1491 nva = errorq_elem_nva(ereport_errorq, eqep); 1492 } else { 1493 ereport = fm_nvlist_create(nva); 1494 } 1495 1496 /* 1497 * Create the scheme "cpu" FMRI. 1498 */ 1499 detector = fm_nvlist_create(nva); 1500 resource = fm_nvlist_create(nva); 1501 switch (cpunodes[aflt->flt_inst].implementation) { 1502 case OLYMPUS_C_IMPL: 1503 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1504 break; 1505 case JUPITER_IMPL: 1506 cpu_type = FM_EREPORT_CPU_SPARC64_VII; 1507 break; 1508 default: 1509 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1510 break; 1511 } 1512 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1513 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1514 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1515 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1516 sbuf); 1517 1518 /* 1519 * Encode all the common data into the ereport. 1520 */ 1521 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1522 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1523 1524 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1525 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1526 1527 /* 1528 * Encode the error specific data that was saved in 1529 * the async_flt structure into the ereport. 1530 */ 1531 cpu_payload_add_aflt(aflt, ereport, resource); 1532 1533 if (aflt->flt_panic || panicstr) { 1534 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1535 } else { 1536 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1537 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1538 fm_nvlist_destroy(detector, FM_NVA_FREE); 1539 fm_nvlist_destroy(resource, FM_NVA_FREE); 1540 } 1541 } 1542 1543 void 1544 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1545 { 1546 int status; 1547 ddi_fm_error_t de; 1548 1549 bzero(&de, sizeof (ddi_fm_error_t)); 1550 1551 de.fme_version = DDI_FME_VERSION; 1552 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1553 de.fme_flag = expected; 1554 de.fme_bus_specific = (void *)aflt->flt_addr; 1555 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1556 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1557 aflt->flt_panic = 1; 1558 } 1559 1560 void 1561 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1562 errorq_t *eqp, uint_t flag) 1563 { 1564 struct async_flt *aflt = (struct async_flt *)payload; 1565 1566 aflt->flt_erpt_class = error_class; 1567 errorq_dispatch(eqp, payload, payload_sz, flag); 1568 } 1569 1570 void 1571 adjust_hw_copy_limits(int ecache_size) 1572 { 1573 /* 1574 * Set hw copy limits. 1575 * 1576 * /etc/system will be parsed later and can override one or more 1577 * of these settings. 1578 * 1579 * At this time, ecache size seems only mildly relevant. 1580 * We seem to run into issues with the d-cache and stalls 1581 * we see on misses. 1582 * 1583 * Cycle measurement indicates that 2 byte aligned copies fare 1584 * little better than doing things with VIS at around 512 bytes. 1585 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1586 * aligned is faster whenever the source and destination data 1587 * in cache and the total size is less than 2 Kbytes. The 2K 1588 * limit seems to be driven by the 2K write cache. 1589 * When more than 2K of copies are done in non-VIS mode, stores 1590 * backup in the write cache. In VIS mode, the write cache is 1591 * bypassed, allowing faster cache-line writes aligned on cache 1592 * boundaries. 1593 * 1594 * In addition, in non-VIS mode, there is no prefetching, so 1595 * for larger copies, the advantage of prefetching to avoid even 1596 * occasional cache misses is enough to justify using the VIS code. 1597 * 1598 * During testing, it was discovered that netbench ran 3% slower 1599 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1600 * applications, data is only used once (copied to the output 1601 * buffer, then copied by the network device off the system). Using 1602 * the VIS copy saves more L2 cache state. Network copies are 1603 * around 1.3K to 1.5K in size for historical reasons. 1604 * 1605 * Therefore, a limit of 1K bytes will be used for the 8 byte 1606 * aligned copy even for large caches and 8 MB ecache. The 1607 * infrastructure to allow different limits for different sized 1608 * caches is kept to allow further tuning in later releases. 1609 */ 1610 1611 if (min_ecache_size == 0 && use_hw_bcopy) { 1612 /* 1613 * First time through - should be before /etc/system 1614 * is read. 1615 * Could skip the checks for zero but this lets us 1616 * preserve any debugger rewrites. 1617 */ 1618 if (hw_copy_limit_1 == 0) { 1619 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1620 priv_hcl_1 = hw_copy_limit_1; 1621 } 1622 if (hw_copy_limit_2 == 0) { 1623 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1624 priv_hcl_2 = hw_copy_limit_2; 1625 } 1626 if (hw_copy_limit_4 == 0) { 1627 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1628 priv_hcl_4 = hw_copy_limit_4; 1629 } 1630 if (hw_copy_limit_8 == 0) { 1631 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1632 priv_hcl_8 = hw_copy_limit_8; 1633 } 1634 min_ecache_size = ecache_size; 1635 } else { 1636 /* 1637 * MP initialization. Called *after* /etc/system has 1638 * been parsed. One CPU has already been initialized. 1639 * Need to cater for /etc/system having scragged one 1640 * of our values. 1641 */ 1642 if (ecache_size == min_ecache_size) { 1643 /* 1644 * Same size ecache. We do nothing unless we 1645 * have a pessimistic ecache setting. In that 1646 * case we become more optimistic (if the cache is 1647 * large enough). 1648 */ 1649 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1650 /* 1651 * Need to adjust hw_copy_limit* from our 1652 * pessimistic uniprocessor value to a more 1653 * optimistic UP value *iff* it hasn't been 1654 * reset. 1655 */ 1656 if ((ecache_size > 1048576) && 1657 (priv_hcl_8 == hw_copy_limit_8)) { 1658 if (ecache_size <= 2097152) 1659 hw_copy_limit_8 = 4 * 1660 VIS_COPY_THRESHOLD; 1661 else if (ecache_size <= 4194304) 1662 hw_copy_limit_8 = 4 * 1663 VIS_COPY_THRESHOLD; 1664 else 1665 hw_copy_limit_8 = 4 * 1666 VIS_COPY_THRESHOLD; 1667 priv_hcl_8 = hw_copy_limit_8; 1668 } 1669 } 1670 } else if (ecache_size < min_ecache_size) { 1671 /* 1672 * A different ecache size. Can this even happen? 1673 */ 1674 if (priv_hcl_8 == hw_copy_limit_8) { 1675 /* 1676 * The previous value that we set 1677 * is unchanged (i.e., it hasn't been 1678 * scragged by /etc/system). Rewrite it. 1679 */ 1680 if (ecache_size <= 1048576) 1681 hw_copy_limit_8 = 8 * 1682 VIS_COPY_THRESHOLD; 1683 else if (ecache_size <= 2097152) 1684 hw_copy_limit_8 = 8 * 1685 VIS_COPY_THRESHOLD; 1686 else if (ecache_size <= 4194304) 1687 hw_copy_limit_8 = 8 * 1688 VIS_COPY_THRESHOLD; 1689 else 1690 hw_copy_limit_8 = 10 * 1691 VIS_COPY_THRESHOLD; 1692 priv_hcl_8 = hw_copy_limit_8; 1693 min_ecache_size = ecache_size; 1694 } 1695 } 1696 } 1697 } 1698 1699 #define VIS_BLOCKSIZE 64 1700 1701 int 1702 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1703 { 1704 int ret, watched; 1705 1706 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1707 ret = dtrace_blksuword32(addr, data, 0); 1708 if (watched) 1709 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1710 1711 return (ret); 1712 } 1713 1714 void 1715 opl_cpu_reg_init() 1716 { 1717 uint64_t this_cpu_log; 1718 1719 /* 1720 * We do not need to re-initialize cpu0 registers. 1721 */ 1722 if (cpu[getprocessorid()] == &cpu0) { 1723 /* 1724 * Support for "ta 3" 1725 */ 1726 opl_ta3(); 1727 return; 1728 } 1729 1730 /* 1731 * Initialize Error log Scratch register for error handling. 1732 */ 1733 1734 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1735 ERRLOG_BUFSZ * (getprocessorid()))); 1736 opl_error_setup(this_cpu_log); 1737 1738 /* 1739 * Enable MMU translating multiple page sizes for 1740 * sITLB and sDTLB. 1741 */ 1742 opl_mpg_enable(); 1743 } 1744 1745 /* 1746 * Queue one event in ue_queue based on ecc_type_to_info entry. 1747 */ 1748 static void 1749 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1750 ecc_type_to_info_t *eccp) 1751 { 1752 struct async_flt *aflt = (struct async_flt *)opl_flt; 1753 1754 if (reason && 1755 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1756 (void) strcat(reason, eccp->ec_reason); 1757 } 1758 1759 opl_flt->flt_bit = eccp->ec_afsr_bit; 1760 opl_flt->flt_type = eccp->ec_flt_type; 1761 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1762 aflt->flt_payload = eccp->ec_err_payload; 1763 1764 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1765 cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt, 1766 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1767 } 1768 1769 /* 1770 * Queue events on async event queue one event per error bit. 1771 * Return number of events queued. 1772 */ 1773 int 1774 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1775 { 1776 struct async_flt *aflt = (struct async_flt *)opl_flt; 1777 ecc_type_to_info_t *eccp; 1778 int nevents = 0; 1779 1780 /* 1781 * Queue expected errors, error bit and fault type must must match 1782 * in the ecc_type_to_info table. 1783 */ 1784 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1785 eccp++) { 1786 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1787 (eccp->ec_flags & aflt->flt_status) != 0) { 1788 /* 1789 * UE error event can be further 1790 * classified/breakdown into finer granularity 1791 * based on the flt_eid_mod value set by HW. We do 1792 * special handling here so that we can report UE 1793 * error in finer granularity as ue_mem, 1794 * ue_channel, ue_cpu or ue_path. 1795 */ 1796 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1797 opl_flt->flt_eid_mod = (aflt->flt_stat & 1798 SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT; 1799 opl_flt->flt_eid_sid = (aflt->flt_stat & 1800 SFSR_EID_SID) >> SFSR_EID_SID_SHIFT; 1801 /* 1802 * Need to advance eccp pointer by flt_eid_mod 1803 * so that we get an appropriate ecc pointer 1804 * 1805 * EID # of advances 1806 * ---------------------------------- 1807 * OPL_ERRID_MEM 0 1808 * OPL_ERRID_CHANNEL 1 1809 * OPL_ERRID_CPU 2 1810 * OPL_ERRID_PATH 3 1811 */ 1812 eccp += opl_flt->flt_eid_mod; 1813 } 1814 cpu_queue_one_event(opl_flt, reason, eccp); 1815 t_afsr_errs &= ~eccp->ec_afsr_bit; 1816 nevents++; 1817 } 1818 } 1819 1820 return (nevents); 1821 } 1822 1823 /* 1824 * Sync. error wrapper functions. 1825 * We use these functions in order to transfer here from the 1826 * nucleus trap handler information about trap type (data or 1827 * instruction) and trap level (0 or above 0). This way we 1828 * get rid of using SFSR's reserved bits. 1829 */ 1830 1831 #define OPL_SYNC_TL0 0 1832 #define OPL_SYNC_TL1 1 1833 #define OPL_ISYNC_ERR 0 1834 #define OPL_DSYNC_ERR 1 1835 1836 void 1837 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1838 { 1839 uint64_t t_sfar = p_sfar; 1840 uint64_t t_sfsr = p_sfsr; 1841 1842 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1843 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1844 } 1845 1846 void 1847 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1848 { 1849 uint64_t t_sfar = p_sfar; 1850 uint64_t t_sfsr = p_sfsr; 1851 1852 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1853 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1854 } 1855 1856 void 1857 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1858 { 1859 uint64_t t_sfar = p_sfar; 1860 uint64_t t_sfsr = p_sfsr; 1861 1862 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1863 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1864 } 1865 1866 void 1867 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1868 { 1869 uint64_t t_sfar = p_sfar; 1870 uint64_t t_sfsr = p_sfsr; 1871 1872 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1873 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1874 } 1875 1876 /* 1877 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1878 * and TLB_PRT. 1879 * This function is designed based on cpu_deferred_error(). 1880 */ 1881 1882 static void 1883 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1884 uint_t tl, uint_t derr) 1885 { 1886 opl_async_flt_t opl_flt; 1887 struct async_flt *aflt; 1888 int trampolined = 0; 1889 char pr_reason[MAX_REASON_STRING]; 1890 uint64_t log_sfsr; 1891 int expected = DDI_FM_ERR_UNEXPECTED; 1892 ddi_acc_hdl_t *hp; 1893 1894 /* 1895 * We need to look at p_flag to determine if the thread detected an 1896 * error while dumping core. We can't grab p_lock here, but it's ok 1897 * because we just need a consistent snapshot and we know that everyone 1898 * else will store a consistent set of bits while holding p_lock. We 1899 * don't have to worry about a race because SDOCORE is set once prior 1900 * to doing i/o from the process's address space and is never cleared. 1901 */ 1902 uint_t pflag = ttoproc(curthread)->p_flag; 1903 1904 pr_reason[0] = '\0'; 1905 1906 /* 1907 * handle the specific error 1908 */ 1909 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1910 aflt = (struct async_flt *)&opl_flt; 1911 aflt->flt_id = gethrtime_waitfree(); 1912 aflt->flt_bus_id = getprocessorid(); 1913 aflt->flt_inst = CPU->cpu_id; 1914 aflt->flt_stat = t_sfsr; 1915 aflt->flt_addr = t_sfar; 1916 aflt->flt_pc = (caddr_t)rp->r_pc; 1917 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1918 aflt->flt_class = (uchar_t)CPU_FAULT; 1919 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & 1920 TSTATE_PRIV) ? 1 : 0)); 1921 aflt->flt_tl = (uchar_t)tl; 1922 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1923 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1924 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1925 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1926 1927 /* 1928 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1929 * So, clear all error bits to avoid mis-handling and force the system 1930 * panicked. 1931 * We skip all the procedures below down to the panic message call. 1932 */ 1933 if (!(t_sfsr & SFSR_FV)) { 1934 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1935 aflt->flt_panic = 1; 1936 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1937 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 1938 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1939 fm_panic("%sErrors(s)", "invalid SFSR"); 1940 } 1941 1942 /* 1943 * If either UE and MK bit is off, this is not valid UE error. 1944 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1945 * mis-handling below. 1946 * aflt->flt_stat keeps the original bits as a reference. 1947 */ 1948 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1949 (SFSR_MK_UE|SFSR_UE)) { 1950 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1951 } 1952 1953 /* 1954 * If the trap occurred in privileged mode at TL=0, we need to check to 1955 * see if we were executing in the kernel under on_trap() or t_lofault 1956 * protection. If so, modify the saved registers so that we return 1957 * from the trap to the appropriate trampoline routine. 1958 */ 1959 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1960 if (curthread->t_ontrap != NULL) { 1961 on_trap_data_t *otp = curthread->t_ontrap; 1962 1963 if (otp->ot_prot & OT_DATA_EC) { 1964 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1965 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1966 rp->r_pc = otp->ot_trampoline; 1967 rp->r_npc = rp->r_pc + 4; 1968 trampolined = 1; 1969 } 1970 1971 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1972 (otp->ot_prot & OT_DATA_ACCESS)) { 1973 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 1974 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 1975 rp->r_pc = otp->ot_trampoline; 1976 rp->r_npc = rp->r_pc + 4; 1977 trampolined = 1; 1978 /* 1979 * for peeks and caut_gets errors are expected 1980 */ 1981 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1982 if (!hp) 1983 expected = DDI_FM_ERR_PEEK; 1984 else if (hp->ah_acc.devacc_attr_access == 1985 DDI_CAUTIOUS_ACC) 1986 expected = DDI_FM_ERR_EXPECTED; 1987 } 1988 1989 } else if (curthread->t_lofault) { 1990 aflt->flt_prot = AFLT_PROT_COPY; 1991 rp->r_g1 = EFAULT; 1992 rp->r_pc = curthread->t_lofault; 1993 rp->r_npc = rp->r_pc + 4; 1994 trampolined = 1; 1995 } 1996 } 1997 1998 /* 1999 * If we're in user mode or we're doing a protected copy, we either 2000 * want the ASTON code below to send a signal to the user process 2001 * or we want to panic if aft_panic is set. 2002 * 2003 * If we're in privileged mode and we're not doing a copy, then we 2004 * need to check if we've trampolined. If we haven't trampolined, 2005 * we should panic. 2006 */ 2007 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2008 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2009 aflt->flt_panic |= aft_panic; 2010 } else if (!trampolined) { 2011 aflt->flt_panic = 1; 2012 } 2013 2014 /* 2015 * If we've trampolined due to a privileged TO or BERR, or if an 2016 * unprivileged TO or BERR occurred, we don't want to enqueue an 2017 * event for that TO or BERR. Queue all other events (if any) besides 2018 * the TO/BERR. 2019 */ 2020 log_sfsr = t_sfsr; 2021 if (trampolined) { 2022 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2023 } else if (!aflt->flt_priv) { 2024 /* 2025 * User mode, suppress messages if 2026 * cpu_berr_to_verbose is not set. 2027 */ 2028 if (!cpu_berr_to_verbose) 2029 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2030 } 2031 2032 if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason, 2033 t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) { 2034 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2035 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2036 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 2037 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2038 } 2039 2040 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2041 cpu_run_bus_error_handlers(aflt, expected); 2042 } 2043 2044 /* 2045 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2046 * be logged as part of the panic flow. 2047 */ 2048 if (aflt->flt_panic) { 2049 if (pr_reason[0] == 0) 2050 strcpy(pr_reason, "invalid SFSR "); 2051 2052 fm_panic("%sErrors(s)", pr_reason); 2053 } 2054 2055 /* 2056 * If we queued an error and we are going to return from the trap and 2057 * the error was in user mode or inside of a copy routine, set AST flag 2058 * so the queue will be drained before returning to user mode. The 2059 * AST processing will also act on our failure policy. 2060 */ 2061 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2062 int pcb_flag = 0; 2063 2064 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2065 pcb_flag |= ASYNC_HWERR; 2066 2067 if (t_sfsr & SFSR_BERR) 2068 pcb_flag |= ASYNC_BERR; 2069 2070 if (t_sfsr & SFSR_TO) 2071 pcb_flag |= ASYNC_BTO; 2072 2073 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2074 aston(curthread); 2075 } 2076 } 2077 2078 /*ARGSUSED*/ 2079 void 2080 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2081 { 2082 opl_async_flt_t opl_flt; 2083 struct async_flt *aflt; 2084 char pr_reason[MAX_REASON_STRING]; 2085 2086 /* normalize tl */ 2087 tl = (tl >= 2 ? 1 : 0); 2088 pr_reason[0] = '\0'; 2089 2090 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2091 aflt = (struct async_flt *)&opl_flt; 2092 aflt->flt_id = gethrtime_waitfree(); 2093 aflt->flt_bus_id = getprocessorid(); 2094 aflt->flt_inst = CPU->cpu_id; 2095 aflt->flt_stat = p_ugesr; 2096 aflt->flt_pc = (caddr_t)rp->r_pc; 2097 aflt->flt_class = (uchar_t)CPU_FAULT; 2098 aflt->flt_tl = tl; 2099 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 2100 1 : 0)); 2101 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2102 aflt->flt_panic = 1; 2103 /* 2104 * HW does not set mod/sid in case of urgent error. 2105 * So we have to set it here. 2106 */ 2107 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2108 opl_flt.flt_eid_sid = aflt->flt_inst; 2109 2110 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2111 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2112 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2113 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt, 2114 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2115 } 2116 2117 fm_panic("Urgent Error"); 2118 } 2119 2120 /* 2121 * Initialization error counters resetting. 2122 */ 2123 /* ARGSUSED */ 2124 static void 2125 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2126 { 2127 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2128 hdlr->cyh_level = CY_LOW_LEVEL; 2129 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2130 2131 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2132 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2133 } 2134 2135 void 2136 cpu_mp_init(void) 2137 { 2138 cyc_omni_handler_t hdlr; 2139 2140 hdlr.cyo_online = opl_ras_online; 2141 hdlr.cyo_offline = NULL; 2142 hdlr.cyo_arg = NULL; 2143 mutex_enter(&cpu_lock); 2144 (void) cyclic_add_omni(&hdlr); 2145 mutex_exit(&cpu_lock); 2146 } 2147 2148 int heaplp_use_stlb = 0; 2149 2150 void 2151 mmu_init_kernel_pgsz(struct hat *hat) 2152 { 2153 uint_t tte = page_szc(segkmem_lpsize); 2154 uchar_t new_cext_primary, new_cext_nucleus; 2155 2156 if (heaplp_use_stlb == 0) { 2157 /* do not reprogram stlb */ 2158 tte = TTE8K; 2159 } 2160 2161 new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K); 2162 new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte); 2163 2164 hat->sfmmu_cext = new_cext_primary; 2165 kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | 2166 ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); 2167 } 2168 2169 size_t 2170 mmu_get_kernel_lpsize(size_t lpsize) 2171 { 2172 uint_t tte; 2173 2174 if (lpsize == 0) { 2175 /* no setting for segkmem_lpsize in /etc/system: use default */ 2176 return (MMU_PAGESIZE4M); 2177 } 2178 2179 for (tte = TTE8K; tte <= TTE4M; tte++) { 2180 if (lpsize == TTEBYTES(tte)) 2181 return (lpsize); 2182 } 2183 2184 return (TTEBYTES(TTE8K)); 2185 } 2186 2187 /* 2188 * Support for ta 3. 2189 * We allocate here a buffer for each cpu 2190 * for saving the current register window. 2191 */ 2192 typedef struct win_regs { 2193 uint64_t l[8]; 2194 uint64_t i[8]; 2195 } win_regs_t; 2196 static void 2197 opl_ta3(void) 2198 { 2199 opl_ta3_save = (char *)kmem_alloc(NCPU * sizeof (win_regs_t), KM_SLEEP); 2200 } 2201 2202 /* 2203 * The following are functions that are unused in 2204 * OPL cpu module. They are defined here to resolve 2205 * dependencies in the "unix" module. 2206 * Unused functions that should never be called in 2207 * OPL are coded with ASSERT(0). 2208 */ 2209 2210 void 2211 cpu_disable_errors(void) 2212 {} 2213 2214 void 2215 cpu_enable_errors(void) 2216 { ASSERT(0); } 2217 2218 /*ARGSUSED*/ 2219 void 2220 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2221 { ASSERT(0); } 2222 2223 /*ARGSUSED*/ 2224 void 2225 cpu_faulted_enter(struct cpu *cp) 2226 {} 2227 2228 /*ARGSUSED*/ 2229 void 2230 cpu_faulted_exit(struct cpu *cp) 2231 {} 2232 2233 /*ARGSUSED*/ 2234 void 2235 cpu_check_allcpus(struct async_flt *aflt) 2236 {} 2237 2238 /*ARGSUSED*/ 2239 void 2240 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2241 { ASSERT(0); } 2242 2243 /*ARGSUSED*/ 2244 void 2245 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2246 { ASSERT(0); } 2247 2248 /*ARGSUSED*/ 2249 void 2250 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2251 { ASSERT(0); } 2252 2253 /*ARGSUSED*/ 2254 void 2255 cpu_busy_ecache_scrub(struct cpu *cp) 2256 {} 2257 2258 /*ARGSUSED*/ 2259 void 2260 cpu_idle_ecache_scrub(struct cpu *cp) 2261 {} 2262 2263 /* ARGSUSED */ 2264 void 2265 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2266 { ASSERT(0); } 2267 2268 void 2269 cpu_init_cache_scrub(void) 2270 {} 2271 2272 /* ARGSUSED */ 2273 int 2274 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2275 { 2276 if (&plat_get_mem_sid) { 2277 return (plat_get_mem_sid(unum, buf, buflen, lenp)); 2278 } else { 2279 return (ENOTSUP); 2280 } 2281 } 2282 2283 /* ARGSUSED */ 2284 int 2285 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2286 { 2287 if (&plat_get_mem_addr) { 2288 return (plat_get_mem_addr(unum, sid, offset, addrp)); 2289 } else { 2290 return (ENOTSUP); 2291 } 2292 } 2293 2294 /* ARGSUSED */ 2295 int 2296 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2297 { 2298 if (&plat_get_mem_offset) { 2299 return (plat_get_mem_offset(flt_addr, offp)); 2300 } else { 2301 return (ENOTSUP); 2302 } 2303 } 2304 2305 /*ARGSUSED*/ 2306 void 2307 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2308 { ASSERT(0); } 2309 2310 /*ARGSUSED*/ 2311 void 2312 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2313 { ASSERT(0); } 2314 2315 /*ARGSUSED*/ 2316 void 2317 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 2318 { ASSERT(0); } 2319 2320 /*ARGSUSED*/ 2321 int 2322 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 2323 errorq_elem_t *eqep, size_t afltoffset) 2324 { 2325 ASSERT(0); 2326 return (0); 2327 } 2328 2329 /*ARGSUSED*/ 2330 char * 2331 flt_to_error_type(struct async_flt *aflt) 2332 { 2333 ASSERT(0); 2334 return (NULL); 2335 } 2336 2337 #define PROM_SPARC64VII_MODE_PROPNAME "SPARC64-VII-mode" 2338 2339 /* 2340 * Check for existence of OPL OBP property that indicates 2341 * SPARC64-VII support. By default, only enable Jupiter 2342 * features if the property is present. It will be 2343 * present in all-Jupiter domains by OBP if the domain has 2344 * been selected by the user on the system controller to 2345 * run in Jupiter mode. Basically, this OBP property must 2346 * be present to turn on the cpu_alljupiter flag. 2347 */ 2348 static int 2349 prom_SPARC64VII_support_enabled(void) 2350 { 2351 int val; 2352 2353 return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME, 2354 (caddr_t)&val) == 0) ? 1 : 0); 2355 } 2356