1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII). 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/ddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/archsystm.h> 37 #include <sys/vmsystm.h> 38 #include <sys/machparam.h> 39 #include <sys/machsystm.h> 40 #include <sys/machthread.h> 41 #include <sys/cpu.h> 42 #include <sys/cmp.h> 43 #include <sys/elf_SPARC.h> 44 #include <vm/vm_dep.h> 45 #include <vm/hat_sfmmu.h> 46 #include <vm/seg_kpm.h> 47 #include <vm/seg_kmem.h> 48 #include <sys/cpuvar.h> 49 #include <sys/opl_olympus_regs.h> 50 #include <sys/opl_module.h> 51 #include <sys/async.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/dditypes.h> 55 #include <sys/cpu_module.h> 56 #include <sys/sysmacros.h> 57 #include <sys/intreg.h> 58 #include <sys/clock.h> 59 #include <sys/platform_module.h> 60 #include <sys/ontrap.h> 61 #include <sys/panic.h> 62 #include <sys/memlist.h> 63 #include <sys/ndifm.h> 64 #include <sys/ddifm.h> 65 #include <sys/fm/protocol.h> 66 #include <sys/fm/util.h> 67 #include <sys/fm/cpu/SPARC64-VI.h> 68 #include <sys/dtrace.h> 69 #include <sys/watchpoint.h> 70 #include <sys/promif.h> 71 72 /* 73 * Internal functions. 74 */ 75 static int cpu_sync_log_err(void *flt); 76 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 77 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 78 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 79 static int prom_SPARC64VII_support_enabled(void); 80 81 /* 82 * Error counters resetting interval. 83 */ 84 static int opl_async_check_interval = 60; /* 1 min */ 85 86 uint_t cpu_impl_dual_pgsz = 1; 87 88 /* 89 * PA[22:0] represent Displacement in Jupiter 90 * configuration space. 91 */ 92 uint_t root_phys_addr_lo_mask = 0x7fffffu; 93 94 /* 95 * set in /etc/system to control logging of user BERR/TO's 96 */ 97 int cpu_berr_to_verbose = 0; 98 99 /* 100 * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled). 101 */ 102 int cpu_alljupiter = 0; 103 104 static int min_ecache_size; 105 static uint_t priv_hcl_1; 106 static uint_t priv_hcl_2; 107 static uint_t priv_hcl_4; 108 static uint_t priv_hcl_8; 109 110 /* 111 * Olympus error log 112 */ 113 static opl_errlog_t *opl_err_log; 114 115 /* 116 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 117 * No any other ecc_type_info insertion is allowed in between the following 118 * four UE classess. 119 */ 120 ecc_type_to_info_t ecc_type_to_info[] = { 121 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 122 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 123 FM_EREPORT_CPU_UE_MEM, 124 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 125 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 126 FM_EREPORT_CPU_UE_CHANNEL, 127 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 128 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 129 FM_EREPORT_CPU_UE_CPU, 130 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 131 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 132 FM_EREPORT_CPU_UE_PATH, 133 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 134 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 135 FM_EREPORT_CPU_BERR, 136 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 137 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 138 FM_EREPORT_CPU_BTO, 139 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 140 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 141 FM_EREPORT_CPU_MTLB, 142 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 143 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 144 FM_EREPORT_CPU_TLBP, 145 146 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 147 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 148 FM_EREPORT_CPU_CRE, 149 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 150 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 151 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 152 FM_EREPORT_CPU_TSBCTX, 153 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 154 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 155 FM_EREPORT_CPU_TSBP, 156 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 157 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 158 FM_EREPORT_CPU_PSTATE, 159 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 160 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 161 FM_EREPORT_CPU_TSTATE, 162 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 163 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 164 FM_EREPORT_CPU_IUG_F, 165 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 166 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 167 FM_EREPORT_CPU_IUG_R, 168 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 169 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 170 FM_EREPORT_CPU_SDC, 171 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 172 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 173 FM_EREPORT_CPU_WDT, 174 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 175 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 176 FM_EREPORT_CPU_DTLB, 177 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 178 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 179 FM_EREPORT_CPU_ITLB, 180 UGESR_IUG_COREERR, "IUG_COREERR", 181 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 182 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 183 FM_EREPORT_CPU_CORE, 184 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 185 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 186 FM_EREPORT_CPU_DAE, 187 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 188 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 189 FM_EREPORT_CPU_IAE, 190 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 191 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 192 FM_EREPORT_CPU_UGE, 193 0, NULL, 0, 0, 194 NULL, 0, 0, 195 }; 196 197 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 198 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 199 int *segsp, int *banksp, int *mcidp); 200 201 202 /* 203 * Setup trap handlers for 0xA, 0x32, 0x40 trap types. 204 */ 205 void 206 cpu_init_trap(void) 207 { 208 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 209 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 210 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 211 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 212 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 213 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 214 } 215 216 static int 217 getintprop(pnode_t node, char *name, int deflt) 218 { 219 int value; 220 221 switch (prom_getproplen(node, name)) { 222 case sizeof (int): 223 (void) prom_getprop(node, name, (caddr_t)&value); 224 break; 225 226 default: 227 value = deflt; 228 break; 229 } 230 231 return (value); 232 } 233 234 /* 235 * Set the magic constants of the implementation. 236 */ 237 /*ARGSUSED*/ 238 void 239 cpu_fiximp(pnode_t dnode) 240 { 241 int i, a; 242 extern int vac_size, vac_shift; 243 extern uint_t vac_mask; 244 245 static struct { 246 char *name; 247 int *var; 248 int defval; 249 } prop[] = { 250 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 251 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 252 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 253 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 254 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 255 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 256 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 257 }; 258 259 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 260 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 261 262 ecache_setsize = ecache_size / ecache_associativity; 263 264 vac_size = OPL_VAC_SIZE; 265 vac_mask = MMU_PAGEMASK & (vac_size - 1); 266 i = 0; a = vac_size; 267 while (a >>= 1) 268 ++i; 269 vac_shift = i; 270 shm_alignment = vac_size; 271 vac = 1; 272 } 273 274 /* 275 * Enable features for Jupiter-only domains. 276 */ 277 void 278 cpu_fix_alljupiter(void) 279 { 280 if (!prom_SPARC64VII_support_enabled()) { 281 /* 282 * Do not enable all-Jupiter features and do not turn on 283 * the cpu_alljupiter flag. 284 */ 285 return; 286 } 287 288 cpu_alljupiter = 1; 289 290 /* 291 * Enable ima hwcap for Jupiter-only domains. DR will prevent 292 * addition of Olympus-C to all-Jupiter domains to preserve ima 293 * hwcap semantics. 294 */ 295 cpu_hwcap_flags |= AV_SPARC_IMA; 296 } 297 298 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 299 /* 300 * Quick and dirty way to redefine locally in 301 * OPL the value of IDSR_BN_SETS to 31 instead 302 * of the standard 32 value. This is to workaround 303 * REV_B of Olympus_c processor's problem in handling 304 * more than 31 xcall broadcast. 305 */ 306 #undef IDSR_BN_SETS 307 #define IDSR_BN_SETS 31 308 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 309 310 void 311 send_mondo_set(cpuset_t set) 312 { 313 int lo, busy, nack, shipped = 0; 314 uint16_t i, cpuids[IDSR_BN_SETS]; 315 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 316 uint64_t starttick, endtick, tick, lasttick; 317 #if (NCPU > IDSR_BN_SETS) 318 int index = 0; 319 int ncpuids = 0; 320 #endif 321 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 322 int bn_sets = IDSR_BN_SETS; 323 uint64_t ver; 324 325 ASSERT(NCPU > bn_sets); 326 #endif 327 328 ASSERT(!CPUSET_ISNULL(set)); 329 starttick = lasttick = gettick(); 330 331 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 332 ver = ultra_getver(); 333 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 334 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 335 bn_sets = 1; 336 #endif 337 338 #if (NCPU <= IDSR_BN_SETS) 339 for (i = 0; i < NCPU; i++) 340 if (CPU_IN_SET(set, i)) { 341 shipit(i, shipped); 342 nackmask |= IDSR_NACK_BIT(shipped); 343 cpuids[shipped++] = i; 344 CPUSET_DEL(set, i); 345 if (CPUSET_ISNULL(set)) 346 break; 347 } 348 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 349 #else 350 for (i = 0; i < NCPU; i++) 351 if (CPU_IN_SET(set, i)) { 352 ncpuids++; 353 354 /* 355 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 356 * find we have shipped to more than (IDSR_BN_SETS) 357 * CPUs, set "index" to the highest numbered CPU in 358 * the set so we can ship to other CPUs a bit later on. 359 */ 360 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 361 if (shipped < bn_sets) { 362 #else 363 if (shipped < IDSR_BN_SETS) { 364 #endif 365 shipit(i, shipped); 366 nackmask |= IDSR_NACK_BIT(shipped); 367 cpuids[shipped++] = i; 368 CPUSET_DEL(set, i); 369 if (CPUSET_ISNULL(set)) 370 break; 371 } else 372 index = (int)i; 373 } 374 375 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 376 #endif 377 378 busymask = IDSR_NACK_TO_BUSY(nackmask); 379 busy = nack = 0; 380 endtick = starttick + xc_tick_limit; 381 for (;;) { 382 idsr = getidsr(); 383 #if (NCPU <= IDSR_BN_SETS) 384 if (idsr == 0) 385 break; 386 #else 387 if (idsr == 0 && shipped == ncpuids) 388 break; 389 #endif 390 tick = gettick(); 391 /* 392 * If there is a big jump between the current tick 393 * count and lasttick, we have probably hit a break 394 * point. Adjust endtick accordingly to avoid panic. 395 */ 396 if (tick > (lasttick + xc_tick_jump_limit)) 397 endtick += (tick - lasttick); 398 lasttick = tick; 399 if (tick > endtick) { 400 if (panic_quiesce) 401 return; 402 cmn_err(CE_CONT, "send mondo timeout [%d NACK %d " 403 "BUSY]\nIDSR 0x%" PRIx64 " cpuids:", 404 nack, busy, idsr); 405 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 406 for (i = 0; i < bn_sets; i++) { 407 #else 408 for (i = 0; i < IDSR_BN_SETS; i++) { 409 #endif 410 if (idsr & (IDSR_NACK_BIT(i) | 411 IDSR_BUSY_BIT(i))) { 412 cmn_err(CE_CONT, " 0x%x", cpuids[i]); 413 } 414 } 415 cmn_err(CE_CONT, "\n"); 416 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 417 } 418 curnack = idsr & nackmask; 419 curbusy = idsr & busymask; 420 421 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 422 /* 423 * Only proceed to send more xcalls if all the 424 * cpus in the previous IDSR_BN_SETS were completed. 425 */ 426 if (curbusy) { 427 busy++; 428 continue; 429 } 430 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 431 432 #if (NCPU > IDSR_BN_SETS) 433 if (shipped < ncpuids) { 434 uint64_t cpus_left; 435 uint16_t next = (uint16_t)index; 436 437 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 438 busymask; 439 440 if (cpus_left) { 441 do { 442 /* 443 * Sequence through and ship to the 444 * remainder of the CPUs in the system 445 * (e.g. other than the first 446 * (IDSR_BN_SETS)) in reverse order. 447 */ 448 lo = lowbit(cpus_left) - 1; 449 i = IDSR_BUSY_IDX(lo); 450 shipit(next, i); 451 shipped++; 452 cpuids[i] = next; 453 454 /* 455 * If we've processed all the CPUs, 456 * exit the loop now and save 457 * instructions. 458 */ 459 if (shipped == ncpuids) 460 break; 461 462 for ((index = ((int)next - 1)); 463 index >= 0; index--) 464 if (CPU_IN_SET(set, index)) { 465 next = (uint16_t)index; 466 break; 467 } 468 469 cpus_left &= ~(1ull << lo); 470 } while (cpus_left); 471 continue; 472 } 473 } 474 #endif 475 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 476 if (curbusy) { 477 busy++; 478 continue; 479 } 480 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 481 #ifdef SEND_MONDO_STATS 482 { 483 int n = gettick() - starttick; 484 if (n < 8192) 485 x_nack_stimes[n >> 7]++; 486 } 487 #endif 488 while (gettick() < (tick + sys_clock_mhz)) 489 ; 490 do { 491 lo = lowbit(curnack) - 1; 492 i = IDSR_NACK_IDX(lo); 493 shipit(cpuids[i], i); 494 curnack &= ~(1ull << lo); 495 } while (curnack); 496 nack++; 497 busy = 0; 498 } 499 #ifdef SEND_MONDO_STATS 500 { 501 int n = gettick() - starttick; 502 if (n < 8192) 503 x_set_stimes[n >> 7]++; 504 else 505 x_set_ltimes[(n >> 13) & 0xf]++; 506 } 507 x_set_cpus[shipped]++; 508 #endif 509 } 510 511 /* 512 * Cpu private initialization. 513 */ 514 void 515 cpu_init_private(struct cpu *cp) 516 { 517 if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) || 518 (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) { 519 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is " 520 "supported", cp->cpu_id, 521 cpunodes[cp->cpu_id].implementation); 522 } 523 524 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 525 } 526 527 void 528 cpu_setup(void) 529 { 530 extern int at_flags; 531 extern int cpc_has_overflow_intr; 532 uint64_t cpu0_log; 533 extern uint64_t opl_cpu0_err_log; 534 535 /* 536 * Initialize Error log Scratch register for error handling. 537 */ 538 539 cpu0_log = va_to_pa(&opl_cpu0_err_log); 540 opl_error_setup(cpu0_log); 541 542 /* 543 * Enable MMU translating multiple page sizes for 544 * sITLB and sDTLB. 545 */ 546 opl_mpg_enable(); 547 548 /* 549 * Setup chip-specific trap handlers. 550 */ 551 cpu_init_trap(); 552 553 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 554 555 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 556 557 /* 558 * Due to the number of entries in the fully-associative tlb 559 * this may have to be tuned lower than in spitfire. 560 */ 561 pp_slots = MIN(8, MAXPP_SLOTS); 562 563 /* 564 * Block stores do not invalidate all pages of the d$, pagecopy 565 * et. al. need virtual translations with virtual coloring taken 566 * into consideration. prefetch/ldd will pollute the d$ on the 567 * load side. 568 */ 569 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 570 571 if (use_page_coloring) { 572 do_pg_coloring = 1; 573 } 574 575 isa_list = 576 "sparcv9+vis2 sparcv9+vis sparcv9 " 577 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 578 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 579 580 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 | 581 AV_SPARC_POPC | AV_SPARC_FMAF; 582 583 /* 584 * On SPARC64-VI, there's no hole in the virtual address space 585 */ 586 hole_start = hole_end = 0; 587 588 /* 589 * The kpm mapping window. 590 * kpm_size: 591 * The size of a single kpm range. 592 * The overall size will be: kpm_size * vac_colors. 593 * kpm_vbase: 594 * The virtual start address of the kpm range within the kernel 595 * virtual address space. kpm_vbase has to be kpm_size aligned. 596 */ 597 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 598 kpm_size_shift = 47; 599 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 600 kpm_smallpages = 1; 601 602 /* 603 * The traptrace code uses either %tick or %stick for 604 * timestamping. We have %stick so we can use it. 605 */ 606 traptrace_use_stick = 1; 607 608 /* 609 * SPARC64-VI has a performance counter overflow interrupt 610 */ 611 cpc_has_overflow_intr = 1; 612 613 /* 614 * Declare that this architecture/cpu combination does not support 615 * fpRAS. 616 */ 617 fpras_implemented = 0; 618 } 619 620 /* 621 * Called by setcpudelay 622 */ 623 void 624 cpu_init_tick_freq(void) 625 { 626 /* 627 * For SPARC64-VI we want to use the system clock rate as 628 * the basis for low level timing, due to support of mixed 629 * speed CPUs and power managment. 630 */ 631 if (system_clock_freq == 0) 632 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 633 634 sys_tick_freq = system_clock_freq; 635 } 636 637 #ifdef SEND_MONDO_STATS 638 uint32_t x_one_stimes[64]; 639 uint32_t x_one_ltimes[16]; 640 uint32_t x_set_stimes[64]; 641 uint32_t x_set_ltimes[16]; 642 uint32_t x_set_cpus[NCPU]; 643 uint32_t x_nack_stimes[64]; 644 #endif 645 646 /* 647 * Note: A version of this function is used by the debugger via the KDI, 648 * and must be kept in sync with this version. Any changes made to this 649 * function to support new chips or to accomodate errata must also be included 650 * in the KDI-specific version. See us3_kdi.c. 651 */ 652 void 653 send_one_mondo(int cpuid) 654 { 655 int busy, nack; 656 uint64_t idsr, starttick, endtick, tick, lasttick; 657 uint64_t busymask; 658 659 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 660 starttick = lasttick = gettick(); 661 shipit(cpuid, 0); 662 endtick = starttick + xc_tick_limit; 663 busy = nack = 0; 664 busymask = IDSR_BUSY; 665 for (;;) { 666 idsr = getidsr(); 667 if (idsr == 0) 668 break; 669 670 tick = gettick(); 671 /* 672 * If there is a big jump between the current tick 673 * count and lasttick, we have probably hit a break 674 * point. Adjust endtick accordingly to avoid panic. 675 */ 676 if (tick > (lasttick + xc_tick_jump_limit)) 677 endtick += (tick - lasttick); 678 lasttick = tick; 679 if (tick > endtick) { 680 if (panic_quiesce) 681 return; 682 cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) " 683 "[%d NACK %d BUSY]", cpuid, nack, busy); 684 } 685 686 if (idsr & busymask) { 687 busy++; 688 continue; 689 } 690 drv_usecwait(1); 691 shipit(cpuid, 0); 692 nack++; 693 busy = 0; 694 } 695 #ifdef SEND_MONDO_STATS 696 { 697 int n = gettick() - starttick; 698 if (n < 8192) 699 x_one_stimes[n >> 7]++; 700 else 701 x_one_ltimes[(n >> 13) & 0xf]++; 702 } 703 #endif 704 } 705 706 /* 707 * init_mmu_page_sizes is set to one after the bootup time initialization 708 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 709 * valid value. 710 * 711 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 712 * versions of disable_ism_large_pages and disable_large_pages, and feed back 713 * into those two hat variables at hat initialization time. 714 * 715 */ 716 int init_mmu_page_sizes = 0; 717 718 static uint_t mmu_disable_large_pages = 0; 719 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | 720 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 721 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 722 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 723 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | 724 (1 << TTE512K)); 725 726 /* 727 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 728 * Called during very early bootup from check_cpus_set(). 729 * Can be called to verify that mmu_page_sizes are set up correctly. 730 * 731 * Set Olympus defaults. We do not use the function parameter. 732 */ 733 /*ARGSUSED*/ 734 int 735 mmu_init_mmu_page_sizes(int32_t not_used) 736 { 737 if (!init_mmu_page_sizes) { 738 mmu_page_sizes = MMU_PAGE_SIZES; 739 mmu_hashcnt = MAX_HASHCNT; 740 mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; 741 mmu_exported_pagesize_mask = (1 << TTE8K) | 742 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 743 (1 << TTE32M) | (1 << TTE256M); 744 init_mmu_page_sizes = 1; 745 return (0); 746 } 747 return (1); 748 } 749 750 /* SPARC64-VI worst case DTLB parameters */ 751 #ifndef LOCKED_DTLB_ENTRIES 752 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 753 #endif 754 #define TOTAL_DTLB_ENTRIES 32 755 #define AVAIL_32M_ENTRIES 0 756 #define AVAIL_256M_ENTRIES 0 757 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 758 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 759 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 760 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 761 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 762 763 /* 764 * The function returns the mmu-specific values for the 765 * hat's disable_large_pages, disable_ism_large_pages, and 766 * disable_auto_data_large_pages and 767 * disable_text_data_large_pages variables. 768 */ 769 uint_t 770 mmu_large_pages_disabled(uint_t flag) 771 { 772 uint_t pages_disable = 0; 773 extern int use_text_pgsz64K; 774 extern int use_text_pgsz512K; 775 776 if (flag == HAT_LOAD) { 777 pages_disable = mmu_disable_large_pages; 778 } else if (flag == HAT_LOAD_SHARE) { 779 pages_disable = mmu_disable_ism_large_pages; 780 } else if (flag == HAT_AUTO_DATA) { 781 pages_disable = mmu_disable_auto_data_large_pages; 782 } else if (flag == HAT_AUTO_TEXT) { 783 pages_disable = mmu_disable_auto_text_large_pages; 784 if (use_text_pgsz512K) { 785 pages_disable &= ~(1 << TTE512K); 786 } 787 if (use_text_pgsz64K) { 788 pages_disable &= ~(1 << TTE64K); 789 } 790 } 791 return (pages_disable); 792 } 793 794 /* 795 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 796 * It may be called from set_platform_defaults, if some value other than 32M 797 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 798 * then only warn, since it would be bad form to panic due to a user typo. 799 * 800 * The function re-initializes the mmu_disable_ism_large_pages variable. 801 */ 802 void 803 mmu_init_large_pages(size_t ism_pagesize) 804 { 805 switch (ism_pagesize) { 806 case MMU_PAGESIZE4M: 807 mmu_disable_ism_large_pages = ((1 << TTE64K) | 808 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 809 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 810 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 811 break; 812 case MMU_PAGESIZE32M: 813 mmu_disable_ism_large_pages = ((1 << TTE64K) | 814 (1 << TTE512K) | (1 << TTE256M)); 815 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 816 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); 817 adjust_data_maxlpsize(ism_pagesize); 818 break; 819 case MMU_PAGESIZE256M: 820 mmu_disable_ism_large_pages = ((1 << TTE64K) | 821 (1 << TTE512K) | (1 << TTE32M)); 822 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 823 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); 824 adjust_data_maxlpsize(ism_pagesize); 825 break; 826 default: 827 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 828 ism_pagesize); 829 break; 830 } 831 } 832 833 /* 834 * Function to reprogram the TLBs when page sizes used 835 * by a process change significantly. 836 */ 837 void 838 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) 839 { 840 uint8_t pgsz0, pgsz1; 841 842 /* 843 * Don't program 2nd dtlb for kernel and ism hat 844 */ 845 ASSERT(hat->sfmmu_ismhat == NULL); 846 ASSERT(hat != ksfmmup); 847 848 /* 849 * hat->sfmmu_pgsz[] is an array whose elements 850 * contain a sorted order of page sizes. Element 851 * 0 is the most commonly used page size, followed 852 * by element 1, and so on. 853 * 854 * ttecnt[] is an array of per-page-size page counts 855 * mapped into the process. 856 * 857 * If the HAT's choice for page sizes is unsuitable, 858 * we can override it here. The new values written 859 * to the array will be handed back to us later to 860 * do the actual programming of the TLB hardware. 861 * 862 */ 863 pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]); 864 pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]); 865 866 /* 867 * This implements PAGESIZE programming of the sTLB 868 * if large TTE counts don't exceed the thresholds. 869 */ 870 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 871 pgsz0 = page_szc(MMU_PAGESIZE); 872 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 873 pgsz1 = page_szc(MMU_PAGESIZE); 874 tmp_pgsz[0] = pgsz0; 875 tmp_pgsz[1] = pgsz1; 876 /* otherwise, accept what the HAT chose for us */ 877 } 878 879 /* 880 * The HAT calls this function when an MMU context is allocated so that we 881 * can reprogram the large TLBs appropriately for the new process using 882 * the context. 883 * 884 * The caller must hold the HAT lock. 885 */ 886 void 887 mmu_set_ctx_page_sizes(struct hat *hat) 888 { 889 uint8_t pgsz0, pgsz1; 890 uint8_t new_cext; 891 892 ASSERT(sfmmu_hat_lock_held(hat)); 893 /* 894 * Don't program 2nd dtlb for kernel and ism hat 895 */ 896 if (hat->sfmmu_ismhat || hat == ksfmmup) 897 return; 898 899 /* 900 * If supported, reprogram the TLBs to a larger pagesize. 901 */ 902 pgsz0 = hat->sfmmu_pgsz[0]; 903 pgsz1 = hat->sfmmu_pgsz[1]; 904 ASSERT(pgsz0 < mmu_page_sizes); 905 ASSERT(pgsz1 < mmu_page_sizes); 906 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 907 if (hat->sfmmu_cext != new_cext) { 908 #ifdef DEBUG 909 int i; 910 /* 911 * assert cnum should be invalid, this is because pagesize 912 * can only be changed after a proc's ctxs are invalidated. 913 */ 914 for (i = 0; i < max_mmu_ctxdoms; i++) { 915 ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 916 } 917 #endif /* DEBUG */ 918 hat->sfmmu_cext = new_cext; 919 } 920 /* 921 * sfmmu_setctx_sec() will take care of the 922 * rest of the dirty work for us. 923 */ 924 } 925 926 /* 927 * This function assumes that there are either four or six supported page 928 * sizes and at most two programmable TLBs, so we need to decide which 929 * page sizes are most important and then adjust the TLB page sizes 930 * accordingly (if supported). 931 * 932 * If these assumptions change, this function will need to be 933 * updated to support whatever the new limits are. 934 */ 935 void 936 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 937 { 938 uint64_t sortcnt[MMU_PAGE_SIZES]; 939 uint8_t tmp_pgsz[MMU_PAGE_SIZES]; 940 uint8_t i, j, max; 941 uint16_t oldval, newval; 942 943 /* 944 * We only consider reprogramming the TLBs if one or more of 945 * the two most used page sizes changes and we're using 946 * large pages in this process. 947 */ 948 if (SFMMU_LGPGS_INUSE(sfmmup)) { 949 /* Sort page sizes. */ 950 for (i = 0; i < mmu_page_sizes; i++) { 951 sortcnt[i] = ttecnt[i]; 952 } 953 for (j = 0; j < mmu_page_sizes; j++) { 954 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 955 if (sortcnt[i] > sortcnt[max]) 956 max = i; 957 } 958 tmp_pgsz[j] = max; 959 sortcnt[max] = 0; 960 } 961 962 oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1]; 963 964 mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz); 965 966 /* Check 2 largest values after the sort. */ 967 newval = tmp_pgsz[0] << 8 | tmp_pgsz[1]; 968 if (newval != oldval) { 969 sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz); 970 } 971 } 972 } 973 974 /* 975 * Return processor specific async error structure 976 * size used. 977 */ 978 int 979 cpu_aflt_size(void) 980 { 981 return (sizeof (opl_async_flt_t)); 982 } 983 984 /* 985 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 986 * post-process CPU events that are dequeued. As such, it can be invoked 987 * from softint context, from AST processing in the trap() flow, or from the 988 * panic flow. We decode the CPU-specific data, and take appropriate actions. 989 * Historically this entry point was used to log the actual cmn_err(9F) text; 990 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 991 * With FMA this function now also returns a flag which indicates to the 992 * caller whether the ereport should be posted (1) or suppressed (0). 993 */ 994 /*ARGSUSED*/ 995 static int 996 cpu_sync_log_err(void *flt) 997 { 998 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 999 struct async_flt *aflt = (struct async_flt *)flt; 1000 1001 /* 1002 * No extra processing of urgent error events. 1003 * Always generate ereports for these events. 1004 */ 1005 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 1006 return (1); 1007 1008 /* 1009 * Additional processing for synchronous errors. 1010 */ 1011 switch (opl_flt->flt_type) { 1012 case OPL_CPU_INV_SFSR: 1013 return (1); 1014 1015 case OPL_CPU_SYNC_UE: 1016 /* 1017 * The validity: SFSR_MK_UE bit has been checked 1018 * in opl_cpu_sync_error() 1019 * No more check is required. 1020 * 1021 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1022 * and they have been retrieved in cpu_queue_events() 1023 */ 1024 1025 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1026 ASSERT(aflt->flt_in_memory); 1027 /* 1028 * We want to skip logging only if ALL the following 1029 * conditions are true: 1030 * 1031 * 1. We are not panicing already. 1032 * 2. The error is a memory error. 1033 * 3. There is only one error. 1034 * 4. The error is on a retired page. 1035 * 5. The error occurred under on_trap 1036 * protection AFLT_PROT_EC 1037 */ 1038 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1039 page_retire_check(aflt->flt_addr, NULL) == 0) { 1040 /* 1041 * Do not log an error from 1042 * the retired page 1043 */ 1044 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1045 return (0); 1046 } 1047 if (!panicstr) 1048 cpu_page_retire(opl_flt); 1049 } 1050 return (1); 1051 1052 case OPL_CPU_SYNC_OTHERS: 1053 /* 1054 * For the following error cases, the processor HW does 1055 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1056 * to assign appropriate values here to reflect what we 1057 * think is the most likely cause of the problem w.r.t to 1058 * the particular error event. For Buserr and timeout 1059 * error event, we will assign OPL_ERRID_CHANNEL as the 1060 * most likely reason. For TLB parity or multiple hit 1061 * error events, we will assign the reason as 1062 * OPL_ERRID_CPU (cpu related problem) and set the 1063 * flt_eid_sid to point to the cpuid. 1064 */ 1065 1066 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1067 /* 1068 * flt_eid_sid will not be used for this case. 1069 */ 1070 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1071 } 1072 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1073 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1074 opl_flt->flt_eid_sid = aflt->flt_inst; 1075 } 1076 1077 /* 1078 * In case of no effective error bit 1079 */ 1080 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1081 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1082 opl_flt->flt_eid_sid = aflt->flt_inst; 1083 } 1084 break; 1085 1086 default: 1087 return (1); 1088 } 1089 return (1); 1090 } 1091 1092 /* 1093 * Retire the bad page that may contain the flushed error. 1094 */ 1095 void 1096 cpu_page_retire(opl_async_flt_t *opl_flt) 1097 { 1098 struct async_flt *aflt = (struct async_flt *)opl_flt; 1099 (void) page_retire(aflt->flt_addr, PR_UE); 1100 } 1101 1102 /* 1103 * Invoked by error_init() early in startup and therefore before 1104 * startup_errorq() is called to drain any error Q - 1105 * 1106 * startup() 1107 * startup_end() 1108 * error_init() 1109 * cpu_error_init() 1110 * errorq_init() 1111 * errorq_drain() 1112 * start_other_cpus() 1113 * 1114 * The purpose of this routine is to create error-related taskqs. Taskqs 1115 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1116 * context. 1117 * 1118 */ 1119 /*ARGSUSED*/ 1120 void 1121 cpu_error_init(int items) 1122 { 1123 opl_err_log = (opl_errlog_t *) 1124 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1125 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1126 cmn_err(CE_PANIC, "The base address of the error log " 1127 "is not page aligned"); 1128 } 1129 1130 /* 1131 * We route all errors through a single switch statement. 1132 */ 1133 void 1134 cpu_ue_log_err(struct async_flt *aflt) 1135 { 1136 switch (aflt->flt_class) { 1137 case CPU_FAULT: 1138 if (cpu_sync_log_err(aflt)) 1139 cpu_ereport_post(aflt); 1140 break; 1141 1142 case BUS_FAULT: 1143 bus_async_log_err(aflt); 1144 break; 1145 1146 default: 1147 cmn_err(CE_WARN, "discarding async error %p with invalid " 1148 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1149 return; 1150 } 1151 } 1152 1153 /* 1154 * Routine for panic hook callback from panic_idle(). 1155 * 1156 * Nothing to do here. 1157 */ 1158 void 1159 cpu_async_panic_callb(void) 1160 { 1161 } 1162 1163 /* 1164 * Routine to return a string identifying the physical name 1165 * associated with a memory/cache error. 1166 */ 1167 /*ARGSUSED*/ 1168 int 1169 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1170 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1171 ushort_t flt_status, char *buf, int buflen, int *lenp) 1172 { 1173 int synd_code; 1174 int ret; 1175 1176 /* 1177 * An AFSR of -1 defaults to a memory syndrome. 1178 */ 1179 synd_code = (int)flt_synd; 1180 1181 if (&plat_get_mem_unum) { 1182 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1183 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1184 buf[0] = '\0'; 1185 *lenp = 0; 1186 } 1187 return (ret); 1188 } 1189 buf[0] = '\0'; 1190 *lenp = 0; 1191 return (ENOTSUP); 1192 } 1193 1194 /* 1195 * Wrapper for cpu_get_mem_unum() routine that takes an 1196 * async_flt struct rather than explicit arguments. 1197 */ 1198 int 1199 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1200 char *buf, int buflen, int *lenp) 1201 { 1202 /* 1203 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1204 * memory error. 1205 */ 1206 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1207 (uint64_t)-1, 1208 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1209 aflt->flt_status, buf, buflen, lenp)); 1210 } 1211 1212 /* 1213 * This routine is a more generic interface to cpu_get_mem_unum() 1214 * that may be used by other modules (e.g. mm). 1215 */ 1216 /*ARGSUSED*/ 1217 int 1218 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1219 char *buf, int buflen, int *lenp) 1220 { 1221 int synd_status, flt_in_memory, ret; 1222 ushort_t flt_status = 0; 1223 char unum[UNUM_NAMLEN]; 1224 1225 /* 1226 * Check for an invalid address. 1227 */ 1228 if (afar == (uint64_t)-1) 1229 return (ENXIO); 1230 1231 if (synd == (uint64_t)-1) 1232 synd_status = AFLT_STAT_INVALID; 1233 else 1234 synd_status = AFLT_STAT_VALID; 1235 1236 flt_in_memory = (*afsr & SFSR_MEMORY) && 1237 pf_is_memory(afar >> MMU_PAGESHIFT); 1238 1239 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1240 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 1241 if (ret != 0) 1242 return (ret); 1243 1244 if (*lenp >= buflen) 1245 return (ENAMETOOLONG); 1246 1247 (void) strncpy(buf, unum, buflen); 1248 1249 return (0); 1250 } 1251 1252 /* 1253 * Routine to return memory information associated 1254 * with a physical address and syndrome. 1255 */ 1256 /*ARGSUSED*/ 1257 int 1258 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1259 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1260 int *segsp, int *banksp, int *mcidp) 1261 { 1262 int synd_code = (int)synd; 1263 1264 if (afar == (uint64_t)-1) 1265 return (ENXIO); 1266 1267 if (p2get_mem_info != NULL) 1268 return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, 1269 bank_sizep, segsp, banksp, mcidp)); 1270 else 1271 return (ENOTSUP); 1272 } 1273 1274 /* 1275 * Routine to return a string identifying the physical 1276 * name associated with a cpuid. 1277 */ 1278 int 1279 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1280 { 1281 int ret; 1282 char unum[UNUM_NAMLEN]; 1283 1284 if (&plat_get_cpu_unum) { 1285 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, 1286 lenp)) != 0) 1287 return (ret); 1288 } else { 1289 return (ENOTSUP); 1290 } 1291 1292 if (*lenp >= buflen) 1293 return (ENAMETOOLONG); 1294 1295 (void) strncpy(buf, unum, *lenp); 1296 1297 return (0); 1298 } 1299 1300 /* 1301 * This routine exports the name buffer size. 1302 */ 1303 size_t 1304 cpu_get_name_bufsize() 1305 { 1306 return (UNUM_NAMLEN); 1307 } 1308 1309 /* 1310 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1311 */ 1312 void 1313 cpu_flush_ecache(void) 1314 { 1315 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1316 cpunodes[CPU->cpu_id].ecache_linesize); 1317 } 1318 1319 static uint8_t 1320 flt_to_trap_type(struct async_flt *aflt) 1321 { 1322 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1323 return (TRAP_TYPE_ECC_I); 1324 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1325 return (TRAP_TYPE_ECC_D); 1326 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1327 return (TRAP_TYPE_URGENT); 1328 return (TRAP_TYPE_UNKNOWN); 1329 } 1330 1331 /* 1332 * Encode the data saved in the opl_async_flt_t struct into 1333 * the FM ereport payload. 1334 */ 1335 /* ARGSUSED */ 1336 static void 1337 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1338 nvlist_t *resource) 1339 { 1340 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1341 char unum[UNUM_NAMLEN]; 1342 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1343 int len; 1344 1345 1346 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1347 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1348 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1349 } 1350 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1351 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1352 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1353 } 1354 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1355 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1356 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1357 } 1358 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1359 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1360 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1361 } 1362 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1363 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1364 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1365 } 1366 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1367 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1368 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1369 } 1370 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1371 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1372 DATA_TYPE_BOOLEAN_VALUE, 1373 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1374 } 1375 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1376 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1377 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1378 } 1379 1380 switch (opl_flt->flt_eid_mod) { 1381 case OPL_ERRID_CPU: 1382 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1383 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1384 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1385 NULL, opl_flt->flt_eid_sid, 1386 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf); 1387 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1388 DATA_TYPE_NVLIST, resource, NULL); 1389 break; 1390 1391 case OPL_ERRID_CHANNEL: 1392 /* 1393 * No resource is created but the cpumem DE will find 1394 * the defective path by retreiving EID from SFSR which is 1395 * included in the payload. 1396 */ 1397 break; 1398 1399 case OPL_ERRID_MEM: 1400 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1401 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1402 unum, NULL, (uint64_t)-1); 1403 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1404 DATA_TYPE_NVLIST, resource, NULL); 1405 break; 1406 1407 case OPL_ERRID_PATH: 1408 /* 1409 * No resource is created but the cpumem DE will find 1410 * the defective path by retreiving EID from SFSR which is 1411 * included in the payload. 1412 */ 1413 break; 1414 } 1415 } 1416 1417 /* 1418 * Returns whether fault address is valid for this error bit and 1419 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1420 */ 1421 /*ARGSUSED*/ 1422 static int 1423 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1424 { 1425 struct async_flt *aflt = (struct async_flt *)opl_flt; 1426 1427 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1428 return ((t_afsr_bit & SFSR_MEMORY) && 1429 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1430 } 1431 return (0); 1432 } 1433 1434 /* 1435 * In OPL SCF does the stick synchronization. 1436 */ 1437 void 1438 sticksync_slave(void) 1439 { 1440 } 1441 1442 /* 1443 * In OPL SCF does the stick synchronization. 1444 */ 1445 void 1446 sticksync_master(void) 1447 { 1448 } 1449 1450 /* 1451 * Cpu private unitialization. OPL cpus do not use the private area. 1452 */ 1453 void 1454 cpu_uninit_private(struct cpu *cp) 1455 { 1456 cmp_delete_cpu(cp->cpu_id); 1457 } 1458 1459 /* 1460 * Always flush an entire cache. 1461 */ 1462 void 1463 cpu_error_ecache_flush(void) 1464 { 1465 cpu_flush_ecache(); 1466 } 1467 1468 void 1469 cpu_ereport_post(struct async_flt *aflt) 1470 { 1471 char *cpu_type, buf[FM_MAX_CLASS]; 1472 nv_alloc_t *nva = NULL; 1473 nvlist_t *ereport, *detector, *resource; 1474 errorq_elem_t *eqep; 1475 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1476 1477 if (aflt->flt_panic || panicstr) { 1478 eqep = errorq_reserve(ereport_errorq); 1479 if (eqep == NULL) 1480 return; 1481 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1482 nva = errorq_elem_nva(ereport_errorq, eqep); 1483 } else { 1484 ereport = fm_nvlist_create(nva); 1485 } 1486 1487 /* 1488 * Create the scheme "cpu" FMRI. 1489 */ 1490 detector = fm_nvlist_create(nva); 1491 resource = fm_nvlist_create(nva); 1492 switch (cpunodes[aflt->flt_inst].implementation) { 1493 case OLYMPUS_C_IMPL: 1494 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1495 break; 1496 case JUPITER_IMPL: 1497 cpu_type = FM_EREPORT_CPU_SPARC64_VII; 1498 break; 1499 default: 1500 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1501 break; 1502 } 1503 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1504 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1505 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1506 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1507 sbuf); 1508 1509 /* 1510 * Encode all the common data into the ereport. 1511 */ 1512 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1513 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1514 1515 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1516 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1517 1518 /* 1519 * Encode the error specific data that was saved in 1520 * the async_flt structure into the ereport. 1521 */ 1522 cpu_payload_add_aflt(aflt, ereport, resource); 1523 1524 if (aflt->flt_panic || panicstr) { 1525 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1526 } else { 1527 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1528 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1529 fm_nvlist_destroy(detector, FM_NVA_FREE); 1530 fm_nvlist_destroy(resource, FM_NVA_FREE); 1531 } 1532 } 1533 1534 void 1535 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1536 { 1537 int status; 1538 ddi_fm_error_t de; 1539 1540 bzero(&de, sizeof (ddi_fm_error_t)); 1541 1542 de.fme_version = DDI_FME_VERSION; 1543 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1544 de.fme_flag = expected; 1545 de.fme_bus_specific = (void *)aflt->flt_addr; 1546 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1547 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1548 aflt->flt_panic = 1; 1549 } 1550 1551 void 1552 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1553 errorq_t *eqp, uint_t flag) 1554 { 1555 struct async_flt *aflt = (struct async_flt *)payload; 1556 1557 aflt->flt_erpt_class = error_class; 1558 errorq_dispatch(eqp, payload, payload_sz, flag); 1559 } 1560 1561 void 1562 adjust_hw_copy_limits(int ecache_size) 1563 { 1564 /* 1565 * Set hw copy limits. 1566 * 1567 * /etc/system will be parsed later and can override one or more 1568 * of these settings. 1569 * 1570 * At this time, ecache size seems only mildly relevant. 1571 * We seem to run into issues with the d-cache and stalls 1572 * we see on misses. 1573 * 1574 * Cycle measurement indicates that 2 byte aligned copies fare 1575 * little better than doing things with VIS at around 512 bytes. 1576 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1577 * aligned is faster whenever the source and destination data 1578 * in cache and the total size is less than 2 Kbytes. The 2K 1579 * limit seems to be driven by the 2K write cache. 1580 * When more than 2K of copies are done in non-VIS mode, stores 1581 * backup in the write cache. In VIS mode, the write cache is 1582 * bypassed, allowing faster cache-line writes aligned on cache 1583 * boundaries. 1584 * 1585 * In addition, in non-VIS mode, there is no prefetching, so 1586 * for larger copies, the advantage of prefetching to avoid even 1587 * occasional cache misses is enough to justify using the VIS code. 1588 * 1589 * During testing, it was discovered that netbench ran 3% slower 1590 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1591 * applications, data is only used once (copied to the output 1592 * buffer, then copied by the network device off the system). Using 1593 * the VIS copy saves more L2 cache state. Network copies are 1594 * around 1.3K to 1.5K in size for historical reasons. 1595 * 1596 * Therefore, a limit of 1K bytes will be used for the 8 byte 1597 * aligned copy even for large caches and 8 MB ecache. The 1598 * infrastructure to allow different limits for different sized 1599 * caches is kept to allow further tuning in later releases. 1600 */ 1601 1602 if (min_ecache_size == 0 && use_hw_bcopy) { 1603 /* 1604 * First time through - should be before /etc/system 1605 * is read. 1606 * Could skip the checks for zero but this lets us 1607 * preserve any debugger rewrites. 1608 */ 1609 if (hw_copy_limit_1 == 0) { 1610 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1611 priv_hcl_1 = hw_copy_limit_1; 1612 } 1613 if (hw_copy_limit_2 == 0) { 1614 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1615 priv_hcl_2 = hw_copy_limit_2; 1616 } 1617 if (hw_copy_limit_4 == 0) { 1618 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1619 priv_hcl_4 = hw_copy_limit_4; 1620 } 1621 if (hw_copy_limit_8 == 0) { 1622 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1623 priv_hcl_8 = hw_copy_limit_8; 1624 } 1625 min_ecache_size = ecache_size; 1626 } else { 1627 /* 1628 * MP initialization. Called *after* /etc/system has 1629 * been parsed. One CPU has already been initialized. 1630 * Need to cater for /etc/system having scragged one 1631 * of our values. 1632 */ 1633 if (ecache_size == min_ecache_size) { 1634 /* 1635 * Same size ecache. We do nothing unless we 1636 * have a pessimistic ecache setting. In that 1637 * case we become more optimistic (if the cache is 1638 * large enough). 1639 */ 1640 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1641 /* 1642 * Need to adjust hw_copy_limit* from our 1643 * pessimistic uniprocessor value to a more 1644 * optimistic UP value *iff* it hasn't been 1645 * reset. 1646 */ 1647 if ((ecache_size > 1048576) && 1648 (priv_hcl_8 == hw_copy_limit_8)) { 1649 if (ecache_size <= 2097152) 1650 hw_copy_limit_8 = 4 * 1651 VIS_COPY_THRESHOLD; 1652 else if (ecache_size <= 4194304) 1653 hw_copy_limit_8 = 4 * 1654 VIS_COPY_THRESHOLD; 1655 else 1656 hw_copy_limit_8 = 4 * 1657 VIS_COPY_THRESHOLD; 1658 priv_hcl_8 = hw_copy_limit_8; 1659 } 1660 } 1661 } else if (ecache_size < min_ecache_size) { 1662 /* 1663 * A different ecache size. Can this even happen? 1664 */ 1665 if (priv_hcl_8 == hw_copy_limit_8) { 1666 /* 1667 * The previous value that we set 1668 * is unchanged (i.e., it hasn't been 1669 * scragged by /etc/system). Rewrite it. 1670 */ 1671 if (ecache_size <= 1048576) 1672 hw_copy_limit_8 = 8 * 1673 VIS_COPY_THRESHOLD; 1674 else if (ecache_size <= 2097152) 1675 hw_copy_limit_8 = 8 * 1676 VIS_COPY_THRESHOLD; 1677 else if (ecache_size <= 4194304) 1678 hw_copy_limit_8 = 8 * 1679 VIS_COPY_THRESHOLD; 1680 else 1681 hw_copy_limit_8 = 10 * 1682 VIS_COPY_THRESHOLD; 1683 priv_hcl_8 = hw_copy_limit_8; 1684 min_ecache_size = ecache_size; 1685 } 1686 } 1687 } 1688 } 1689 1690 #define VIS_BLOCKSIZE 64 1691 1692 int 1693 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1694 { 1695 int ret, watched; 1696 1697 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1698 ret = dtrace_blksuword32(addr, data, 0); 1699 if (watched) 1700 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1701 1702 return (ret); 1703 } 1704 1705 void 1706 opl_cpu_reg_init() 1707 { 1708 uint64_t this_cpu_log; 1709 1710 /* 1711 * We do not need to re-initialize cpu0 registers. 1712 */ 1713 if (cpu[getprocessorid()] == &cpu0) 1714 return; 1715 1716 /* 1717 * Initialize Error log Scratch register for error handling. 1718 */ 1719 1720 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1721 ERRLOG_BUFSZ * (getprocessorid()))); 1722 opl_error_setup(this_cpu_log); 1723 1724 /* 1725 * Enable MMU translating multiple page sizes for 1726 * sITLB and sDTLB. 1727 */ 1728 opl_mpg_enable(); 1729 } 1730 1731 /* 1732 * Queue one event in ue_queue based on ecc_type_to_info entry. 1733 */ 1734 static void 1735 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1736 ecc_type_to_info_t *eccp) 1737 { 1738 struct async_flt *aflt = (struct async_flt *)opl_flt; 1739 1740 if (reason && 1741 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1742 (void) strcat(reason, eccp->ec_reason); 1743 } 1744 1745 opl_flt->flt_bit = eccp->ec_afsr_bit; 1746 opl_flt->flt_type = eccp->ec_flt_type; 1747 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1748 aflt->flt_payload = eccp->ec_err_payload; 1749 1750 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1751 cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt, 1752 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1753 } 1754 1755 /* 1756 * Queue events on async event queue one event per error bit. 1757 * Return number of events queued. 1758 */ 1759 int 1760 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1761 { 1762 struct async_flt *aflt = (struct async_flt *)opl_flt; 1763 ecc_type_to_info_t *eccp; 1764 int nevents = 0; 1765 1766 /* 1767 * Queue expected errors, error bit and fault type must must match 1768 * in the ecc_type_to_info table. 1769 */ 1770 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1771 eccp++) { 1772 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1773 (eccp->ec_flags & aflt->flt_status) != 0) { 1774 /* 1775 * UE error event can be further 1776 * classified/breakdown into finer granularity 1777 * based on the flt_eid_mod value set by HW. We do 1778 * special handling here so that we can report UE 1779 * error in finer granularity as ue_mem, 1780 * ue_channel, ue_cpu or ue_path. 1781 */ 1782 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1783 opl_flt->flt_eid_mod = (aflt->flt_stat & 1784 SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT; 1785 opl_flt->flt_eid_sid = (aflt->flt_stat & 1786 SFSR_EID_SID) >> SFSR_EID_SID_SHIFT; 1787 /* 1788 * Need to advance eccp pointer by flt_eid_mod 1789 * so that we get an appropriate ecc pointer 1790 * 1791 * EID # of advances 1792 * ---------------------------------- 1793 * OPL_ERRID_MEM 0 1794 * OPL_ERRID_CHANNEL 1 1795 * OPL_ERRID_CPU 2 1796 * OPL_ERRID_PATH 3 1797 */ 1798 eccp += opl_flt->flt_eid_mod; 1799 } 1800 cpu_queue_one_event(opl_flt, reason, eccp); 1801 t_afsr_errs &= ~eccp->ec_afsr_bit; 1802 nevents++; 1803 } 1804 } 1805 1806 return (nevents); 1807 } 1808 1809 /* 1810 * Sync. error wrapper functions. 1811 * We use these functions in order to transfer here from the 1812 * nucleus trap handler information about trap type (data or 1813 * instruction) and trap level (0 or above 0). This way we 1814 * get rid of using SFSR's reserved bits. 1815 */ 1816 1817 #define OPL_SYNC_TL0 0 1818 #define OPL_SYNC_TL1 1 1819 #define OPL_ISYNC_ERR 0 1820 #define OPL_DSYNC_ERR 1 1821 1822 void 1823 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1824 { 1825 uint64_t t_sfar = p_sfar; 1826 uint64_t t_sfsr = p_sfsr; 1827 1828 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1829 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1830 } 1831 1832 void 1833 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1834 { 1835 uint64_t t_sfar = p_sfar; 1836 uint64_t t_sfsr = p_sfsr; 1837 1838 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1839 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1840 } 1841 1842 void 1843 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1844 { 1845 uint64_t t_sfar = p_sfar; 1846 uint64_t t_sfsr = p_sfsr; 1847 1848 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1849 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1850 } 1851 1852 void 1853 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1854 { 1855 uint64_t t_sfar = p_sfar; 1856 uint64_t t_sfsr = p_sfsr; 1857 1858 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1859 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1860 } 1861 1862 /* 1863 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1864 * and TLB_PRT. 1865 * This function is designed based on cpu_deferred_error(). 1866 */ 1867 1868 static void 1869 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1870 uint_t tl, uint_t derr) 1871 { 1872 opl_async_flt_t opl_flt; 1873 struct async_flt *aflt; 1874 int trampolined = 0; 1875 char pr_reason[MAX_REASON_STRING]; 1876 uint64_t log_sfsr; 1877 int expected = DDI_FM_ERR_UNEXPECTED; 1878 ddi_acc_hdl_t *hp; 1879 1880 /* 1881 * We need to look at p_flag to determine if the thread detected an 1882 * error while dumping core. We can't grab p_lock here, but it's ok 1883 * because we just need a consistent snapshot and we know that everyone 1884 * else will store a consistent set of bits while holding p_lock. We 1885 * don't have to worry about a race because SDOCORE is set once prior 1886 * to doing i/o from the process's address space and is never cleared. 1887 */ 1888 uint_t pflag = ttoproc(curthread)->p_flag; 1889 1890 pr_reason[0] = '\0'; 1891 1892 /* 1893 * handle the specific error 1894 */ 1895 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1896 aflt = (struct async_flt *)&opl_flt; 1897 aflt->flt_id = gethrtime_waitfree(); 1898 aflt->flt_bus_id = getprocessorid(); 1899 aflt->flt_inst = CPU->cpu_id; 1900 aflt->flt_stat = t_sfsr; 1901 aflt->flt_addr = t_sfar; 1902 aflt->flt_pc = (caddr_t)rp->r_pc; 1903 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1904 aflt->flt_class = (uchar_t)CPU_FAULT; 1905 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & 1906 TSTATE_PRIV) ? 1 : 0)); 1907 aflt->flt_tl = (uchar_t)tl; 1908 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1909 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1910 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1911 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1912 1913 /* 1914 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1915 * So, clear all error bits to avoid mis-handling and force the system 1916 * panicked. 1917 * We skip all the procedures below down to the panic message call. 1918 */ 1919 if (!(t_sfsr & SFSR_FV)) { 1920 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1921 aflt->flt_panic = 1; 1922 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1923 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 1924 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1925 fm_panic("%sErrors(s)", "invalid SFSR"); 1926 } 1927 1928 /* 1929 * If either UE and MK bit is off, this is not valid UE error. 1930 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1931 * mis-handling below. 1932 * aflt->flt_stat keeps the original bits as a reference. 1933 */ 1934 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1935 (SFSR_MK_UE|SFSR_UE)) { 1936 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1937 } 1938 1939 /* 1940 * If the trap occurred in privileged mode at TL=0, we need to check to 1941 * see if we were executing in the kernel under on_trap() or t_lofault 1942 * protection. If so, modify the saved registers so that we return 1943 * from the trap to the appropriate trampoline routine. 1944 */ 1945 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1946 if (curthread->t_ontrap != NULL) { 1947 on_trap_data_t *otp = curthread->t_ontrap; 1948 1949 if (otp->ot_prot & OT_DATA_EC) { 1950 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1951 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1952 rp->r_pc = otp->ot_trampoline; 1953 rp->r_npc = rp->r_pc + 4; 1954 trampolined = 1; 1955 } 1956 1957 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1958 (otp->ot_prot & OT_DATA_ACCESS)) { 1959 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 1960 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 1961 rp->r_pc = otp->ot_trampoline; 1962 rp->r_npc = rp->r_pc + 4; 1963 trampolined = 1; 1964 /* 1965 * for peeks and caut_gets errors are expected 1966 */ 1967 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1968 if (!hp) 1969 expected = DDI_FM_ERR_PEEK; 1970 else if (hp->ah_acc.devacc_attr_access == 1971 DDI_CAUTIOUS_ACC) 1972 expected = DDI_FM_ERR_EXPECTED; 1973 } 1974 1975 } else if (curthread->t_lofault) { 1976 aflt->flt_prot = AFLT_PROT_COPY; 1977 rp->r_g1 = EFAULT; 1978 rp->r_pc = curthread->t_lofault; 1979 rp->r_npc = rp->r_pc + 4; 1980 trampolined = 1; 1981 } 1982 } 1983 1984 /* 1985 * If we're in user mode or we're doing a protected copy, we either 1986 * want the ASTON code below to send a signal to the user process 1987 * or we want to panic if aft_panic is set. 1988 * 1989 * If we're in privileged mode and we're not doing a copy, then we 1990 * need to check if we've trampolined. If we haven't trampolined, 1991 * we should panic. 1992 */ 1993 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1994 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 1995 aflt->flt_panic |= aft_panic; 1996 } else if (!trampolined) { 1997 aflt->flt_panic = 1; 1998 } 1999 2000 /* 2001 * If we've trampolined due to a privileged TO or BERR, or if an 2002 * unprivileged TO or BERR occurred, we don't want to enqueue an 2003 * event for that TO or BERR. Queue all other events (if any) besides 2004 * the TO/BERR. 2005 */ 2006 log_sfsr = t_sfsr; 2007 if (trampolined) { 2008 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2009 } else if (!aflt->flt_priv) { 2010 /* 2011 * User mode, suppress messages if 2012 * cpu_berr_to_verbose is not set. 2013 */ 2014 if (!cpu_berr_to_verbose) 2015 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2016 } 2017 2018 if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason, 2019 t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) { 2020 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2021 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2022 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 2023 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2024 } 2025 2026 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2027 cpu_run_bus_error_handlers(aflt, expected); 2028 } 2029 2030 /* 2031 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2032 * be logged as part of the panic flow. 2033 */ 2034 if (aflt->flt_panic) { 2035 if (pr_reason[0] == 0) 2036 strcpy(pr_reason, "invalid SFSR "); 2037 2038 fm_panic("%sErrors(s)", pr_reason); 2039 } 2040 2041 /* 2042 * If we queued an error and we are going to return from the trap and 2043 * the error was in user mode or inside of a copy routine, set AST flag 2044 * so the queue will be drained before returning to user mode. The 2045 * AST processing will also act on our failure policy. 2046 */ 2047 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2048 int pcb_flag = 0; 2049 2050 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2051 pcb_flag |= ASYNC_HWERR; 2052 2053 if (t_sfsr & SFSR_BERR) 2054 pcb_flag |= ASYNC_BERR; 2055 2056 if (t_sfsr & SFSR_TO) 2057 pcb_flag |= ASYNC_BTO; 2058 2059 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2060 aston(curthread); 2061 } 2062 } 2063 2064 /*ARGSUSED*/ 2065 void 2066 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2067 { 2068 opl_async_flt_t opl_flt; 2069 struct async_flt *aflt; 2070 char pr_reason[MAX_REASON_STRING]; 2071 2072 /* normalize tl */ 2073 tl = (tl >= 2 ? 1 : 0); 2074 pr_reason[0] = '\0'; 2075 2076 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2077 aflt = (struct async_flt *)&opl_flt; 2078 aflt->flt_id = gethrtime_waitfree(); 2079 aflt->flt_bus_id = getprocessorid(); 2080 aflt->flt_inst = CPU->cpu_id; 2081 aflt->flt_stat = p_ugesr; 2082 aflt->flt_pc = (caddr_t)rp->r_pc; 2083 aflt->flt_class = (uchar_t)CPU_FAULT; 2084 aflt->flt_tl = tl; 2085 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 2086 1 : 0)); 2087 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2088 aflt->flt_panic = 1; 2089 /* 2090 * HW does not set mod/sid in case of urgent error. 2091 * So we have to set it here. 2092 */ 2093 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2094 opl_flt.flt_eid_sid = aflt->flt_inst; 2095 2096 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2097 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2098 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2099 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt, 2100 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2101 } 2102 2103 fm_panic("Urgent Error"); 2104 } 2105 2106 /* 2107 * Initialization error counters resetting. 2108 */ 2109 /* ARGSUSED */ 2110 static void 2111 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2112 { 2113 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2114 hdlr->cyh_level = CY_LOW_LEVEL; 2115 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2116 2117 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2118 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2119 } 2120 2121 void 2122 cpu_mp_init(void) 2123 { 2124 cyc_omni_handler_t hdlr; 2125 2126 hdlr.cyo_online = opl_ras_online; 2127 hdlr.cyo_offline = NULL; 2128 hdlr.cyo_arg = NULL; 2129 mutex_enter(&cpu_lock); 2130 (void) cyclic_add_omni(&hdlr); 2131 mutex_exit(&cpu_lock); 2132 } 2133 2134 int heaplp_use_stlb = 0; 2135 2136 void 2137 mmu_init_kernel_pgsz(struct hat *hat) 2138 { 2139 uint_t tte = page_szc(segkmem_lpsize); 2140 uchar_t new_cext_primary, new_cext_nucleus; 2141 2142 if (heaplp_use_stlb == 0) { 2143 /* do not reprogram stlb */ 2144 tte = TTE8K; 2145 } 2146 2147 new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K); 2148 new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte); 2149 2150 hat->sfmmu_cext = new_cext_primary; 2151 kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | 2152 ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); 2153 } 2154 2155 size_t 2156 mmu_get_kernel_lpsize(size_t lpsize) 2157 { 2158 uint_t tte; 2159 2160 if (lpsize == 0) { 2161 /* no setting for segkmem_lpsize in /etc/system: use default */ 2162 return (MMU_PAGESIZE4M); 2163 } 2164 2165 for (tte = TTE8K; tte <= TTE4M; tte++) { 2166 if (lpsize == TTEBYTES(tte)) 2167 return (lpsize); 2168 } 2169 2170 return (TTEBYTES(TTE8K)); 2171 } 2172 2173 /* 2174 * The following are functions that are unused in 2175 * OPL cpu module. They are defined here to resolve 2176 * dependencies in the "unix" module. 2177 * Unused functions that should never be called in 2178 * OPL are coded with ASSERT(0). 2179 */ 2180 2181 void 2182 cpu_disable_errors(void) 2183 {} 2184 2185 void 2186 cpu_enable_errors(void) 2187 { ASSERT(0); } 2188 2189 /*ARGSUSED*/ 2190 void 2191 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2192 { ASSERT(0); } 2193 2194 /*ARGSUSED*/ 2195 void 2196 cpu_faulted_enter(struct cpu *cp) 2197 {} 2198 2199 /*ARGSUSED*/ 2200 void 2201 cpu_faulted_exit(struct cpu *cp) 2202 {} 2203 2204 /*ARGSUSED*/ 2205 void 2206 cpu_check_allcpus(struct async_flt *aflt) 2207 {} 2208 2209 /*ARGSUSED*/ 2210 void 2211 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2212 { ASSERT(0); } 2213 2214 /*ARGSUSED*/ 2215 void 2216 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2217 { ASSERT(0); } 2218 2219 /*ARGSUSED*/ 2220 void 2221 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2222 { ASSERT(0); } 2223 2224 /*ARGSUSED*/ 2225 void 2226 cpu_busy_ecache_scrub(struct cpu *cp) 2227 {} 2228 2229 /*ARGSUSED*/ 2230 void 2231 cpu_idle_ecache_scrub(struct cpu *cp) 2232 {} 2233 2234 /* ARGSUSED */ 2235 void 2236 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2237 { ASSERT(0); } 2238 2239 void 2240 cpu_init_cache_scrub(void) 2241 {} 2242 2243 /* ARGSUSED */ 2244 int 2245 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2246 { 2247 if (&plat_get_mem_sid) { 2248 return (plat_get_mem_sid(unum, buf, buflen, lenp)); 2249 } else { 2250 return (ENOTSUP); 2251 } 2252 } 2253 2254 /* ARGSUSED */ 2255 int 2256 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2257 { 2258 if (&plat_get_mem_addr) { 2259 return (plat_get_mem_addr(unum, sid, offset, addrp)); 2260 } else { 2261 return (ENOTSUP); 2262 } 2263 } 2264 2265 /* ARGSUSED */ 2266 int 2267 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2268 { 2269 if (&plat_get_mem_offset) { 2270 return (plat_get_mem_offset(flt_addr, offp)); 2271 } else { 2272 return (ENOTSUP); 2273 } 2274 } 2275 2276 /*ARGSUSED*/ 2277 void 2278 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2279 { ASSERT(0); } 2280 2281 /*ARGSUSED*/ 2282 void 2283 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2284 { ASSERT(0); } 2285 2286 /*ARGSUSED*/ 2287 void 2288 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 2289 { ASSERT(0); } 2290 2291 /*ARGSUSED*/ 2292 int 2293 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 2294 errorq_elem_t *eqep, size_t afltoffset) 2295 { 2296 ASSERT(0); 2297 return (0); 2298 } 2299 2300 /*ARGSUSED*/ 2301 char * 2302 flt_to_error_type(struct async_flt *aflt) 2303 { 2304 ASSERT(0); 2305 return (NULL); 2306 } 2307 2308 #define PROM_SPARC64VII_MODE_PROPNAME "SPARC64-VII-mode" 2309 2310 /* 2311 * Check for existence of OPL OBP property that indicates 2312 * SPARC64-VII support. By default, only enable Jupiter 2313 * features if the property is present. It will be 2314 * present in all-Jupiter domains by OBP if the domain has 2315 * been selected by the user on the system controller to 2316 * run in Jupiter mode. Basically, this OBP property must 2317 * be present to turn on the cpu_alljupiter flag. 2318 */ 2319 static int 2320 prom_SPARC64VII_support_enabled(void) 2321 { 2322 int val; 2323 2324 return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME, 2325 (caddr_t)&val) == 0) ? 1 : 0); 2326 } 2327