1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII). 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/ddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/archsystm.h> 37 #include <sys/vmsystm.h> 38 #include <sys/machparam.h> 39 #include <sys/machsystm.h> 40 #include <sys/machthread.h> 41 #include <sys/cpu.h> 42 #include <sys/cmp.h> 43 #include <sys/elf_SPARC.h> 44 #include <vm/vm_dep.h> 45 #include <vm/hat_sfmmu.h> 46 #include <vm/seg_kpm.h> 47 #include <vm/seg_kmem.h> 48 #include <sys/cpuvar.h> 49 #include <sys/opl_olympus_regs.h> 50 #include <sys/opl_module.h> 51 #include <sys/async.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/dditypes.h> 55 #include <sys/cpu_module.h> 56 #include <sys/sysmacros.h> 57 #include <sys/intreg.h> 58 #include <sys/clock.h> 59 #include <sys/platform_module.h> 60 #include <sys/ontrap.h> 61 #include <sys/panic.h> 62 #include <sys/memlist.h> 63 #include <sys/ndifm.h> 64 #include <sys/ddifm.h> 65 #include <sys/fm/protocol.h> 66 #include <sys/fm/util.h> 67 #include <sys/fm/cpu/SPARC64-VI.h> 68 #include <sys/dtrace.h> 69 #include <sys/watchpoint.h> 70 #include <sys/promif.h> 71 72 /* 73 * Internal functions. 74 */ 75 static int cpu_sync_log_err(void *flt); 76 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 77 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 78 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 79 80 /* 81 * Error counters resetting interval. 82 */ 83 static int opl_async_check_interval = 60; /* 1 min */ 84 85 uint_t cpu_impl_dual_pgsz = 1; 86 87 /* 88 * PA[22:0] represent Displacement in Jupiter 89 * configuration space. 90 */ 91 uint_t root_phys_addr_lo_mask = 0x7fffffu; 92 93 /* 94 * set in /etc/system to control logging of user BERR/TO's 95 */ 96 int cpu_berr_to_verbose = 0; 97 98 /* 99 * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled). 100 */ 101 int cpu_alljupiter = 0; 102 103 static int min_ecache_size; 104 static uint_t priv_hcl_1; 105 static uint_t priv_hcl_2; 106 static uint_t priv_hcl_4; 107 static uint_t priv_hcl_8; 108 109 /* 110 * Olympus error log 111 */ 112 static opl_errlog_t *opl_err_log; 113 114 /* 115 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 116 * No any other ecc_type_info insertion is allowed in between the following 117 * four UE classess. 118 */ 119 ecc_type_to_info_t ecc_type_to_info[] = { 120 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 121 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 122 FM_EREPORT_CPU_UE_MEM, 123 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 124 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 125 FM_EREPORT_CPU_UE_CHANNEL, 126 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 127 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 128 FM_EREPORT_CPU_UE_CPU, 129 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 130 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 131 FM_EREPORT_CPU_UE_PATH, 132 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 133 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 134 FM_EREPORT_CPU_BERR, 135 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 136 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 137 FM_EREPORT_CPU_BTO, 138 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 139 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 140 FM_EREPORT_CPU_MTLB, 141 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 142 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 143 FM_EREPORT_CPU_TLBP, 144 145 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 146 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 147 FM_EREPORT_CPU_CRE, 148 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 149 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 150 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 151 FM_EREPORT_CPU_TSBCTX, 152 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 153 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 154 FM_EREPORT_CPU_TSBP, 155 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 156 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 157 FM_EREPORT_CPU_PSTATE, 158 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 159 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 160 FM_EREPORT_CPU_TSTATE, 161 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 162 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 163 FM_EREPORT_CPU_IUG_F, 164 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 165 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 166 FM_EREPORT_CPU_IUG_R, 167 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 168 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 169 FM_EREPORT_CPU_SDC, 170 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 171 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 172 FM_EREPORT_CPU_WDT, 173 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 174 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 175 FM_EREPORT_CPU_DTLB, 176 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 177 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 178 FM_EREPORT_CPU_ITLB, 179 UGESR_IUG_COREERR, "IUG_COREERR", 180 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 181 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 182 FM_EREPORT_CPU_CORE, 183 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 184 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 185 FM_EREPORT_CPU_DAE, 186 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 187 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 188 FM_EREPORT_CPU_IAE, 189 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 190 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 191 FM_EREPORT_CPU_UGE, 192 0, NULL, 0, 0, 193 NULL, 0, 0, 194 }; 195 196 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 197 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 198 int *segsp, int *banksp, int *mcidp); 199 200 201 /* 202 * Setup trap handlers for 0xA, 0x32, 0x40 trap types. 203 */ 204 void 205 cpu_init_trap(void) 206 { 207 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 208 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 209 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 210 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 211 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 212 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 213 } 214 215 static int 216 getintprop(pnode_t node, char *name, int deflt) 217 { 218 int value; 219 220 switch (prom_getproplen(node, name)) { 221 case sizeof (int): 222 (void) prom_getprop(node, name, (caddr_t)&value); 223 break; 224 225 default: 226 value = deflt; 227 break; 228 } 229 230 return (value); 231 } 232 233 /* 234 * Set the magic constants of the implementation. 235 */ 236 /*ARGSUSED*/ 237 void 238 cpu_fiximp(pnode_t dnode) 239 { 240 int i, a; 241 extern int vac_size, vac_shift; 242 extern uint_t vac_mask; 243 244 static struct { 245 char *name; 246 int *var; 247 int defval; 248 } prop[] = { 249 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 250 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 251 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 252 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 253 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 254 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 255 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 256 }; 257 258 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 259 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 260 261 ecache_setsize = ecache_size / ecache_associativity; 262 263 vac_size = OPL_VAC_SIZE; 264 vac_mask = MMU_PAGEMASK & (vac_size - 1); 265 i = 0; a = vac_size; 266 while (a >>= 1) 267 ++i; 268 vac_shift = i; 269 shm_alignment = vac_size; 270 vac = 1; 271 } 272 273 /* 274 * Enable features for Jupiter-only domains. 275 */ 276 void 277 cpu_fix_alljupiter(void) 278 { 279 cpu_alljupiter = 1; 280 281 /* 282 * Enable ima hwcap for Jupiter-only domains. DR will prevent 283 * addition of Olympus-C to all-Jupiter domains to preserve ima 284 * hwcap semantics. 285 */ 286 cpu_hwcap_flags |= AV_SPARC_IMA; 287 } 288 289 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 290 /* 291 * Quick and dirty way to redefine locally in 292 * OPL the value of IDSR_BN_SETS to 31 instead 293 * of the standard 32 value. This is to workaround 294 * REV_B of Olympus_c processor's problem in handling 295 * more than 31 xcall broadcast. 296 */ 297 #undef IDSR_BN_SETS 298 #define IDSR_BN_SETS 31 299 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 300 301 void 302 send_mondo_set(cpuset_t set) 303 { 304 int lo, busy, nack, shipped = 0; 305 uint16_t i, cpuids[IDSR_BN_SETS]; 306 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 307 uint64_t starttick, endtick, tick, lasttick; 308 #if (NCPU > IDSR_BN_SETS) 309 int index = 0; 310 int ncpuids = 0; 311 #endif 312 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 313 int bn_sets = IDSR_BN_SETS; 314 uint64_t ver; 315 316 ASSERT(NCPU > bn_sets); 317 #endif 318 319 ASSERT(!CPUSET_ISNULL(set)); 320 starttick = lasttick = gettick(); 321 322 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 323 ver = ultra_getver(); 324 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 325 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 326 bn_sets = 1; 327 #endif 328 329 #if (NCPU <= IDSR_BN_SETS) 330 for (i = 0; i < NCPU; i++) 331 if (CPU_IN_SET(set, i)) { 332 shipit(i, shipped); 333 nackmask |= IDSR_NACK_BIT(shipped); 334 cpuids[shipped++] = i; 335 CPUSET_DEL(set, i); 336 if (CPUSET_ISNULL(set)) 337 break; 338 } 339 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 340 #else 341 for (i = 0; i < NCPU; i++) 342 if (CPU_IN_SET(set, i)) { 343 ncpuids++; 344 345 /* 346 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 347 * find we have shipped to more than (IDSR_BN_SETS) 348 * CPUs, set "index" to the highest numbered CPU in 349 * the set so we can ship to other CPUs a bit later on. 350 */ 351 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 352 if (shipped < bn_sets) { 353 #else 354 if (shipped < IDSR_BN_SETS) { 355 #endif 356 shipit(i, shipped); 357 nackmask |= IDSR_NACK_BIT(shipped); 358 cpuids[shipped++] = i; 359 CPUSET_DEL(set, i); 360 if (CPUSET_ISNULL(set)) 361 break; 362 } else 363 index = (int)i; 364 } 365 366 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 367 #endif 368 369 busymask = IDSR_NACK_TO_BUSY(nackmask); 370 busy = nack = 0; 371 endtick = starttick + xc_tick_limit; 372 for (;;) { 373 idsr = getidsr(); 374 #if (NCPU <= IDSR_BN_SETS) 375 if (idsr == 0) 376 break; 377 #else 378 if (idsr == 0 && shipped == ncpuids) 379 break; 380 #endif 381 tick = gettick(); 382 /* 383 * If there is a big jump between the current tick 384 * count and lasttick, we have probably hit a break 385 * point. Adjust endtick accordingly to avoid panic. 386 */ 387 if (tick > (lasttick + xc_tick_jump_limit)) 388 endtick += (tick - lasttick); 389 lasttick = tick; 390 if (tick > endtick) { 391 if (panic_quiesce) 392 return; 393 cmn_err(CE_CONT, "send mondo timeout [%d NACK %d " 394 "BUSY]\nIDSR 0x%" PRIx64 " cpuids:", 395 nack, busy, idsr); 396 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 397 for (i = 0; i < bn_sets; i++) { 398 #else 399 for (i = 0; i < IDSR_BN_SETS; i++) { 400 #endif 401 if (idsr & (IDSR_NACK_BIT(i) | 402 IDSR_BUSY_BIT(i))) { 403 cmn_err(CE_CONT, " 0x%x", cpuids[i]); 404 } 405 } 406 cmn_err(CE_CONT, "\n"); 407 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 408 } 409 curnack = idsr & nackmask; 410 curbusy = idsr & busymask; 411 412 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 413 /* 414 * Only proceed to send more xcalls if all the 415 * cpus in the previous IDSR_BN_SETS were completed. 416 */ 417 if (curbusy) { 418 busy++; 419 continue; 420 } 421 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 422 423 #if (NCPU > IDSR_BN_SETS) 424 if (shipped < ncpuids) { 425 uint64_t cpus_left; 426 uint16_t next = (uint16_t)index; 427 428 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 429 busymask; 430 431 if (cpus_left) { 432 do { 433 /* 434 * Sequence through and ship to the 435 * remainder of the CPUs in the system 436 * (e.g. other than the first 437 * (IDSR_BN_SETS)) in reverse order. 438 */ 439 lo = lowbit(cpus_left) - 1; 440 i = IDSR_BUSY_IDX(lo); 441 shipit(next, i); 442 shipped++; 443 cpuids[i] = next; 444 445 /* 446 * If we've processed all the CPUs, 447 * exit the loop now and save 448 * instructions. 449 */ 450 if (shipped == ncpuids) 451 break; 452 453 for ((index = ((int)next - 1)); 454 index >= 0; index--) 455 if (CPU_IN_SET(set, index)) { 456 next = (uint16_t)index; 457 break; 458 } 459 460 cpus_left &= ~(1ull << lo); 461 } while (cpus_left); 462 continue; 463 } 464 } 465 #endif 466 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 467 if (curbusy) { 468 busy++; 469 continue; 470 } 471 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 472 #ifdef SEND_MONDO_STATS 473 { 474 int n = gettick() - starttick; 475 if (n < 8192) 476 x_nack_stimes[n >> 7]++; 477 } 478 #endif 479 while (gettick() < (tick + sys_clock_mhz)) 480 ; 481 do { 482 lo = lowbit(curnack) - 1; 483 i = IDSR_NACK_IDX(lo); 484 shipit(cpuids[i], i); 485 curnack &= ~(1ull << lo); 486 } while (curnack); 487 nack++; 488 busy = 0; 489 } 490 #ifdef SEND_MONDO_STATS 491 { 492 int n = gettick() - starttick; 493 if (n < 8192) 494 x_set_stimes[n >> 7]++; 495 else 496 x_set_ltimes[(n >> 13) & 0xf]++; 497 } 498 x_set_cpus[shipped]++; 499 #endif 500 } 501 502 /* 503 * Cpu private initialization. 504 */ 505 void 506 cpu_init_private(struct cpu *cp) 507 { 508 if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) || 509 (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) { 510 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is " 511 "supported", cp->cpu_id, 512 cpunodes[cp->cpu_id].implementation); 513 } 514 515 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 516 } 517 518 void 519 cpu_setup(void) 520 { 521 extern int at_flags; 522 extern int cpc_has_overflow_intr; 523 uint64_t cpu0_log; 524 extern uint64_t opl_cpu0_err_log; 525 526 /* 527 * Initialize Error log Scratch register for error handling. 528 */ 529 530 cpu0_log = va_to_pa(&opl_cpu0_err_log); 531 opl_error_setup(cpu0_log); 532 533 /* 534 * Enable MMU translating multiple page sizes for 535 * sITLB and sDTLB. 536 */ 537 opl_mpg_enable(); 538 539 /* 540 * Setup chip-specific trap handlers. 541 */ 542 cpu_init_trap(); 543 544 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 545 546 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 547 548 /* 549 * Due to the number of entries in the fully-associative tlb 550 * this may have to be tuned lower than in spitfire. 551 */ 552 pp_slots = MIN(8, MAXPP_SLOTS); 553 554 /* 555 * Block stores do not invalidate all pages of the d$, pagecopy 556 * et. al. need virtual translations with virtual coloring taken 557 * into consideration. prefetch/ldd will pollute the d$ on the 558 * load side. 559 */ 560 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 561 562 if (use_page_coloring) { 563 do_pg_coloring = 1; 564 } 565 566 isa_list = 567 "sparcv9+vis2 sparcv9+vis sparcv9 " 568 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 569 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 570 571 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 | 572 AV_SPARC_POPC | AV_SPARC_FMAF; 573 574 /* 575 * On SPARC64-VI, there's no hole in the virtual address space 576 */ 577 hole_start = hole_end = 0; 578 579 /* 580 * The kpm mapping window. 581 * kpm_size: 582 * The size of a single kpm range. 583 * The overall size will be: kpm_size * vac_colors. 584 * kpm_vbase: 585 * The virtual start address of the kpm range within the kernel 586 * virtual address space. kpm_vbase has to be kpm_size aligned. 587 */ 588 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 589 kpm_size_shift = 47; 590 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 591 kpm_smallpages = 1; 592 593 /* 594 * The traptrace code uses either %tick or %stick for 595 * timestamping. We have %stick so we can use it. 596 */ 597 traptrace_use_stick = 1; 598 599 /* 600 * SPARC64-VI has a performance counter overflow interrupt 601 */ 602 cpc_has_overflow_intr = 1; 603 604 /* 605 * Declare that this architecture/cpu combination does not support 606 * fpRAS. 607 */ 608 fpras_implemented = 0; 609 } 610 611 /* 612 * Called by setcpudelay 613 */ 614 void 615 cpu_init_tick_freq(void) 616 { 617 /* 618 * For SPARC64-VI we want to use the system clock rate as 619 * the basis for low level timing, due to support of mixed 620 * speed CPUs and power managment. 621 */ 622 if (system_clock_freq == 0) 623 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 624 625 sys_tick_freq = system_clock_freq; 626 } 627 628 #ifdef SEND_MONDO_STATS 629 uint32_t x_one_stimes[64]; 630 uint32_t x_one_ltimes[16]; 631 uint32_t x_set_stimes[64]; 632 uint32_t x_set_ltimes[16]; 633 uint32_t x_set_cpus[NCPU]; 634 uint32_t x_nack_stimes[64]; 635 #endif 636 637 /* 638 * Note: A version of this function is used by the debugger via the KDI, 639 * and must be kept in sync with this version. Any changes made to this 640 * function to support new chips or to accomodate errata must also be included 641 * in the KDI-specific version. See us3_kdi.c. 642 */ 643 void 644 send_one_mondo(int cpuid) 645 { 646 int busy, nack; 647 uint64_t idsr, starttick, endtick, tick, lasttick; 648 uint64_t busymask; 649 650 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 651 starttick = lasttick = gettick(); 652 shipit(cpuid, 0); 653 endtick = starttick + xc_tick_limit; 654 busy = nack = 0; 655 busymask = IDSR_BUSY; 656 for (;;) { 657 idsr = getidsr(); 658 if (idsr == 0) 659 break; 660 661 tick = gettick(); 662 /* 663 * If there is a big jump between the current tick 664 * count and lasttick, we have probably hit a break 665 * point. Adjust endtick accordingly to avoid panic. 666 */ 667 if (tick > (lasttick + xc_tick_jump_limit)) 668 endtick += (tick - lasttick); 669 lasttick = tick; 670 if (tick > endtick) { 671 if (panic_quiesce) 672 return; 673 cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) " 674 "[%d NACK %d BUSY]", cpuid, nack, busy); 675 } 676 677 if (idsr & busymask) { 678 busy++; 679 continue; 680 } 681 drv_usecwait(1); 682 shipit(cpuid, 0); 683 nack++; 684 busy = 0; 685 } 686 #ifdef SEND_MONDO_STATS 687 { 688 int n = gettick() - starttick; 689 if (n < 8192) 690 x_one_stimes[n >> 7]++; 691 else 692 x_one_ltimes[(n >> 13) & 0xf]++; 693 } 694 #endif 695 } 696 697 /* 698 * init_mmu_page_sizes is set to one after the bootup time initialization 699 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 700 * valid value. 701 * 702 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 703 * versions of disable_ism_large_pages and disable_large_pages, and feed back 704 * into those two hat variables at hat initialization time. 705 * 706 */ 707 int init_mmu_page_sizes = 0; 708 709 static uint_t mmu_disable_large_pages = 0; 710 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | 711 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 712 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 713 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 714 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | 715 (1 << TTE512K)); 716 717 /* 718 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 719 * Called during very early bootup from check_cpus_set(). 720 * Can be called to verify that mmu_page_sizes are set up correctly. 721 * 722 * Set Olympus defaults. We do not use the function parameter. 723 */ 724 /*ARGSUSED*/ 725 int 726 mmu_init_mmu_page_sizes(int32_t not_used) 727 { 728 if (!init_mmu_page_sizes) { 729 mmu_page_sizes = MMU_PAGE_SIZES; 730 mmu_hashcnt = MAX_HASHCNT; 731 mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; 732 mmu_exported_pagesize_mask = (1 << TTE8K) | 733 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 734 (1 << TTE32M) | (1 << TTE256M); 735 init_mmu_page_sizes = 1; 736 return (0); 737 } 738 return (1); 739 } 740 741 /* SPARC64-VI worst case DTLB parameters */ 742 #ifndef LOCKED_DTLB_ENTRIES 743 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 744 #endif 745 #define TOTAL_DTLB_ENTRIES 32 746 #define AVAIL_32M_ENTRIES 0 747 #define AVAIL_256M_ENTRIES 0 748 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 749 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 750 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 751 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 752 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 753 754 /* 755 * The function returns the mmu-specific values for the 756 * hat's disable_large_pages, disable_ism_large_pages, and 757 * disable_auto_data_large_pages and 758 * disable_text_data_large_pages variables. 759 */ 760 uint_t 761 mmu_large_pages_disabled(uint_t flag) 762 { 763 uint_t pages_disable = 0; 764 extern int use_text_pgsz64K; 765 extern int use_text_pgsz512K; 766 767 if (flag == HAT_LOAD) { 768 pages_disable = mmu_disable_large_pages; 769 } else if (flag == HAT_LOAD_SHARE) { 770 pages_disable = mmu_disable_ism_large_pages; 771 } else if (flag == HAT_AUTO_DATA) { 772 pages_disable = mmu_disable_auto_data_large_pages; 773 } else if (flag == HAT_AUTO_TEXT) { 774 pages_disable = mmu_disable_auto_text_large_pages; 775 if (use_text_pgsz512K) { 776 pages_disable &= ~(1 << TTE512K); 777 } 778 if (use_text_pgsz64K) { 779 pages_disable &= ~(1 << TTE64K); 780 } 781 } 782 return (pages_disable); 783 } 784 785 /* 786 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 787 * It may be called from set_platform_defaults, if some value other than 32M 788 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 789 * then only warn, since it would be bad form to panic due to a user typo. 790 * 791 * The function re-initializes the mmu_disable_ism_large_pages variable. 792 */ 793 void 794 mmu_init_large_pages(size_t ism_pagesize) 795 { 796 switch (ism_pagesize) { 797 case MMU_PAGESIZE4M: 798 mmu_disable_ism_large_pages = ((1 << TTE64K) | 799 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 800 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 801 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 802 break; 803 case MMU_PAGESIZE32M: 804 mmu_disable_ism_large_pages = ((1 << TTE64K) | 805 (1 << TTE512K) | (1 << TTE256M)); 806 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 807 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); 808 adjust_data_maxlpsize(ism_pagesize); 809 break; 810 case MMU_PAGESIZE256M: 811 mmu_disable_ism_large_pages = ((1 << TTE64K) | 812 (1 << TTE512K) | (1 << TTE32M)); 813 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 814 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); 815 adjust_data_maxlpsize(ism_pagesize); 816 break; 817 default: 818 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 819 ism_pagesize); 820 break; 821 } 822 } 823 824 /* 825 * Function to reprogram the TLBs when page sizes used 826 * by a process change significantly. 827 */ 828 void 829 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) 830 { 831 uint8_t pgsz0, pgsz1; 832 833 /* 834 * Don't program 2nd dtlb for kernel and ism hat 835 */ 836 ASSERT(hat->sfmmu_ismhat == NULL); 837 ASSERT(hat != ksfmmup); 838 839 /* 840 * hat->sfmmu_pgsz[] is an array whose elements 841 * contain a sorted order of page sizes. Element 842 * 0 is the most commonly used page size, followed 843 * by element 1, and so on. 844 * 845 * ttecnt[] is an array of per-page-size page counts 846 * mapped into the process. 847 * 848 * If the HAT's choice for page sizes is unsuitable, 849 * we can override it here. The new values written 850 * to the array will be handed back to us later to 851 * do the actual programming of the TLB hardware. 852 * 853 */ 854 pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]); 855 pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]); 856 857 /* 858 * This implements PAGESIZE programming of the sTLB 859 * if large TTE counts don't exceed the thresholds. 860 */ 861 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 862 pgsz0 = page_szc(MMU_PAGESIZE); 863 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 864 pgsz1 = page_szc(MMU_PAGESIZE); 865 tmp_pgsz[0] = pgsz0; 866 tmp_pgsz[1] = pgsz1; 867 /* otherwise, accept what the HAT chose for us */ 868 } 869 870 /* 871 * The HAT calls this function when an MMU context is allocated so that we 872 * can reprogram the large TLBs appropriately for the new process using 873 * the context. 874 * 875 * The caller must hold the HAT lock. 876 */ 877 void 878 mmu_set_ctx_page_sizes(struct hat *hat) 879 { 880 uint8_t pgsz0, pgsz1; 881 uint8_t new_cext; 882 883 ASSERT(sfmmu_hat_lock_held(hat)); 884 /* 885 * Don't program 2nd dtlb for kernel and ism hat 886 */ 887 if (hat->sfmmu_ismhat || hat == ksfmmup) 888 return; 889 890 /* 891 * If supported, reprogram the TLBs to a larger pagesize. 892 */ 893 pgsz0 = hat->sfmmu_pgsz[0]; 894 pgsz1 = hat->sfmmu_pgsz[1]; 895 ASSERT(pgsz0 < mmu_page_sizes); 896 ASSERT(pgsz1 < mmu_page_sizes); 897 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 898 if (hat->sfmmu_cext != new_cext) { 899 #ifdef DEBUG 900 int i; 901 /* 902 * assert cnum should be invalid, this is because pagesize 903 * can only be changed after a proc's ctxs are invalidated. 904 */ 905 for (i = 0; i < max_mmu_ctxdoms; i++) { 906 ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 907 } 908 #endif /* DEBUG */ 909 hat->sfmmu_cext = new_cext; 910 } 911 /* 912 * sfmmu_setctx_sec() will take care of the 913 * rest of the dirty work for us. 914 */ 915 } 916 917 /* 918 * This function assumes that there are either four or six supported page 919 * sizes and at most two programmable TLBs, so we need to decide which 920 * page sizes are most important and then adjust the TLB page sizes 921 * accordingly (if supported). 922 * 923 * If these assumptions change, this function will need to be 924 * updated to support whatever the new limits are. 925 */ 926 void 927 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 928 { 929 uint64_t sortcnt[MMU_PAGE_SIZES]; 930 uint8_t tmp_pgsz[MMU_PAGE_SIZES]; 931 uint8_t i, j, max; 932 uint16_t oldval, newval; 933 934 /* 935 * We only consider reprogramming the TLBs if one or more of 936 * the two most used page sizes changes and we're using 937 * large pages in this process. 938 */ 939 if (SFMMU_LGPGS_INUSE(sfmmup)) { 940 /* Sort page sizes. */ 941 for (i = 0; i < mmu_page_sizes; i++) { 942 sortcnt[i] = ttecnt[i]; 943 } 944 for (j = 0; j < mmu_page_sizes; j++) { 945 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 946 if (sortcnt[i] > sortcnt[max]) 947 max = i; 948 } 949 tmp_pgsz[j] = max; 950 sortcnt[max] = 0; 951 } 952 953 oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1]; 954 955 mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz); 956 957 /* Check 2 largest values after the sort. */ 958 newval = tmp_pgsz[0] << 8 | tmp_pgsz[1]; 959 if (newval != oldval) { 960 sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz); 961 } 962 } 963 } 964 965 /* 966 * Return processor specific async error structure 967 * size used. 968 */ 969 int 970 cpu_aflt_size(void) 971 { 972 return (sizeof (opl_async_flt_t)); 973 } 974 975 /* 976 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 977 * post-process CPU events that are dequeued. As such, it can be invoked 978 * from softint context, from AST processing in the trap() flow, or from the 979 * panic flow. We decode the CPU-specific data, and take appropriate actions. 980 * Historically this entry point was used to log the actual cmn_err(9F) text; 981 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 982 * With FMA this function now also returns a flag which indicates to the 983 * caller whether the ereport should be posted (1) or suppressed (0). 984 */ 985 /*ARGSUSED*/ 986 static int 987 cpu_sync_log_err(void *flt) 988 { 989 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 990 struct async_flt *aflt = (struct async_flt *)flt; 991 992 /* 993 * No extra processing of urgent error events. 994 * Always generate ereports for these events. 995 */ 996 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 997 return (1); 998 999 /* 1000 * Additional processing for synchronous errors. 1001 */ 1002 switch (opl_flt->flt_type) { 1003 case OPL_CPU_INV_SFSR: 1004 return (1); 1005 1006 case OPL_CPU_SYNC_UE: 1007 /* 1008 * The validity: SFSR_MK_UE bit has been checked 1009 * in opl_cpu_sync_error() 1010 * No more check is required. 1011 * 1012 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1013 * and they have been retrieved in cpu_queue_events() 1014 */ 1015 1016 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1017 ASSERT(aflt->flt_in_memory); 1018 /* 1019 * We want to skip logging only if ALL the following 1020 * conditions are true: 1021 * 1022 * 1. We are not panicing already. 1023 * 2. The error is a memory error. 1024 * 3. There is only one error. 1025 * 4. The error is on a retired page. 1026 * 5. The error occurred under on_trap 1027 * protection AFLT_PROT_EC 1028 */ 1029 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1030 page_retire_check(aflt->flt_addr, NULL) == 0) { 1031 /* 1032 * Do not log an error from 1033 * the retired page 1034 */ 1035 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1036 return (0); 1037 } 1038 if (!panicstr) 1039 cpu_page_retire(opl_flt); 1040 } 1041 return (1); 1042 1043 case OPL_CPU_SYNC_OTHERS: 1044 /* 1045 * For the following error cases, the processor HW does 1046 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1047 * to assign appropriate values here to reflect what we 1048 * think is the most likely cause of the problem w.r.t to 1049 * the particular error event. For Buserr and timeout 1050 * error event, we will assign OPL_ERRID_CHANNEL as the 1051 * most likely reason. For TLB parity or multiple hit 1052 * error events, we will assign the reason as 1053 * OPL_ERRID_CPU (cpu related problem) and set the 1054 * flt_eid_sid to point to the cpuid. 1055 */ 1056 1057 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1058 /* 1059 * flt_eid_sid will not be used for this case. 1060 */ 1061 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1062 } 1063 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1064 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1065 opl_flt->flt_eid_sid = aflt->flt_inst; 1066 } 1067 1068 /* 1069 * In case of no effective error bit 1070 */ 1071 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1072 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1073 opl_flt->flt_eid_sid = aflt->flt_inst; 1074 } 1075 break; 1076 1077 default: 1078 return (1); 1079 } 1080 return (1); 1081 } 1082 1083 /* 1084 * Retire the bad page that may contain the flushed error. 1085 */ 1086 void 1087 cpu_page_retire(opl_async_flt_t *opl_flt) 1088 { 1089 struct async_flt *aflt = (struct async_flt *)opl_flt; 1090 (void) page_retire(aflt->flt_addr, PR_UE); 1091 } 1092 1093 /* 1094 * Invoked by error_init() early in startup and therefore before 1095 * startup_errorq() is called to drain any error Q - 1096 * 1097 * startup() 1098 * startup_end() 1099 * error_init() 1100 * cpu_error_init() 1101 * errorq_init() 1102 * errorq_drain() 1103 * start_other_cpus() 1104 * 1105 * The purpose of this routine is to create error-related taskqs. Taskqs 1106 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1107 * context. 1108 * 1109 */ 1110 /*ARGSUSED*/ 1111 void 1112 cpu_error_init(int items) 1113 { 1114 opl_err_log = (opl_errlog_t *) 1115 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1116 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1117 cmn_err(CE_PANIC, "The base address of the error log " 1118 "is not page aligned"); 1119 } 1120 1121 /* 1122 * We route all errors through a single switch statement. 1123 */ 1124 void 1125 cpu_ue_log_err(struct async_flt *aflt) 1126 { 1127 switch (aflt->flt_class) { 1128 case CPU_FAULT: 1129 if (cpu_sync_log_err(aflt)) 1130 cpu_ereport_post(aflt); 1131 break; 1132 1133 case BUS_FAULT: 1134 bus_async_log_err(aflt); 1135 break; 1136 1137 default: 1138 cmn_err(CE_WARN, "discarding async error %p with invalid " 1139 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1140 return; 1141 } 1142 } 1143 1144 /* 1145 * Routine for panic hook callback from panic_idle(). 1146 * 1147 * Nothing to do here. 1148 */ 1149 void 1150 cpu_async_panic_callb(void) 1151 { 1152 } 1153 1154 /* 1155 * Routine to return a string identifying the physical name 1156 * associated with a memory/cache error. 1157 */ 1158 /*ARGSUSED*/ 1159 int 1160 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1161 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1162 ushort_t flt_status, char *buf, int buflen, int *lenp) 1163 { 1164 int synd_code; 1165 int ret; 1166 1167 /* 1168 * An AFSR of -1 defaults to a memory syndrome. 1169 */ 1170 synd_code = (int)flt_synd; 1171 1172 if (&plat_get_mem_unum) { 1173 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1174 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1175 buf[0] = '\0'; 1176 *lenp = 0; 1177 } 1178 return (ret); 1179 } 1180 buf[0] = '\0'; 1181 *lenp = 0; 1182 return (ENOTSUP); 1183 } 1184 1185 /* 1186 * Wrapper for cpu_get_mem_unum() routine that takes an 1187 * async_flt struct rather than explicit arguments. 1188 */ 1189 int 1190 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1191 char *buf, int buflen, int *lenp) 1192 { 1193 /* 1194 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1195 * memory error. 1196 */ 1197 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1198 (uint64_t)-1, 1199 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1200 aflt->flt_status, buf, buflen, lenp)); 1201 } 1202 1203 /* 1204 * This routine is a more generic interface to cpu_get_mem_unum() 1205 * that may be used by other modules (e.g. mm). 1206 */ 1207 /*ARGSUSED*/ 1208 int 1209 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1210 char *buf, int buflen, int *lenp) 1211 { 1212 int synd_status, flt_in_memory, ret; 1213 ushort_t flt_status = 0; 1214 char unum[UNUM_NAMLEN]; 1215 1216 /* 1217 * Check for an invalid address. 1218 */ 1219 if (afar == (uint64_t)-1) 1220 return (ENXIO); 1221 1222 if (synd == (uint64_t)-1) 1223 synd_status = AFLT_STAT_INVALID; 1224 else 1225 synd_status = AFLT_STAT_VALID; 1226 1227 flt_in_memory = (*afsr & SFSR_MEMORY) && 1228 pf_is_memory(afar >> MMU_PAGESHIFT); 1229 1230 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1231 CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); 1232 if (ret != 0) 1233 return (ret); 1234 1235 if (*lenp >= buflen) 1236 return (ENAMETOOLONG); 1237 1238 (void) strncpy(buf, unum, buflen); 1239 1240 return (0); 1241 } 1242 1243 /* 1244 * Routine to return memory information associated 1245 * with a physical address and syndrome. 1246 */ 1247 /*ARGSUSED*/ 1248 int 1249 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1250 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1251 int *segsp, int *banksp, int *mcidp) 1252 { 1253 int synd_code = (int)synd; 1254 1255 if (afar == (uint64_t)-1) 1256 return (ENXIO); 1257 1258 if (p2get_mem_info != NULL) 1259 return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, 1260 bank_sizep, segsp, banksp, mcidp)); 1261 else 1262 return (ENOTSUP); 1263 } 1264 1265 /* 1266 * Routine to return a string identifying the physical 1267 * name associated with a cpuid. 1268 */ 1269 int 1270 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1271 { 1272 int ret; 1273 char unum[UNUM_NAMLEN]; 1274 1275 if (&plat_get_cpu_unum) { 1276 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, 1277 lenp)) != 0) 1278 return (ret); 1279 } else { 1280 return (ENOTSUP); 1281 } 1282 1283 if (*lenp >= buflen) 1284 return (ENAMETOOLONG); 1285 1286 (void) strncpy(buf, unum, *lenp); 1287 1288 return (0); 1289 } 1290 1291 /* 1292 * This routine exports the name buffer size. 1293 */ 1294 size_t 1295 cpu_get_name_bufsize() 1296 { 1297 return (UNUM_NAMLEN); 1298 } 1299 1300 /* 1301 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1302 */ 1303 void 1304 cpu_flush_ecache(void) 1305 { 1306 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1307 cpunodes[CPU->cpu_id].ecache_linesize); 1308 } 1309 1310 static uint8_t 1311 flt_to_trap_type(struct async_flt *aflt) 1312 { 1313 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1314 return (TRAP_TYPE_ECC_I); 1315 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1316 return (TRAP_TYPE_ECC_D); 1317 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1318 return (TRAP_TYPE_URGENT); 1319 return (TRAP_TYPE_UNKNOWN); 1320 } 1321 1322 /* 1323 * Encode the data saved in the opl_async_flt_t struct into 1324 * the FM ereport payload. 1325 */ 1326 /* ARGSUSED */ 1327 static void 1328 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1329 nvlist_t *resource) 1330 { 1331 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1332 char unum[UNUM_NAMLEN]; 1333 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1334 int len; 1335 1336 1337 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1338 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1339 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1340 } 1341 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1342 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1343 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1344 } 1345 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1346 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1347 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1348 } 1349 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1350 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1351 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1352 } 1353 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1354 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1355 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1356 } 1357 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1358 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1359 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1360 } 1361 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1362 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1363 DATA_TYPE_BOOLEAN_VALUE, 1364 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1365 } 1366 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1367 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1368 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1369 } 1370 1371 switch (opl_flt->flt_eid_mod) { 1372 case OPL_ERRID_CPU: 1373 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1374 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1375 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1376 NULL, opl_flt->flt_eid_sid, 1377 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf); 1378 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1379 DATA_TYPE_NVLIST, resource, NULL); 1380 break; 1381 1382 case OPL_ERRID_CHANNEL: 1383 /* 1384 * No resource is created but the cpumem DE will find 1385 * the defective path by retreiving EID from SFSR which is 1386 * included in the payload. 1387 */ 1388 break; 1389 1390 case OPL_ERRID_MEM: 1391 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1392 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1393 unum, NULL, (uint64_t)-1); 1394 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1395 DATA_TYPE_NVLIST, resource, NULL); 1396 break; 1397 1398 case OPL_ERRID_PATH: 1399 /* 1400 * No resource is created but the cpumem DE will find 1401 * the defective path by retreiving EID from SFSR which is 1402 * included in the payload. 1403 */ 1404 break; 1405 } 1406 } 1407 1408 /* 1409 * Returns whether fault address is valid for this error bit and 1410 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1411 */ 1412 /*ARGSUSED*/ 1413 static int 1414 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1415 { 1416 struct async_flt *aflt = (struct async_flt *)opl_flt; 1417 1418 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1419 return ((t_afsr_bit & SFSR_MEMORY) && 1420 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1421 } 1422 return (0); 1423 } 1424 1425 /* 1426 * In OPL SCF does the stick synchronization. 1427 */ 1428 void 1429 sticksync_slave(void) 1430 { 1431 } 1432 1433 /* 1434 * In OPL SCF does the stick synchronization. 1435 */ 1436 void 1437 sticksync_master(void) 1438 { 1439 } 1440 1441 /* 1442 * Cpu private unitialization. OPL cpus do not use the private area. 1443 */ 1444 void 1445 cpu_uninit_private(struct cpu *cp) 1446 { 1447 cmp_delete_cpu(cp->cpu_id); 1448 } 1449 1450 /* 1451 * Always flush an entire cache. 1452 */ 1453 void 1454 cpu_error_ecache_flush(void) 1455 { 1456 cpu_flush_ecache(); 1457 } 1458 1459 void 1460 cpu_ereport_post(struct async_flt *aflt) 1461 { 1462 char *cpu_type, buf[FM_MAX_CLASS]; 1463 nv_alloc_t *nva = NULL; 1464 nvlist_t *ereport, *detector, *resource; 1465 errorq_elem_t *eqep; 1466 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1467 1468 if (aflt->flt_panic || panicstr) { 1469 eqep = errorq_reserve(ereport_errorq); 1470 if (eqep == NULL) 1471 return; 1472 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1473 nva = errorq_elem_nva(ereport_errorq, eqep); 1474 } else { 1475 ereport = fm_nvlist_create(nva); 1476 } 1477 1478 /* 1479 * Create the scheme "cpu" FMRI. 1480 */ 1481 detector = fm_nvlist_create(nva); 1482 resource = fm_nvlist_create(nva); 1483 switch (cpunodes[aflt->flt_inst].implementation) { 1484 case OLYMPUS_C_IMPL: 1485 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1486 break; 1487 case JUPITER_IMPL: 1488 cpu_type = FM_EREPORT_CPU_SPARC64_VII; 1489 break; 1490 default: 1491 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1492 break; 1493 } 1494 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1495 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1496 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1497 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1498 sbuf); 1499 1500 /* 1501 * Encode all the common data into the ereport. 1502 */ 1503 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1504 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1505 1506 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1507 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1508 1509 /* 1510 * Encode the error specific data that was saved in 1511 * the async_flt structure into the ereport. 1512 */ 1513 cpu_payload_add_aflt(aflt, ereport, resource); 1514 1515 if (aflt->flt_panic || panicstr) { 1516 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1517 } else { 1518 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1519 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1520 fm_nvlist_destroy(detector, FM_NVA_FREE); 1521 fm_nvlist_destroy(resource, FM_NVA_FREE); 1522 } 1523 } 1524 1525 void 1526 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1527 { 1528 int status; 1529 ddi_fm_error_t de; 1530 1531 bzero(&de, sizeof (ddi_fm_error_t)); 1532 1533 de.fme_version = DDI_FME_VERSION; 1534 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1535 de.fme_flag = expected; 1536 de.fme_bus_specific = (void *)aflt->flt_addr; 1537 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1538 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1539 aflt->flt_panic = 1; 1540 } 1541 1542 void 1543 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1544 errorq_t *eqp, uint_t flag) 1545 { 1546 struct async_flt *aflt = (struct async_flt *)payload; 1547 1548 aflt->flt_erpt_class = error_class; 1549 errorq_dispatch(eqp, payload, payload_sz, flag); 1550 } 1551 1552 void 1553 adjust_hw_copy_limits(int ecache_size) 1554 { 1555 /* 1556 * Set hw copy limits. 1557 * 1558 * /etc/system will be parsed later and can override one or more 1559 * of these settings. 1560 * 1561 * At this time, ecache size seems only mildly relevant. 1562 * We seem to run into issues with the d-cache and stalls 1563 * we see on misses. 1564 * 1565 * Cycle measurement indicates that 2 byte aligned copies fare 1566 * little better than doing things with VIS at around 512 bytes. 1567 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1568 * aligned is faster whenever the source and destination data 1569 * in cache and the total size is less than 2 Kbytes. The 2K 1570 * limit seems to be driven by the 2K write cache. 1571 * When more than 2K of copies are done in non-VIS mode, stores 1572 * backup in the write cache. In VIS mode, the write cache is 1573 * bypassed, allowing faster cache-line writes aligned on cache 1574 * boundaries. 1575 * 1576 * In addition, in non-VIS mode, there is no prefetching, so 1577 * for larger copies, the advantage of prefetching to avoid even 1578 * occasional cache misses is enough to justify using the VIS code. 1579 * 1580 * During testing, it was discovered that netbench ran 3% slower 1581 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1582 * applications, data is only used once (copied to the output 1583 * buffer, then copied by the network device off the system). Using 1584 * the VIS copy saves more L2 cache state. Network copies are 1585 * around 1.3K to 1.5K in size for historical reasons. 1586 * 1587 * Therefore, a limit of 1K bytes will be used for the 8 byte 1588 * aligned copy even for large caches and 8 MB ecache. The 1589 * infrastructure to allow different limits for different sized 1590 * caches is kept to allow further tuning in later releases. 1591 */ 1592 1593 if (min_ecache_size == 0 && use_hw_bcopy) { 1594 /* 1595 * First time through - should be before /etc/system 1596 * is read. 1597 * Could skip the checks for zero but this lets us 1598 * preserve any debugger rewrites. 1599 */ 1600 if (hw_copy_limit_1 == 0) { 1601 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1602 priv_hcl_1 = hw_copy_limit_1; 1603 } 1604 if (hw_copy_limit_2 == 0) { 1605 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1606 priv_hcl_2 = hw_copy_limit_2; 1607 } 1608 if (hw_copy_limit_4 == 0) { 1609 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1610 priv_hcl_4 = hw_copy_limit_4; 1611 } 1612 if (hw_copy_limit_8 == 0) { 1613 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1614 priv_hcl_8 = hw_copy_limit_8; 1615 } 1616 min_ecache_size = ecache_size; 1617 } else { 1618 /* 1619 * MP initialization. Called *after* /etc/system has 1620 * been parsed. One CPU has already been initialized. 1621 * Need to cater for /etc/system having scragged one 1622 * of our values. 1623 */ 1624 if (ecache_size == min_ecache_size) { 1625 /* 1626 * Same size ecache. We do nothing unless we 1627 * have a pessimistic ecache setting. In that 1628 * case we become more optimistic (if the cache is 1629 * large enough). 1630 */ 1631 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1632 /* 1633 * Need to adjust hw_copy_limit* from our 1634 * pessimistic uniprocessor value to a more 1635 * optimistic UP value *iff* it hasn't been 1636 * reset. 1637 */ 1638 if ((ecache_size > 1048576) && 1639 (priv_hcl_8 == hw_copy_limit_8)) { 1640 if (ecache_size <= 2097152) 1641 hw_copy_limit_8 = 4 * 1642 VIS_COPY_THRESHOLD; 1643 else if (ecache_size <= 4194304) 1644 hw_copy_limit_8 = 4 * 1645 VIS_COPY_THRESHOLD; 1646 else 1647 hw_copy_limit_8 = 4 * 1648 VIS_COPY_THRESHOLD; 1649 priv_hcl_8 = hw_copy_limit_8; 1650 } 1651 } 1652 } else if (ecache_size < min_ecache_size) { 1653 /* 1654 * A different ecache size. Can this even happen? 1655 */ 1656 if (priv_hcl_8 == hw_copy_limit_8) { 1657 /* 1658 * The previous value that we set 1659 * is unchanged (i.e., it hasn't been 1660 * scragged by /etc/system). Rewrite it. 1661 */ 1662 if (ecache_size <= 1048576) 1663 hw_copy_limit_8 = 8 * 1664 VIS_COPY_THRESHOLD; 1665 else if (ecache_size <= 2097152) 1666 hw_copy_limit_8 = 8 * 1667 VIS_COPY_THRESHOLD; 1668 else if (ecache_size <= 4194304) 1669 hw_copy_limit_8 = 8 * 1670 VIS_COPY_THRESHOLD; 1671 else 1672 hw_copy_limit_8 = 10 * 1673 VIS_COPY_THRESHOLD; 1674 priv_hcl_8 = hw_copy_limit_8; 1675 min_ecache_size = ecache_size; 1676 } 1677 } 1678 } 1679 } 1680 1681 #define VIS_BLOCKSIZE 64 1682 1683 int 1684 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1685 { 1686 int ret, watched; 1687 1688 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1689 ret = dtrace_blksuword32(addr, data, 0); 1690 if (watched) 1691 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1692 1693 return (ret); 1694 } 1695 1696 void 1697 opl_cpu_reg_init() 1698 { 1699 uint64_t this_cpu_log; 1700 1701 /* 1702 * We do not need to re-initialize cpu0 registers. 1703 */ 1704 if (cpu[getprocessorid()] == &cpu0) 1705 return; 1706 1707 /* 1708 * Initialize Error log Scratch register for error handling. 1709 */ 1710 1711 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1712 ERRLOG_BUFSZ * (getprocessorid()))); 1713 opl_error_setup(this_cpu_log); 1714 1715 /* 1716 * Enable MMU translating multiple page sizes for 1717 * sITLB and sDTLB. 1718 */ 1719 opl_mpg_enable(); 1720 } 1721 1722 /* 1723 * Queue one event in ue_queue based on ecc_type_to_info entry. 1724 */ 1725 static void 1726 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1727 ecc_type_to_info_t *eccp) 1728 { 1729 struct async_flt *aflt = (struct async_flt *)opl_flt; 1730 1731 if (reason && 1732 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1733 (void) strcat(reason, eccp->ec_reason); 1734 } 1735 1736 opl_flt->flt_bit = eccp->ec_afsr_bit; 1737 opl_flt->flt_type = eccp->ec_flt_type; 1738 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1739 aflt->flt_payload = eccp->ec_err_payload; 1740 1741 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1742 cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt, 1743 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1744 } 1745 1746 /* 1747 * Queue events on async event queue one event per error bit. 1748 * Return number of events queued. 1749 */ 1750 int 1751 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1752 { 1753 struct async_flt *aflt = (struct async_flt *)opl_flt; 1754 ecc_type_to_info_t *eccp; 1755 int nevents = 0; 1756 1757 /* 1758 * Queue expected errors, error bit and fault type must must match 1759 * in the ecc_type_to_info table. 1760 */ 1761 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1762 eccp++) { 1763 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1764 (eccp->ec_flags & aflt->flt_status) != 0) { 1765 /* 1766 * UE error event can be further 1767 * classified/breakdown into finer granularity 1768 * based on the flt_eid_mod value set by HW. We do 1769 * special handling here so that we can report UE 1770 * error in finer granularity as ue_mem, 1771 * ue_channel, ue_cpu or ue_path. 1772 */ 1773 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1774 opl_flt->flt_eid_mod = (aflt->flt_stat & 1775 SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT; 1776 opl_flt->flt_eid_sid = (aflt->flt_stat & 1777 SFSR_EID_SID) >> SFSR_EID_SID_SHIFT; 1778 /* 1779 * Need to advance eccp pointer by flt_eid_mod 1780 * so that we get an appropriate ecc pointer 1781 * 1782 * EID # of advances 1783 * ---------------------------------- 1784 * OPL_ERRID_MEM 0 1785 * OPL_ERRID_CHANNEL 1 1786 * OPL_ERRID_CPU 2 1787 * OPL_ERRID_PATH 3 1788 */ 1789 eccp += opl_flt->flt_eid_mod; 1790 } 1791 cpu_queue_one_event(opl_flt, reason, eccp); 1792 t_afsr_errs &= ~eccp->ec_afsr_bit; 1793 nevents++; 1794 } 1795 } 1796 1797 return (nevents); 1798 } 1799 1800 /* 1801 * Sync. error wrapper functions. 1802 * We use these functions in order to transfer here from the 1803 * nucleus trap handler information about trap type (data or 1804 * instruction) and trap level (0 or above 0). This way we 1805 * get rid of using SFSR's reserved bits. 1806 */ 1807 1808 #define OPL_SYNC_TL0 0 1809 #define OPL_SYNC_TL1 1 1810 #define OPL_ISYNC_ERR 0 1811 #define OPL_DSYNC_ERR 1 1812 1813 void 1814 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1815 { 1816 uint64_t t_sfar = p_sfar; 1817 uint64_t t_sfsr = p_sfsr; 1818 1819 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1820 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1821 } 1822 1823 void 1824 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1825 { 1826 uint64_t t_sfar = p_sfar; 1827 uint64_t t_sfsr = p_sfsr; 1828 1829 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1830 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1831 } 1832 1833 void 1834 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1835 { 1836 uint64_t t_sfar = p_sfar; 1837 uint64_t t_sfsr = p_sfsr; 1838 1839 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1840 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1841 } 1842 1843 void 1844 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1845 { 1846 uint64_t t_sfar = p_sfar; 1847 uint64_t t_sfsr = p_sfsr; 1848 1849 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1850 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1851 } 1852 1853 /* 1854 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1855 * and TLB_PRT. 1856 * This function is designed based on cpu_deferred_error(). 1857 */ 1858 1859 static void 1860 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1861 uint_t tl, uint_t derr) 1862 { 1863 opl_async_flt_t opl_flt; 1864 struct async_flt *aflt; 1865 int trampolined = 0; 1866 char pr_reason[MAX_REASON_STRING]; 1867 uint64_t log_sfsr; 1868 int expected = DDI_FM_ERR_UNEXPECTED; 1869 ddi_acc_hdl_t *hp; 1870 1871 /* 1872 * We need to look at p_flag to determine if the thread detected an 1873 * error while dumping core. We can't grab p_lock here, but it's ok 1874 * because we just need a consistent snapshot and we know that everyone 1875 * else will store a consistent set of bits while holding p_lock. We 1876 * don't have to worry about a race because SDOCORE is set once prior 1877 * to doing i/o from the process's address space and is never cleared. 1878 */ 1879 uint_t pflag = ttoproc(curthread)->p_flag; 1880 1881 pr_reason[0] = '\0'; 1882 1883 /* 1884 * handle the specific error 1885 */ 1886 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1887 aflt = (struct async_flt *)&opl_flt; 1888 aflt->flt_id = gethrtime_waitfree(); 1889 aflt->flt_bus_id = getprocessorid(); 1890 aflt->flt_inst = CPU->cpu_id; 1891 aflt->flt_stat = t_sfsr; 1892 aflt->flt_addr = t_sfar; 1893 aflt->flt_pc = (caddr_t)rp->r_pc; 1894 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1895 aflt->flt_class = (uchar_t)CPU_FAULT; 1896 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & 1897 TSTATE_PRIV) ? 1 : 0)); 1898 aflt->flt_tl = (uchar_t)tl; 1899 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1900 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1901 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1902 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1903 1904 /* 1905 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1906 * So, clear all error bits to avoid mis-handling and force the system 1907 * panicked. 1908 * We skip all the procedures below down to the panic message call. 1909 */ 1910 if (!(t_sfsr & SFSR_FV)) { 1911 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1912 aflt->flt_panic = 1; 1913 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1914 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 1915 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 1916 fm_panic("%sErrors(s)", "invalid SFSR"); 1917 } 1918 1919 /* 1920 * If either UE and MK bit is off, this is not valid UE error. 1921 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1922 * mis-handling below. 1923 * aflt->flt_stat keeps the original bits as a reference. 1924 */ 1925 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1926 (SFSR_MK_UE|SFSR_UE)) { 1927 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1928 } 1929 1930 /* 1931 * If the trap occurred in privileged mode at TL=0, we need to check to 1932 * see if we were executing in the kernel under on_trap() or t_lofault 1933 * protection. If so, modify the saved registers so that we return 1934 * from the trap to the appropriate trampoline routine. 1935 */ 1936 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1937 if (curthread->t_ontrap != NULL) { 1938 on_trap_data_t *otp = curthread->t_ontrap; 1939 1940 if (otp->ot_prot & OT_DATA_EC) { 1941 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1942 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1943 rp->r_pc = otp->ot_trampoline; 1944 rp->r_npc = rp->r_pc + 4; 1945 trampolined = 1; 1946 } 1947 1948 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1949 (otp->ot_prot & OT_DATA_ACCESS)) { 1950 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 1951 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 1952 rp->r_pc = otp->ot_trampoline; 1953 rp->r_npc = rp->r_pc + 4; 1954 trampolined = 1; 1955 /* 1956 * for peeks and caut_gets errors are expected 1957 */ 1958 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1959 if (!hp) 1960 expected = DDI_FM_ERR_PEEK; 1961 else if (hp->ah_acc.devacc_attr_access == 1962 DDI_CAUTIOUS_ACC) 1963 expected = DDI_FM_ERR_EXPECTED; 1964 } 1965 1966 } else if (curthread->t_lofault) { 1967 aflt->flt_prot = AFLT_PROT_COPY; 1968 rp->r_g1 = EFAULT; 1969 rp->r_pc = curthread->t_lofault; 1970 rp->r_npc = rp->r_pc + 4; 1971 trampolined = 1; 1972 } 1973 } 1974 1975 /* 1976 * If we're in user mode or we're doing a protected copy, we either 1977 * want the ASTON code below to send a signal to the user process 1978 * or we want to panic if aft_panic is set. 1979 * 1980 * If we're in privileged mode and we're not doing a copy, then we 1981 * need to check if we've trampolined. If we haven't trampolined, 1982 * we should panic. 1983 */ 1984 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1985 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 1986 aflt->flt_panic |= aft_panic; 1987 } else if (!trampolined) { 1988 aflt->flt_panic = 1; 1989 } 1990 1991 /* 1992 * If we've trampolined due to a privileged TO or BERR, or if an 1993 * unprivileged TO or BERR occurred, we don't want to enqueue an 1994 * event for that TO or BERR. Queue all other events (if any) besides 1995 * the TO/BERR. 1996 */ 1997 log_sfsr = t_sfsr; 1998 if (trampolined) { 1999 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2000 } else if (!aflt->flt_priv) { 2001 /* 2002 * User mode, suppress messages if 2003 * cpu_berr_to_verbose is not set. 2004 */ 2005 if (!cpu_berr_to_verbose) 2006 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2007 } 2008 2009 if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason, 2010 t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) { 2011 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2012 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2013 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, 2014 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2015 } 2016 2017 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2018 cpu_run_bus_error_handlers(aflt, expected); 2019 } 2020 2021 /* 2022 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2023 * be logged as part of the panic flow. 2024 */ 2025 if (aflt->flt_panic) { 2026 if (pr_reason[0] == 0) 2027 strcpy(pr_reason, "invalid SFSR "); 2028 2029 fm_panic("%sErrors(s)", pr_reason); 2030 } 2031 2032 /* 2033 * If we queued an error and we are going to return from the trap and 2034 * the error was in user mode or inside of a copy routine, set AST flag 2035 * so the queue will be drained before returning to user mode. The 2036 * AST processing will also act on our failure policy. 2037 */ 2038 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2039 int pcb_flag = 0; 2040 2041 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 2042 pcb_flag |= ASYNC_HWERR; 2043 2044 if (t_sfsr & SFSR_BERR) 2045 pcb_flag |= ASYNC_BERR; 2046 2047 if (t_sfsr & SFSR_TO) 2048 pcb_flag |= ASYNC_BTO; 2049 2050 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2051 aston(curthread); 2052 } 2053 } 2054 2055 /*ARGSUSED*/ 2056 void 2057 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2058 { 2059 opl_async_flt_t opl_flt; 2060 struct async_flt *aflt; 2061 char pr_reason[MAX_REASON_STRING]; 2062 2063 /* normalize tl */ 2064 tl = (tl >= 2 ? 1 : 0); 2065 pr_reason[0] = '\0'; 2066 2067 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2068 aflt = (struct async_flt *)&opl_flt; 2069 aflt->flt_id = gethrtime_waitfree(); 2070 aflt->flt_bus_id = getprocessorid(); 2071 aflt->flt_inst = CPU->cpu_id; 2072 aflt->flt_stat = p_ugesr; 2073 aflt->flt_pc = (caddr_t)rp->r_pc; 2074 aflt->flt_class = (uchar_t)CPU_FAULT; 2075 aflt->flt_tl = tl; 2076 aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 2077 1 : 0)); 2078 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2079 aflt->flt_panic = 1; 2080 /* 2081 * HW does not set mod/sid in case of urgent error. 2082 * So we have to set it here. 2083 */ 2084 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2085 opl_flt.flt_eid_sid = aflt->flt_inst; 2086 2087 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2088 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2089 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2090 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt, 2091 sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); 2092 } 2093 2094 fm_panic("Urgent Error"); 2095 } 2096 2097 /* 2098 * Initialization error counters resetting. 2099 */ 2100 /* ARGSUSED */ 2101 static void 2102 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2103 { 2104 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2105 hdlr->cyh_level = CY_LOW_LEVEL; 2106 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2107 2108 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2109 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2110 } 2111 2112 void 2113 cpu_mp_init(void) 2114 { 2115 cyc_omni_handler_t hdlr; 2116 2117 hdlr.cyo_online = opl_ras_online; 2118 hdlr.cyo_offline = NULL; 2119 hdlr.cyo_arg = NULL; 2120 mutex_enter(&cpu_lock); 2121 (void) cyclic_add_omni(&hdlr); 2122 mutex_exit(&cpu_lock); 2123 } 2124 2125 int heaplp_use_stlb = 0; 2126 2127 void 2128 mmu_init_kernel_pgsz(struct hat *hat) 2129 { 2130 uint_t tte = page_szc(segkmem_lpsize); 2131 uchar_t new_cext_primary, new_cext_nucleus; 2132 2133 if (heaplp_use_stlb == 0) { 2134 /* do not reprogram stlb */ 2135 tte = TTE8K; 2136 } 2137 2138 new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K); 2139 new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte); 2140 2141 hat->sfmmu_cext = new_cext_primary; 2142 kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | 2143 ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); 2144 } 2145 2146 size_t 2147 mmu_get_kernel_lpsize(size_t lpsize) 2148 { 2149 uint_t tte; 2150 2151 if (lpsize == 0) { 2152 /* no setting for segkmem_lpsize in /etc/system: use default */ 2153 return (MMU_PAGESIZE4M); 2154 } 2155 2156 for (tte = TTE8K; tte <= TTE4M; tte++) { 2157 if (lpsize == TTEBYTES(tte)) 2158 return (lpsize); 2159 } 2160 2161 return (TTEBYTES(TTE8K)); 2162 } 2163 2164 /* 2165 * The following are functions that are unused in 2166 * OPL cpu module. They are defined here to resolve 2167 * dependencies in the "unix" module. 2168 * Unused functions that should never be called in 2169 * OPL are coded with ASSERT(0). 2170 */ 2171 2172 void 2173 cpu_disable_errors(void) 2174 {} 2175 2176 void 2177 cpu_enable_errors(void) 2178 { ASSERT(0); } 2179 2180 /*ARGSUSED*/ 2181 void 2182 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2183 { ASSERT(0); } 2184 2185 /*ARGSUSED*/ 2186 void 2187 cpu_faulted_enter(struct cpu *cp) 2188 {} 2189 2190 /*ARGSUSED*/ 2191 void 2192 cpu_faulted_exit(struct cpu *cp) 2193 {} 2194 2195 /*ARGSUSED*/ 2196 void 2197 cpu_check_allcpus(struct async_flt *aflt) 2198 {} 2199 2200 /*ARGSUSED*/ 2201 void 2202 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2203 { ASSERT(0); } 2204 2205 /*ARGSUSED*/ 2206 void 2207 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2208 { ASSERT(0); } 2209 2210 /*ARGSUSED*/ 2211 void 2212 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2213 { ASSERT(0); } 2214 2215 /*ARGSUSED*/ 2216 void 2217 cpu_busy_ecache_scrub(struct cpu *cp) 2218 {} 2219 2220 /*ARGSUSED*/ 2221 void 2222 cpu_idle_ecache_scrub(struct cpu *cp) 2223 {} 2224 2225 /* ARGSUSED */ 2226 void 2227 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2228 { ASSERT(0); } 2229 2230 void 2231 cpu_init_cache_scrub(void) 2232 {} 2233 2234 /* ARGSUSED */ 2235 int 2236 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2237 { 2238 if (&plat_get_mem_sid) { 2239 return (plat_get_mem_sid(unum, buf, buflen, lenp)); 2240 } else { 2241 return (ENOTSUP); 2242 } 2243 } 2244 2245 /* ARGSUSED */ 2246 int 2247 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2248 { 2249 if (&plat_get_mem_addr) { 2250 return (plat_get_mem_addr(unum, sid, offset, addrp)); 2251 } else { 2252 return (ENOTSUP); 2253 } 2254 } 2255 2256 /* ARGSUSED */ 2257 int 2258 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2259 { 2260 if (&plat_get_mem_offset) { 2261 return (plat_get_mem_offset(flt_addr, offp)); 2262 } else { 2263 return (ENOTSUP); 2264 } 2265 } 2266 2267 /*ARGSUSED*/ 2268 void 2269 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2270 { ASSERT(0); } 2271 2272 /*ARGSUSED*/ 2273 void 2274 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2275 { ASSERT(0); } 2276 2277 /*ARGSUSED*/ 2278 void 2279 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 2280 { ASSERT(0); } 2281 2282 /*ARGSUSED*/ 2283 int 2284 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 2285 errorq_elem_t *eqep, size_t afltoffset) 2286 { 2287 ASSERT(0); 2288 return (0); 2289 } 2290 2291 /*ARGSUSED*/ 2292 char * 2293 flt_to_error_type(struct async_flt *aflt) 2294 { 2295 ASSERT(0); 2296 return (NULL); 2297 } 2298