1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/ddi.h> 31 #include <sys/sysmacros.h> 32 #include <sys/archsystm.h> 33 #include <sys/vmsystm.h> 34 #include <sys/machparam.h> 35 #include <sys/machsystm.h> 36 #include <sys/machthread.h> 37 #include <sys/cpu.h> 38 #include <sys/cmp.h> 39 #include <sys/elf_SPARC.h> 40 #include <vm/vm_dep.h> 41 #include <vm/hat_sfmmu.h> 42 #include <vm/seg_kpm.h> 43 #include <vm/seg_kmem.h> 44 #include <sys/cpuvar.h> 45 #include <sys/opl_olympus_regs.h> 46 #include <sys/opl_module.h> 47 #include <sys/async.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/dditypes.h> 51 #include <sys/cpu_module.h> 52 #include <sys/sysmacros.h> 53 #include <sys/intreg.h> 54 #include <sys/clock.h> 55 #include <sys/platform_module.h> 56 #include <sys/ontrap.h> 57 #include <sys/panic.h> 58 #include <sys/memlist.h> 59 #include <sys/ndifm.h> 60 #include <sys/ddifm.h> 61 #include <sys/fm/protocol.h> 62 #include <sys/fm/util.h> 63 #include <sys/fm/cpu/SPARC64-VI.h> 64 #include <sys/dtrace.h> 65 #include <sys/watchpoint.h> 66 #include <sys/promif.h> 67 68 /* 69 * Internal functions. 70 */ 71 static int cpu_sync_log_err(void *flt); 72 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 73 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 74 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 75 76 /* 77 * Error counters resetting interval. 78 */ 79 static int opl_async_check_interval = 60; /* 1 min */ 80 81 uint_t cpu_impl_dual_pgsz = 1; 82 83 /* 84 * PA[22:0] represent Displacement in Jupiter 85 * configuration space. 86 */ 87 uint_t root_phys_addr_lo_mask = 0x7fffffu; 88 89 /* 90 * set in /etc/system to control logging of user BERR/TO's 91 */ 92 int cpu_berr_to_verbose = 0; 93 94 static int min_ecache_size; 95 static uint_t priv_hcl_1; 96 static uint_t priv_hcl_2; 97 static uint_t priv_hcl_4; 98 static uint_t priv_hcl_8; 99 100 /* 101 * Olympus error log 102 */ 103 static opl_errlog_t *opl_err_log; 104 105 /* 106 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 107 * No any other ecc_type_info insertion is allowed in between the following 108 * four UE classess. 109 */ 110 ecc_type_to_info_t ecc_type_to_info[] = { 111 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 112 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 113 FM_EREPORT_CPU_UE_MEM, 114 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 115 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 116 FM_EREPORT_CPU_UE_CHANNEL, 117 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 118 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 119 FM_EREPORT_CPU_UE_CPU, 120 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 121 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 122 FM_EREPORT_CPU_UE_PATH, 123 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 124 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 125 FM_EREPORT_CPU_BERR, 126 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 127 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 128 FM_EREPORT_CPU_BTO, 129 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 130 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 131 FM_EREPORT_CPU_MTLB, 132 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 133 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 134 FM_EREPORT_CPU_TLBP, 135 136 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 137 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 138 FM_EREPORT_CPU_CRE, 139 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 140 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 141 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 142 FM_EREPORT_CPU_TSBCTX, 143 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 144 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 145 FM_EREPORT_CPU_TSBP, 146 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 147 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 148 FM_EREPORT_CPU_PSTATE, 149 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 150 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 151 FM_EREPORT_CPU_TSTATE, 152 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 153 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 154 FM_EREPORT_CPU_IUG_F, 155 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 156 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 157 FM_EREPORT_CPU_IUG_R, 158 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 159 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 160 FM_EREPORT_CPU_SDC, 161 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 162 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 163 FM_EREPORT_CPU_WDT, 164 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 165 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 166 FM_EREPORT_CPU_DTLB, 167 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 168 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 169 FM_EREPORT_CPU_ITLB, 170 UGESR_IUG_COREERR, "IUG_COREERR", 171 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 172 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 173 FM_EREPORT_CPU_CORE, 174 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 175 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 176 FM_EREPORT_CPU_DAE, 177 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 178 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 179 FM_EREPORT_CPU_IAE, 180 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 181 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 182 FM_EREPORT_CPU_UGE, 183 0, NULL, 0, 0, 184 NULL, 0, 0, 185 }; 186 187 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 188 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 189 int *segsp, int *banksp, int *mcidp); 190 191 192 /* 193 * Setup trap handlers for 0xA, 0x32, 0x40 trap types. 194 */ 195 void 196 cpu_init_trap(void) 197 { 198 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 199 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 200 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 201 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 202 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 203 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 204 } 205 206 static int 207 getintprop(pnode_t node, char *name, int deflt) 208 { 209 int value; 210 211 switch (prom_getproplen(node, name)) { 212 case sizeof (int): 213 (void) prom_getprop(node, name, (caddr_t)&value); 214 break; 215 216 default: 217 value = deflt; 218 break; 219 } 220 221 return (value); 222 } 223 224 /* 225 * Set the magic constants of the implementation. 226 */ 227 /*ARGSUSED*/ 228 void 229 cpu_fiximp(pnode_t dnode) 230 { 231 int i, a; 232 extern int vac_size, vac_shift; 233 extern uint_t vac_mask; 234 235 static struct { 236 char *name; 237 int *var; 238 int defval; 239 } prop[] = { 240 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 241 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 242 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 243 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 244 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 245 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 246 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 247 }; 248 249 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 250 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 251 252 ecache_setsize = ecache_size / ecache_associativity; 253 254 vac_size = OPL_VAC_SIZE; 255 vac_mask = MMU_PAGEMASK & (vac_size - 1); 256 i = 0; a = vac_size; 257 while (a >>= 1) 258 ++i; 259 vac_shift = i; 260 shm_alignment = vac_size; 261 vac = 1; 262 } 263 264 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 265 /* 266 * Quick and dirty way to redefine locally in 267 * OPL the value of IDSR_BN_SETS to 31 instead 268 * of the standard 32 value. This is to workaround 269 * REV_B of Olympus_c processor's problem in handling 270 * more than 31 xcall broadcast. 271 */ 272 #undef IDSR_BN_SETS 273 #define IDSR_BN_SETS 31 274 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 275 276 void 277 send_mondo_set(cpuset_t set) 278 { 279 int lo, busy, nack, shipped = 0; 280 uint16_t i, cpuids[IDSR_BN_SETS]; 281 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 282 uint64_t starttick, endtick, tick, lasttick; 283 #if (NCPU > IDSR_BN_SETS) 284 int index = 0; 285 int ncpuids = 0; 286 #endif 287 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 288 int bn_sets = IDSR_BN_SETS; 289 uint64_t ver; 290 291 ASSERT(NCPU > bn_sets); 292 #endif 293 294 ASSERT(!CPUSET_ISNULL(set)); 295 starttick = lasttick = gettick(); 296 297 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 298 ver = ultra_getver(); 299 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 300 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 301 bn_sets = 1; 302 #endif 303 304 #if (NCPU <= IDSR_BN_SETS) 305 for (i = 0; i < NCPU; i++) 306 if (CPU_IN_SET(set, i)) { 307 shipit(i, shipped); 308 nackmask |= IDSR_NACK_BIT(shipped); 309 cpuids[shipped++] = i; 310 CPUSET_DEL(set, i); 311 if (CPUSET_ISNULL(set)) 312 break; 313 } 314 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 315 #else 316 for (i = 0; i < NCPU; i++) 317 if (CPU_IN_SET(set, i)) { 318 ncpuids++; 319 320 /* 321 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 322 * find we have shipped to more than (IDSR_BN_SETS) 323 * CPUs, set "index" to the highest numbered CPU in 324 * the set so we can ship to other CPUs a bit later on. 325 */ 326 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 327 if (shipped < bn_sets) { 328 #else 329 if (shipped < IDSR_BN_SETS) { 330 #endif 331 shipit(i, shipped); 332 nackmask |= IDSR_NACK_BIT(shipped); 333 cpuids[shipped++] = i; 334 CPUSET_DEL(set, i); 335 if (CPUSET_ISNULL(set)) 336 break; 337 } else 338 index = (int)i; 339 } 340 341 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 342 #endif 343 344 busymask = IDSR_NACK_TO_BUSY(nackmask); 345 busy = nack = 0; 346 endtick = starttick + xc_tick_limit; 347 for (;;) { 348 idsr = getidsr(); 349 #if (NCPU <= IDSR_BN_SETS) 350 if (idsr == 0) 351 break; 352 #else 353 if (idsr == 0 && shipped == ncpuids) 354 break; 355 #endif 356 tick = gettick(); 357 /* 358 * If there is a big jump between the current tick 359 * count and lasttick, we have probably hit a break 360 * point. Adjust endtick accordingly to avoid panic. 361 */ 362 if (tick > (lasttick + xc_tick_jump_limit)) 363 endtick += (tick - lasttick); 364 lasttick = tick; 365 if (tick > endtick) { 366 if (panic_quiesce) 367 return; 368 cmn_err(CE_CONT, "send mondo timeout " 369 "[%d NACK %d BUSY]\nIDSR 0x%" 370 "" PRIx64 " cpuids:", nack, busy, idsr); 371 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 372 for (i = 0; i < bn_sets; i++) { 373 #else 374 for (i = 0; i < IDSR_BN_SETS; i++) { 375 #endif 376 if (idsr & (IDSR_NACK_BIT(i) | 377 IDSR_BUSY_BIT(i))) { 378 cmn_err(CE_CONT, " 0x%x", 379 cpuids[i]); 380 } 381 } 382 cmn_err(CE_CONT, "\n"); 383 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 384 } 385 curnack = idsr & nackmask; 386 curbusy = idsr & busymask; 387 388 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 389 /* 390 * Only proceed to send more xcalls if all the 391 * cpus in the previous IDSR_BN_SETS were completed. 392 */ 393 if (curbusy) { 394 busy++; 395 continue; 396 } 397 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 398 399 #if (NCPU > IDSR_BN_SETS) 400 if (shipped < ncpuids) { 401 uint64_t cpus_left; 402 uint16_t next = (uint16_t)index; 403 404 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 405 busymask; 406 407 if (cpus_left) { 408 do { 409 /* 410 * Sequence through and ship to the 411 * remainder of the CPUs in the system 412 * (e.g. other than the first 413 * (IDSR_BN_SETS)) in reverse order. 414 */ 415 lo = lowbit(cpus_left) - 1; 416 i = IDSR_BUSY_IDX(lo); 417 shipit(next, i); 418 shipped++; 419 cpuids[i] = next; 420 421 /* 422 * If we've processed all the CPUs, 423 * exit the loop now and save 424 * instructions. 425 */ 426 if (shipped == ncpuids) 427 break; 428 429 for ((index = ((int)next - 1)); 430 index >= 0; index--) 431 if (CPU_IN_SET(set, index)) { 432 next = (uint16_t)index; 433 break; 434 } 435 436 cpus_left &= ~(1ull << lo); 437 } while (cpus_left); 438 continue; 439 } 440 } 441 #endif 442 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 443 if (curbusy) { 444 busy++; 445 continue; 446 } 447 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 448 #ifdef SEND_MONDO_STATS 449 { 450 int n = gettick() - starttick; 451 if (n < 8192) 452 x_nack_stimes[n >> 7]++; 453 } 454 #endif 455 while (gettick() < (tick + sys_clock_mhz)) 456 ; 457 do { 458 lo = lowbit(curnack) - 1; 459 i = IDSR_NACK_IDX(lo); 460 shipit(cpuids[i], i); 461 curnack &= ~(1ull << lo); 462 } while (curnack); 463 nack++; 464 busy = 0; 465 } 466 #ifdef SEND_MONDO_STATS 467 { 468 int n = gettick() - starttick; 469 if (n < 8192) 470 x_set_stimes[n >> 7]++; 471 else 472 x_set_ltimes[(n >> 13) & 0xf]++; 473 } 474 x_set_cpus[shipped]++; 475 #endif 476 } 477 478 /* 479 * Cpu private initialization. 480 */ 481 void 482 cpu_init_private(struct cpu *cp) 483 { 484 if (!(IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation))) { 485 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI is supported", 486 cp->cpu_id, cpunodes[cp->cpu_id].implementation); 487 } 488 489 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 490 } 491 492 void 493 cpu_setup(void) 494 { 495 extern int at_flags; 496 extern int disable_delay_tlb_flush, delay_tlb_flush; 497 extern int cpc_has_overflow_intr; 498 uint64_t cpu0_log; 499 extern uint64_t opl_cpu0_err_log; 500 501 /* 502 * Initialize Error log Scratch register for error handling. 503 */ 504 505 cpu0_log = va_to_pa(&opl_cpu0_err_log); 506 opl_error_setup(cpu0_log); 507 508 /* 509 * Enable MMU translating multiple page sizes for 510 * sITLB and sDTLB. 511 */ 512 opl_mpg_enable(); 513 514 /* 515 * Setup chip-specific trap handlers. 516 */ 517 cpu_init_trap(); 518 519 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 520 521 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 522 523 /* 524 * Due to the number of entries in the fully-associative tlb 525 * this may have to be tuned lower than in spitfire. 526 */ 527 pp_slots = MIN(8, MAXPP_SLOTS); 528 529 /* 530 * Block stores do not invalidate all pages of the d$, pagecopy 531 * et. al. need virtual translations with virtual coloring taken 532 * into consideration. prefetch/ldd will pollute the d$ on the 533 * load side. 534 */ 535 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 536 537 if (use_page_coloring) { 538 do_pg_coloring = 1; 539 if (use_virtual_coloring) 540 do_virtual_coloring = 1; 541 } 542 543 isa_list = 544 "sparcv9+vis2 sparcv9+vis sparcv9 " 545 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 546 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 547 548 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 | 549 AV_SPARC_POPC | AV_SPARC_FMAF; 550 551 /* 552 * On SPARC64-VI, there's no hole in the virtual address space 553 */ 554 hole_start = hole_end = 0; 555 556 /* 557 * The kpm mapping window. 558 * kpm_size: 559 * The size of a single kpm range. 560 * The overall size will be: kpm_size * vac_colors. 561 * kpm_vbase: 562 * The virtual start address of the kpm range within the kernel 563 * virtual address space. kpm_vbase has to be kpm_size aligned. 564 */ 565 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 566 kpm_size_shift = 47; 567 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 568 kpm_smallpages = 1; 569 570 /* 571 * The traptrace code uses either %tick or %stick for 572 * timestamping. We have %stick so we can use it. 573 */ 574 traptrace_use_stick = 1; 575 576 /* 577 * SPARC64-VI has a performance counter overflow interrupt 578 */ 579 cpc_has_overflow_intr = 1; 580 581 /* 582 * Use SPARC64-VI flush-all support 583 */ 584 if (!disable_delay_tlb_flush) 585 delay_tlb_flush = 1; 586 587 /* 588 * Declare that this architecture/cpu combination does not support 589 * fpRAS. 590 */ 591 fpras_implemented = 0; 592 } 593 594 /* 595 * Called by setcpudelay 596 */ 597 void 598 cpu_init_tick_freq(void) 599 { 600 /* 601 * For SPARC64-VI we want to use the system clock rate as 602 * the basis for low level timing, due to support of mixed 603 * speed CPUs and power managment. 604 */ 605 if (system_clock_freq == 0) 606 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 607 608 sys_tick_freq = system_clock_freq; 609 } 610 611 #ifdef SEND_MONDO_STATS 612 uint32_t x_one_stimes[64]; 613 uint32_t x_one_ltimes[16]; 614 uint32_t x_set_stimes[64]; 615 uint32_t x_set_ltimes[16]; 616 uint32_t x_set_cpus[NCPU]; 617 uint32_t x_nack_stimes[64]; 618 #endif 619 620 /* 621 * Note: A version of this function is used by the debugger via the KDI, 622 * and must be kept in sync with this version. Any changes made to this 623 * function to support new chips or to accomodate errata must also be included 624 * in the KDI-specific version. See us3_kdi.c. 625 */ 626 void 627 send_one_mondo(int cpuid) 628 { 629 int busy, nack; 630 uint64_t idsr, starttick, endtick, tick, lasttick; 631 uint64_t busymask; 632 633 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 634 starttick = lasttick = gettick(); 635 shipit(cpuid, 0); 636 endtick = starttick + xc_tick_limit; 637 busy = nack = 0; 638 busymask = IDSR_BUSY; 639 for (;;) { 640 idsr = getidsr(); 641 if (idsr == 0) 642 break; 643 644 tick = gettick(); 645 /* 646 * If there is a big jump between the current tick 647 * count and lasttick, we have probably hit a break 648 * point. Adjust endtick accordingly to avoid panic. 649 */ 650 if (tick > (lasttick + xc_tick_jump_limit)) 651 endtick += (tick - lasttick); 652 lasttick = tick; 653 if (tick > endtick) { 654 if (panic_quiesce) 655 return; 656 cmn_err(CE_PANIC, "send mondo timeout " 657 "(target 0x%x) [%d NACK %d BUSY]", 658 cpuid, nack, busy); 659 } 660 661 if (idsr & busymask) { 662 busy++; 663 continue; 664 } 665 drv_usecwait(1); 666 shipit(cpuid, 0); 667 nack++; 668 busy = 0; 669 } 670 #ifdef SEND_MONDO_STATS 671 { 672 int n = gettick() - starttick; 673 if (n < 8192) 674 x_one_stimes[n >> 7]++; 675 else 676 x_one_ltimes[(n >> 13) & 0xf]++; 677 } 678 #endif 679 } 680 681 /* 682 * init_mmu_page_sizes is set to one after the bootup time initialization 683 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 684 * valid value. 685 * 686 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 687 * versions of disable_ism_large_pages and disable_large_pages, and feed back 688 * into those two hat variables at hat initialization time. 689 * 690 */ 691 int init_mmu_page_sizes = 0; 692 693 static uint_t mmu_disable_large_pages = 0; 694 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | 695 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 696 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 697 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 698 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | 699 (1 << TTE512K)); 700 701 /* 702 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 703 * Called during very early bootup from check_cpus_set(). 704 * Can be called to verify that mmu_page_sizes are set up correctly. 705 * 706 * Set Olympus defaults. We do not use the function parameter. 707 */ 708 /*ARGSUSED*/ 709 int 710 mmu_init_mmu_page_sizes(int32_t not_used) 711 { 712 if (!init_mmu_page_sizes) { 713 mmu_page_sizes = MMU_PAGE_SIZES; 714 mmu_hashcnt = MAX_HASHCNT; 715 mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; 716 mmu_exported_pagesize_mask = (1 << TTE8K) | 717 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 718 (1 << TTE32M) | (1 << TTE256M); 719 init_mmu_page_sizes = 1; 720 return (0); 721 } 722 return (1); 723 } 724 725 /* SPARC64-VI worst case DTLB parameters */ 726 #ifndef LOCKED_DTLB_ENTRIES 727 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 728 #endif 729 #define TOTAL_DTLB_ENTRIES 32 730 #define AVAIL_32M_ENTRIES 0 731 #define AVAIL_256M_ENTRIES 0 732 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 733 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 734 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 735 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 736 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 737 738 /* 739 * The function returns the mmu-specific values for the 740 * hat's disable_large_pages, disable_ism_large_pages, and 741 * disable_auto_data_large_pages and 742 * disable_text_data_large_pages variables. 743 */ 744 uint_t 745 mmu_large_pages_disabled(uint_t flag) 746 { 747 uint_t pages_disable = 0; 748 extern int use_text_pgsz64K; 749 extern int use_text_pgsz512K; 750 751 if (flag == HAT_LOAD) { 752 pages_disable = mmu_disable_large_pages; 753 } else if (flag == HAT_LOAD_SHARE) { 754 pages_disable = mmu_disable_ism_large_pages; 755 } else if (flag == HAT_AUTO_DATA) { 756 pages_disable = mmu_disable_auto_data_large_pages; 757 } else if (flag == HAT_AUTO_TEXT) { 758 pages_disable = mmu_disable_auto_text_large_pages; 759 if (use_text_pgsz512K) { 760 pages_disable &= ~(1 << TTE512K); 761 } 762 if (use_text_pgsz64K) { 763 pages_disable &= ~(1 << TTE64K); 764 } 765 } 766 return (pages_disable); 767 } 768 769 /* 770 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 771 * It may be called from set_platform_defaults, if some value other than 32M 772 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 773 * then only warn, since it would be bad form to panic due to a user typo. 774 * 775 * The function re-initializes the mmu_disable_ism_large_pages variable. 776 */ 777 void 778 mmu_init_large_pages(size_t ism_pagesize) 779 { 780 switch (ism_pagesize) { 781 case MMU_PAGESIZE4M: 782 mmu_disable_ism_large_pages = ((1 << TTE64K) | 783 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 784 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 785 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 786 break; 787 case MMU_PAGESIZE32M: 788 mmu_disable_ism_large_pages = ((1 << TTE64K) | 789 (1 << TTE512K) | (1 << TTE256M)); 790 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 791 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); 792 adjust_data_maxlpsize(ism_pagesize); 793 break; 794 case MMU_PAGESIZE256M: 795 mmu_disable_ism_large_pages = ((1 << TTE64K) | 796 (1 << TTE512K) | (1 << TTE32M)); 797 mmu_disable_auto_data_large_pages = ((1 << TTE64K) | 798 (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); 799 adjust_data_maxlpsize(ism_pagesize); 800 break; 801 default: 802 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 803 ism_pagesize); 804 break; 805 } 806 } 807 808 /* 809 * Function to reprogram the TLBs when page sizes used 810 * by a process change significantly. 811 */ 812 void 813 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz) 814 { 815 uint8_t pgsz0, pgsz1; 816 817 /* 818 * Don't program 2nd dtlb for kernel and ism hat 819 */ 820 ASSERT(hat->sfmmu_ismhat == NULL); 821 ASSERT(hat != ksfmmup); 822 823 /* 824 * hat->sfmmu_pgsz[] is an array whose elements 825 * contain a sorted order of page sizes. Element 826 * 0 is the most commonly used page size, followed 827 * by element 1, and so on. 828 * 829 * ttecnt[] is an array of per-page-size page counts 830 * mapped into the process. 831 * 832 * If the HAT's choice for page sizes is unsuitable, 833 * we can override it here. The new values written 834 * to the array will be handed back to us later to 835 * do the actual programming of the TLB hardware. 836 * 837 */ 838 pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]); 839 pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]); 840 841 /* 842 * This implements PAGESIZE programming of the sTLB 843 * if large TTE counts don't exceed the thresholds. 844 */ 845 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 846 pgsz0 = page_szc(MMU_PAGESIZE); 847 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 848 pgsz1 = page_szc(MMU_PAGESIZE); 849 tmp_pgsz[0] = pgsz0; 850 tmp_pgsz[1] = pgsz1; 851 /* otherwise, accept what the HAT chose for us */ 852 } 853 854 /* 855 * The HAT calls this function when an MMU context is allocated so that we 856 * can reprogram the large TLBs appropriately for the new process using 857 * the context. 858 * 859 * The caller must hold the HAT lock. 860 */ 861 void 862 mmu_set_ctx_page_sizes(struct hat *hat) 863 { 864 uint8_t pgsz0, pgsz1; 865 uint8_t new_cext; 866 867 ASSERT(sfmmu_hat_lock_held(hat)); 868 /* 869 * Don't program 2nd dtlb for kernel and ism hat 870 */ 871 if (hat->sfmmu_ismhat || hat == ksfmmup) 872 return; 873 874 /* 875 * If supported, reprogram the TLBs to a larger pagesize. 876 */ 877 pgsz0 = hat->sfmmu_pgsz[0]; 878 pgsz1 = hat->sfmmu_pgsz[1]; 879 ASSERT(pgsz0 < mmu_page_sizes); 880 ASSERT(pgsz1 < mmu_page_sizes); 881 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 882 if (hat->sfmmu_cext != new_cext) { 883 #ifdef DEBUG 884 int i; 885 /* 886 * assert cnum should be invalid, this is because pagesize 887 * can only be changed after a proc's ctxs are invalidated. 888 */ 889 for (i = 0; i < max_mmu_ctxdoms; i++) { 890 ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); 891 } 892 #endif /* DEBUG */ 893 hat->sfmmu_cext = new_cext; 894 } 895 /* 896 * sfmmu_setctx_sec() will take care of the 897 * rest of the dirty work for us. 898 */ 899 } 900 901 /* 902 * This function assumes that there are either four or six supported page 903 * sizes and at most two programmable TLBs, so we need to decide which 904 * page sizes are most important and then adjust the TLB page sizes 905 * accordingly (if supported). 906 * 907 * If these assumptions change, this function will need to be 908 * updated to support whatever the new limits are. 909 */ 910 void 911 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt) 912 { 913 uint64_t sortcnt[MMU_PAGE_SIZES]; 914 uint8_t tmp_pgsz[MMU_PAGE_SIZES]; 915 uint8_t i, j, max; 916 uint16_t oldval, newval; 917 918 /* 919 * We only consider reprogramming the TLBs if one or more of 920 * the two most used page sizes changes and we're using 921 * large pages in this process. 922 */ 923 if (sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) { 924 /* Sort page sizes. */ 925 for (i = 0; i < mmu_page_sizes; i++) { 926 sortcnt[i] = ttecnt[i]; 927 } 928 for (j = 0; j < mmu_page_sizes; j++) { 929 for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) { 930 if (sortcnt[i] > sortcnt[max]) 931 max = i; 932 } 933 tmp_pgsz[j] = max; 934 sortcnt[max] = 0; 935 } 936 937 oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1]; 938 939 mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz); 940 941 /* Check 2 largest values after the sort. */ 942 newval = tmp_pgsz[0] << 8 | tmp_pgsz[1]; 943 if (newval != oldval) { 944 sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz); 945 } 946 } 947 } 948 949 /* 950 * Return processor specific async error structure 951 * size used. 952 */ 953 int 954 cpu_aflt_size(void) 955 { 956 return (sizeof (opl_async_flt_t)); 957 } 958 959 /* 960 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 961 * post-process CPU events that are dequeued. As such, it can be invoked 962 * from softint context, from AST processing in the trap() flow, or from the 963 * panic flow. We decode the CPU-specific data, and take appropriate actions. 964 * Historically this entry point was used to log the actual cmn_err(9F) text; 965 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 966 * With FMA this function now also returns a flag which indicates to the 967 * caller whether the ereport should be posted (1) or suppressed (0). 968 */ 969 /*ARGSUSED*/ 970 static int 971 cpu_sync_log_err(void *flt) 972 { 973 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 974 struct async_flt *aflt = (struct async_flt *)flt; 975 976 /* 977 * No extra processing of urgent error events. 978 * Always generate ereports for these events. 979 */ 980 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 981 return (1); 982 983 /* 984 * Additional processing for synchronous errors. 985 */ 986 switch (opl_flt->flt_type) { 987 case OPL_CPU_INV_SFSR: 988 return (1); 989 990 case OPL_CPU_SYNC_UE: 991 /* 992 * The validity: SFSR_MK_UE bit has been checked 993 * in opl_cpu_sync_error() 994 * No more check is required. 995 * 996 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 997 * and they have been retrieved in cpu_queue_events() 998 */ 999 1000 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1001 ASSERT(aflt->flt_in_memory); 1002 /* 1003 * We want to skip logging only if ALL the following 1004 * conditions are true: 1005 * 1006 * 1. We are not panicing already. 1007 * 2. The error is a memory error. 1008 * 3. There is only one error. 1009 * 4. The error is on a retired page. 1010 * 5. The error occurred under on_trap 1011 * protection AFLT_PROT_EC 1012 */ 1013 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1014 page_retire_check(aflt->flt_addr, NULL) == 0) { 1015 /* 1016 * Do not log an error from 1017 * the retired page 1018 */ 1019 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1020 return (0); 1021 } 1022 if (!panicstr) 1023 cpu_page_retire(opl_flt); 1024 } 1025 return (1); 1026 1027 case OPL_CPU_SYNC_OTHERS: 1028 /* 1029 * For the following error cases, the processor HW does 1030 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1031 * to assign appropriate values here to reflect what we 1032 * think is the most likely cause of the problem w.r.t to 1033 * the particular error event. For Buserr and timeout 1034 * error event, we will assign OPL_ERRID_CHANNEL as the 1035 * most likely reason. For TLB parity or multiple hit 1036 * error events, we will assign the reason as 1037 * OPL_ERRID_CPU (cpu related problem) and set the 1038 * flt_eid_sid to point to the cpuid. 1039 */ 1040 1041 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1042 /* 1043 * flt_eid_sid will not be used for this case. 1044 */ 1045 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1046 } 1047 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1048 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1049 opl_flt->flt_eid_sid = aflt->flt_inst; 1050 } 1051 1052 /* 1053 * In case of no effective error bit 1054 */ 1055 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1056 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1057 opl_flt->flt_eid_sid = aflt->flt_inst; 1058 } 1059 break; 1060 1061 default: 1062 return (1); 1063 } 1064 return (1); 1065 } 1066 1067 /* 1068 * Retire the bad page that may contain the flushed error. 1069 */ 1070 void 1071 cpu_page_retire(opl_async_flt_t *opl_flt) 1072 { 1073 struct async_flt *aflt = (struct async_flt *)opl_flt; 1074 (void) page_retire(aflt->flt_addr, PR_UE); 1075 } 1076 1077 /* 1078 * Invoked by error_init() early in startup and therefore before 1079 * startup_errorq() is called to drain any error Q - 1080 * 1081 * startup() 1082 * startup_end() 1083 * error_init() 1084 * cpu_error_init() 1085 * errorq_init() 1086 * errorq_drain() 1087 * start_other_cpus() 1088 * 1089 * The purpose of this routine is to create error-related taskqs. Taskqs 1090 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1091 * context. 1092 * 1093 */ 1094 /*ARGSUSED*/ 1095 void 1096 cpu_error_init(int items) 1097 { 1098 opl_err_log = (opl_errlog_t *) 1099 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1100 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1101 cmn_err(CE_PANIC, "The base address of the error log " 1102 "is not page aligned"); 1103 } 1104 1105 /* 1106 * We route all errors through a single switch statement. 1107 */ 1108 void 1109 cpu_ue_log_err(struct async_flt *aflt) 1110 { 1111 switch (aflt->flt_class) { 1112 case CPU_FAULT: 1113 if (cpu_sync_log_err(aflt)) 1114 cpu_ereport_post(aflt); 1115 break; 1116 1117 case BUS_FAULT: 1118 bus_async_log_err(aflt); 1119 break; 1120 1121 default: 1122 cmn_err(CE_WARN, "discarding async error %p with invalid " 1123 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1124 return; 1125 } 1126 } 1127 1128 /* 1129 * Routine for panic hook callback from panic_idle(). 1130 * 1131 * Nothing to do here. 1132 */ 1133 void 1134 cpu_async_panic_callb(void) 1135 { 1136 } 1137 1138 /* 1139 * Routine to return a string identifying the physical name 1140 * associated with a memory/cache error. 1141 */ 1142 /*ARGSUSED*/ 1143 int 1144 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1145 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1146 ushort_t flt_status, char *buf, int buflen, int *lenp) 1147 { 1148 int synd_code; 1149 int ret; 1150 1151 /* 1152 * An AFSR of -1 defaults to a memory syndrome. 1153 */ 1154 synd_code = (int)flt_synd; 1155 1156 if (&plat_get_mem_unum) { 1157 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1158 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1159 buf[0] = '\0'; 1160 *lenp = 0; 1161 } 1162 return (ret); 1163 } 1164 buf[0] = '\0'; 1165 *lenp = 0; 1166 return (ENOTSUP); 1167 } 1168 1169 /* 1170 * Wrapper for cpu_get_mem_unum() routine that takes an 1171 * async_flt struct rather than explicit arguments. 1172 */ 1173 int 1174 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1175 char *buf, int buflen, int *lenp) 1176 { 1177 /* 1178 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1179 * memory error. 1180 */ 1181 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1182 (uint64_t)-1, 1183 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1184 aflt->flt_status, buf, buflen, lenp)); 1185 } 1186 1187 /* 1188 * This routine is a more generic interface to cpu_get_mem_unum() 1189 * that may be used by other modules (e.g. mm). 1190 */ 1191 /*ARGSUSED*/ 1192 int 1193 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1194 char *buf, int buflen, int *lenp) 1195 { 1196 int synd_status, flt_in_memory, ret; 1197 ushort_t flt_status = 0; 1198 char unum[UNUM_NAMLEN]; 1199 1200 /* 1201 * Check for an invalid address. 1202 */ 1203 if (afar == (uint64_t)-1) 1204 return (ENXIO); 1205 1206 if (synd == (uint64_t)-1) 1207 synd_status = AFLT_STAT_INVALID; 1208 else 1209 synd_status = AFLT_STAT_VALID; 1210 1211 flt_in_memory = (*afsr & SFSR_MEMORY) && 1212 pf_is_memory(afar >> MMU_PAGESHIFT); 1213 1214 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1215 CPU->cpu_id, flt_in_memory, flt_status, unum, 1216 UNUM_NAMLEN, lenp); 1217 if (ret != 0) 1218 return (ret); 1219 1220 if (*lenp >= buflen) 1221 return (ENAMETOOLONG); 1222 1223 (void) strncpy(buf, unum, buflen); 1224 1225 return (0); 1226 } 1227 1228 /* 1229 * Routine to return memory information associated 1230 * with a physical address and syndrome. 1231 */ 1232 /*ARGSUSED*/ 1233 int 1234 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1235 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1236 int *segsp, int *banksp, int *mcidp) 1237 { 1238 int synd_code = (int)synd; 1239 1240 if (afar == (uint64_t)-1) 1241 return (ENXIO); 1242 1243 if (p2get_mem_info != NULL) 1244 return ((p2get_mem_info)(synd_code, afar, 1245 mem_sizep, seg_sizep, bank_sizep, 1246 segsp, banksp, mcidp)); 1247 else 1248 return (ENOTSUP); 1249 } 1250 1251 /* 1252 * Routine to return a string identifying the physical 1253 * name associated with a cpuid. 1254 */ 1255 int 1256 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1257 { 1258 int ret; 1259 char unum[UNUM_NAMLEN]; 1260 1261 if (&plat_get_cpu_unum) { 1262 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 1263 != 0) 1264 return (ret); 1265 } else { 1266 return (ENOTSUP); 1267 } 1268 1269 if (*lenp >= buflen) 1270 return (ENAMETOOLONG); 1271 1272 (void) strncpy(buf, unum, *lenp); 1273 1274 return (0); 1275 } 1276 1277 /* 1278 * This routine exports the name buffer size. 1279 */ 1280 size_t 1281 cpu_get_name_bufsize() 1282 { 1283 return (UNUM_NAMLEN); 1284 } 1285 1286 /* 1287 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1288 */ 1289 void 1290 cpu_flush_ecache(void) 1291 { 1292 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1293 cpunodes[CPU->cpu_id].ecache_linesize); 1294 } 1295 1296 static uint8_t 1297 flt_to_trap_type(struct async_flt *aflt) 1298 { 1299 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1300 return (TRAP_TYPE_ECC_I); 1301 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1302 return (TRAP_TYPE_ECC_D); 1303 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1304 return (TRAP_TYPE_URGENT); 1305 return (-1); 1306 } 1307 1308 /* 1309 * Encode the data saved in the opl_async_flt_t struct into 1310 * the FM ereport payload. 1311 */ 1312 /* ARGSUSED */ 1313 static void 1314 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1315 nvlist_t *resource) 1316 { 1317 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1318 char unum[UNUM_NAMLEN]; 1319 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1320 int len; 1321 1322 1323 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1324 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1325 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1326 } 1327 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1328 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1329 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1330 } 1331 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1332 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1333 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1334 } 1335 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1336 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1337 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1338 } 1339 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1340 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1341 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1342 } 1343 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1344 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1345 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1346 } 1347 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1348 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1349 DATA_TYPE_BOOLEAN_VALUE, 1350 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1351 } 1352 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1353 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1354 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1355 } 1356 1357 switch (opl_flt->flt_eid_mod) { 1358 case OPL_ERRID_CPU: 1359 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1360 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1361 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1362 NULL, opl_flt->flt_eid_sid, 1363 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, 1364 sbuf); 1365 fm_payload_set(payload, 1366 FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1367 DATA_TYPE_NVLIST, resource, NULL); 1368 break; 1369 1370 case OPL_ERRID_CHANNEL: 1371 /* 1372 * No resource is created but the cpumem DE will find 1373 * the defective path by retreiving EID from SFSR which is 1374 * included in the payload. 1375 */ 1376 break; 1377 1378 case OPL_ERRID_MEM: 1379 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1380 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1381 NULL, unum, NULL, (uint64_t)-1); 1382 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1383 DATA_TYPE_NVLIST, resource, NULL); 1384 break; 1385 1386 case OPL_ERRID_PATH: 1387 /* 1388 * No resource is created but the cpumem DE will find 1389 * the defective path by retreiving EID from SFSR which is 1390 * included in the payload. 1391 */ 1392 break; 1393 } 1394 } 1395 1396 /* 1397 * Returns whether fault address is valid for this error bit and 1398 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1399 */ 1400 /*ARGSUSED*/ 1401 static int 1402 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1403 { 1404 struct async_flt *aflt = (struct async_flt *)opl_flt; 1405 1406 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1407 return ((t_afsr_bit & SFSR_MEMORY) && 1408 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1409 } 1410 return (0); 1411 } 1412 1413 /* 1414 * In OPL SCF does the stick synchronization. 1415 */ 1416 void 1417 sticksync_slave(void) 1418 { 1419 } 1420 1421 /* 1422 * In OPL SCF does the stick synchronization. 1423 */ 1424 void 1425 sticksync_master(void) 1426 { 1427 } 1428 1429 /* 1430 * Cpu private unitialization. OPL cpus do not use the private area. 1431 */ 1432 void 1433 cpu_uninit_private(struct cpu *cp) 1434 { 1435 cmp_delete_cpu(cp->cpu_id); 1436 } 1437 1438 /* 1439 * Always flush an entire cache. 1440 */ 1441 void 1442 cpu_error_ecache_flush(void) 1443 { 1444 cpu_flush_ecache(); 1445 } 1446 1447 void 1448 cpu_ereport_post(struct async_flt *aflt) 1449 { 1450 char *cpu_type, buf[FM_MAX_CLASS]; 1451 nv_alloc_t *nva = NULL; 1452 nvlist_t *ereport, *detector, *resource; 1453 errorq_elem_t *eqep; 1454 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1455 1456 if (aflt->flt_panic || panicstr) { 1457 eqep = errorq_reserve(ereport_errorq); 1458 if (eqep == NULL) 1459 return; 1460 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1461 nva = errorq_elem_nva(ereport_errorq, eqep); 1462 } else { 1463 ereport = fm_nvlist_create(nva); 1464 } 1465 1466 /* 1467 * Create the scheme "cpu" FMRI. 1468 */ 1469 detector = fm_nvlist_create(nva); 1470 resource = fm_nvlist_create(nva); 1471 switch (cpunodes[aflt->flt_inst].implementation) { 1472 case OLYMPUS_C_IMPL: 1473 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1474 break; 1475 default: 1476 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1477 break; 1478 } 1479 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1480 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1481 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1482 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1483 sbuf); 1484 1485 /* 1486 * Encode all the common data into the ereport. 1487 */ 1488 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1489 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1490 1491 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1492 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1493 1494 /* 1495 * Encode the error specific data that was saved in 1496 * the async_flt structure into the ereport. 1497 */ 1498 cpu_payload_add_aflt(aflt, ereport, resource); 1499 1500 if (aflt->flt_panic || panicstr) { 1501 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1502 } else { 1503 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1504 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1505 fm_nvlist_destroy(detector, FM_NVA_FREE); 1506 fm_nvlist_destroy(resource, FM_NVA_FREE); 1507 } 1508 } 1509 1510 void 1511 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1512 { 1513 int status; 1514 ddi_fm_error_t de; 1515 1516 bzero(&de, sizeof (ddi_fm_error_t)); 1517 1518 de.fme_version = DDI_FME_VERSION; 1519 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1520 de.fme_flag = expected; 1521 de.fme_bus_specific = (void *)aflt->flt_addr; 1522 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1523 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1524 aflt->flt_panic = 1; 1525 } 1526 1527 void 1528 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1529 errorq_t *eqp, uint_t flag) 1530 { 1531 struct async_flt *aflt = (struct async_flt *)payload; 1532 1533 aflt->flt_erpt_class = error_class; 1534 errorq_dispatch(eqp, payload, payload_sz, flag); 1535 } 1536 1537 void 1538 adjust_hw_copy_limits(int ecache_size) 1539 { 1540 /* 1541 * Set hw copy limits. 1542 * 1543 * /etc/system will be parsed later and can override one or more 1544 * of these settings. 1545 * 1546 * At this time, ecache size seems only mildly relevant. 1547 * We seem to run into issues with the d-cache and stalls 1548 * we see on misses. 1549 * 1550 * Cycle measurement indicates that 2 byte aligned copies fare 1551 * little better than doing things with VIS at around 512 bytes. 1552 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1553 * aligned is faster whenever the source and destination data 1554 * in cache and the total size is less than 2 Kbytes. The 2K 1555 * limit seems to be driven by the 2K write cache. 1556 * When more than 2K of copies are done in non-VIS mode, stores 1557 * backup in the write cache. In VIS mode, the write cache is 1558 * bypassed, allowing faster cache-line writes aligned on cache 1559 * boundaries. 1560 * 1561 * In addition, in non-VIS mode, there is no prefetching, so 1562 * for larger copies, the advantage of prefetching to avoid even 1563 * occasional cache misses is enough to justify using the VIS code. 1564 * 1565 * During testing, it was discovered that netbench ran 3% slower 1566 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1567 * applications, data is only used once (copied to the output 1568 * buffer, then copied by the network device off the system). Using 1569 * the VIS copy saves more L2 cache state. Network copies are 1570 * around 1.3K to 1.5K in size for historical reasons. 1571 * 1572 * Therefore, a limit of 1K bytes will be used for the 8 byte 1573 * aligned copy even for large caches and 8 MB ecache. The 1574 * infrastructure to allow different limits for different sized 1575 * caches is kept to allow further tuning in later releases. 1576 */ 1577 1578 if (min_ecache_size == 0 && use_hw_bcopy) { 1579 /* 1580 * First time through - should be before /etc/system 1581 * is read. 1582 * Could skip the checks for zero but this lets us 1583 * preserve any debugger rewrites. 1584 */ 1585 if (hw_copy_limit_1 == 0) { 1586 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1587 priv_hcl_1 = hw_copy_limit_1; 1588 } 1589 if (hw_copy_limit_2 == 0) { 1590 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1591 priv_hcl_2 = hw_copy_limit_2; 1592 } 1593 if (hw_copy_limit_4 == 0) { 1594 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1595 priv_hcl_4 = hw_copy_limit_4; 1596 } 1597 if (hw_copy_limit_8 == 0) { 1598 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1599 priv_hcl_8 = hw_copy_limit_8; 1600 } 1601 min_ecache_size = ecache_size; 1602 } else { 1603 /* 1604 * MP initialization. Called *after* /etc/system has 1605 * been parsed. One CPU has already been initialized. 1606 * Need to cater for /etc/system having scragged one 1607 * of our values. 1608 */ 1609 if (ecache_size == min_ecache_size) { 1610 /* 1611 * Same size ecache. We do nothing unless we 1612 * have a pessimistic ecache setting. In that 1613 * case we become more optimistic (if the cache is 1614 * large enough). 1615 */ 1616 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1617 /* 1618 * Need to adjust hw_copy_limit* from our 1619 * pessimistic uniprocessor value to a more 1620 * optimistic UP value *iff* it hasn't been 1621 * reset. 1622 */ 1623 if ((ecache_size > 1048576) && 1624 (priv_hcl_8 == hw_copy_limit_8)) { 1625 if (ecache_size <= 2097152) 1626 hw_copy_limit_8 = 4 * 1627 VIS_COPY_THRESHOLD; 1628 else if (ecache_size <= 4194304) 1629 hw_copy_limit_8 = 4 * 1630 VIS_COPY_THRESHOLD; 1631 else 1632 hw_copy_limit_8 = 4 * 1633 VIS_COPY_THRESHOLD; 1634 priv_hcl_8 = hw_copy_limit_8; 1635 } 1636 } 1637 } else if (ecache_size < min_ecache_size) { 1638 /* 1639 * A different ecache size. Can this even happen? 1640 */ 1641 if (priv_hcl_8 == hw_copy_limit_8) { 1642 /* 1643 * The previous value that we set 1644 * is unchanged (i.e., it hasn't been 1645 * scragged by /etc/system). Rewrite it. 1646 */ 1647 if (ecache_size <= 1048576) 1648 hw_copy_limit_8 = 8 * 1649 VIS_COPY_THRESHOLD; 1650 else if (ecache_size <= 2097152) 1651 hw_copy_limit_8 = 8 * 1652 VIS_COPY_THRESHOLD; 1653 else if (ecache_size <= 4194304) 1654 hw_copy_limit_8 = 8 * 1655 VIS_COPY_THRESHOLD; 1656 else 1657 hw_copy_limit_8 = 10 * 1658 VIS_COPY_THRESHOLD; 1659 priv_hcl_8 = hw_copy_limit_8; 1660 min_ecache_size = ecache_size; 1661 } 1662 } 1663 } 1664 } 1665 1666 #define VIS_BLOCKSIZE 64 1667 1668 int 1669 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1670 { 1671 int ret, watched; 1672 1673 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1674 ret = dtrace_blksuword32(addr, data, 0); 1675 if (watched) 1676 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1677 1678 return (ret); 1679 } 1680 1681 void 1682 opl_cpu_reg_init() 1683 { 1684 uint64_t this_cpu_log; 1685 1686 /* 1687 * We do not need to re-initialize cpu0 registers. 1688 */ 1689 if (cpu[getprocessorid()] == &cpu0) 1690 return; 1691 1692 /* 1693 * Initialize Error log Scratch register for error handling. 1694 */ 1695 1696 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1697 ERRLOG_BUFSZ * (getprocessorid()))); 1698 opl_error_setup(this_cpu_log); 1699 1700 /* 1701 * Enable MMU translating multiple page sizes for 1702 * sITLB and sDTLB. 1703 */ 1704 opl_mpg_enable(); 1705 } 1706 1707 /* 1708 * Queue one event in ue_queue based on ecc_type_to_info entry. 1709 */ 1710 static void 1711 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1712 ecc_type_to_info_t *eccp) 1713 { 1714 struct async_flt *aflt = (struct async_flt *)opl_flt; 1715 1716 if (reason && 1717 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1718 (void) strcat(reason, eccp->ec_reason); 1719 } 1720 1721 opl_flt->flt_bit = eccp->ec_afsr_bit; 1722 opl_flt->flt_type = eccp->ec_flt_type; 1723 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1724 aflt->flt_payload = eccp->ec_err_payload; 1725 1726 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1727 cpu_errorq_dispatch(eccp->ec_err_class, 1728 (void *)opl_flt, sizeof (opl_async_flt_t), 1729 ue_queue, 1730 aflt->flt_panic); 1731 } 1732 1733 /* 1734 * Queue events on async event queue one event per error bit. 1735 * Return number of events queued. 1736 */ 1737 int 1738 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1739 { 1740 struct async_flt *aflt = (struct async_flt *)opl_flt; 1741 ecc_type_to_info_t *eccp; 1742 int nevents = 0; 1743 1744 /* 1745 * Queue expected errors, error bit and fault type must must match 1746 * in the ecc_type_to_info table. 1747 */ 1748 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1749 eccp++) { 1750 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1751 (eccp->ec_flags & aflt->flt_status) != 0) { 1752 /* 1753 * UE error event can be further 1754 * classified/breakdown into finer granularity 1755 * based on the flt_eid_mod value set by HW. We do 1756 * special handling here so that we can report UE 1757 * error in finer granularity as ue_mem, 1758 * ue_channel, ue_cpu or ue_path. 1759 */ 1760 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1761 opl_flt->flt_eid_mod = 1762 (aflt->flt_stat & SFSR_EID_MOD) 1763 >> SFSR_EID_MOD_SHIFT; 1764 opl_flt->flt_eid_sid = 1765 (aflt->flt_stat & SFSR_EID_SID) 1766 >> SFSR_EID_SID_SHIFT; 1767 /* 1768 * Need to advance eccp pointer by flt_eid_mod 1769 * so that we get an appropriate ecc pointer 1770 * 1771 * EID # of advances 1772 * ---------------------------------- 1773 * OPL_ERRID_MEM 0 1774 * OPL_ERRID_CHANNEL 1 1775 * OPL_ERRID_CPU 2 1776 * OPL_ERRID_PATH 3 1777 */ 1778 eccp += opl_flt->flt_eid_mod; 1779 } 1780 cpu_queue_one_event(opl_flt, reason, eccp); 1781 t_afsr_errs &= ~eccp->ec_afsr_bit; 1782 nevents++; 1783 } 1784 } 1785 1786 return (nevents); 1787 } 1788 1789 /* 1790 * Sync. error wrapper functions. 1791 * We use these functions in order to transfer here from the 1792 * nucleus trap handler information about trap type (data or 1793 * instruction) and trap level (0 or above 0). This way we 1794 * get rid of using SFSR's reserved bits. 1795 */ 1796 1797 #define OPL_SYNC_TL0 0 1798 #define OPL_SYNC_TL1 1 1799 #define OPL_ISYNC_ERR 0 1800 #define OPL_DSYNC_ERR 1 1801 1802 void 1803 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1804 { 1805 uint64_t t_sfar = p_sfar; 1806 uint64_t t_sfsr = p_sfsr; 1807 1808 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1809 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1810 } 1811 1812 void 1813 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1814 { 1815 uint64_t t_sfar = p_sfar; 1816 uint64_t t_sfsr = p_sfsr; 1817 1818 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1819 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1820 } 1821 1822 void 1823 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1824 { 1825 uint64_t t_sfar = p_sfar; 1826 uint64_t t_sfsr = p_sfsr; 1827 1828 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1829 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1830 } 1831 1832 void 1833 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1834 { 1835 uint64_t t_sfar = p_sfar; 1836 uint64_t t_sfsr = p_sfsr; 1837 1838 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1839 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1840 } 1841 1842 /* 1843 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1844 * and TLB_PRT. 1845 * This function is designed based on cpu_deferred_error(). 1846 */ 1847 1848 static void 1849 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1850 uint_t tl, uint_t derr) 1851 { 1852 opl_async_flt_t opl_flt; 1853 struct async_flt *aflt; 1854 int trampolined = 0; 1855 char pr_reason[MAX_REASON_STRING]; 1856 uint64_t log_sfsr; 1857 int expected = DDI_FM_ERR_UNEXPECTED; 1858 ddi_acc_hdl_t *hp; 1859 1860 /* 1861 * We need to look at p_flag to determine if the thread detected an 1862 * error while dumping core. We can't grab p_lock here, but it's ok 1863 * because we just need a consistent snapshot and we know that everyone 1864 * else will store a consistent set of bits while holding p_lock. We 1865 * don't have to worry about a race because SDOCORE is set once prior 1866 * to doing i/o from the process's address space and is never cleared. 1867 */ 1868 uint_t pflag = ttoproc(curthread)->p_flag; 1869 1870 pr_reason[0] = '\0'; 1871 1872 /* 1873 * handle the specific error 1874 */ 1875 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1876 aflt = (struct async_flt *)&opl_flt; 1877 aflt->flt_id = gethrtime_waitfree(); 1878 aflt->flt_bus_id = getprocessorid(); 1879 aflt->flt_inst = CPU->cpu_id; 1880 aflt->flt_stat = t_sfsr; 1881 aflt->flt_addr = t_sfar; 1882 aflt->flt_pc = (caddr_t)rp->r_pc; 1883 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1884 aflt->flt_class = (uchar_t)CPU_FAULT; 1885 aflt->flt_priv = (uchar_t) 1886 (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); 1887 aflt->flt_tl = (uchar_t)tl; 1888 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1889 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1890 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1891 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1892 1893 /* 1894 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1895 * So, clear all error bits to avoid mis-handling and force the system 1896 * panicked. 1897 * We skip all the procedures below down to the panic message call. 1898 */ 1899 if (!(t_sfsr & SFSR_FV)) { 1900 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1901 aflt->flt_panic = 1; 1902 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1903 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, 1904 (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, 1905 aflt->flt_panic); 1906 fm_panic("%sErrors(s)", "invalid SFSR"); 1907 } 1908 1909 /* 1910 * If either UE and MK bit is off, this is not valid UE error. 1911 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1912 * mis-handling below. 1913 * aflt->flt_stat keeps the original bits as a reference. 1914 */ 1915 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1916 (SFSR_MK_UE|SFSR_UE)) { 1917 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1918 } 1919 1920 /* 1921 * If the trap occurred in privileged mode at TL=0, we need to check to 1922 * see if we were executing in the kernel under on_trap() or t_lofault 1923 * protection. If so, modify the saved registers so that we return 1924 * from the trap to the appropriate trampoline routine. 1925 */ 1926 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1927 if (curthread->t_ontrap != NULL) { 1928 on_trap_data_t *otp = curthread->t_ontrap; 1929 1930 if (otp->ot_prot & OT_DATA_EC) { 1931 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1932 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1933 rp->r_pc = otp->ot_trampoline; 1934 rp->r_npc = rp->r_pc + 4; 1935 trampolined = 1; 1936 } 1937 1938 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1939 (otp->ot_prot & OT_DATA_ACCESS)) { 1940 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 1941 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 1942 rp->r_pc = otp->ot_trampoline; 1943 rp->r_npc = rp->r_pc + 4; 1944 trampolined = 1; 1945 /* 1946 * for peeks and caut_gets errors are expected 1947 */ 1948 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1949 if (!hp) 1950 expected = DDI_FM_ERR_PEEK; 1951 else if (hp->ah_acc.devacc_attr_access == 1952 DDI_CAUTIOUS_ACC) 1953 expected = DDI_FM_ERR_EXPECTED; 1954 } 1955 1956 } else if (curthread->t_lofault) { 1957 aflt->flt_prot = AFLT_PROT_COPY; 1958 rp->r_g1 = EFAULT; 1959 rp->r_pc = curthread->t_lofault; 1960 rp->r_npc = rp->r_pc + 4; 1961 trampolined = 1; 1962 } 1963 } 1964 1965 /* 1966 * If we're in user mode or we're doing a protected copy, we either 1967 * want the ASTON code below to send a signal to the user process 1968 * or we want to panic if aft_panic is set. 1969 * 1970 * If we're in privileged mode and we're not doing a copy, then we 1971 * need to check if we've trampolined. If we haven't trampolined, 1972 * we should panic. 1973 */ 1974 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1975 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 1976 aflt->flt_panic |= aft_panic; 1977 } else if (!trampolined) { 1978 aflt->flt_panic = 1; 1979 } 1980 1981 /* 1982 * If we've trampolined due to a privileged TO or BERR, or if an 1983 * unprivileged TO or BERR occurred, we don't want to enqueue an 1984 * event for that TO or BERR. Queue all other events (if any) besides 1985 * the TO/BERR. 1986 */ 1987 log_sfsr = t_sfsr; 1988 if (trampolined) { 1989 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 1990 } else if (!aflt->flt_priv) { 1991 /* 1992 * User mode, suppress messages if 1993 * cpu_berr_to_verbose is not set. 1994 */ 1995 if (!cpu_berr_to_verbose) 1996 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 1997 } 1998 1999 if (((log_sfsr & SFSR_ERRS) && 2000 (cpu_queue_events(&opl_flt, pr_reason, t_sfsr) == 0)) || 2001 ((t_sfsr & SFSR_ERRS) == 0)) { 2002 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2003 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2004 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, 2005 (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, 2006 aflt->flt_panic); 2007 } 2008 2009 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2010 cpu_run_bus_error_handlers(aflt, expected); 2011 } 2012 2013 /* 2014 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2015 * be logged as part of the panic flow. 2016 */ 2017 if (aflt->flt_panic) { 2018 if (pr_reason[0] == 0) 2019 strcpy(pr_reason, "invalid SFSR "); 2020 2021 fm_panic("%sErrors(s)", pr_reason); 2022 } 2023 2024 /* 2025 * If we queued an error and we are going to return from the trap and 2026 * the error was in user mode or inside of a copy routine, set AST flag 2027 * so the queue will be drained before returning to user mode. The 2028 * AST processing will also act on our failure policy. 2029 */ 2030 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2031 int pcb_flag = 0; 2032 2033 if (t_sfsr & (SFSR_ERRS & 2034 ~(SFSR_BERR | SFSR_TO))) 2035 pcb_flag |= ASYNC_HWERR; 2036 2037 if (t_sfsr & SFSR_BERR) 2038 pcb_flag |= ASYNC_BERR; 2039 2040 if (t_sfsr & SFSR_TO) 2041 pcb_flag |= ASYNC_BTO; 2042 2043 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2044 aston(curthread); 2045 } 2046 } 2047 2048 /*ARGSUSED*/ 2049 void 2050 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2051 { 2052 opl_async_flt_t opl_flt; 2053 struct async_flt *aflt; 2054 char pr_reason[MAX_REASON_STRING]; 2055 2056 /* normalize tl */ 2057 tl = (tl >= 2 ? 1 : 0); 2058 pr_reason[0] = '\0'; 2059 2060 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2061 aflt = (struct async_flt *)&opl_flt; 2062 aflt->flt_id = gethrtime_waitfree(); 2063 aflt->flt_bus_id = getprocessorid(); 2064 aflt->flt_inst = CPU->cpu_id; 2065 aflt->flt_stat = p_ugesr; 2066 aflt->flt_pc = (caddr_t)rp->r_pc; 2067 aflt->flt_class = (uchar_t)CPU_FAULT; 2068 aflt->flt_tl = tl; 2069 aflt->flt_priv = (uchar_t) 2070 (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); 2071 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2072 aflt->flt_panic = 1; 2073 /* 2074 * HW does not set mod/sid in case of urgent error. 2075 * So we have to set it here. 2076 */ 2077 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2078 opl_flt.flt_eid_sid = aflt->flt_inst; 2079 2080 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2081 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2082 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2083 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, 2084 (void *)&opl_flt, sizeof (opl_async_flt_t), 2085 ue_queue, aflt->flt_panic); 2086 } 2087 2088 fm_panic("Urgent Error"); 2089 } 2090 2091 /* 2092 * Initialization error counters resetting. 2093 */ 2094 /* ARGSUSED */ 2095 static void 2096 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2097 { 2098 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2099 hdlr->cyh_level = CY_LOW_LEVEL; 2100 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2101 2102 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2103 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2104 } 2105 2106 void 2107 cpu_mp_init(void) 2108 { 2109 cyc_omni_handler_t hdlr; 2110 2111 hdlr.cyo_online = opl_ras_online; 2112 hdlr.cyo_offline = NULL; 2113 hdlr.cyo_arg = NULL; 2114 mutex_enter(&cpu_lock); 2115 (void) cyclic_add_omni(&hdlr); 2116 mutex_exit(&cpu_lock); 2117 } 2118 2119 int heaplp_use_stlb = -1; 2120 2121 void 2122 mmu_init_kernel_pgsz(struct hat *hat) 2123 { 2124 uint_t tte = page_szc(segkmem_lpsize); 2125 uchar_t new_cext_primary, new_cext_nucleus; 2126 2127 if (heaplp_use_stlb == 0) { 2128 /* do not reprogram stlb */ 2129 tte = TTE8K; 2130 } 2131 2132 new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K); 2133 new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte); 2134 2135 hat->sfmmu_cext = new_cext_primary; 2136 kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) | 2137 ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT); 2138 } 2139 2140 size_t 2141 mmu_get_kernel_lpsize(size_t lpsize) 2142 { 2143 uint_t tte; 2144 2145 if (lpsize == 0) { 2146 /* no setting for segkmem_lpsize in /etc/system: use default */ 2147 return (MMU_PAGESIZE4M); 2148 } 2149 2150 for (tte = TTE8K; tte <= TTE4M; tte++) { 2151 if (lpsize == TTEBYTES(tte)) 2152 return (lpsize); 2153 } 2154 2155 return (TTEBYTES(TTE8K)); 2156 } 2157 2158 /* 2159 * The following are functions that are unused in 2160 * OPL cpu module. They are defined here to resolve 2161 * dependencies in the "unix" module. 2162 * Unused functions that should never be called in 2163 * OPL are coded with ASSERT(0). 2164 */ 2165 2166 void 2167 cpu_disable_errors(void) 2168 {} 2169 2170 void 2171 cpu_enable_errors(void) 2172 { ASSERT(0); } 2173 2174 /*ARGSUSED*/ 2175 void 2176 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2177 { ASSERT(0); } 2178 2179 /*ARGSUSED*/ 2180 void 2181 cpu_faulted_enter(struct cpu *cp) 2182 {} 2183 2184 /*ARGSUSED*/ 2185 void 2186 cpu_faulted_exit(struct cpu *cp) 2187 {} 2188 2189 /*ARGSUSED*/ 2190 void 2191 cpu_check_allcpus(struct async_flt *aflt) 2192 {} 2193 2194 /*ARGSUSED*/ 2195 void 2196 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2197 { ASSERT(0); } 2198 2199 /*ARGSUSED*/ 2200 void 2201 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2202 { ASSERT(0); } 2203 2204 /*ARGSUSED*/ 2205 void 2206 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2207 { ASSERT(0); } 2208 2209 /*ARGSUSED*/ 2210 void 2211 cpu_busy_ecache_scrub(struct cpu *cp) 2212 {} 2213 2214 /*ARGSUSED*/ 2215 void 2216 cpu_idle_ecache_scrub(struct cpu *cp) 2217 {} 2218 2219 /* ARGSUSED */ 2220 void 2221 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2222 { ASSERT(0); } 2223 2224 void 2225 cpu_init_cache_scrub(void) 2226 {} 2227 2228 /* ARGSUSED */ 2229 int 2230 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2231 { 2232 if (&plat_get_mem_sid) { 2233 return (plat_get_mem_sid(unum, buf, buflen, lenp)); 2234 } else { 2235 return (ENOTSUP); 2236 } 2237 } 2238 2239 /* ARGSUSED */ 2240 int 2241 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2242 { 2243 if (&plat_get_mem_addr) { 2244 return (plat_get_mem_addr(unum, sid, offset, addrp)); 2245 } else { 2246 return (ENOTSUP); 2247 } 2248 } 2249 2250 /* ARGSUSED */ 2251 int 2252 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2253 { 2254 if (&plat_get_mem_offset) { 2255 return (plat_get_mem_offset(flt_addr, offp)); 2256 } else { 2257 return (ENOTSUP); 2258 } 2259 } 2260 2261 /*ARGSUSED*/ 2262 void 2263 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2264 { ASSERT(0); } 2265 2266 /*ARGSUSED*/ 2267 void 2268 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2269 { ASSERT(0); } 2270 2271 /*ARGSUSED*/ 2272 void 2273 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) 2274 { ASSERT(0); } 2275 2276 /*ARGSUSED*/ 2277 int 2278 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, 2279 errorq_elem_t *eqep, size_t afltoffset) 2280 { 2281 ASSERT(0); 2282 return (0); 2283 } 2284 2285 /*ARGSUSED*/ 2286 char * 2287 flt_to_error_type(struct async_flt *aflt) 2288 { 2289 ASSERT(0); 2290 return (NULL); 2291 } 2292