1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/ddi.h> 31 #include <sys/sysmacros.h> 32 #include <sys/archsystm.h> 33 #include <sys/vmsystm.h> 34 #include <sys/machparam.h> 35 #include <sys/machsystm.h> 36 #include <sys/machthread.h> 37 #include <sys/cpu.h> 38 #include <sys/cmp.h> 39 #include <sys/elf_SPARC.h> 40 #include <vm/vm_dep.h> 41 #include <vm/hat_sfmmu.h> 42 #include <vm/seg_kpm.h> 43 #include <sys/cpuvar.h> 44 #include <sys/opl_olympus_regs.h> 45 #include <sys/opl_module.h> 46 #include <sys/async.h> 47 #include <sys/cmn_err.h> 48 #include <sys/debug.h> 49 #include <sys/dditypes.h> 50 #include <sys/cpu_module.h> 51 #include <sys/sysmacros.h> 52 #include <sys/intreg.h> 53 #include <sys/clock.h> 54 #include <sys/platform_module.h> 55 #include <sys/ontrap.h> 56 #include <sys/panic.h> 57 #include <sys/memlist.h> 58 #include <sys/ndifm.h> 59 #include <sys/ddifm.h> 60 #include <sys/fm/protocol.h> 61 #include <sys/fm/util.h> 62 #include <sys/fm/cpu/SPARC64-VI.h> 63 #include <sys/dtrace.h> 64 #include <sys/watchpoint.h> 65 #include <sys/promif.h> 66 67 /* 68 * Internal functions. 69 */ 70 static int cpu_sync_log_err(void *flt); 71 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 72 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 73 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 74 75 /* 76 * Error counters resetting interval. 77 */ 78 static int opl_async_check_interval = 60; /* 1 min */ 79 80 /* 81 * Maximum number of contexts for Olympus-C. 82 */ 83 #define MAX_NCTXS (1 << 13) 84 85 /* Will be set !NULL for SPARC64-VI and derivatives. */ 86 static uchar_t ctx_pgsz_arr[MAX_NCTXS]; 87 uchar_t *ctx_pgsz_array = ctx_pgsz_arr; 88 89 /* 90 * PA[22:0] represent Displacement in Jupiter 91 * configuration space. 92 */ 93 uint_t root_phys_addr_lo_mask = 0x7fffffu; 94 95 /* 96 * set in /etc/system to control logging of user BERR/TO's 97 */ 98 int cpu_berr_to_verbose = 0; 99 100 static int min_ecache_size; 101 static uint_t priv_hcl_1; 102 static uint_t priv_hcl_2; 103 static uint_t priv_hcl_4; 104 static uint_t priv_hcl_8; 105 106 /* 107 * Olympus error log 108 */ 109 static opl_errlog_t *opl_err_log; 110 111 /* 112 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 113 * No any other ecc_type_info insertion is allowed in between the following 114 * four UE classess. 115 */ 116 ecc_type_to_info_t ecc_type_to_info[] = { 117 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 118 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 119 FM_EREPORT_CPU_UE_MEM, 120 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 121 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 122 FM_EREPORT_CPU_UE_CHANNEL, 123 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 124 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 125 FM_EREPORT_CPU_UE_CPU, 126 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 127 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 128 FM_EREPORT_CPU_UE_PATH, 129 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 130 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 131 FM_EREPORT_CPU_BERR, 132 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 133 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 134 FM_EREPORT_CPU_BTO, 135 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 136 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 137 FM_EREPORT_CPU_MTLB, 138 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 139 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 140 FM_EREPORT_CPU_TLBP, 141 142 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 143 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 144 FM_EREPORT_CPU_CRE, 145 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 146 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 147 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 148 FM_EREPORT_CPU_TSBCTX, 149 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 150 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 151 FM_EREPORT_CPU_TSBP, 152 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 153 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 154 FM_EREPORT_CPU_PSTATE, 155 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 156 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 157 FM_EREPORT_CPU_TSTATE, 158 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 159 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 160 FM_EREPORT_CPU_IUG_F, 161 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 162 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 163 FM_EREPORT_CPU_IUG_R, 164 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 165 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 166 FM_EREPORT_CPU_SDC, 167 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 168 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 169 FM_EREPORT_CPU_WDT, 170 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 171 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 172 FM_EREPORT_CPU_DTLB, 173 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 174 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 175 FM_EREPORT_CPU_ITLB, 176 UGESR_IUG_COREERR, "IUG_COREERR", 177 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 178 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 179 FM_EREPORT_CPU_CORE, 180 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 181 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 182 FM_EREPORT_CPU_DAE, 183 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 184 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 185 FM_EREPORT_CPU_IAE, 186 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 187 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 188 FM_EREPORT_CPU_UGE, 189 0, NULL, 0, 0, 190 NULL, 0, 0, 191 }; 192 193 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 194 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 195 int *segsp, int *banksp, int *mcidp); 196 197 198 /* 199 * Setup trap handlers for 0xA, 0x32, 0x40 trap types. 200 */ 201 void 202 cpu_init_trap(void) 203 { 204 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 205 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 206 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 207 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 208 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 209 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 210 } 211 212 static int 213 getintprop(pnode_t node, char *name, int deflt) 214 { 215 int value; 216 217 switch (prom_getproplen(node, name)) { 218 case sizeof (int): 219 (void) prom_getprop(node, name, (caddr_t)&value); 220 break; 221 222 default: 223 value = deflt; 224 break; 225 } 226 227 return (value); 228 } 229 230 /* 231 * Set the magic constants of the implementation. 232 */ 233 /*ARGSUSED*/ 234 void 235 cpu_fiximp(pnode_t dnode) 236 { 237 int i, a; 238 extern int vac_size, vac_shift; 239 extern uint_t vac_mask; 240 241 static struct { 242 char *name; 243 int *var; 244 int defval; 245 } prop[] = { 246 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 247 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 248 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 249 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 250 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 251 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 252 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 253 }; 254 255 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 256 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 257 258 ecache_setsize = ecache_size / ecache_associativity; 259 260 vac_size = OPL_VAC_SIZE; 261 vac_mask = MMU_PAGEMASK & (vac_size - 1); 262 i = 0; a = vac_size; 263 while (a >>= 1) 264 ++i; 265 vac_shift = i; 266 shm_alignment = vac_size; 267 vac = 1; 268 } 269 270 void 271 send_mondo_set(cpuset_t set) 272 { 273 int lo, busy, nack, shipped = 0; 274 uint16_t i, cpuids[IDSR_BN_SETS]; 275 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 276 uint64_t starttick, endtick, tick, lasttick; 277 #if (NCPU > IDSR_BN_SETS) 278 int index = 0; 279 int ncpuids = 0; 280 #endif 281 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 282 int bn_sets = IDSR_BN_SETS; 283 uint64_t ver; 284 285 ASSERT(NCPU > bn_sets); 286 #endif 287 288 ASSERT(!CPUSET_ISNULL(set)); 289 starttick = lasttick = gettick(); 290 291 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 292 ver = ultra_getver(); 293 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 294 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 295 bn_sets = 1; 296 #endif 297 298 #if (NCPU <= IDSR_BN_SETS) 299 for (i = 0; i < NCPU; i++) 300 if (CPU_IN_SET(set, i)) { 301 shipit(i, shipped); 302 nackmask |= IDSR_NACK_BIT(shipped); 303 cpuids[shipped++] = i; 304 CPUSET_DEL(set, i); 305 if (CPUSET_ISNULL(set)) 306 break; 307 } 308 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 309 #else 310 for (i = 0; i < NCPU; i++) 311 if (CPU_IN_SET(set, i)) { 312 ncpuids++; 313 314 /* 315 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 316 * find we have shipped to more than (IDSR_BN_SETS) 317 * CPUs, set "index" to the highest numbered CPU in 318 * the set so we can ship to other CPUs a bit later on. 319 */ 320 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 321 if (shipped < bn_sets) { 322 #else 323 if (shipped < IDSR_BN_SETS) { 324 #endif 325 shipit(i, shipped); 326 nackmask |= IDSR_NACK_BIT(shipped); 327 cpuids[shipped++] = i; 328 CPUSET_DEL(set, i); 329 if (CPUSET_ISNULL(set)) 330 break; 331 } else 332 index = (int)i; 333 } 334 335 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 336 #endif 337 338 busymask = IDSR_NACK_TO_BUSY(nackmask); 339 busy = nack = 0; 340 endtick = starttick + xc_tick_limit; 341 for (;;) { 342 idsr = getidsr(); 343 #if (NCPU <= IDSR_BN_SETS) 344 if (idsr == 0) 345 break; 346 #else 347 if (idsr == 0 && shipped == ncpuids) 348 break; 349 #endif 350 tick = gettick(); 351 /* 352 * If there is a big jump between the current tick 353 * count and lasttick, we have probably hit a break 354 * point. Adjust endtick accordingly to avoid panic. 355 */ 356 if (tick > (lasttick + xc_tick_jump_limit)) 357 endtick += (tick - lasttick); 358 lasttick = tick; 359 if (tick > endtick) { 360 if (panic_quiesce) 361 return; 362 cmn_err(CE_CONT, "send mondo timeout " 363 "[%d NACK %d BUSY]\nIDSR 0x%" 364 "" PRIx64 " cpuids:", nack, busy, idsr); 365 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 366 for (i = 0; i < bn_sets; i++) { 367 #else 368 for (i = 0; i < IDSR_BN_SETS; i++) { 369 #endif 370 if (idsr & (IDSR_NACK_BIT(i) | 371 IDSR_BUSY_BIT(i))) { 372 cmn_err(CE_CONT, " 0x%x", 373 cpuids[i]); 374 } 375 } 376 cmn_err(CE_CONT, "\n"); 377 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 378 } 379 curnack = idsr & nackmask; 380 curbusy = idsr & busymask; 381 382 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 383 /* 384 * Only proceed to send more xcalls if all the 385 * cpus in the previous IDSR_BN_SETS were completed. 386 */ 387 if (curbusy) { 388 busy++; 389 continue; 390 } 391 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 392 393 #if (NCPU > IDSR_BN_SETS) 394 if (shipped < ncpuids) { 395 uint64_t cpus_left; 396 uint16_t next = (uint16_t)index; 397 398 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 399 busymask; 400 401 if (cpus_left) { 402 do { 403 /* 404 * Sequence through and ship to the 405 * remainder of the CPUs in the system 406 * (e.g. other than the first 407 * (IDSR_BN_SETS)) in reverse order. 408 */ 409 lo = lowbit(cpus_left) - 1; 410 i = IDSR_BUSY_IDX(lo); 411 shipit(next, i); 412 shipped++; 413 cpuids[i] = next; 414 415 /* 416 * If we've processed all the CPUs, 417 * exit the loop now and save 418 * instructions. 419 */ 420 if (shipped == ncpuids) 421 break; 422 423 for ((index = ((int)next - 1)); 424 index >= 0; index--) 425 if (CPU_IN_SET(set, index)) { 426 next = (uint16_t)index; 427 break; 428 } 429 430 cpus_left &= ~(1ull << lo); 431 } while (cpus_left); 432 continue; 433 } 434 } 435 #endif 436 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 437 if (curbusy) { 438 busy++; 439 continue; 440 } 441 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 442 #ifdef SEND_MONDO_STATS 443 { 444 int n = gettick() - starttick; 445 if (n < 8192) 446 x_nack_stimes[n >> 7]++; 447 } 448 #endif 449 while (gettick() < (tick + sys_clock_mhz)) 450 ; 451 do { 452 lo = lowbit(curnack) - 1; 453 i = IDSR_NACK_IDX(lo); 454 shipit(cpuids[i], i); 455 curnack &= ~(1ull << lo); 456 } while (curnack); 457 nack++; 458 busy = 0; 459 } 460 #ifdef SEND_MONDO_STATS 461 { 462 int n = gettick() - starttick; 463 if (n < 8192) 464 x_set_stimes[n >> 7]++; 465 else 466 x_set_ltimes[(n >> 13) & 0xf]++; 467 } 468 x_set_cpus[shipped]++; 469 #endif 470 } 471 472 /* 473 * Cpu private initialization. 474 */ 475 void 476 cpu_init_private(struct cpu *cp) 477 { 478 if (!(IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation))) { 479 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI is supported", 480 cp->cpu_id, cpunodes[cp->cpu_id].implementation); 481 } 482 483 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 484 } 485 486 void 487 cpu_setup(void) 488 { 489 extern int at_flags; 490 extern int disable_delay_tlb_flush, delay_tlb_flush; 491 extern int cpc_has_overflow_intr; 492 extern int disable_text_largepages; 493 extern int use_text_pgsz4m; 494 uint64_t cpu0_log; 495 extern uint64_t opl_cpu0_err_log; 496 497 /* 498 * Initialize Error log Scratch register for error handling. 499 */ 500 501 cpu0_log = va_to_pa(&opl_cpu0_err_log); 502 opl_error_setup(cpu0_log); 503 504 /* 505 * Enable MMU translating multiple page sizes for 506 * sITLB and sDTLB. 507 */ 508 opl_mpg_enable(); 509 510 /* 511 * Setup chip-specific trap handlers. 512 */ 513 cpu_init_trap(); 514 515 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 516 517 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 518 519 /* 520 * Use the maximum number of contexts available for SPARC64-VI 521 * unless it has been tuned for debugging. 522 * We are checking against 0 here since this value can be patched 523 * while booting. It can not be patched via /etc/system since it 524 * will be patched too late and thus cause the system to panic. 525 */ 526 if (nctxs == 0) 527 nctxs = MAX_NCTXS; 528 529 /* 530 * Due to the number of entries in the fully-associative tlb 531 * this may have to be tuned lower than in spitfire. 532 */ 533 pp_slots = MIN(8, MAXPP_SLOTS); 534 535 /* 536 * Block stores do not invalidate all pages of the d$, pagecopy 537 * et. al. need virtual translations with virtual coloring taken 538 * into consideration. prefetch/ldd will pollute the d$ on the 539 * load side. 540 */ 541 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 542 543 if (use_page_coloring) { 544 do_pg_coloring = 1; 545 if (use_virtual_coloring) 546 do_virtual_coloring = 1; 547 } 548 549 isa_list = 550 "sparcv9+vis2 sparcv9+vis sparcv9 " 551 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 552 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 553 554 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; 555 556 /* 557 * On SPARC64-VI, there's no hole in the virtual address space 558 */ 559 hole_start = hole_end = 0; 560 561 /* 562 * The kpm mapping window. 563 * kpm_size: 564 * The size of a single kpm range. 565 * The overall size will be: kpm_size * vac_colors. 566 * kpm_vbase: 567 * The virtual start address of the kpm range within the kernel 568 * virtual address space. kpm_vbase has to be kpm_size aligned. 569 */ 570 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 571 kpm_size_shift = 47; 572 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 573 kpm_smallpages = 1; 574 575 /* 576 * The traptrace code uses either %tick or %stick for 577 * timestamping. We have %stick so we can use it. 578 */ 579 traptrace_use_stick = 1; 580 581 /* 582 * SPARC64-VI has a performance counter overflow interrupt 583 */ 584 cpc_has_overflow_intr = 1; 585 586 /* 587 * Use SPARC64-VI flush-all support 588 */ 589 if (!disable_delay_tlb_flush) 590 delay_tlb_flush = 1; 591 592 /* 593 * Declare that this architecture/cpu combination does not support 594 * fpRAS. 595 */ 596 fpras_implemented = 0; 597 598 /* 599 * Enable 4M pages to be used for mapping user text by default. Don't 600 * use large pages for initialized data segments since we may not know 601 * at exec() time what should be the preferred large page size for DTLB 602 * programming. 603 */ 604 use_text_pgsz4m = 1; 605 disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | 606 (1 << TTE32M) | (1 << TTE256M); 607 } 608 609 /* 610 * Called by setcpudelay 611 */ 612 void 613 cpu_init_tick_freq(void) 614 { 615 /* 616 * For SPARC64-VI we want to use the system clock rate as 617 * the basis for low level timing, due to support of mixed 618 * speed CPUs and power managment. 619 */ 620 if (system_clock_freq == 0) 621 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 622 623 sys_tick_freq = system_clock_freq; 624 } 625 626 #ifdef SEND_MONDO_STATS 627 uint32_t x_one_stimes[64]; 628 uint32_t x_one_ltimes[16]; 629 uint32_t x_set_stimes[64]; 630 uint32_t x_set_ltimes[16]; 631 uint32_t x_set_cpus[NCPU]; 632 uint32_t x_nack_stimes[64]; 633 #endif 634 635 /* 636 * Note: A version of this function is used by the debugger via the KDI, 637 * and must be kept in sync with this version. Any changes made to this 638 * function to support new chips or to accomodate errata must also be included 639 * in the KDI-specific version. See us3_kdi.c. 640 */ 641 void 642 send_one_mondo(int cpuid) 643 { 644 int busy, nack; 645 uint64_t idsr, starttick, endtick, tick, lasttick; 646 uint64_t busymask; 647 648 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 649 starttick = lasttick = gettick(); 650 shipit(cpuid, 0); 651 endtick = starttick + xc_tick_limit; 652 busy = nack = 0; 653 busymask = IDSR_BUSY; 654 for (;;) { 655 idsr = getidsr(); 656 if (idsr == 0) 657 break; 658 659 tick = gettick(); 660 /* 661 * If there is a big jump between the current tick 662 * count and lasttick, we have probably hit a break 663 * point. Adjust endtick accordingly to avoid panic. 664 */ 665 if (tick > (lasttick + xc_tick_jump_limit)) 666 endtick += (tick - lasttick); 667 lasttick = tick; 668 if (tick > endtick) { 669 if (panic_quiesce) 670 return; 671 cmn_err(CE_PANIC, "send mondo timeout " 672 "(target 0x%x) [%d NACK %d BUSY]", 673 cpuid, nack, busy); 674 } 675 676 if (idsr & busymask) { 677 busy++; 678 continue; 679 } 680 drv_usecwait(1); 681 shipit(cpuid, 0); 682 nack++; 683 busy = 0; 684 } 685 #ifdef SEND_MONDO_STATS 686 { 687 int n = gettick() - starttick; 688 if (n < 8192) 689 x_one_stimes[n >> 7]++; 690 else 691 x_one_ltimes[(n >> 13) & 0xf]++; 692 } 693 #endif 694 } 695 696 /* 697 * init_mmu_page_sizes is set to one after the bootup time initialization 698 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 699 * valid value. 700 * 701 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 702 * versions of disable_ism_large_pages and disable_large_pages, and feed back 703 * into those two hat variables at hat initialization time. 704 * 705 */ 706 int init_mmu_page_sizes = 0; 707 static int mmu_disable_ism_large_pages = ((1 << TTE64K) | 708 (1 << TTE512K) | (1 << TTE256M)); 709 static int mmu_disable_large_pages = 0; 710 711 /* 712 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 713 * Called during very early bootup from check_cpus_set(). 714 * Can be called to verify that mmu_page_sizes are set up correctly. 715 * 716 * Set Olympus defaults. We do not use the function parameter. 717 */ 718 /*ARGSUSED*/ 719 int 720 mmu_init_mmu_page_sizes(int32_t not_used) 721 { 722 if (!init_mmu_page_sizes) { 723 mmu_page_sizes = MMU_PAGE_SIZES; 724 mmu_hashcnt = MAX_HASHCNT; 725 mmu_ism_pagesize = MMU_PAGESIZE32M; 726 mmu_exported_pagesize_mask = (1 << TTE8K) | 727 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 728 (1 << TTE32M) | (1 << TTE256M); 729 init_mmu_page_sizes = 1; 730 return (0); 731 } 732 return (1); 733 } 734 735 /* SPARC64-VI worst case DTLB parameters */ 736 #ifndef LOCKED_DTLB_ENTRIES 737 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 738 #endif 739 #define TOTAL_DTLB_ENTRIES 32 740 #define AVAIL_32M_ENTRIES 0 741 #define AVAIL_256M_ENTRIES 0 742 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 743 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 744 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 745 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 746 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 747 748 size_t 749 mmu_map_pgsz(size_t pgsize) 750 { 751 struct proc *p = curproc; 752 struct as *as = p->p_as; 753 struct hat *hat = as->a_hat; 754 uint_t pgsz0, pgsz1; 755 size_t size0, size1; 756 757 ASSERT(mmu_page_sizes == max_mmu_page_sizes); 758 pgsz0 = hat->sfmmu_pgsz[0]; 759 pgsz1 = hat->sfmmu_pgsz[1]; 760 size0 = hw_page_array[pgsz0].hp_size; 761 size1 = hw_page_array[pgsz1].hp_size; 762 /* Allow use of a larger pagesize if neither TLB is reprogrammed. */ 763 if ((pgsz0 == TTE8K) && (pgsz1 == TTE8K)) { 764 return (pgsize); 765 /* Allow use of requested pagesize if TLB is reprogrammed to it. */ 766 } else if ((pgsize == size0) || (pgsize == size1)) { 767 return (pgsize); 768 /* Use larger reprogrammed TLB size if pgsize is atleast that big. */ 769 } else if (pgsz1 > pgsz0) { 770 if (pgsize >= size1) 771 return (size1); 772 /* Use smaller reprogrammed TLB size if pgsize is atleast that big. */ 773 } else { 774 if (pgsize >= size0) 775 return (size0); 776 } 777 return (pgsize); 778 } 779 780 /* 781 * The function returns the mmu-specific values for the 782 * hat's disable_large_pages and disable_ism_large_pages variables. 783 */ 784 int 785 mmu_large_pages_disabled(uint_t flag) 786 { 787 int pages_disable = 0; 788 789 if (flag == HAT_LOAD) { 790 pages_disable = mmu_disable_large_pages; 791 } else if (flag == HAT_LOAD_SHARE) { 792 pages_disable = mmu_disable_ism_large_pages; 793 } 794 return (pages_disable); 795 } 796 797 /* 798 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 799 * It may be called from set_platform_defaults, if some value other than 32M 800 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 801 * then only warn, since it would be bad form to panic due to a user typo. 802 * 803 * The function re-initializes the mmu_disable_ism_large_pages variable. 804 */ 805 void 806 mmu_init_large_pages(size_t ism_pagesize) 807 { 808 switch (ism_pagesize) { 809 case MMU_PAGESIZE4M: 810 mmu_disable_ism_large_pages = ((1 << TTE64K) | 811 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 812 break; 813 case MMU_PAGESIZE32M: 814 mmu_disable_ism_large_pages = ((1 << TTE64K) | 815 (1 << TTE512K) | (1 << TTE256M)); 816 break; 817 case MMU_PAGESIZE256M: 818 mmu_disable_ism_large_pages = ((1 << TTE64K) | 819 (1 << TTE512K) | (1 << TTE32M)); 820 break; 821 default: 822 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 823 ism_pagesize); 824 break; 825 } 826 } 827 828 /*ARGSUSED*/ 829 uint_t 830 mmu_preferred_pgsz(struct hat *hat, caddr_t addr, size_t len) 831 { 832 sfmmu_t *sfmmup = (sfmmu_t *)hat; 833 uint_t pgsz0, pgsz1; 834 uint_t szc, maxszc = mmu_page_sizes - 1; 835 size_t pgsz; 836 extern int disable_large_pages; 837 838 pgsz0 = (uint_t)sfmmup->sfmmu_pgsz[0]; 839 pgsz1 = (uint_t)sfmmup->sfmmu_pgsz[1]; 840 841 /* 842 * If either of the TLBs are reprogrammed, choose 843 * the largest mapping size as the preferred size, 844 * if it fits the size and alignment constraints. 845 * Else return the largest mapping size that fits, 846 * if neither TLB is reprogrammed. 847 */ 848 if (pgsz0 > TTE8K || pgsz1 > TTE8K) { 849 if (pgsz1 > pgsz0) { /* First try pgsz1 */ 850 pgsz = hw_page_array[pgsz1].hp_size; 851 if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) 852 return (pgsz1); 853 } 854 if (pgsz0 > TTE8K) { /* Then try pgsz0, if !TTE8K */ 855 pgsz = hw_page_array[pgsz0].hp_size; 856 if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) 857 return (pgsz0); 858 } 859 } else { /* Otherwise pick best fit if neither TLB is reprogrammed. */ 860 for (szc = maxszc; szc > TTE8K; szc--) { 861 if (disable_large_pages & (1 << szc)) 862 continue; 863 864 pgsz = hw_page_array[szc].hp_size; 865 if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) 866 return (szc); 867 } 868 } 869 return (TTE8K); 870 } 871 872 /* 873 * Function to reprogram the TLBs when page sizes used 874 * by a process change significantly. 875 */ 876 void 877 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt) 878 { 879 extern int page_szc(size_t); 880 uint8_t pgsz0, pgsz1; 881 882 /* 883 * Don't program 2nd dtlb for kernel and ism hat 884 */ 885 if (hat->sfmmu_ismhat || hat == ksfmmup) 886 return; 887 888 /* 889 * hat->sfmmu_pgsz[] is an array whose elements 890 * contain a sorted order of page sizes. Element 891 * 0 is the most commonly used page size, followed 892 * by element 1, and so on. 893 * 894 * ttecnt[] is an array of per-page-size page counts 895 * mapped into the process. 896 * 897 * If the HAT's choice for page sizes is unsuitable, 898 * we can override it here. The new values written 899 * to the array will be handed back to us later to 900 * do the actual programming of the TLB hardware. 901 * 902 */ 903 pgsz0 = (uint8_t)MIN(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]); 904 pgsz1 = (uint8_t)MAX(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]); 905 906 /* 907 * This implements PAGESIZE programming of the sTLB 908 * if large TTE counts don't exceed the thresholds. 909 */ 910 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 911 pgsz0 = page_szc(MMU_PAGESIZE); 912 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 913 pgsz1 = page_szc(MMU_PAGESIZE); 914 hat->sfmmu_pgsz[0] = pgsz0; 915 hat->sfmmu_pgsz[1] = pgsz1; 916 /* otherwise, accept what the HAT chose for us */ 917 } 918 919 /* 920 * The HAT calls this function when an MMU context is allocated so that we 921 * can reprogram the large TLBs appropriately for the new process using 922 * the context. 923 * 924 * The caller must hold the HAT lock. 925 */ 926 void 927 mmu_set_ctx_page_sizes(struct hat *hat) 928 { 929 uint8_t pgsz0, pgsz1; 930 uint8_t new_cext; 931 932 ASSERT(sfmmu_hat_lock_held(hat)); 933 /* 934 * Don't program 2nd dtlb for kernel and ism hat 935 */ 936 if (hat->sfmmu_ismhat || hat == ksfmmup) 937 return; 938 939 /* 940 * If supported, reprogram the TLBs to a larger pagesize. 941 */ 942 pgsz0 = hat->sfmmu_pgsz[0]; 943 pgsz1 = hat->sfmmu_pgsz[1]; 944 ASSERT(pgsz0 < mmu_page_sizes); 945 ASSERT(pgsz1 < mmu_page_sizes); 946 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 947 if (hat->sfmmu_cext != new_cext) { 948 hat->sfmmu_cext = new_cext; 949 } 950 ctx_pgsz_array[hat->sfmmu_cnum] = hat->sfmmu_cext; 951 /* 952 * sfmmu_setctx_sec() will take care of the 953 * rest of the dirty work for us. 954 */ 955 } 956 957 /* 958 * Return processor specific async error structure 959 * size used. 960 */ 961 int 962 cpu_aflt_size(void) 963 { 964 return (sizeof (opl_async_flt_t)); 965 } 966 967 /* 968 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 969 * post-process CPU events that are dequeued. As such, it can be invoked 970 * from softint context, from AST processing in the trap() flow, or from the 971 * panic flow. We decode the CPU-specific data, and take appropriate actions. 972 * Historically this entry point was used to log the actual cmn_err(9F) text; 973 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 974 * With FMA this function now also returns a flag which indicates to the 975 * caller whether the ereport should be posted (1) or suppressed (0). 976 */ 977 /*ARGSUSED*/ 978 static int 979 cpu_sync_log_err(void *flt) 980 { 981 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 982 struct async_flt *aflt = (struct async_flt *)flt; 983 984 /* 985 * No extra processing of urgent error events. 986 * Always generate ereports for these events. 987 */ 988 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 989 return (1); 990 991 /* 992 * Additional processing for synchronous errors. 993 */ 994 switch (opl_flt->flt_type) { 995 case OPL_CPU_INV_SFSR: 996 return (1); 997 998 case OPL_CPU_SYNC_UE: 999 /* 1000 * The validity: SFSR_MK_UE bit has been checked 1001 * in opl_cpu_sync_error() 1002 * No more check is required. 1003 * 1004 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1005 * and they have been retrieved in cpu_queue_events() 1006 */ 1007 1008 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1009 ASSERT(aflt->flt_in_memory); 1010 /* 1011 * We want to skip logging only if ALL the following 1012 * conditions are true: 1013 * 1014 * 1. We are not panicing already. 1015 * 2. The error is a memory error. 1016 * 3. There is only one error. 1017 * 4. The error is on a retired page. 1018 * 5. The error occurred under on_trap 1019 * protection AFLT_PROT_EC 1020 */ 1021 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1022 page_retire_check(aflt->flt_addr, NULL) == 0) { 1023 /* 1024 * Do not log an error from 1025 * the retired page 1026 */ 1027 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1028 return (0); 1029 } 1030 if (!panicstr) 1031 cpu_page_retire(opl_flt); 1032 } 1033 return (1); 1034 1035 case OPL_CPU_SYNC_OTHERS: 1036 /* 1037 * For the following error cases, the processor HW does 1038 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1039 * to assign appropriate values here to reflect what we 1040 * think is the most likely cause of the problem w.r.t to 1041 * the particular error event. For Buserr and timeout 1042 * error event, we will assign OPL_ERRID_CHANNEL as the 1043 * most likely reason. For TLB parity or multiple hit 1044 * error events, we will assign the reason as 1045 * OPL_ERRID_CPU (cpu related problem) and set the 1046 * flt_eid_sid to point to the cpuid. 1047 */ 1048 1049 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1050 /* 1051 * flt_eid_sid will not be used for this case. 1052 */ 1053 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1054 } 1055 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1056 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1057 opl_flt->flt_eid_sid = aflt->flt_inst; 1058 } 1059 1060 /* 1061 * In case of no effective error bit 1062 */ 1063 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1064 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1065 opl_flt->flt_eid_sid = aflt->flt_inst; 1066 } 1067 break; 1068 1069 default: 1070 return (1); 1071 } 1072 return (1); 1073 } 1074 1075 /* 1076 * Retire the bad page that may contain the flushed error. 1077 */ 1078 void 1079 cpu_page_retire(opl_async_flt_t *opl_flt) 1080 { 1081 struct async_flt *aflt = (struct async_flt *)opl_flt; 1082 (void) page_retire(aflt->flt_addr, PR_UE); 1083 } 1084 1085 /* 1086 * Invoked by error_init() early in startup and therefore before 1087 * startup_errorq() is called to drain any error Q - 1088 * 1089 * startup() 1090 * startup_end() 1091 * error_init() 1092 * cpu_error_init() 1093 * errorq_init() 1094 * errorq_drain() 1095 * start_other_cpus() 1096 * 1097 * The purpose of this routine is to create error-related taskqs. Taskqs 1098 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1099 * context. 1100 * 1101 */ 1102 /*ARGSUSED*/ 1103 void 1104 cpu_error_init(int items) 1105 { 1106 opl_err_log = (opl_errlog_t *) 1107 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1108 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1109 cmn_err(CE_PANIC, "The base address of the error log " 1110 "is not page aligned"); 1111 } 1112 1113 /* 1114 * We route all errors through a single switch statement. 1115 */ 1116 void 1117 cpu_ue_log_err(struct async_flt *aflt) 1118 { 1119 switch (aflt->flt_class) { 1120 case CPU_FAULT: 1121 if (cpu_sync_log_err(aflt)) 1122 cpu_ereport_post(aflt); 1123 break; 1124 1125 case BUS_FAULT: 1126 bus_async_log_err(aflt); 1127 break; 1128 1129 default: 1130 cmn_err(CE_WARN, "discarding async error %p with invalid " 1131 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1132 return; 1133 } 1134 } 1135 1136 /* 1137 * Routine for panic hook callback from panic_idle(). 1138 * 1139 * Nothing to do here. 1140 */ 1141 void 1142 cpu_async_panic_callb(void) 1143 { 1144 } 1145 1146 /* 1147 * Routine to return a string identifying the physical name 1148 * associated with a memory/cache error. 1149 */ 1150 /*ARGSUSED*/ 1151 int 1152 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1153 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1154 ushort_t flt_status, char *buf, int buflen, int *lenp) 1155 { 1156 int synd_code; 1157 int ret; 1158 1159 /* 1160 * An AFSR of -1 defaults to a memory syndrome. 1161 */ 1162 synd_code = (int)flt_synd; 1163 1164 if (&plat_get_mem_unum) { 1165 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1166 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1167 buf[0] = '\0'; 1168 *lenp = 0; 1169 } 1170 return (ret); 1171 } 1172 buf[0] = '\0'; 1173 *lenp = 0; 1174 return (ENOTSUP); 1175 } 1176 1177 /* 1178 * Wrapper for cpu_get_mem_unum() routine that takes an 1179 * async_flt struct rather than explicit arguments. 1180 */ 1181 int 1182 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1183 char *buf, int buflen, int *lenp) 1184 { 1185 /* 1186 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1187 * memory error. 1188 */ 1189 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1190 (uint64_t)-1, 1191 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1192 aflt->flt_status, buf, buflen, lenp)); 1193 } 1194 1195 /* 1196 * This routine is a more generic interface to cpu_get_mem_unum() 1197 * that may be used by other modules (e.g. mm). 1198 */ 1199 /*ARGSUSED*/ 1200 int 1201 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1202 char *buf, int buflen, int *lenp) 1203 { 1204 int synd_status, flt_in_memory, ret; 1205 ushort_t flt_status = 0; 1206 char unum[UNUM_NAMLEN]; 1207 1208 /* 1209 * Check for an invalid address. 1210 */ 1211 if (afar == (uint64_t)-1) 1212 return (ENXIO); 1213 1214 if (synd == (uint64_t)-1) 1215 synd_status = AFLT_STAT_INVALID; 1216 else 1217 synd_status = AFLT_STAT_VALID; 1218 1219 flt_in_memory = (*afsr & SFSR_MEMORY) && 1220 pf_is_memory(afar >> MMU_PAGESHIFT); 1221 1222 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1223 CPU->cpu_id, flt_in_memory, flt_status, unum, 1224 UNUM_NAMLEN, lenp); 1225 if (ret != 0) 1226 return (ret); 1227 1228 if (*lenp >= buflen) 1229 return (ENAMETOOLONG); 1230 1231 (void) strncpy(buf, unum, buflen); 1232 1233 return (0); 1234 } 1235 1236 /* 1237 * Routine to return memory information associated 1238 * with a physical address and syndrome. 1239 */ 1240 /*ARGSUSED*/ 1241 int 1242 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1243 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1244 int *segsp, int *banksp, int *mcidp) 1245 { 1246 int synd_code = (int)synd; 1247 1248 if (afar == (uint64_t)-1) 1249 return (ENXIO); 1250 1251 if (p2get_mem_info != NULL) 1252 return ((p2get_mem_info)(synd_code, afar, 1253 mem_sizep, seg_sizep, bank_sizep, 1254 segsp, banksp, mcidp)); 1255 else 1256 return (ENOTSUP); 1257 } 1258 1259 /* 1260 * Routine to return a string identifying the physical 1261 * name associated with a cpuid. 1262 */ 1263 int 1264 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1265 { 1266 int ret; 1267 char unum[UNUM_NAMLEN]; 1268 1269 if (&plat_get_cpu_unum) { 1270 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 1271 != 0) 1272 return (ret); 1273 } else { 1274 return (ENOTSUP); 1275 } 1276 1277 if (*lenp >= buflen) 1278 return (ENAMETOOLONG); 1279 1280 (void) strncpy(buf, unum, *lenp); 1281 1282 return (0); 1283 } 1284 1285 /* 1286 * This routine exports the name buffer size. 1287 */ 1288 size_t 1289 cpu_get_name_bufsize() 1290 { 1291 return (UNUM_NAMLEN); 1292 } 1293 1294 /* 1295 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1296 */ 1297 void 1298 cpu_flush_ecache(void) 1299 { 1300 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1301 cpunodes[CPU->cpu_id].ecache_linesize); 1302 } 1303 1304 static uint8_t 1305 flt_to_trap_type(struct async_flt *aflt) 1306 { 1307 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1308 return (TRAP_TYPE_ECC_I); 1309 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1310 return (TRAP_TYPE_ECC_D); 1311 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1312 return (TRAP_TYPE_URGENT); 1313 return (-1); 1314 } 1315 1316 /* 1317 * Encode the data saved in the opl_async_flt_t struct into 1318 * the FM ereport payload. 1319 */ 1320 /* ARGSUSED */ 1321 static void 1322 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1323 nvlist_t *resource) 1324 { 1325 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1326 char unum[UNUM_NAMLEN]; 1327 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1328 int len; 1329 1330 1331 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1332 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1333 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1334 } 1335 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1336 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1337 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1338 } 1339 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1340 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1341 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1342 } 1343 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1344 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1345 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1346 } 1347 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1348 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1349 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1350 } 1351 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1352 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1353 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1354 } 1355 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1356 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1357 DATA_TYPE_BOOLEAN_VALUE, 1358 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1359 } 1360 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1361 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1362 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1363 } 1364 1365 switch (opl_flt->flt_eid_mod) { 1366 case OPL_ERRID_CPU: 1367 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1368 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1369 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1370 NULL, opl_flt->flt_eid_sid, 1371 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, 1372 sbuf); 1373 fm_payload_set(payload, 1374 FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1375 DATA_TYPE_NVLIST, resource, NULL); 1376 break; 1377 1378 case OPL_ERRID_CHANNEL: 1379 /* 1380 * No resource is created but the cpumem DE will find 1381 * the defective path by retreiving EID from SFSR which is 1382 * included in the payload. 1383 */ 1384 break; 1385 1386 case OPL_ERRID_MEM: 1387 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1388 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1389 NULL, unum, NULL, (uint64_t)-1); 1390 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1391 DATA_TYPE_NVLIST, resource, NULL); 1392 break; 1393 1394 case OPL_ERRID_PATH: 1395 /* 1396 * No resource is created but the cpumem DE will find 1397 * the defective path by retreiving EID from SFSR which is 1398 * included in the payload. 1399 */ 1400 break; 1401 } 1402 } 1403 1404 /* 1405 * Returns whether fault address is valid for this error bit and 1406 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1407 */ 1408 /*ARGSUSED*/ 1409 static int 1410 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1411 { 1412 struct async_flt *aflt = (struct async_flt *)opl_flt; 1413 1414 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1415 return ((t_afsr_bit & SFSR_MEMORY) && 1416 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1417 } 1418 return (0); 1419 } 1420 1421 /* 1422 * In OPL SCF does the stick synchronization. 1423 */ 1424 void 1425 sticksync_slave(void) 1426 { 1427 } 1428 1429 /* 1430 * In OPL SCF does the stick synchronization. 1431 */ 1432 void 1433 sticksync_master(void) 1434 { 1435 } 1436 1437 /* 1438 * Cpu private unitialization. OPL cpus do not use the private area. 1439 */ 1440 void 1441 cpu_uninit_private(struct cpu *cp) 1442 { 1443 cmp_delete_cpu(cp->cpu_id); 1444 } 1445 1446 /* 1447 * Always flush an entire cache. 1448 */ 1449 void 1450 cpu_error_ecache_flush(void) 1451 { 1452 cpu_flush_ecache(); 1453 } 1454 1455 void 1456 cpu_ereport_post(struct async_flt *aflt) 1457 { 1458 char *cpu_type, buf[FM_MAX_CLASS]; 1459 nv_alloc_t *nva = NULL; 1460 nvlist_t *ereport, *detector, *resource; 1461 errorq_elem_t *eqep; 1462 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1463 1464 if (aflt->flt_panic || panicstr) { 1465 eqep = errorq_reserve(ereport_errorq); 1466 if (eqep == NULL) 1467 return; 1468 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1469 nva = errorq_elem_nva(ereport_errorq, eqep); 1470 } else { 1471 ereport = fm_nvlist_create(nva); 1472 } 1473 1474 /* 1475 * Create the scheme "cpu" FMRI. 1476 */ 1477 detector = fm_nvlist_create(nva); 1478 resource = fm_nvlist_create(nva); 1479 switch (cpunodes[aflt->flt_inst].implementation) { 1480 case OLYMPUS_C_IMPL: 1481 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1482 break; 1483 default: 1484 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1485 break; 1486 } 1487 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1488 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1489 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1490 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1491 sbuf); 1492 1493 /* 1494 * Encode all the common data into the ereport. 1495 */ 1496 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1497 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1498 1499 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1500 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1501 1502 /* 1503 * Encode the error specific data that was saved in 1504 * the async_flt structure into the ereport. 1505 */ 1506 cpu_payload_add_aflt(aflt, ereport, resource); 1507 1508 if (aflt->flt_panic || panicstr) { 1509 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1510 } else { 1511 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1512 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1513 fm_nvlist_destroy(detector, FM_NVA_FREE); 1514 fm_nvlist_destroy(resource, FM_NVA_FREE); 1515 } 1516 } 1517 1518 void 1519 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1520 { 1521 int status; 1522 ddi_fm_error_t de; 1523 1524 bzero(&de, sizeof (ddi_fm_error_t)); 1525 1526 de.fme_version = DDI_FME_VERSION; 1527 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1528 de.fme_flag = expected; 1529 de.fme_bus_specific = (void *)aflt->flt_addr; 1530 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1531 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1532 aflt->flt_panic = 1; 1533 } 1534 1535 void 1536 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1537 errorq_t *eqp, uint_t flag) 1538 { 1539 struct async_flt *aflt = (struct async_flt *)payload; 1540 1541 aflt->flt_erpt_class = error_class; 1542 errorq_dispatch(eqp, payload, payload_sz, flag); 1543 } 1544 1545 void 1546 adjust_hw_copy_limits(int ecache_size) 1547 { 1548 /* 1549 * Set hw copy limits. 1550 * 1551 * /etc/system will be parsed later and can override one or more 1552 * of these settings. 1553 * 1554 * At this time, ecache size seems only mildly relevant. 1555 * We seem to run into issues with the d-cache and stalls 1556 * we see on misses. 1557 * 1558 * Cycle measurement indicates that 2 byte aligned copies fare 1559 * little better than doing things with VIS at around 512 bytes. 1560 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1561 * aligned is faster whenever the source and destination data 1562 * in cache and the total size is less than 2 Kbytes. The 2K 1563 * limit seems to be driven by the 2K write cache. 1564 * When more than 2K of copies are done in non-VIS mode, stores 1565 * backup in the write cache. In VIS mode, the write cache is 1566 * bypassed, allowing faster cache-line writes aligned on cache 1567 * boundaries. 1568 * 1569 * In addition, in non-VIS mode, there is no prefetching, so 1570 * for larger copies, the advantage of prefetching to avoid even 1571 * occasional cache misses is enough to justify using the VIS code. 1572 * 1573 * During testing, it was discovered that netbench ran 3% slower 1574 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1575 * applications, data is only used once (copied to the output 1576 * buffer, then copied by the network device off the system). Using 1577 * the VIS copy saves more L2 cache state. Network copies are 1578 * around 1.3K to 1.5K in size for historical reasons. 1579 * 1580 * Therefore, a limit of 1K bytes will be used for the 8 byte 1581 * aligned copy even for large caches and 8 MB ecache. The 1582 * infrastructure to allow different limits for different sized 1583 * caches is kept to allow further tuning in later releases. 1584 */ 1585 1586 if (min_ecache_size == 0 && use_hw_bcopy) { 1587 /* 1588 * First time through - should be before /etc/system 1589 * is read. 1590 * Could skip the checks for zero but this lets us 1591 * preserve any debugger rewrites. 1592 */ 1593 if (hw_copy_limit_1 == 0) { 1594 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1595 priv_hcl_1 = hw_copy_limit_1; 1596 } 1597 if (hw_copy_limit_2 == 0) { 1598 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1599 priv_hcl_2 = hw_copy_limit_2; 1600 } 1601 if (hw_copy_limit_4 == 0) { 1602 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1603 priv_hcl_4 = hw_copy_limit_4; 1604 } 1605 if (hw_copy_limit_8 == 0) { 1606 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1607 priv_hcl_8 = hw_copy_limit_8; 1608 } 1609 min_ecache_size = ecache_size; 1610 } else { 1611 /* 1612 * MP initialization. Called *after* /etc/system has 1613 * been parsed. One CPU has already been initialized. 1614 * Need to cater for /etc/system having scragged one 1615 * of our values. 1616 */ 1617 if (ecache_size == min_ecache_size) { 1618 /* 1619 * Same size ecache. We do nothing unless we 1620 * have a pessimistic ecache setting. In that 1621 * case we become more optimistic (if the cache is 1622 * large enough). 1623 */ 1624 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1625 /* 1626 * Need to adjust hw_copy_limit* from our 1627 * pessimistic uniprocessor value to a more 1628 * optimistic UP value *iff* it hasn't been 1629 * reset. 1630 */ 1631 if ((ecache_size > 1048576) && 1632 (priv_hcl_8 == hw_copy_limit_8)) { 1633 if (ecache_size <= 2097152) 1634 hw_copy_limit_8 = 4 * 1635 VIS_COPY_THRESHOLD; 1636 else if (ecache_size <= 4194304) 1637 hw_copy_limit_8 = 4 * 1638 VIS_COPY_THRESHOLD; 1639 else 1640 hw_copy_limit_8 = 4 * 1641 VIS_COPY_THRESHOLD; 1642 priv_hcl_8 = hw_copy_limit_8; 1643 } 1644 } 1645 } else if (ecache_size < min_ecache_size) { 1646 /* 1647 * A different ecache size. Can this even happen? 1648 */ 1649 if (priv_hcl_8 == hw_copy_limit_8) { 1650 /* 1651 * The previous value that we set 1652 * is unchanged (i.e., it hasn't been 1653 * scragged by /etc/system). Rewrite it. 1654 */ 1655 if (ecache_size <= 1048576) 1656 hw_copy_limit_8 = 8 * 1657 VIS_COPY_THRESHOLD; 1658 else if (ecache_size <= 2097152) 1659 hw_copy_limit_8 = 8 * 1660 VIS_COPY_THRESHOLD; 1661 else if (ecache_size <= 4194304) 1662 hw_copy_limit_8 = 8 * 1663 VIS_COPY_THRESHOLD; 1664 else 1665 hw_copy_limit_8 = 10 * 1666 VIS_COPY_THRESHOLD; 1667 priv_hcl_8 = hw_copy_limit_8; 1668 min_ecache_size = ecache_size; 1669 } 1670 } 1671 } 1672 } 1673 1674 #define VIS_BLOCKSIZE 64 1675 1676 int 1677 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1678 { 1679 int ret, watched; 1680 1681 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1682 ret = dtrace_blksuword32(addr, data, 0); 1683 if (watched) 1684 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1685 1686 return (ret); 1687 } 1688 1689 void 1690 opl_cpu_reg_init() 1691 { 1692 uint64_t this_cpu_log; 1693 1694 /* 1695 * We do not need to re-initialize cpu0 registers. 1696 */ 1697 if (cpu[getprocessorid()] == &cpu0) 1698 return; 1699 1700 /* 1701 * Initialize Error log Scratch register for error handling. 1702 */ 1703 1704 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1705 ERRLOG_BUFSZ * (getprocessorid()))); 1706 opl_error_setup(this_cpu_log); 1707 1708 /* 1709 * Enable MMU translating multiple page sizes for 1710 * sITLB and sDTLB. 1711 */ 1712 opl_mpg_enable(); 1713 } 1714 1715 /* 1716 * Queue one event in ue_queue based on ecc_type_to_info entry. 1717 */ 1718 static void 1719 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1720 ecc_type_to_info_t *eccp) 1721 { 1722 struct async_flt *aflt = (struct async_flt *)opl_flt; 1723 1724 if (reason && 1725 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1726 (void) strcat(reason, eccp->ec_reason); 1727 } 1728 1729 opl_flt->flt_bit = eccp->ec_afsr_bit; 1730 opl_flt->flt_type = eccp->ec_flt_type; 1731 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1732 aflt->flt_payload = eccp->ec_err_payload; 1733 1734 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1735 cpu_errorq_dispatch(eccp->ec_err_class, 1736 (void *)opl_flt, sizeof (opl_async_flt_t), 1737 ue_queue, 1738 aflt->flt_panic); 1739 } 1740 1741 /* 1742 * Queue events on async event queue one event per error bit. 1743 * Return number of events queued. 1744 */ 1745 int 1746 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1747 { 1748 struct async_flt *aflt = (struct async_flt *)opl_flt; 1749 ecc_type_to_info_t *eccp; 1750 int nevents = 0; 1751 1752 /* 1753 * Queue expected errors, error bit and fault type must must match 1754 * in the ecc_type_to_info table. 1755 */ 1756 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1757 eccp++) { 1758 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1759 (eccp->ec_flags & aflt->flt_status) != 0) { 1760 /* 1761 * UE error event can be further 1762 * classified/breakdown into finer granularity 1763 * based on the flt_eid_mod value set by HW. We do 1764 * special handling here so that we can report UE 1765 * error in finer granularity as ue_mem, 1766 * ue_channel, ue_cpu or ue_path. 1767 */ 1768 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1769 opl_flt->flt_eid_mod = 1770 (aflt->flt_stat & SFSR_EID_MOD) 1771 >> SFSR_EID_MOD_SHIFT; 1772 opl_flt->flt_eid_sid = 1773 (aflt->flt_stat & SFSR_EID_SID) 1774 >> SFSR_EID_SID_SHIFT; 1775 /* 1776 * Need to advance eccp pointer by flt_eid_mod 1777 * so that we get an appropriate ecc pointer 1778 * 1779 * EID # of advances 1780 * ---------------------------------- 1781 * OPL_ERRID_MEM 0 1782 * OPL_ERRID_CHANNEL 1 1783 * OPL_ERRID_CPU 2 1784 * OPL_ERRID_PATH 3 1785 */ 1786 eccp += opl_flt->flt_eid_mod; 1787 } 1788 cpu_queue_one_event(opl_flt, reason, eccp); 1789 t_afsr_errs &= ~eccp->ec_afsr_bit; 1790 nevents++; 1791 } 1792 } 1793 1794 return (nevents); 1795 } 1796 1797 /* 1798 * Sync. error wrapper functions. 1799 * We use these functions in order to transfer here from the 1800 * nucleus trap handler information about trap type (data or 1801 * instruction) and trap level (0 or above 0). This way we 1802 * get rid of using SFSR's reserved bits. 1803 */ 1804 1805 #define OPL_SYNC_TL0 0 1806 #define OPL_SYNC_TL1 1 1807 #define OPL_ISYNC_ERR 0 1808 #define OPL_DSYNC_ERR 1 1809 1810 void 1811 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1812 { 1813 uint64_t t_sfar = p_sfar; 1814 uint64_t t_sfsr = p_sfsr; 1815 1816 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1817 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1818 } 1819 1820 void 1821 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1822 { 1823 uint64_t t_sfar = p_sfar; 1824 uint64_t t_sfsr = p_sfsr; 1825 1826 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1827 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1828 } 1829 1830 void 1831 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1832 { 1833 uint64_t t_sfar = p_sfar; 1834 uint64_t t_sfsr = p_sfsr; 1835 1836 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1837 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1838 } 1839 1840 void 1841 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1842 { 1843 uint64_t t_sfar = p_sfar; 1844 uint64_t t_sfsr = p_sfsr; 1845 1846 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1847 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1848 } 1849 1850 /* 1851 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1852 * and TLB_PRT. 1853 * This function is designed based on cpu_deferred_error(). 1854 */ 1855 1856 static void 1857 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1858 uint_t tl, uint_t derr) 1859 { 1860 opl_async_flt_t opl_flt; 1861 struct async_flt *aflt; 1862 int trampolined = 0; 1863 char pr_reason[MAX_REASON_STRING]; 1864 uint64_t log_sfsr; 1865 int expected = DDI_FM_ERR_UNEXPECTED; 1866 ddi_acc_hdl_t *hp; 1867 1868 /* 1869 * We need to look at p_flag to determine if the thread detected an 1870 * error while dumping core. We can't grab p_lock here, but it's ok 1871 * because we just need a consistent snapshot and we know that everyone 1872 * else will store a consistent set of bits while holding p_lock. We 1873 * don't have to worry about a race because SDOCORE is set once prior 1874 * to doing i/o from the process's address space and is never cleared. 1875 */ 1876 uint_t pflag = ttoproc(curthread)->p_flag; 1877 1878 pr_reason[0] = '\0'; 1879 1880 /* 1881 * handle the specific error 1882 */ 1883 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1884 aflt = (struct async_flt *)&opl_flt; 1885 aflt->flt_id = gethrtime_waitfree(); 1886 aflt->flt_bus_id = getprocessorid(); 1887 aflt->flt_inst = CPU->cpu_id; 1888 aflt->flt_stat = t_sfsr; 1889 aflt->flt_addr = t_sfar; 1890 aflt->flt_pc = (caddr_t)rp->r_pc; 1891 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1892 aflt->flt_class = (uchar_t)CPU_FAULT; 1893 aflt->flt_priv = (uchar_t) 1894 (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); 1895 aflt->flt_tl = (uchar_t)tl; 1896 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1897 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1898 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1899 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1900 1901 /* 1902 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1903 * So, clear all error bits to avoid mis-handling and force the system 1904 * panicked. 1905 * We skip all the procedures below down to the panic message call. 1906 */ 1907 if (!(t_sfsr & SFSR_FV)) { 1908 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1909 aflt->flt_panic = 1; 1910 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1911 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, 1912 (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, 1913 aflt->flt_panic); 1914 fm_panic("%sErrors(s)", "invalid SFSR"); 1915 } 1916 1917 /* 1918 * If either UE and MK bit is off, this is not valid UE error. 1919 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1920 * mis-handling below. 1921 * aflt->flt_stat keeps the original bits as a reference. 1922 */ 1923 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1924 (SFSR_MK_UE|SFSR_UE)) { 1925 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1926 } 1927 1928 /* 1929 * If the trap occurred in privileged mode at TL=0, we need to check to 1930 * see if we were executing in the kernel under on_trap() or t_lofault 1931 * protection. If so, modify the saved registers so that we return 1932 * from the trap to the appropriate trampoline routine. 1933 */ 1934 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1935 if (curthread->t_ontrap != NULL) { 1936 on_trap_data_t *otp = curthread->t_ontrap; 1937 1938 if (otp->ot_prot & OT_DATA_EC) { 1939 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1940 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1941 rp->r_pc = otp->ot_trampoline; 1942 rp->r_npc = rp->r_pc + 4; 1943 trampolined = 1; 1944 } 1945 1946 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1947 (otp->ot_prot & OT_DATA_ACCESS)) { 1948 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 1949 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 1950 rp->r_pc = otp->ot_trampoline; 1951 rp->r_npc = rp->r_pc + 4; 1952 trampolined = 1; 1953 /* 1954 * for peeks and caut_gets errors are expected 1955 */ 1956 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1957 if (!hp) 1958 expected = DDI_FM_ERR_PEEK; 1959 else if (hp->ah_acc.devacc_attr_access == 1960 DDI_CAUTIOUS_ACC) 1961 expected = DDI_FM_ERR_EXPECTED; 1962 } 1963 1964 } else if (curthread->t_lofault) { 1965 aflt->flt_prot = AFLT_PROT_COPY; 1966 rp->r_g1 = EFAULT; 1967 rp->r_pc = curthread->t_lofault; 1968 rp->r_npc = rp->r_pc + 4; 1969 trampolined = 1; 1970 } 1971 } 1972 1973 /* 1974 * If we're in user mode or we're doing a protected copy, we either 1975 * want the ASTON code below to send a signal to the user process 1976 * or we want to panic if aft_panic is set. 1977 * 1978 * If we're in privileged mode and we're not doing a copy, then we 1979 * need to check if we've trampolined. If we haven't trampolined, 1980 * we should panic. 1981 */ 1982 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1983 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 1984 aflt->flt_panic |= aft_panic; 1985 } else if (!trampolined) { 1986 aflt->flt_panic = 1; 1987 } 1988 1989 /* 1990 * If we've trampolined due to a privileged TO or BERR, or if an 1991 * unprivileged TO or BERR occurred, we don't want to enqueue an 1992 * event for that TO or BERR. Queue all other events (if any) besides 1993 * the TO/BERR. 1994 */ 1995 log_sfsr = t_sfsr; 1996 if (trampolined) { 1997 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 1998 } else if (!aflt->flt_priv) { 1999 /* 2000 * User mode, suppress messages if 2001 * cpu_berr_to_verbose is not set. 2002 */ 2003 if (!cpu_berr_to_verbose) 2004 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2005 } 2006 2007 if (((log_sfsr & SFSR_ERRS) && 2008 (cpu_queue_events(&opl_flt, pr_reason, t_sfsr) == 0)) || 2009 ((t_sfsr & SFSR_ERRS) == 0)) { 2010 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2011 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2012 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, 2013 (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, 2014 aflt->flt_panic); 2015 } 2016 2017 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2018 cpu_run_bus_error_handlers(aflt, expected); 2019 } 2020 2021 /* 2022 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2023 * be logged as part of the panic flow. 2024 */ 2025 if (aflt->flt_panic) { 2026 if (pr_reason[0] == 0) 2027 strcpy(pr_reason, "invalid SFSR "); 2028 2029 fm_panic("%sErrors(s)", pr_reason); 2030 } 2031 2032 /* 2033 * If we queued an error and we are going to return from the trap and 2034 * the error was in user mode or inside of a copy routine, set AST flag 2035 * so the queue will be drained before returning to user mode. The 2036 * AST processing will also act on our failure policy. 2037 */ 2038 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2039 int pcb_flag = 0; 2040 2041 if (t_sfsr & (SFSR_ERRS & 2042 ~(SFSR_BERR | SFSR_TO))) 2043 pcb_flag |= ASYNC_HWERR; 2044 2045 if (t_sfsr & SFSR_BERR) 2046 pcb_flag |= ASYNC_BERR; 2047 2048 if (t_sfsr & SFSR_TO) 2049 pcb_flag |= ASYNC_BTO; 2050 2051 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2052 aston(curthread); 2053 } 2054 } 2055 2056 /*ARGSUSED*/ 2057 void 2058 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2059 { 2060 opl_async_flt_t opl_flt; 2061 struct async_flt *aflt; 2062 char pr_reason[MAX_REASON_STRING]; 2063 2064 /* normalize tl */ 2065 tl = (tl >= 2 ? 1 : 0); 2066 pr_reason[0] = '\0'; 2067 2068 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2069 aflt = (struct async_flt *)&opl_flt; 2070 aflt->flt_id = gethrtime_waitfree(); 2071 aflt->flt_bus_id = getprocessorid(); 2072 aflt->flt_inst = CPU->cpu_id; 2073 aflt->flt_stat = p_ugesr; 2074 aflt->flt_pc = (caddr_t)rp->r_pc; 2075 aflt->flt_class = (uchar_t)CPU_FAULT; 2076 aflt->flt_tl = tl; 2077 aflt->flt_priv = (uchar_t) 2078 (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); 2079 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2080 aflt->flt_panic = 1; 2081 /* 2082 * HW does not set mod/sid in case of urgent error. 2083 * So we have to set it here. 2084 */ 2085 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2086 opl_flt.flt_eid_sid = aflt->flt_inst; 2087 2088 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2089 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2090 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2091 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, 2092 (void *)&opl_flt, sizeof (opl_async_flt_t), 2093 ue_queue, aflt->flt_panic); 2094 } 2095 2096 fm_panic("Urgent Error"); 2097 } 2098 2099 /* 2100 * Initialization error counters resetting. 2101 */ 2102 /* ARGSUSED */ 2103 static void 2104 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2105 { 2106 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2107 hdlr->cyh_level = CY_LOW_LEVEL; 2108 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2109 2110 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2111 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2112 } 2113 2114 void 2115 cpu_mp_init(void) 2116 { 2117 cyc_omni_handler_t hdlr; 2118 2119 hdlr.cyo_online = opl_ras_online; 2120 hdlr.cyo_offline = NULL; 2121 hdlr.cyo_arg = NULL; 2122 mutex_enter(&cpu_lock); 2123 (void) cyclic_add_omni(&hdlr); 2124 mutex_exit(&cpu_lock); 2125 } 2126 2127 /*ARGSUSED*/ 2128 void 2129 mmu_init_kernel_pgsz(struct hat *hat) 2130 { 2131 } 2132 2133 size_t 2134 mmu_get_kernel_lpsize(size_t lpsize) 2135 { 2136 uint_t tte; 2137 2138 if (lpsize == 0) { 2139 /* no setting for segkmem_lpsize in /etc/system: use default */ 2140 return (MMU_PAGESIZE4M); 2141 } 2142 2143 for (tte = TTE8K; tte <= TTE4M; tte++) { 2144 if (lpsize == TTEBYTES(tte)) 2145 return (lpsize); 2146 } 2147 2148 return (TTEBYTES(TTE8K)); 2149 } 2150 2151 /* 2152 * The following are functions that are unused in 2153 * OPL cpu module. They are defined here to resolve 2154 * dependencies in the "unix" module. 2155 * Unused functions that should never be called in 2156 * OPL are coded with ASSERT(0). 2157 */ 2158 2159 void 2160 cpu_disable_errors(void) 2161 {} 2162 2163 void 2164 cpu_enable_errors(void) 2165 { ASSERT(0); } 2166 2167 /*ARGSUSED*/ 2168 void 2169 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2170 { ASSERT(0); } 2171 2172 /*ARGSUSED*/ 2173 void 2174 cpu_faulted_enter(struct cpu *cp) 2175 {} 2176 2177 /*ARGSUSED*/ 2178 void 2179 cpu_faulted_exit(struct cpu *cp) 2180 {} 2181 2182 /*ARGSUSED*/ 2183 void 2184 cpu_check_allcpus(struct async_flt *aflt) 2185 {} 2186 2187 /*ARGSUSED*/ 2188 void 2189 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2190 { ASSERT(0); } 2191 2192 /*ARGSUSED*/ 2193 void 2194 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2195 { ASSERT(0); } 2196 2197 /*ARGSUSED*/ 2198 void 2199 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2200 { ASSERT(0); } 2201 2202 /*ARGSUSED*/ 2203 void 2204 cpu_busy_ecache_scrub(struct cpu *cp) 2205 {} 2206 2207 /*ARGSUSED*/ 2208 void 2209 cpu_idle_ecache_scrub(struct cpu *cp) 2210 {} 2211 2212 /* ARGSUSED */ 2213 void 2214 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2215 { ASSERT(0); } 2216 2217 void 2218 cpu_init_cache_scrub(void) 2219 {} 2220 2221 /* ARGSUSED */ 2222 int 2223 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2224 { 2225 return (ENOTSUP); 2226 } 2227 2228 /* ARGSUSED */ 2229 int 2230 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2231 { 2232 return (ENOTSUP); 2233 } 2234 2235 /* ARGSUSED */ 2236 int 2237 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2238 { 2239 return (ENOTSUP); 2240 } 2241 2242 /*ARGSUSED*/ 2243 void 2244 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2245 { ASSERT(0); } 2246 2247 /*ARGSUSED*/ 2248 void 2249 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2250 { ASSERT(0); } 2251