1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/ddi.h> 31 #include <sys/sysmacros.h> 32 #include <sys/archsystm.h> 33 #include <sys/vmsystm.h> 34 #include <sys/machparam.h> 35 #include <sys/machsystm.h> 36 #include <sys/machthread.h> 37 #include <sys/cpu.h> 38 #include <sys/cmp.h> 39 #include <sys/elf_SPARC.h> 40 #include <vm/vm_dep.h> 41 #include <vm/hat_sfmmu.h> 42 #include <vm/seg_kpm.h> 43 #include <sys/cpuvar.h> 44 #include <sys/opl_olympus_regs.h> 45 #include <sys/opl_module.h> 46 #include <sys/async.h> 47 #include <sys/cmn_err.h> 48 #include <sys/debug.h> 49 #include <sys/dditypes.h> 50 #include <sys/cpu_module.h> 51 #include <sys/sysmacros.h> 52 #include <sys/intreg.h> 53 #include <sys/clock.h> 54 #include <sys/platform_module.h> 55 #include <sys/ontrap.h> 56 #include <sys/panic.h> 57 #include <sys/memlist.h> 58 #include <sys/ndifm.h> 59 #include <sys/ddifm.h> 60 #include <sys/fm/protocol.h> 61 #include <sys/fm/util.h> 62 #include <sys/fm/cpu/SPARC64-VI.h> 63 #include <sys/dtrace.h> 64 #include <sys/watchpoint.h> 65 #include <sys/promif.h> 66 67 /* 68 * Internal functions. 69 */ 70 static int cpu_sync_log_err(void *flt); 71 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); 72 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); 73 static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); 74 75 /* 76 * Error counters resetting interval. 77 */ 78 static int opl_async_check_interval = 60; /* 1 min */ 79 80 /* 81 * Maximum number of contexts for Olympus-C. 82 */ 83 #define MAX_NCTXS (1 << 13) 84 85 /* Will be set !NULL for SPARC64-VI and derivatives. */ 86 static uchar_t ctx_pgsz_arr[MAX_NCTXS]; 87 uchar_t *ctx_pgsz_array = ctx_pgsz_arr; 88 89 /* 90 * PA[22:0] represent Displacement in Jupiter 91 * configuration space. 92 */ 93 uint_t root_phys_addr_lo_mask = 0x7fffffu; 94 95 /* 96 * set in /etc/system to control logging of user BERR/TO's 97 */ 98 int cpu_berr_to_verbose = 0; 99 100 static int min_ecache_size; 101 static uint_t priv_hcl_1; 102 static uint_t priv_hcl_2; 103 static uint_t priv_hcl_4; 104 static uint_t priv_hcl_8; 105 106 /* 107 * Olympus error log 108 */ 109 static opl_errlog_t *opl_err_log; 110 111 /* 112 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). 113 * No any other ecc_type_info insertion is allowed in between the following 114 * four UE classess. 115 */ 116 ecc_type_to_info_t ecc_type_to_info[] = { 117 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 118 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 119 FM_EREPORT_CPU_UE_MEM, 120 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 121 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 122 FM_EREPORT_CPU_UE_CHANNEL, 123 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 124 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 125 FM_EREPORT_CPU_UE_CPU, 126 SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, 127 "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, 128 FM_EREPORT_CPU_UE_PATH, 129 SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 130 "Bus Error", FM_EREPORT_PAYLOAD_SYNC, 131 FM_EREPORT_CPU_BERR, 132 SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 133 "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, 134 FM_EREPORT_CPU_BTO, 135 SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 136 "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, 137 FM_EREPORT_CPU_MTLB, 138 SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, 139 "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, 140 FM_EREPORT_CPU_TLBP, 141 142 UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 143 "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, 144 FM_EREPORT_CPU_CRE, 145 UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", 146 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 147 "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, 148 FM_EREPORT_CPU_TSBCTX, 149 UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 150 "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, 151 FM_EREPORT_CPU_TSBP, 152 UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 153 "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, 154 FM_EREPORT_CPU_PSTATE, 155 UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 156 "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, 157 FM_EREPORT_CPU_TSTATE, 158 UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 159 "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, 160 FM_EREPORT_CPU_IUG_F, 161 UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 162 "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, 163 FM_EREPORT_CPU_IUG_R, 164 UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 165 "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, 166 FM_EREPORT_CPU_SDC, 167 UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 168 "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, 169 FM_EREPORT_CPU_WDT, 170 UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 171 "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, 172 FM_EREPORT_CPU_DTLB, 173 UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 174 "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, 175 FM_EREPORT_CPU_ITLB, 176 UGESR_IUG_COREERR, "IUG_COREERR", 177 OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 178 "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, 179 FM_EREPORT_CPU_CORE, 180 UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 181 "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, 182 FM_EREPORT_CPU_DAE, 183 UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 184 "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, 185 FM_EREPORT_CPU_IAE, 186 UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, 187 "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, 188 FM_EREPORT_CPU_UGE, 189 0, NULL, 0, 0, 190 NULL, 0, 0, 191 }; 192 193 int (*p2get_mem_info)(int synd_code, uint64_t paddr, 194 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 195 int *segsp, int *banksp, int *mcidp); 196 197 198 /* 199 * Setup trap handlers for 0xA, 0x32, 0x40 trap types. 200 */ 201 void 202 cpu_init_trap(void) 203 { 204 OPL_SET_TRAP(tt0_iae, opl_serr_instr); 205 OPL_SET_TRAP(tt1_iae, opl_serr_instr); 206 OPL_SET_TRAP(tt0_dae, opl_serr_instr); 207 OPL_SET_TRAP(tt1_dae, opl_serr_instr); 208 OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); 209 OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); 210 } 211 212 static int 213 getintprop(pnode_t node, char *name, int deflt) 214 { 215 int value; 216 217 switch (prom_getproplen(node, name)) { 218 case sizeof (int): 219 (void) prom_getprop(node, name, (caddr_t)&value); 220 break; 221 222 default: 223 value = deflt; 224 break; 225 } 226 227 return (value); 228 } 229 230 /* 231 * Set the magic constants of the implementation. 232 */ 233 /*ARGSUSED*/ 234 void 235 cpu_fiximp(pnode_t dnode) 236 { 237 int i, a; 238 extern int vac_size, vac_shift; 239 extern uint_t vac_mask; 240 241 static struct { 242 char *name; 243 int *var; 244 int defval; 245 } prop[] = { 246 "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, 247 "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, 248 "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, 249 "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, 250 "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, 251 "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, 252 "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY 253 }; 254 255 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) 256 *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); 257 258 ecache_setsize = ecache_size / ecache_associativity; 259 260 vac_size = OPL_VAC_SIZE; 261 vac_mask = MMU_PAGEMASK & (vac_size - 1); 262 i = 0; a = vac_size; 263 while (a >>= 1) 264 ++i; 265 vac_shift = i; 266 shm_alignment = vac_size; 267 vac = 1; 268 } 269 270 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 271 /* 272 * Quick and dirty way to redefine locally in 273 * OPL the value of IDSR_BN_SETS to 31 instead 274 * of the standard 32 value. This is to workaround 275 * REV_B of Olympus_c processor's problem in handling 276 * more than 31 xcall broadcast. 277 */ 278 #undef IDSR_BN_SETS 279 #define IDSR_BN_SETS 31 280 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 281 282 void 283 send_mondo_set(cpuset_t set) 284 { 285 int lo, busy, nack, shipped = 0; 286 uint16_t i, cpuids[IDSR_BN_SETS]; 287 uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; 288 uint64_t starttick, endtick, tick, lasttick; 289 #if (NCPU > IDSR_BN_SETS) 290 int index = 0; 291 int ncpuids = 0; 292 #endif 293 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 294 int bn_sets = IDSR_BN_SETS; 295 uint64_t ver; 296 297 ASSERT(NCPU > bn_sets); 298 #endif 299 300 ASSERT(!CPUSET_ISNULL(set)); 301 starttick = lasttick = gettick(); 302 303 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 304 ver = ultra_getver(); 305 if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && 306 ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) 307 bn_sets = 1; 308 #endif 309 310 #if (NCPU <= IDSR_BN_SETS) 311 for (i = 0; i < NCPU; i++) 312 if (CPU_IN_SET(set, i)) { 313 shipit(i, shipped); 314 nackmask |= IDSR_NACK_BIT(shipped); 315 cpuids[shipped++] = i; 316 CPUSET_DEL(set, i); 317 if (CPUSET_ISNULL(set)) 318 break; 319 } 320 CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); 321 #else 322 for (i = 0; i < NCPU; i++) 323 if (CPU_IN_SET(set, i)) { 324 ncpuids++; 325 326 /* 327 * Ship only to the first (IDSR_BN_SETS) CPUs. If we 328 * find we have shipped to more than (IDSR_BN_SETS) 329 * CPUs, set "index" to the highest numbered CPU in 330 * the set so we can ship to other CPUs a bit later on. 331 */ 332 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 333 if (shipped < bn_sets) { 334 #else 335 if (shipped < IDSR_BN_SETS) { 336 #endif 337 shipit(i, shipped); 338 nackmask |= IDSR_NACK_BIT(shipped); 339 cpuids[shipped++] = i; 340 CPUSET_DEL(set, i); 341 if (CPUSET_ISNULL(set)) 342 break; 343 } else 344 index = (int)i; 345 } 346 347 CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); 348 #endif 349 350 busymask = IDSR_NACK_TO_BUSY(nackmask); 351 busy = nack = 0; 352 endtick = starttick + xc_tick_limit; 353 for (;;) { 354 idsr = getidsr(); 355 #if (NCPU <= IDSR_BN_SETS) 356 if (idsr == 0) 357 break; 358 #else 359 if (idsr == 0 && shipped == ncpuids) 360 break; 361 #endif 362 tick = gettick(); 363 /* 364 * If there is a big jump between the current tick 365 * count and lasttick, we have probably hit a break 366 * point. Adjust endtick accordingly to avoid panic. 367 */ 368 if (tick > (lasttick + xc_tick_jump_limit)) 369 endtick += (tick - lasttick); 370 lasttick = tick; 371 if (tick > endtick) { 372 if (panic_quiesce) 373 return; 374 cmn_err(CE_CONT, "send mondo timeout " 375 "[%d NACK %d BUSY]\nIDSR 0x%" 376 "" PRIx64 " cpuids:", nack, busy, idsr); 377 #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL 378 for (i = 0; i < bn_sets; i++) { 379 #else 380 for (i = 0; i < IDSR_BN_SETS; i++) { 381 #endif 382 if (idsr & (IDSR_NACK_BIT(i) | 383 IDSR_BUSY_BIT(i))) { 384 cmn_err(CE_CONT, " 0x%x", 385 cpuids[i]); 386 } 387 } 388 cmn_err(CE_CONT, "\n"); 389 cmn_err(CE_PANIC, "send_mondo_set: timeout"); 390 } 391 curnack = idsr & nackmask; 392 curbusy = idsr & busymask; 393 394 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL 395 /* 396 * Only proceed to send more xcalls if all the 397 * cpus in the previous IDSR_BN_SETS were completed. 398 */ 399 if (curbusy) { 400 busy++; 401 continue; 402 } 403 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 404 405 #if (NCPU > IDSR_BN_SETS) 406 if (shipped < ncpuids) { 407 uint64_t cpus_left; 408 uint16_t next = (uint16_t)index; 409 410 cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & 411 busymask; 412 413 if (cpus_left) { 414 do { 415 /* 416 * Sequence through and ship to the 417 * remainder of the CPUs in the system 418 * (e.g. other than the first 419 * (IDSR_BN_SETS)) in reverse order. 420 */ 421 lo = lowbit(cpus_left) - 1; 422 i = IDSR_BUSY_IDX(lo); 423 shipit(next, i); 424 shipped++; 425 cpuids[i] = next; 426 427 /* 428 * If we've processed all the CPUs, 429 * exit the loop now and save 430 * instructions. 431 */ 432 if (shipped == ncpuids) 433 break; 434 435 for ((index = ((int)next - 1)); 436 index >= 0; index--) 437 if (CPU_IN_SET(set, index)) { 438 next = (uint16_t)index; 439 break; 440 } 441 442 cpus_left &= ~(1ull << lo); 443 } while (cpus_left); 444 continue; 445 } 446 } 447 #endif 448 #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL 449 if (curbusy) { 450 busy++; 451 continue; 452 } 453 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ 454 #ifdef SEND_MONDO_STATS 455 { 456 int n = gettick() - starttick; 457 if (n < 8192) 458 x_nack_stimes[n >> 7]++; 459 } 460 #endif 461 while (gettick() < (tick + sys_clock_mhz)) 462 ; 463 do { 464 lo = lowbit(curnack) - 1; 465 i = IDSR_NACK_IDX(lo); 466 shipit(cpuids[i], i); 467 curnack &= ~(1ull << lo); 468 } while (curnack); 469 nack++; 470 busy = 0; 471 } 472 #ifdef SEND_MONDO_STATS 473 { 474 int n = gettick() - starttick; 475 if (n < 8192) 476 x_set_stimes[n >> 7]++; 477 else 478 x_set_ltimes[(n >> 13) & 0xf]++; 479 } 480 x_set_cpus[shipped]++; 481 #endif 482 } 483 484 /* 485 * Cpu private initialization. 486 */ 487 void 488 cpu_init_private(struct cpu *cp) 489 { 490 if (!(IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation))) { 491 cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI is supported", 492 cp->cpu_id, cpunodes[cp->cpu_id].implementation); 493 } 494 495 adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); 496 } 497 498 void 499 cpu_setup(void) 500 { 501 extern int at_flags; 502 extern int disable_delay_tlb_flush, delay_tlb_flush; 503 extern int cpc_has_overflow_intr; 504 extern int disable_text_largepages; 505 extern int use_text_pgsz4m; 506 uint64_t cpu0_log; 507 extern uint64_t opl_cpu0_err_log; 508 509 /* 510 * Initialize Error log Scratch register for error handling. 511 */ 512 513 cpu0_log = va_to_pa(&opl_cpu0_err_log); 514 opl_error_setup(cpu0_log); 515 516 /* 517 * Enable MMU translating multiple page sizes for 518 * sITLB and sDTLB. 519 */ 520 opl_mpg_enable(); 521 522 /* 523 * Setup chip-specific trap handlers. 524 */ 525 cpu_init_trap(); 526 527 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 528 529 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; 530 531 /* 532 * Use the maximum number of contexts available for SPARC64-VI 533 * unless it has been tuned for debugging. 534 * We are checking against 0 here since this value can be patched 535 * while booting. It can not be patched via /etc/system since it 536 * will be patched too late and thus cause the system to panic. 537 */ 538 if (nctxs == 0) 539 nctxs = MAX_NCTXS; 540 541 /* 542 * Due to the number of entries in the fully-associative tlb 543 * this may have to be tuned lower than in spitfire. 544 */ 545 pp_slots = MIN(8, MAXPP_SLOTS); 546 547 /* 548 * Block stores do not invalidate all pages of the d$, pagecopy 549 * et. al. need virtual translations with virtual coloring taken 550 * into consideration. prefetch/ldd will pollute the d$ on the 551 * load side. 552 */ 553 pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; 554 555 if (use_page_coloring) { 556 do_pg_coloring = 1; 557 if (use_virtual_coloring) 558 do_virtual_coloring = 1; 559 } 560 561 isa_list = 562 "sparcv9+vis2 sparcv9+vis sparcv9 " 563 "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " 564 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 565 566 cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; 567 568 /* 569 * On SPARC64-VI, there's no hole in the virtual address space 570 */ 571 hole_start = hole_end = 0; 572 573 /* 574 * The kpm mapping window. 575 * kpm_size: 576 * The size of a single kpm range. 577 * The overall size will be: kpm_size * vac_colors. 578 * kpm_vbase: 579 * The virtual start address of the kpm range within the kernel 580 * virtual address space. kpm_vbase has to be kpm_size aligned. 581 */ 582 kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ 583 kpm_size_shift = 47; 584 kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ 585 kpm_smallpages = 1; 586 587 /* 588 * The traptrace code uses either %tick or %stick for 589 * timestamping. We have %stick so we can use it. 590 */ 591 traptrace_use_stick = 1; 592 593 /* 594 * SPARC64-VI has a performance counter overflow interrupt 595 */ 596 cpc_has_overflow_intr = 1; 597 598 /* 599 * Use SPARC64-VI flush-all support 600 */ 601 if (!disable_delay_tlb_flush) 602 delay_tlb_flush = 1; 603 604 /* 605 * Declare that this architecture/cpu combination does not support 606 * fpRAS. 607 */ 608 fpras_implemented = 0; 609 610 /* 611 * Enable 4M pages to be used for mapping user text by default. Don't 612 * use large pages for initialized data segments since we may not know 613 * at exec() time what should be the preferred large page size for DTLB 614 * programming. 615 */ 616 use_text_pgsz4m = 1; 617 disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | 618 (1 << TTE32M) | (1 << TTE256M); 619 } 620 621 /* 622 * Called by setcpudelay 623 */ 624 void 625 cpu_init_tick_freq(void) 626 { 627 /* 628 * For SPARC64-VI we want to use the system clock rate as 629 * the basis for low level timing, due to support of mixed 630 * speed CPUs and power managment. 631 */ 632 if (system_clock_freq == 0) 633 cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); 634 635 sys_tick_freq = system_clock_freq; 636 } 637 638 #ifdef SEND_MONDO_STATS 639 uint32_t x_one_stimes[64]; 640 uint32_t x_one_ltimes[16]; 641 uint32_t x_set_stimes[64]; 642 uint32_t x_set_ltimes[16]; 643 uint32_t x_set_cpus[NCPU]; 644 uint32_t x_nack_stimes[64]; 645 #endif 646 647 /* 648 * Note: A version of this function is used by the debugger via the KDI, 649 * and must be kept in sync with this version. Any changes made to this 650 * function to support new chips or to accomodate errata must also be included 651 * in the KDI-specific version. See us3_kdi.c. 652 */ 653 void 654 send_one_mondo(int cpuid) 655 { 656 int busy, nack; 657 uint64_t idsr, starttick, endtick, tick, lasttick; 658 uint64_t busymask; 659 660 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 661 starttick = lasttick = gettick(); 662 shipit(cpuid, 0); 663 endtick = starttick + xc_tick_limit; 664 busy = nack = 0; 665 busymask = IDSR_BUSY; 666 for (;;) { 667 idsr = getidsr(); 668 if (idsr == 0) 669 break; 670 671 tick = gettick(); 672 /* 673 * If there is a big jump between the current tick 674 * count and lasttick, we have probably hit a break 675 * point. Adjust endtick accordingly to avoid panic. 676 */ 677 if (tick > (lasttick + xc_tick_jump_limit)) 678 endtick += (tick - lasttick); 679 lasttick = tick; 680 if (tick > endtick) { 681 if (panic_quiesce) 682 return; 683 cmn_err(CE_PANIC, "send mondo timeout " 684 "(target 0x%x) [%d NACK %d BUSY]", 685 cpuid, nack, busy); 686 } 687 688 if (idsr & busymask) { 689 busy++; 690 continue; 691 } 692 drv_usecwait(1); 693 shipit(cpuid, 0); 694 nack++; 695 busy = 0; 696 } 697 #ifdef SEND_MONDO_STATS 698 { 699 int n = gettick() - starttick; 700 if (n < 8192) 701 x_one_stimes[n >> 7]++; 702 else 703 x_one_ltimes[(n >> 13) & 0xf]++; 704 } 705 #endif 706 } 707 708 /* 709 * init_mmu_page_sizes is set to one after the bootup time initialization 710 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a 711 * valid value. 712 * 713 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific 714 * versions of disable_ism_large_pages and disable_large_pages, and feed back 715 * into those two hat variables at hat initialization time. 716 * 717 */ 718 int init_mmu_page_sizes = 0; 719 static int mmu_disable_ism_large_pages = ((1 << TTE64K) | 720 (1 << TTE512K) | (1 << TTE256M)); 721 static int mmu_disable_large_pages = 0; 722 723 /* 724 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. 725 * Called during very early bootup from check_cpus_set(). 726 * Can be called to verify that mmu_page_sizes are set up correctly. 727 * 728 * Set Olympus defaults. We do not use the function parameter. 729 */ 730 /*ARGSUSED*/ 731 int 732 mmu_init_mmu_page_sizes(int32_t not_used) 733 { 734 if (!init_mmu_page_sizes) { 735 mmu_page_sizes = MMU_PAGE_SIZES; 736 mmu_hashcnt = MAX_HASHCNT; 737 mmu_ism_pagesize = MMU_PAGESIZE32M; 738 mmu_exported_pagesize_mask = (1 << TTE8K) | 739 (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | 740 (1 << TTE32M) | (1 << TTE256M); 741 init_mmu_page_sizes = 1; 742 return (0); 743 } 744 return (1); 745 } 746 747 /* SPARC64-VI worst case DTLB parameters */ 748 #ifndef LOCKED_DTLB_ENTRIES 749 #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ 750 #endif 751 #define TOTAL_DTLB_ENTRIES 32 752 #define AVAIL_32M_ENTRIES 0 753 #define AVAIL_256M_ENTRIES 0 754 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) 755 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { 756 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 757 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, 758 AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; 759 760 size_t 761 mmu_map_pgsz(size_t pgsize) 762 { 763 struct proc *p = curproc; 764 struct as *as = p->p_as; 765 struct hat *hat = as->a_hat; 766 uint_t pgsz0, pgsz1; 767 size_t size0, size1; 768 769 ASSERT(mmu_page_sizes == max_mmu_page_sizes); 770 pgsz0 = hat->sfmmu_pgsz[0]; 771 pgsz1 = hat->sfmmu_pgsz[1]; 772 size0 = hw_page_array[pgsz0].hp_size; 773 size1 = hw_page_array[pgsz1].hp_size; 774 /* Allow use of a larger pagesize if neither TLB is reprogrammed. */ 775 if ((pgsz0 == TTE8K) && (pgsz1 == TTE8K)) { 776 return (pgsize); 777 /* Allow use of requested pagesize if TLB is reprogrammed to it. */ 778 } else if ((pgsize == size0) || (pgsize == size1)) { 779 return (pgsize); 780 /* Use larger reprogrammed TLB size if pgsize is atleast that big. */ 781 } else if (pgsz1 > pgsz0) { 782 if (pgsize >= size1) 783 return (size1); 784 /* Use smaller reprogrammed TLB size if pgsize is atleast that big. */ 785 } else { 786 if (pgsize >= size0) 787 return (size0); 788 } 789 return (pgsize); 790 } 791 792 /* 793 * The function returns the mmu-specific values for the 794 * hat's disable_large_pages and disable_ism_large_pages variables. 795 */ 796 int 797 mmu_large_pages_disabled(uint_t flag) 798 { 799 int pages_disable = 0; 800 801 if (flag == HAT_LOAD) { 802 pages_disable = mmu_disable_large_pages; 803 } else if (flag == HAT_LOAD_SHARE) { 804 pages_disable = mmu_disable_ism_large_pages; 805 } 806 return (pages_disable); 807 } 808 809 /* 810 * mmu_init_large_pages is called with the desired ism_pagesize parameter. 811 * It may be called from set_platform_defaults, if some value other than 32M 812 * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, 813 * then only warn, since it would be bad form to panic due to a user typo. 814 * 815 * The function re-initializes the mmu_disable_ism_large_pages variable. 816 */ 817 void 818 mmu_init_large_pages(size_t ism_pagesize) 819 { 820 switch (ism_pagesize) { 821 case MMU_PAGESIZE4M: 822 mmu_disable_ism_large_pages = ((1 << TTE64K) | 823 (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); 824 break; 825 case MMU_PAGESIZE32M: 826 mmu_disable_ism_large_pages = ((1 << TTE64K) | 827 (1 << TTE512K) | (1 << TTE256M)); 828 break; 829 case MMU_PAGESIZE256M: 830 mmu_disable_ism_large_pages = ((1 << TTE64K) | 831 (1 << TTE512K) | (1 << TTE32M)); 832 break; 833 default: 834 cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", 835 ism_pagesize); 836 break; 837 } 838 } 839 840 /*ARGSUSED*/ 841 uint_t 842 mmu_preferred_pgsz(struct hat *hat, caddr_t addr, size_t len) 843 { 844 sfmmu_t *sfmmup = (sfmmu_t *)hat; 845 uint_t pgsz0, pgsz1; 846 uint_t szc, maxszc = mmu_page_sizes - 1; 847 size_t pgsz; 848 extern int disable_large_pages; 849 850 pgsz0 = (uint_t)sfmmup->sfmmu_pgsz[0]; 851 pgsz1 = (uint_t)sfmmup->sfmmu_pgsz[1]; 852 853 /* 854 * If either of the TLBs are reprogrammed, choose 855 * the largest mapping size as the preferred size, 856 * if it fits the size and alignment constraints. 857 * Else return the largest mapping size that fits, 858 * if neither TLB is reprogrammed. 859 */ 860 if (pgsz0 > TTE8K || pgsz1 > TTE8K) { 861 if (pgsz1 > pgsz0) { /* First try pgsz1 */ 862 pgsz = hw_page_array[pgsz1].hp_size; 863 if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) 864 return (pgsz1); 865 } 866 if (pgsz0 > TTE8K) { /* Then try pgsz0, if !TTE8K */ 867 pgsz = hw_page_array[pgsz0].hp_size; 868 if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) 869 return (pgsz0); 870 } 871 } else { /* Otherwise pick best fit if neither TLB is reprogrammed. */ 872 for (szc = maxszc; szc > TTE8K; szc--) { 873 if (disable_large_pages & (1 << szc)) 874 continue; 875 876 pgsz = hw_page_array[szc].hp_size; 877 if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) 878 return (szc); 879 } 880 } 881 return (TTE8K); 882 } 883 884 /* 885 * Function to reprogram the TLBs when page sizes used 886 * by a process change significantly. 887 */ 888 void 889 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt) 890 { 891 extern int page_szc(size_t); 892 uint8_t pgsz0, pgsz1; 893 894 /* 895 * Don't program 2nd dtlb for kernel and ism hat 896 */ 897 if (hat->sfmmu_ismhat || hat == ksfmmup) 898 return; 899 900 /* 901 * hat->sfmmu_pgsz[] is an array whose elements 902 * contain a sorted order of page sizes. Element 903 * 0 is the most commonly used page size, followed 904 * by element 1, and so on. 905 * 906 * ttecnt[] is an array of per-page-size page counts 907 * mapped into the process. 908 * 909 * If the HAT's choice for page sizes is unsuitable, 910 * we can override it here. The new values written 911 * to the array will be handed back to us later to 912 * do the actual programming of the TLB hardware. 913 * 914 */ 915 pgsz0 = (uint8_t)MIN(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]); 916 pgsz1 = (uint8_t)MAX(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]); 917 918 /* 919 * This implements PAGESIZE programming of the sTLB 920 * if large TTE counts don't exceed the thresholds. 921 */ 922 if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) 923 pgsz0 = page_szc(MMU_PAGESIZE); 924 if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) 925 pgsz1 = page_szc(MMU_PAGESIZE); 926 hat->sfmmu_pgsz[0] = pgsz0; 927 hat->sfmmu_pgsz[1] = pgsz1; 928 /* otherwise, accept what the HAT chose for us */ 929 } 930 931 /* 932 * The HAT calls this function when an MMU context is allocated so that we 933 * can reprogram the large TLBs appropriately for the new process using 934 * the context. 935 * 936 * The caller must hold the HAT lock. 937 */ 938 void 939 mmu_set_ctx_page_sizes(struct hat *hat) 940 { 941 uint8_t pgsz0, pgsz1; 942 uint8_t new_cext; 943 944 ASSERT(sfmmu_hat_lock_held(hat)); 945 /* 946 * Don't program 2nd dtlb for kernel and ism hat 947 */ 948 if (hat->sfmmu_ismhat || hat == ksfmmup) 949 return; 950 951 /* 952 * If supported, reprogram the TLBs to a larger pagesize. 953 */ 954 pgsz0 = hat->sfmmu_pgsz[0]; 955 pgsz1 = hat->sfmmu_pgsz[1]; 956 ASSERT(pgsz0 < mmu_page_sizes); 957 ASSERT(pgsz1 < mmu_page_sizes); 958 new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); 959 if (hat->sfmmu_cext != new_cext) { 960 hat->sfmmu_cext = new_cext; 961 } 962 ctx_pgsz_array[hat->sfmmu_cnum] = hat->sfmmu_cext; 963 /* 964 * sfmmu_setctx_sec() will take care of the 965 * rest of the dirty work for us. 966 */ 967 } 968 969 /* 970 * Return processor specific async error structure 971 * size used. 972 */ 973 int 974 cpu_aflt_size(void) 975 { 976 return (sizeof (opl_async_flt_t)); 977 } 978 979 /* 980 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to 981 * post-process CPU events that are dequeued. As such, it can be invoked 982 * from softint context, from AST processing in the trap() flow, or from the 983 * panic flow. We decode the CPU-specific data, and take appropriate actions. 984 * Historically this entry point was used to log the actual cmn_err(9F) text; 985 * now with FMA it is used to prepare 'flt' to be converted into an ereport. 986 * With FMA this function now also returns a flag which indicates to the 987 * caller whether the ereport should be posted (1) or suppressed (0). 988 */ 989 /*ARGSUSED*/ 990 static int 991 cpu_sync_log_err(void *flt) 992 { 993 opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; 994 struct async_flt *aflt = (struct async_flt *)flt; 995 996 /* 997 * No extra processing of urgent error events. 998 * Always generate ereports for these events. 999 */ 1000 if (aflt->flt_status == OPL_ECC_URGENT_TRAP) 1001 return (1); 1002 1003 /* 1004 * Additional processing for synchronous errors. 1005 */ 1006 switch (opl_flt->flt_type) { 1007 case OPL_CPU_INV_SFSR: 1008 return (1); 1009 1010 case OPL_CPU_SYNC_UE: 1011 /* 1012 * The validity: SFSR_MK_UE bit has been checked 1013 * in opl_cpu_sync_error() 1014 * No more check is required. 1015 * 1016 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, 1017 * and they have been retrieved in cpu_queue_events() 1018 */ 1019 1020 if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { 1021 ASSERT(aflt->flt_in_memory); 1022 /* 1023 * We want to skip logging only if ALL the following 1024 * conditions are true: 1025 * 1026 * 1. We are not panicing already. 1027 * 2. The error is a memory error. 1028 * 3. There is only one error. 1029 * 4. The error is on a retired page. 1030 * 5. The error occurred under on_trap 1031 * protection AFLT_PROT_EC 1032 */ 1033 if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && 1034 page_retire_check(aflt->flt_addr, NULL) == 0) { 1035 /* 1036 * Do not log an error from 1037 * the retired page 1038 */ 1039 softcall(ecc_page_zero, (void *)aflt->flt_addr); 1040 return (0); 1041 } 1042 if (!panicstr) 1043 cpu_page_retire(opl_flt); 1044 } 1045 return (1); 1046 1047 case OPL_CPU_SYNC_OTHERS: 1048 /* 1049 * For the following error cases, the processor HW does 1050 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt 1051 * to assign appropriate values here to reflect what we 1052 * think is the most likely cause of the problem w.r.t to 1053 * the particular error event. For Buserr and timeout 1054 * error event, we will assign OPL_ERRID_CHANNEL as the 1055 * most likely reason. For TLB parity or multiple hit 1056 * error events, we will assign the reason as 1057 * OPL_ERRID_CPU (cpu related problem) and set the 1058 * flt_eid_sid to point to the cpuid. 1059 */ 1060 1061 if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { 1062 /* 1063 * flt_eid_sid will not be used for this case. 1064 */ 1065 opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; 1066 } 1067 if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { 1068 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1069 opl_flt->flt_eid_sid = aflt->flt_inst; 1070 } 1071 1072 /* 1073 * In case of no effective error bit 1074 */ 1075 if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { 1076 opl_flt->flt_eid_mod = OPL_ERRID_CPU; 1077 opl_flt->flt_eid_sid = aflt->flt_inst; 1078 } 1079 break; 1080 1081 default: 1082 return (1); 1083 } 1084 return (1); 1085 } 1086 1087 /* 1088 * Retire the bad page that may contain the flushed error. 1089 */ 1090 void 1091 cpu_page_retire(opl_async_flt_t *opl_flt) 1092 { 1093 struct async_flt *aflt = (struct async_flt *)opl_flt; 1094 (void) page_retire(aflt->flt_addr, PR_UE); 1095 } 1096 1097 /* 1098 * Invoked by error_init() early in startup and therefore before 1099 * startup_errorq() is called to drain any error Q - 1100 * 1101 * startup() 1102 * startup_end() 1103 * error_init() 1104 * cpu_error_init() 1105 * errorq_init() 1106 * errorq_drain() 1107 * start_other_cpus() 1108 * 1109 * The purpose of this routine is to create error-related taskqs. Taskqs 1110 * are used for this purpose because cpu_lock can't be grabbed from interrupt 1111 * context. 1112 * 1113 */ 1114 /*ARGSUSED*/ 1115 void 1116 cpu_error_init(int items) 1117 { 1118 opl_err_log = (opl_errlog_t *) 1119 kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); 1120 if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) 1121 cmn_err(CE_PANIC, "The base address of the error log " 1122 "is not page aligned"); 1123 } 1124 1125 /* 1126 * We route all errors through a single switch statement. 1127 */ 1128 void 1129 cpu_ue_log_err(struct async_flt *aflt) 1130 { 1131 switch (aflt->flt_class) { 1132 case CPU_FAULT: 1133 if (cpu_sync_log_err(aflt)) 1134 cpu_ereport_post(aflt); 1135 break; 1136 1137 case BUS_FAULT: 1138 bus_async_log_err(aflt); 1139 break; 1140 1141 default: 1142 cmn_err(CE_WARN, "discarding async error %p with invalid " 1143 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1144 return; 1145 } 1146 } 1147 1148 /* 1149 * Routine for panic hook callback from panic_idle(). 1150 * 1151 * Nothing to do here. 1152 */ 1153 void 1154 cpu_async_panic_callb(void) 1155 { 1156 } 1157 1158 /* 1159 * Routine to return a string identifying the physical name 1160 * associated with a memory/cache error. 1161 */ 1162 /*ARGSUSED*/ 1163 int 1164 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, 1165 uint64_t flt_addr, int flt_bus_id, int flt_in_memory, 1166 ushort_t flt_status, char *buf, int buflen, int *lenp) 1167 { 1168 int synd_code; 1169 int ret; 1170 1171 /* 1172 * An AFSR of -1 defaults to a memory syndrome. 1173 */ 1174 synd_code = (int)flt_synd; 1175 1176 if (&plat_get_mem_unum) { 1177 if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, 1178 flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { 1179 buf[0] = '\0'; 1180 *lenp = 0; 1181 } 1182 return (ret); 1183 } 1184 buf[0] = '\0'; 1185 *lenp = 0; 1186 return (ENOTSUP); 1187 } 1188 1189 /* 1190 * Wrapper for cpu_get_mem_unum() routine that takes an 1191 * async_flt struct rather than explicit arguments. 1192 */ 1193 int 1194 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1195 char *buf, int buflen, int *lenp) 1196 { 1197 /* 1198 * We always pass -1 so that cpu_get_mem_unum will interpret this as a 1199 * memory error. 1200 */ 1201 return (cpu_get_mem_unum(synd_status, aflt->flt_synd, 1202 (uint64_t)-1, 1203 aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, 1204 aflt->flt_status, buf, buflen, lenp)); 1205 } 1206 1207 /* 1208 * This routine is a more generic interface to cpu_get_mem_unum() 1209 * that may be used by other modules (e.g. mm). 1210 */ 1211 /*ARGSUSED*/ 1212 int 1213 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1214 char *buf, int buflen, int *lenp) 1215 { 1216 int synd_status, flt_in_memory, ret; 1217 ushort_t flt_status = 0; 1218 char unum[UNUM_NAMLEN]; 1219 1220 /* 1221 * Check for an invalid address. 1222 */ 1223 if (afar == (uint64_t)-1) 1224 return (ENXIO); 1225 1226 if (synd == (uint64_t)-1) 1227 synd_status = AFLT_STAT_INVALID; 1228 else 1229 synd_status = AFLT_STAT_VALID; 1230 1231 flt_in_memory = (*afsr & SFSR_MEMORY) && 1232 pf_is_memory(afar >> MMU_PAGESHIFT); 1233 1234 ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1235 CPU->cpu_id, flt_in_memory, flt_status, unum, 1236 UNUM_NAMLEN, lenp); 1237 if (ret != 0) 1238 return (ret); 1239 1240 if (*lenp >= buflen) 1241 return (ENAMETOOLONG); 1242 1243 (void) strncpy(buf, unum, buflen); 1244 1245 return (0); 1246 } 1247 1248 /* 1249 * Routine to return memory information associated 1250 * with a physical address and syndrome. 1251 */ 1252 /*ARGSUSED*/ 1253 int 1254 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1255 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1256 int *segsp, int *banksp, int *mcidp) 1257 { 1258 int synd_code = (int)synd; 1259 1260 if (afar == (uint64_t)-1) 1261 return (ENXIO); 1262 1263 if (p2get_mem_info != NULL) 1264 return ((p2get_mem_info)(synd_code, afar, 1265 mem_sizep, seg_sizep, bank_sizep, 1266 segsp, banksp, mcidp)); 1267 else 1268 return (ENOTSUP); 1269 } 1270 1271 /* 1272 * Routine to return a string identifying the physical 1273 * name associated with a cpuid. 1274 */ 1275 int 1276 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1277 { 1278 int ret; 1279 char unum[UNUM_NAMLEN]; 1280 1281 if (&plat_get_cpu_unum) { 1282 if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) 1283 != 0) 1284 return (ret); 1285 } else { 1286 return (ENOTSUP); 1287 } 1288 1289 if (*lenp >= buflen) 1290 return (ENAMETOOLONG); 1291 1292 (void) strncpy(buf, unum, *lenp); 1293 1294 return (0); 1295 } 1296 1297 /* 1298 * This routine exports the name buffer size. 1299 */ 1300 size_t 1301 cpu_get_name_bufsize() 1302 { 1303 return (UNUM_NAMLEN); 1304 } 1305 1306 /* 1307 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH 1308 */ 1309 void 1310 cpu_flush_ecache(void) 1311 { 1312 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, 1313 cpunodes[CPU->cpu_id].ecache_linesize); 1314 } 1315 1316 static uint8_t 1317 flt_to_trap_type(struct async_flt *aflt) 1318 { 1319 if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) 1320 return (TRAP_TYPE_ECC_I); 1321 if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) 1322 return (TRAP_TYPE_ECC_D); 1323 if (aflt->flt_status & OPL_ECC_URGENT_TRAP) 1324 return (TRAP_TYPE_URGENT); 1325 return (-1); 1326 } 1327 1328 /* 1329 * Encode the data saved in the opl_async_flt_t struct into 1330 * the FM ereport payload. 1331 */ 1332 /* ARGSUSED */ 1333 static void 1334 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, 1335 nvlist_t *resource) 1336 { 1337 opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; 1338 char unum[UNUM_NAMLEN]; 1339 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1340 int len; 1341 1342 1343 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { 1344 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, 1345 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1346 } 1347 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { 1348 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, 1349 DATA_TYPE_UINT64, aflt->flt_addr, NULL); 1350 } 1351 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { 1352 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, 1353 DATA_TYPE_UINT64, aflt->flt_stat, NULL); 1354 } 1355 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { 1356 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, 1357 DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); 1358 } 1359 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { 1360 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, 1361 DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); 1362 } 1363 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { 1364 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, 1365 DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); 1366 } 1367 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 1368 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 1369 DATA_TYPE_BOOLEAN_VALUE, 1370 (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); 1371 } 1372 if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { 1373 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, 1374 DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); 1375 } 1376 1377 switch (opl_flt->flt_eid_mod) { 1378 case OPL_ERRID_CPU: 1379 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1380 (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); 1381 (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, 1382 NULL, opl_flt->flt_eid_sid, 1383 (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, 1384 sbuf); 1385 fm_payload_set(payload, 1386 FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1387 DATA_TYPE_NVLIST, resource, NULL); 1388 break; 1389 1390 case OPL_ERRID_CHANNEL: 1391 /* 1392 * No resource is created but the cpumem DE will find 1393 * the defective path by retreiving EID from SFSR which is 1394 * included in the payload. 1395 */ 1396 break; 1397 1398 case OPL_ERRID_MEM: 1399 (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); 1400 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1401 NULL, unum, NULL, (uint64_t)-1); 1402 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 1403 DATA_TYPE_NVLIST, resource, NULL); 1404 break; 1405 1406 case OPL_ERRID_PATH: 1407 /* 1408 * No resource is created but the cpumem DE will find 1409 * the defective path by retreiving EID from SFSR which is 1410 * included in the payload. 1411 */ 1412 break; 1413 } 1414 } 1415 1416 /* 1417 * Returns whether fault address is valid for this error bit and 1418 * whether the address is "in memory" (i.e. pf_is_memory returns 1). 1419 */ 1420 /*ARGSUSED*/ 1421 static int 1422 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) 1423 { 1424 struct async_flt *aflt = (struct async_flt *)opl_flt; 1425 1426 if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { 1427 return ((t_afsr_bit & SFSR_MEMORY) && 1428 pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); 1429 } 1430 return (0); 1431 } 1432 1433 /* 1434 * In OPL SCF does the stick synchronization. 1435 */ 1436 void 1437 sticksync_slave(void) 1438 { 1439 } 1440 1441 /* 1442 * In OPL SCF does the stick synchronization. 1443 */ 1444 void 1445 sticksync_master(void) 1446 { 1447 } 1448 1449 /* 1450 * Cpu private unitialization. OPL cpus do not use the private area. 1451 */ 1452 void 1453 cpu_uninit_private(struct cpu *cp) 1454 { 1455 cmp_delete_cpu(cp->cpu_id); 1456 } 1457 1458 /* 1459 * Always flush an entire cache. 1460 */ 1461 void 1462 cpu_error_ecache_flush(void) 1463 { 1464 cpu_flush_ecache(); 1465 } 1466 1467 void 1468 cpu_ereport_post(struct async_flt *aflt) 1469 { 1470 char *cpu_type, buf[FM_MAX_CLASS]; 1471 nv_alloc_t *nva = NULL; 1472 nvlist_t *ereport, *detector, *resource; 1473 errorq_elem_t *eqep; 1474 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 1475 1476 if (aflt->flt_panic || panicstr) { 1477 eqep = errorq_reserve(ereport_errorq); 1478 if (eqep == NULL) 1479 return; 1480 ereport = errorq_elem_nvl(ereport_errorq, eqep); 1481 nva = errorq_elem_nva(ereport_errorq, eqep); 1482 } else { 1483 ereport = fm_nvlist_create(nva); 1484 } 1485 1486 /* 1487 * Create the scheme "cpu" FMRI. 1488 */ 1489 detector = fm_nvlist_create(nva); 1490 resource = fm_nvlist_create(nva); 1491 switch (cpunodes[aflt->flt_inst].implementation) { 1492 case OLYMPUS_C_IMPL: 1493 cpu_type = FM_EREPORT_CPU_SPARC64_VI; 1494 break; 1495 default: 1496 cpu_type = FM_EREPORT_CPU_UNSUPPORTED; 1497 break; 1498 } 1499 (void) snprintf(sbuf, sizeof (sbuf), "%llX", 1500 (u_longlong_t)cpunodes[aflt->flt_inst].device_id); 1501 (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, 1502 aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, 1503 sbuf); 1504 1505 /* 1506 * Encode all the common data into the ereport. 1507 */ 1508 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 1509 FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); 1510 1511 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 1512 fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); 1513 1514 /* 1515 * Encode the error specific data that was saved in 1516 * the async_flt structure into the ereport. 1517 */ 1518 cpu_payload_add_aflt(aflt, ereport, resource); 1519 1520 if (aflt->flt_panic || panicstr) { 1521 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1522 } else { 1523 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1524 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1525 fm_nvlist_destroy(detector, FM_NVA_FREE); 1526 fm_nvlist_destroy(resource, FM_NVA_FREE); 1527 } 1528 } 1529 1530 void 1531 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 1532 { 1533 int status; 1534 ddi_fm_error_t de; 1535 1536 bzero(&de, sizeof (ddi_fm_error_t)); 1537 1538 de.fme_version = DDI_FME_VERSION; 1539 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 1540 de.fme_flag = expected; 1541 de.fme_bus_specific = (void *)aflt->flt_addr; 1542 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 1543 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 1544 aflt->flt_panic = 1; 1545 } 1546 1547 void 1548 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 1549 errorq_t *eqp, uint_t flag) 1550 { 1551 struct async_flt *aflt = (struct async_flt *)payload; 1552 1553 aflt->flt_erpt_class = error_class; 1554 errorq_dispatch(eqp, payload, payload_sz, flag); 1555 } 1556 1557 void 1558 adjust_hw_copy_limits(int ecache_size) 1559 { 1560 /* 1561 * Set hw copy limits. 1562 * 1563 * /etc/system will be parsed later and can override one or more 1564 * of these settings. 1565 * 1566 * At this time, ecache size seems only mildly relevant. 1567 * We seem to run into issues with the d-cache and stalls 1568 * we see on misses. 1569 * 1570 * Cycle measurement indicates that 2 byte aligned copies fare 1571 * little better than doing things with VIS at around 512 bytes. 1572 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte 1573 * aligned is faster whenever the source and destination data 1574 * in cache and the total size is less than 2 Kbytes. The 2K 1575 * limit seems to be driven by the 2K write cache. 1576 * When more than 2K of copies are done in non-VIS mode, stores 1577 * backup in the write cache. In VIS mode, the write cache is 1578 * bypassed, allowing faster cache-line writes aligned on cache 1579 * boundaries. 1580 * 1581 * In addition, in non-VIS mode, there is no prefetching, so 1582 * for larger copies, the advantage of prefetching to avoid even 1583 * occasional cache misses is enough to justify using the VIS code. 1584 * 1585 * During testing, it was discovered that netbench ran 3% slower 1586 * when hw_copy_limit_8 was 2K or larger. Apparently for server 1587 * applications, data is only used once (copied to the output 1588 * buffer, then copied by the network device off the system). Using 1589 * the VIS copy saves more L2 cache state. Network copies are 1590 * around 1.3K to 1.5K in size for historical reasons. 1591 * 1592 * Therefore, a limit of 1K bytes will be used for the 8 byte 1593 * aligned copy even for large caches and 8 MB ecache. The 1594 * infrastructure to allow different limits for different sized 1595 * caches is kept to allow further tuning in later releases. 1596 */ 1597 1598 if (min_ecache_size == 0 && use_hw_bcopy) { 1599 /* 1600 * First time through - should be before /etc/system 1601 * is read. 1602 * Could skip the checks for zero but this lets us 1603 * preserve any debugger rewrites. 1604 */ 1605 if (hw_copy_limit_1 == 0) { 1606 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 1607 priv_hcl_1 = hw_copy_limit_1; 1608 } 1609 if (hw_copy_limit_2 == 0) { 1610 hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; 1611 priv_hcl_2 = hw_copy_limit_2; 1612 } 1613 if (hw_copy_limit_4 == 0) { 1614 hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; 1615 priv_hcl_4 = hw_copy_limit_4; 1616 } 1617 if (hw_copy_limit_8 == 0) { 1618 hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; 1619 priv_hcl_8 = hw_copy_limit_8; 1620 } 1621 min_ecache_size = ecache_size; 1622 } else { 1623 /* 1624 * MP initialization. Called *after* /etc/system has 1625 * been parsed. One CPU has already been initialized. 1626 * Need to cater for /etc/system having scragged one 1627 * of our values. 1628 */ 1629 if (ecache_size == min_ecache_size) { 1630 /* 1631 * Same size ecache. We do nothing unless we 1632 * have a pessimistic ecache setting. In that 1633 * case we become more optimistic (if the cache is 1634 * large enough). 1635 */ 1636 if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { 1637 /* 1638 * Need to adjust hw_copy_limit* from our 1639 * pessimistic uniprocessor value to a more 1640 * optimistic UP value *iff* it hasn't been 1641 * reset. 1642 */ 1643 if ((ecache_size > 1048576) && 1644 (priv_hcl_8 == hw_copy_limit_8)) { 1645 if (ecache_size <= 2097152) 1646 hw_copy_limit_8 = 4 * 1647 VIS_COPY_THRESHOLD; 1648 else if (ecache_size <= 4194304) 1649 hw_copy_limit_8 = 4 * 1650 VIS_COPY_THRESHOLD; 1651 else 1652 hw_copy_limit_8 = 4 * 1653 VIS_COPY_THRESHOLD; 1654 priv_hcl_8 = hw_copy_limit_8; 1655 } 1656 } 1657 } else if (ecache_size < min_ecache_size) { 1658 /* 1659 * A different ecache size. Can this even happen? 1660 */ 1661 if (priv_hcl_8 == hw_copy_limit_8) { 1662 /* 1663 * The previous value that we set 1664 * is unchanged (i.e., it hasn't been 1665 * scragged by /etc/system). Rewrite it. 1666 */ 1667 if (ecache_size <= 1048576) 1668 hw_copy_limit_8 = 8 * 1669 VIS_COPY_THRESHOLD; 1670 else if (ecache_size <= 2097152) 1671 hw_copy_limit_8 = 8 * 1672 VIS_COPY_THRESHOLD; 1673 else if (ecache_size <= 4194304) 1674 hw_copy_limit_8 = 8 * 1675 VIS_COPY_THRESHOLD; 1676 else 1677 hw_copy_limit_8 = 10 * 1678 VIS_COPY_THRESHOLD; 1679 priv_hcl_8 = hw_copy_limit_8; 1680 min_ecache_size = ecache_size; 1681 } 1682 } 1683 } 1684 } 1685 1686 #define VIS_BLOCKSIZE 64 1687 1688 int 1689 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 1690 { 1691 int ret, watched; 1692 1693 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1694 ret = dtrace_blksuword32(addr, data, 0); 1695 if (watched) 1696 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 1697 1698 return (ret); 1699 } 1700 1701 void 1702 opl_cpu_reg_init() 1703 { 1704 uint64_t this_cpu_log; 1705 1706 /* 1707 * We do not need to re-initialize cpu0 registers. 1708 */ 1709 if (cpu[getprocessorid()] == &cpu0) 1710 return; 1711 1712 /* 1713 * Initialize Error log Scratch register for error handling. 1714 */ 1715 1716 this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + 1717 ERRLOG_BUFSZ * (getprocessorid()))); 1718 opl_error_setup(this_cpu_log); 1719 1720 /* 1721 * Enable MMU translating multiple page sizes for 1722 * sITLB and sDTLB. 1723 */ 1724 opl_mpg_enable(); 1725 } 1726 1727 /* 1728 * Queue one event in ue_queue based on ecc_type_to_info entry. 1729 */ 1730 static void 1731 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, 1732 ecc_type_to_info_t *eccp) 1733 { 1734 struct async_flt *aflt = (struct async_flt *)opl_flt; 1735 1736 if (reason && 1737 strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { 1738 (void) strcat(reason, eccp->ec_reason); 1739 } 1740 1741 opl_flt->flt_bit = eccp->ec_afsr_bit; 1742 opl_flt->flt_type = eccp->ec_flt_type; 1743 aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); 1744 aflt->flt_payload = eccp->ec_err_payload; 1745 1746 ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); 1747 cpu_errorq_dispatch(eccp->ec_err_class, 1748 (void *)opl_flt, sizeof (opl_async_flt_t), 1749 ue_queue, 1750 aflt->flt_panic); 1751 } 1752 1753 /* 1754 * Queue events on async event queue one event per error bit. 1755 * Return number of events queued. 1756 */ 1757 int 1758 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) 1759 { 1760 struct async_flt *aflt = (struct async_flt *)opl_flt; 1761 ecc_type_to_info_t *eccp; 1762 int nevents = 0; 1763 1764 /* 1765 * Queue expected errors, error bit and fault type must must match 1766 * in the ecc_type_to_info table. 1767 */ 1768 for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; 1769 eccp++) { 1770 if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && 1771 (eccp->ec_flags & aflt->flt_status) != 0) { 1772 /* 1773 * UE error event can be further 1774 * classified/breakdown into finer granularity 1775 * based on the flt_eid_mod value set by HW. We do 1776 * special handling here so that we can report UE 1777 * error in finer granularity as ue_mem, 1778 * ue_channel, ue_cpu or ue_path. 1779 */ 1780 if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { 1781 opl_flt->flt_eid_mod = 1782 (aflt->flt_stat & SFSR_EID_MOD) 1783 >> SFSR_EID_MOD_SHIFT; 1784 opl_flt->flt_eid_sid = 1785 (aflt->flt_stat & SFSR_EID_SID) 1786 >> SFSR_EID_SID_SHIFT; 1787 /* 1788 * Need to advance eccp pointer by flt_eid_mod 1789 * so that we get an appropriate ecc pointer 1790 * 1791 * EID # of advances 1792 * ---------------------------------- 1793 * OPL_ERRID_MEM 0 1794 * OPL_ERRID_CHANNEL 1 1795 * OPL_ERRID_CPU 2 1796 * OPL_ERRID_PATH 3 1797 */ 1798 eccp += opl_flt->flt_eid_mod; 1799 } 1800 cpu_queue_one_event(opl_flt, reason, eccp); 1801 t_afsr_errs &= ~eccp->ec_afsr_bit; 1802 nevents++; 1803 } 1804 } 1805 1806 return (nevents); 1807 } 1808 1809 /* 1810 * Sync. error wrapper functions. 1811 * We use these functions in order to transfer here from the 1812 * nucleus trap handler information about trap type (data or 1813 * instruction) and trap level (0 or above 0). This way we 1814 * get rid of using SFSR's reserved bits. 1815 */ 1816 1817 #define OPL_SYNC_TL0 0 1818 #define OPL_SYNC_TL1 1 1819 #define OPL_ISYNC_ERR 0 1820 #define OPL_DSYNC_ERR 1 1821 1822 void 1823 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1824 { 1825 uint64_t t_sfar = p_sfar; 1826 uint64_t t_sfsr = p_sfsr; 1827 1828 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1829 OPL_SYNC_TL0, OPL_ISYNC_ERR); 1830 } 1831 1832 void 1833 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1834 { 1835 uint64_t t_sfar = p_sfar; 1836 uint64_t t_sfsr = p_sfsr; 1837 1838 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1839 OPL_SYNC_TL1, OPL_ISYNC_ERR); 1840 } 1841 1842 void 1843 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1844 { 1845 uint64_t t_sfar = p_sfar; 1846 uint64_t t_sfsr = p_sfsr; 1847 1848 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1849 OPL_SYNC_TL0, OPL_DSYNC_ERR); 1850 } 1851 1852 void 1853 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) 1854 { 1855 uint64_t t_sfar = p_sfar; 1856 uint64_t t_sfsr = p_sfsr; 1857 1858 opl_cpu_sync_error(rp, t_sfar, t_sfsr, 1859 OPL_SYNC_TL1, OPL_DSYNC_ERR); 1860 } 1861 1862 /* 1863 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL 1864 * and TLB_PRT. 1865 * This function is designed based on cpu_deferred_error(). 1866 */ 1867 1868 static void 1869 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, 1870 uint_t tl, uint_t derr) 1871 { 1872 opl_async_flt_t opl_flt; 1873 struct async_flt *aflt; 1874 int trampolined = 0; 1875 char pr_reason[MAX_REASON_STRING]; 1876 uint64_t log_sfsr; 1877 int expected = DDI_FM_ERR_UNEXPECTED; 1878 ddi_acc_hdl_t *hp; 1879 1880 /* 1881 * We need to look at p_flag to determine if the thread detected an 1882 * error while dumping core. We can't grab p_lock here, but it's ok 1883 * because we just need a consistent snapshot and we know that everyone 1884 * else will store a consistent set of bits while holding p_lock. We 1885 * don't have to worry about a race because SDOCORE is set once prior 1886 * to doing i/o from the process's address space and is never cleared. 1887 */ 1888 uint_t pflag = ttoproc(curthread)->p_flag; 1889 1890 pr_reason[0] = '\0'; 1891 1892 /* 1893 * handle the specific error 1894 */ 1895 bzero(&opl_flt, sizeof (opl_async_flt_t)); 1896 aflt = (struct async_flt *)&opl_flt; 1897 aflt->flt_id = gethrtime_waitfree(); 1898 aflt->flt_bus_id = getprocessorid(); 1899 aflt->flt_inst = CPU->cpu_id; 1900 aflt->flt_stat = t_sfsr; 1901 aflt->flt_addr = t_sfar; 1902 aflt->flt_pc = (caddr_t)rp->r_pc; 1903 aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; 1904 aflt->flt_class = (uchar_t)CPU_FAULT; 1905 aflt->flt_priv = (uchar_t) 1906 (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); 1907 aflt->flt_tl = (uchar_t)tl; 1908 aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || 1909 (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); 1910 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1911 aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; 1912 1913 /* 1914 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. 1915 * So, clear all error bits to avoid mis-handling and force the system 1916 * panicked. 1917 * We skip all the procedures below down to the panic message call. 1918 */ 1919 if (!(t_sfsr & SFSR_FV)) { 1920 opl_flt.flt_type = OPL_CPU_INV_SFSR; 1921 aflt->flt_panic = 1; 1922 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 1923 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, 1924 (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, 1925 aflt->flt_panic); 1926 fm_panic("%sErrors(s)", "invalid SFSR"); 1927 } 1928 1929 /* 1930 * If either UE and MK bit is off, this is not valid UE error. 1931 * If it is not valid UE error, clear UE & MK_UE bits to prevent 1932 * mis-handling below. 1933 * aflt->flt_stat keeps the original bits as a reference. 1934 */ 1935 if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != 1936 (SFSR_MK_UE|SFSR_UE)) { 1937 t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); 1938 } 1939 1940 /* 1941 * If the trap occurred in privileged mode at TL=0, we need to check to 1942 * see if we were executing in the kernel under on_trap() or t_lofault 1943 * protection. If so, modify the saved registers so that we return 1944 * from the trap to the appropriate trampoline routine. 1945 */ 1946 if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { 1947 if (curthread->t_ontrap != NULL) { 1948 on_trap_data_t *otp = curthread->t_ontrap; 1949 1950 if (otp->ot_prot & OT_DATA_EC) { 1951 aflt->flt_prot = (uchar_t)AFLT_PROT_EC; 1952 otp->ot_trap |= (ushort_t)OT_DATA_EC; 1953 rp->r_pc = otp->ot_trampoline; 1954 rp->r_npc = rp->r_pc + 4; 1955 trampolined = 1; 1956 } 1957 1958 if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && 1959 (otp->ot_prot & OT_DATA_ACCESS)) { 1960 aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; 1961 otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; 1962 rp->r_pc = otp->ot_trampoline; 1963 rp->r_npc = rp->r_pc + 4; 1964 trampolined = 1; 1965 /* 1966 * for peeks and caut_gets errors are expected 1967 */ 1968 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1969 if (!hp) 1970 expected = DDI_FM_ERR_PEEK; 1971 else if (hp->ah_acc.devacc_attr_access == 1972 DDI_CAUTIOUS_ACC) 1973 expected = DDI_FM_ERR_EXPECTED; 1974 } 1975 1976 } else if (curthread->t_lofault) { 1977 aflt->flt_prot = AFLT_PROT_COPY; 1978 rp->r_g1 = EFAULT; 1979 rp->r_pc = curthread->t_lofault; 1980 rp->r_npc = rp->r_pc + 4; 1981 trampolined = 1; 1982 } 1983 } 1984 1985 /* 1986 * If we're in user mode or we're doing a protected copy, we either 1987 * want the ASTON code below to send a signal to the user process 1988 * or we want to panic if aft_panic is set. 1989 * 1990 * If we're in privileged mode and we're not doing a copy, then we 1991 * need to check if we've trampolined. If we haven't trampolined, 1992 * we should panic. 1993 */ 1994 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 1995 if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) 1996 aflt->flt_panic |= aft_panic; 1997 } else if (!trampolined) { 1998 aflt->flt_panic = 1; 1999 } 2000 2001 /* 2002 * If we've trampolined due to a privileged TO or BERR, or if an 2003 * unprivileged TO or BERR occurred, we don't want to enqueue an 2004 * event for that TO or BERR. Queue all other events (if any) besides 2005 * the TO/BERR. 2006 */ 2007 log_sfsr = t_sfsr; 2008 if (trampolined) { 2009 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2010 } else if (!aflt->flt_priv) { 2011 /* 2012 * User mode, suppress messages if 2013 * cpu_berr_to_verbose is not set. 2014 */ 2015 if (!cpu_berr_to_verbose) 2016 log_sfsr &= ~(SFSR_TO | SFSR_BERR); 2017 } 2018 2019 if (((log_sfsr & SFSR_ERRS) && 2020 (cpu_queue_events(&opl_flt, pr_reason, t_sfsr) == 0)) || 2021 ((t_sfsr & SFSR_ERRS) == 0)) { 2022 opl_flt.flt_type = OPL_CPU_INV_SFSR; 2023 aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; 2024 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, 2025 (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, 2026 aflt->flt_panic); 2027 } 2028 2029 if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { 2030 cpu_run_bus_error_handlers(aflt, expected); 2031 } 2032 2033 /* 2034 * Panic here if aflt->flt_panic has been set. Enqueued errors will 2035 * be logged as part of the panic flow. 2036 */ 2037 if (aflt->flt_panic) { 2038 if (pr_reason[0] == 0) 2039 strcpy(pr_reason, "invalid SFSR "); 2040 2041 fm_panic("%sErrors(s)", pr_reason); 2042 } 2043 2044 /* 2045 * If we queued an error and we are going to return from the trap and 2046 * the error was in user mode or inside of a copy routine, set AST flag 2047 * so the queue will be drained before returning to user mode. The 2048 * AST processing will also act on our failure policy. 2049 */ 2050 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 2051 int pcb_flag = 0; 2052 2053 if (t_sfsr & (SFSR_ERRS & 2054 ~(SFSR_BERR | SFSR_TO))) 2055 pcb_flag |= ASYNC_HWERR; 2056 2057 if (t_sfsr & SFSR_BERR) 2058 pcb_flag |= ASYNC_BERR; 2059 2060 if (t_sfsr & SFSR_TO) 2061 pcb_flag |= ASYNC_BTO; 2062 2063 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 2064 aston(curthread); 2065 } 2066 } 2067 2068 /*ARGSUSED*/ 2069 void 2070 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) 2071 { 2072 opl_async_flt_t opl_flt; 2073 struct async_flt *aflt; 2074 char pr_reason[MAX_REASON_STRING]; 2075 2076 /* normalize tl */ 2077 tl = (tl >= 2 ? 1 : 0); 2078 pr_reason[0] = '\0'; 2079 2080 bzero(&opl_flt, sizeof (opl_async_flt_t)); 2081 aflt = (struct async_flt *)&opl_flt; 2082 aflt->flt_id = gethrtime_waitfree(); 2083 aflt->flt_bus_id = getprocessorid(); 2084 aflt->flt_inst = CPU->cpu_id; 2085 aflt->flt_stat = p_ugesr; 2086 aflt->flt_pc = (caddr_t)rp->r_pc; 2087 aflt->flt_class = (uchar_t)CPU_FAULT; 2088 aflt->flt_tl = tl; 2089 aflt->flt_priv = (uchar_t) 2090 (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); 2091 aflt->flt_status = OPL_ECC_URGENT_TRAP; 2092 aflt->flt_panic = 1; 2093 /* 2094 * HW does not set mod/sid in case of urgent error. 2095 * So we have to set it here. 2096 */ 2097 opl_flt.flt_eid_mod = OPL_ERRID_CPU; 2098 opl_flt.flt_eid_sid = aflt->flt_inst; 2099 2100 if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { 2101 opl_flt.flt_type = OPL_CPU_INV_UGESR; 2102 aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; 2103 cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, 2104 (void *)&opl_flt, sizeof (opl_async_flt_t), 2105 ue_queue, aflt->flt_panic); 2106 } 2107 2108 fm_panic("Urgent Error"); 2109 } 2110 2111 /* 2112 * Initialization error counters resetting. 2113 */ 2114 /* ARGSUSED */ 2115 static void 2116 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) 2117 { 2118 hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; 2119 hdlr->cyh_level = CY_LOW_LEVEL; 2120 hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; 2121 2122 when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); 2123 when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; 2124 } 2125 2126 void 2127 cpu_mp_init(void) 2128 { 2129 cyc_omni_handler_t hdlr; 2130 2131 hdlr.cyo_online = opl_ras_online; 2132 hdlr.cyo_offline = NULL; 2133 hdlr.cyo_arg = NULL; 2134 mutex_enter(&cpu_lock); 2135 (void) cyclic_add_omni(&hdlr); 2136 mutex_exit(&cpu_lock); 2137 } 2138 2139 /*ARGSUSED*/ 2140 void 2141 mmu_init_kernel_pgsz(struct hat *hat) 2142 { 2143 } 2144 2145 size_t 2146 mmu_get_kernel_lpsize(size_t lpsize) 2147 { 2148 uint_t tte; 2149 2150 if (lpsize == 0) { 2151 /* no setting for segkmem_lpsize in /etc/system: use default */ 2152 return (MMU_PAGESIZE4M); 2153 } 2154 2155 for (tte = TTE8K; tte <= TTE4M; tte++) { 2156 if (lpsize == TTEBYTES(tte)) 2157 return (lpsize); 2158 } 2159 2160 return (TTEBYTES(TTE8K)); 2161 } 2162 2163 /* 2164 * The following are functions that are unused in 2165 * OPL cpu module. They are defined here to resolve 2166 * dependencies in the "unix" module. 2167 * Unused functions that should never be called in 2168 * OPL are coded with ASSERT(0). 2169 */ 2170 2171 void 2172 cpu_disable_errors(void) 2173 {} 2174 2175 void 2176 cpu_enable_errors(void) 2177 { ASSERT(0); } 2178 2179 /*ARGSUSED*/ 2180 void 2181 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) 2182 { ASSERT(0); } 2183 2184 /*ARGSUSED*/ 2185 void 2186 cpu_faulted_enter(struct cpu *cp) 2187 {} 2188 2189 /*ARGSUSED*/ 2190 void 2191 cpu_faulted_exit(struct cpu *cp) 2192 {} 2193 2194 /*ARGSUSED*/ 2195 void 2196 cpu_check_allcpus(struct async_flt *aflt) 2197 {} 2198 2199 /*ARGSUSED*/ 2200 void 2201 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) 2202 { ASSERT(0); } 2203 2204 /*ARGSUSED*/ 2205 void 2206 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) 2207 { ASSERT(0); } 2208 2209 /*ARGSUSED*/ 2210 void 2211 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 2212 { ASSERT(0); } 2213 2214 /*ARGSUSED*/ 2215 void 2216 cpu_busy_ecache_scrub(struct cpu *cp) 2217 {} 2218 2219 /*ARGSUSED*/ 2220 void 2221 cpu_idle_ecache_scrub(struct cpu *cp) 2222 {} 2223 2224 /* ARGSUSED */ 2225 void 2226 cpu_change_speed(uint64_t divisor, uint64_t arg2) 2227 { ASSERT(0); } 2228 2229 void 2230 cpu_init_cache_scrub(void) 2231 {} 2232 2233 /* ARGSUSED */ 2234 int 2235 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 2236 { 2237 return (ENOTSUP); 2238 } 2239 2240 /* ARGSUSED */ 2241 int 2242 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 2243 { 2244 return (ENOTSUP); 2245 } 2246 2247 /* ARGSUSED */ 2248 int 2249 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 2250 { 2251 return (ENOTSUP); 2252 } 2253 2254 /*ARGSUSED*/ 2255 void 2256 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2257 { ASSERT(0); } 2258 2259 /*ARGSUSED*/ 2260 void 2261 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2262 { ASSERT(0); } 2263