/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Internal functions. */ static int cpu_sync_log_err(void *flt); static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *); static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t); static int cpu_flt_in_memory(opl_async_flt_t *, uint64_t); /* * Error counters resetting interval. */ static int opl_async_check_interval = 60; /* 1 min */ /* * Maximum number of contexts for Olympus-C. */ #define MAX_NCTXS (1 << 13) /* Will be set !NULL for SPARC64-VI and derivatives. */ static uchar_t ctx_pgsz_arr[MAX_NCTXS]; uchar_t *ctx_pgsz_array = ctx_pgsz_arr; /* * PA[22:0] represent Displacement in Jupiter * configuration space. */ uint_t root_phys_addr_lo_mask = 0x7fffffu; /* * set in /etc/system to control logging of user BERR/TO's */ int cpu_berr_to_verbose = 0; static int min_ecache_size; static uint_t priv_hcl_1; static uint_t priv_hcl_2; static uint_t priv_hcl_4; static uint_t priv_hcl_8; /* * Olympus error log */ static opl_errlog_t *opl_err_log; /* * UE is classified into four classes (MEM, CHANNEL, CPU, PATH). * No any other ecc_type_info insertion is allowed in between the following * four UE classess. */ ecc_type_to_info_t ecc_type_to_info[] = { SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_UE_MEM, SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_UE_CHANNEL, SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_UE_CPU, SFSR_UE, "UE ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE, "Uncorrectable ECC", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_UE_PATH, SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, "Bus Error", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_BERR, SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, "Bus Timeout", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_BTO, SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, "TLB MultiHit", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_MTLB, SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS, "TLB Parity", FM_EREPORT_PAYLOAD_SYNC, FM_EREPORT_CPU_TLBP, UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IAUG CRE", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_CRE, UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IAUG TSBCTXT", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_TSBCTX, UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG TSBP", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_TSBP, UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG PSTATE", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_PSTATE, UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG TSTATE", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_TSTATE, UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG FREG", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_IUG_F, UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG RREG", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_IUG_R, UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "AUG SDC", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_SDC, UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG WDT", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_WDT, UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG DTLB", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_DTLB, UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG ITLB", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_ITLB, UGESR_IUG_COREERR, "IUG_COREERR", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "IUG COREERR", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_CORE, UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "MULTI DAE", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_DAE, UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "MULTI IAE", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_IAE, UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT, "MULTI UGE", FM_EREPORT_PAYLOAD_URGENT, FM_EREPORT_CPU_UGE, 0, NULL, 0, 0, NULL, 0, 0, }; int (*p2get_mem_info)(int synd_code, uint64_t paddr, uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, int *segsp, int *banksp, int *mcidp); /* * Setup trap handlers for 0xA, 0x32, 0x40 trap types. */ void cpu_init_trap(void) { OPL_SET_TRAP(tt0_iae, opl_serr_instr); OPL_SET_TRAP(tt1_iae, opl_serr_instr); OPL_SET_TRAP(tt0_dae, opl_serr_instr); OPL_SET_TRAP(tt1_dae, opl_serr_instr); OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr); OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr); } static int getintprop(pnode_t node, char *name, int deflt) { int value; switch (prom_getproplen(node, name)) { case sizeof (int): (void) prom_getprop(node, name, (caddr_t)&value); break; default: value = deflt; break; } return (value); } /* * Set the magic constants of the implementation. */ /*ARGSUSED*/ void cpu_fiximp(pnode_t dnode) { int i, a; extern int vac_size, vac_shift; extern uint_t vac_mask; static struct { char *name; int *var; int defval; } prop[] = { "l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE, "l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE, "l1-icache-size", &icache_size, OPL_ICACHE_SIZE, "l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE, "l2-cache-size", &ecache_size, OPL_ECACHE_SIZE, "l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE, "l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY }; for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); ecache_setsize = ecache_size / ecache_associativity; vac_size = OPL_VAC_SIZE; vac_mask = MMU_PAGEMASK & (vac_size - 1); i = 0; a = vac_size; while (a >>= 1) ++i; vac_shift = i; shm_alignment = vac_size; vac = 1; } #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL /* * Quick and dirty way to redefine locally in * OPL the value of IDSR_BN_SETS to 31 instead * of the standard 32 value. This is to workaround * REV_B of Olympus_c processor's problem in handling * more than 31 xcall broadcast. */ #undef IDSR_BN_SETS #define IDSR_BN_SETS 31 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ void send_mondo_set(cpuset_t set) { int lo, busy, nack, shipped = 0; uint16_t i, cpuids[IDSR_BN_SETS]; uint64_t idsr, nackmask = 0, busymask, curnack, curbusy; uint64_t starttick, endtick, tick, lasttick; #if (NCPU > IDSR_BN_SETS) int index = 0; int ncpuids = 0; #endif #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL int bn_sets = IDSR_BN_SETS; uint64_t ver; ASSERT(NCPU > bn_sets); #endif ASSERT(!CPUSET_ISNULL(set)); starttick = lasttick = gettick(); #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL ver = ultra_getver(); if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) && ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A)) bn_sets = 1; #endif #if (NCPU <= IDSR_BN_SETS) for (i = 0; i < NCPU; i++) if (CPU_IN_SET(set, i)) { shipit(i, shipped); nackmask |= IDSR_NACK_BIT(shipped); cpuids[shipped++] = i; CPUSET_DEL(set, i); if (CPUSET_ISNULL(set)) break; } CPU_STATS_ADDQ(CPU, sys, xcalls, shipped); #else for (i = 0; i < NCPU; i++) if (CPU_IN_SET(set, i)) { ncpuids++; /* * Ship only to the first (IDSR_BN_SETS) CPUs. If we * find we have shipped to more than (IDSR_BN_SETS) * CPUs, set "index" to the highest numbered CPU in * the set so we can ship to other CPUs a bit later on. */ #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL if (shipped < bn_sets) { #else if (shipped < IDSR_BN_SETS) { #endif shipit(i, shipped); nackmask |= IDSR_NACK_BIT(shipped); cpuids[shipped++] = i; CPUSET_DEL(set, i); if (CPUSET_ISNULL(set)) break; } else index = (int)i; } CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids); #endif busymask = IDSR_NACK_TO_BUSY(nackmask); busy = nack = 0; endtick = starttick + xc_tick_limit; for (;;) { idsr = getidsr(); #if (NCPU <= IDSR_BN_SETS) if (idsr == 0) break; #else if (idsr == 0 && shipped == ncpuids) break; #endif tick = gettick(); /* * If there is a big jump between the current tick * count and lasttick, we have probably hit a break * point. Adjust endtick accordingly to avoid panic. */ if (tick > (lasttick + xc_tick_jump_limit)) endtick += (tick - lasttick); lasttick = tick; if (tick > endtick) { if (panic_quiesce) return; cmn_err(CE_CONT, "send mondo timeout " "[%d NACK %d BUSY]\nIDSR 0x%" "" PRIx64 " cpuids:", nack, busy, idsr); #ifdef OLYMPUS_C_REV_A_ERRATA_XCALL for (i = 0; i < bn_sets; i++) { #else for (i = 0; i < IDSR_BN_SETS; i++) { #endif if (idsr & (IDSR_NACK_BIT(i) | IDSR_BUSY_BIT(i))) { cmn_err(CE_CONT, " 0x%x", cpuids[i]); } } cmn_err(CE_CONT, "\n"); cmn_err(CE_PANIC, "send_mondo_set: timeout"); } curnack = idsr & nackmask; curbusy = idsr & busymask; #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL /* * Only proceed to send more xcalls if all the * cpus in the previous IDSR_BN_SETS were completed. */ if (curbusy) { busy++; continue; } #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ #if (NCPU > IDSR_BN_SETS) if (shipped < ncpuids) { uint64_t cpus_left; uint16_t next = (uint16_t)index; cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) & busymask; if (cpus_left) { do { /* * Sequence through and ship to the * remainder of the CPUs in the system * (e.g. other than the first * (IDSR_BN_SETS)) in reverse order. */ lo = lowbit(cpus_left) - 1; i = IDSR_BUSY_IDX(lo); shipit(next, i); shipped++; cpuids[i] = next; /* * If we've processed all the CPUs, * exit the loop now and save * instructions. */ if (shipped == ncpuids) break; for ((index = ((int)next - 1)); index >= 0; index--) if (CPU_IN_SET(set, index)) { next = (uint16_t)index; break; } cpus_left &= ~(1ull << lo); } while (cpus_left); continue; } } #endif #ifndef OLYMPUS_C_REV_B_ERRATA_XCALL if (curbusy) { busy++; continue; } #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */ #ifdef SEND_MONDO_STATS { int n = gettick() - starttick; if (n < 8192) x_nack_stimes[n >> 7]++; } #endif while (gettick() < (tick + sys_clock_mhz)) ; do { lo = lowbit(curnack) - 1; i = IDSR_NACK_IDX(lo); shipit(cpuids[i], i); curnack &= ~(1ull << lo); } while (curnack); nack++; busy = 0; } #ifdef SEND_MONDO_STATS { int n = gettick() - starttick; if (n < 8192) x_set_stimes[n >> 7]++; else x_set_ltimes[(n >> 13) & 0xf]++; } x_set_cpus[shipped]++; #endif } /* * Cpu private initialization. */ void cpu_init_private(struct cpu *cp) { if (!(IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation))) { cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI is supported", cp->cpu_id, cpunodes[cp->cpu_id].implementation); } adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size); } void cpu_setup(void) { extern int at_flags; extern int disable_delay_tlb_flush, delay_tlb_flush; extern int cpc_has_overflow_intr; extern int disable_text_largepages; extern int use_text_pgsz4m; uint64_t cpu0_log; extern uint64_t opl_cpu0_err_log; /* * Initialize Error log Scratch register for error handling. */ cpu0_log = va_to_pa(&opl_cpu0_err_log); opl_error_setup(cpu0_log); /* * Enable MMU translating multiple page sizes for * sITLB and sDTLB. */ opl_mpg_enable(); /* * Setup chip-specific trap handlers. */ cpu_init_trap(); cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; /* * Use the maximum number of contexts available for SPARC64-VI * unless it has been tuned for debugging. * We are checking against 0 here since this value can be patched * while booting. It can not be patched via /etc/system since it * will be patched too late and thus cause the system to panic. */ if (nctxs == 0) nctxs = MAX_NCTXS; /* * Due to the number of entries in the fully-associative tlb * this may have to be tuned lower than in spitfire. */ pp_slots = MIN(8, MAXPP_SLOTS); /* * Block stores do not invalidate all pages of the d$, pagecopy * et. al. need virtual translations with virtual coloring taken * into consideration. prefetch/ldd will pollute the d$ on the * load side. */ pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; if (use_page_coloring) { do_pg_coloring = 1; if (use_virtual_coloring) do_virtual_coloring = 1; } isa_list = "sparcv9+vis2 sparcv9+vis sparcv9 " "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " "sparcv8 sparcv8-fsmuld sparcv7 sparc"; cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; /* * On SPARC64-VI, there's no hole in the virtual address space */ hole_start = hole_end = 0; /* * The kpm mapping window. * kpm_size: * The size of a single kpm range. * The overall size will be: kpm_size * vac_colors. * kpm_vbase: * The virtual start address of the kpm range within the kernel * virtual address space. kpm_vbase has to be kpm_size aligned. */ kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */ kpm_size_shift = 47; kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ kpm_smallpages = 1; /* * The traptrace code uses either %tick or %stick for * timestamping. We have %stick so we can use it. */ traptrace_use_stick = 1; /* * SPARC64-VI has a performance counter overflow interrupt */ cpc_has_overflow_intr = 1; /* * Use SPARC64-VI flush-all support */ if (!disable_delay_tlb_flush) delay_tlb_flush = 1; /* * Declare that this architecture/cpu combination does not support * fpRAS. */ fpras_implemented = 0; /* * Enable 4M pages to be used for mapping user text by default. Don't * use large pages for initialized data segments since we may not know * at exec() time what should be the preferred large page size for DTLB * programming. */ use_text_pgsz4m = 1; disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M); } /* * Called by setcpudelay */ void cpu_init_tick_freq(void) { /* * For SPARC64-VI we want to use the system clock rate as * the basis for low level timing, due to support of mixed * speed CPUs and power managment. */ if (system_clock_freq == 0) cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); sys_tick_freq = system_clock_freq; } #ifdef SEND_MONDO_STATS uint32_t x_one_stimes[64]; uint32_t x_one_ltimes[16]; uint32_t x_set_stimes[64]; uint32_t x_set_ltimes[16]; uint32_t x_set_cpus[NCPU]; uint32_t x_nack_stimes[64]; #endif /* * Note: A version of this function is used by the debugger via the KDI, * and must be kept in sync with this version. Any changes made to this * function to support new chips or to accomodate errata must also be included * in the KDI-specific version. See us3_kdi.c. */ void send_one_mondo(int cpuid) { int busy, nack; uint64_t idsr, starttick, endtick, tick, lasttick; uint64_t busymask; CPU_STATS_ADDQ(CPU, sys, xcalls, 1); starttick = lasttick = gettick(); shipit(cpuid, 0); endtick = starttick + xc_tick_limit; busy = nack = 0; busymask = IDSR_BUSY; for (;;) { idsr = getidsr(); if (idsr == 0) break; tick = gettick(); /* * If there is a big jump between the current tick * count and lasttick, we have probably hit a break * point. Adjust endtick accordingly to avoid panic. */ if (tick > (lasttick + xc_tick_jump_limit)) endtick += (tick - lasttick); lasttick = tick; if (tick > endtick) { if (panic_quiesce) return; cmn_err(CE_PANIC, "send mondo timeout " "(target 0x%x) [%d NACK %d BUSY]", cpuid, nack, busy); } if (idsr & busymask) { busy++; continue; } drv_usecwait(1); shipit(cpuid, 0); nack++; busy = 0; } #ifdef SEND_MONDO_STATS { int n = gettick() - starttick; if (n < 8192) x_one_stimes[n >> 7]++; else x_one_ltimes[(n >> 13) & 0xf]++; } #endif } /* * init_mmu_page_sizes is set to one after the bootup time initialization * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a * valid value. * * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific * versions of disable_ism_large_pages and disable_large_pages, and feed back * into those two hat variables at hat initialization time. * */ int init_mmu_page_sizes = 0; static int mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE256M)); static int mmu_disable_large_pages = 0; /* * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. * Called during very early bootup from check_cpus_set(). * Can be called to verify that mmu_page_sizes are set up correctly. * * Set Olympus defaults. We do not use the function parameter. */ /*ARGSUSED*/ int mmu_init_mmu_page_sizes(int32_t not_used) { if (!init_mmu_page_sizes) { mmu_page_sizes = MMU_PAGE_SIZES; mmu_hashcnt = MAX_HASHCNT; mmu_ism_pagesize = MMU_PAGESIZE32M; mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M) | (1 << TTE256M); init_mmu_page_sizes = 1; return (0); } return (1); } /* SPARC64-VI worst case DTLB parameters */ #ifndef LOCKED_DTLB_ENTRIES #define LOCKED_DTLB_ENTRIES 5 /* 2 user TSBs, 2 nucleus, + OBP */ #endif #define TOTAL_DTLB_ENTRIES 32 #define AVAIL_32M_ENTRIES 0 #define AVAIL_256M_ENTRIES 0 #define AVAIL_DTLB_ENTRIES (TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES) static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES}; size_t mmu_map_pgsz(size_t pgsize) { struct proc *p = curproc; struct as *as = p->p_as; struct hat *hat = as->a_hat; uint_t pgsz0, pgsz1; size_t size0, size1; ASSERT(mmu_page_sizes == max_mmu_page_sizes); pgsz0 = hat->sfmmu_pgsz[0]; pgsz1 = hat->sfmmu_pgsz[1]; size0 = hw_page_array[pgsz0].hp_size; size1 = hw_page_array[pgsz1].hp_size; /* Allow use of a larger pagesize if neither TLB is reprogrammed. */ if ((pgsz0 == TTE8K) && (pgsz1 == TTE8K)) { return (pgsize); /* Allow use of requested pagesize if TLB is reprogrammed to it. */ } else if ((pgsize == size0) || (pgsize == size1)) { return (pgsize); /* Use larger reprogrammed TLB size if pgsize is atleast that big. */ } else if (pgsz1 > pgsz0) { if (pgsize >= size1) return (size1); /* Use smaller reprogrammed TLB size if pgsize is atleast that big. */ } else { if (pgsize >= size0) return (size0); } return (pgsize); } /* * The function returns the mmu-specific values for the * hat's disable_large_pages and disable_ism_large_pages variables. */ int mmu_large_pages_disabled(uint_t flag) { int pages_disable = 0; if (flag == HAT_LOAD) { pages_disable = mmu_disable_large_pages; } else if (flag == HAT_LOAD_SHARE) { pages_disable = mmu_disable_ism_large_pages; } return (pages_disable); } /* * mmu_init_large_pages is called with the desired ism_pagesize parameter. * It may be called from set_platform_defaults, if some value other than 32M * is desired. mmu_ism_pagesize is the tunable. If it has a bad value, * then only warn, since it would be bad form to panic due to a user typo. * * The function re-initializes the mmu_disable_ism_large_pages variable. */ void mmu_init_large_pages(size_t ism_pagesize) { switch (ism_pagesize) { case MMU_PAGESIZE4M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); break; case MMU_PAGESIZE32M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE256M)); break; case MMU_PAGESIZE256M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M)); break; default: cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", ism_pagesize); break; } } /*ARGSUSED*/ uint_t mmu_preferred_pgsz(struct hat *hat, caddr_t addr, size_t len) { sfmmu_t *sfmmup = (sfmmu_t *)hat; uint_t pgsz0, pgsz1; uint_t szc, maxszc = mmu_page_sizes - 1; size_t pgsz; extern int disable_large_pages; pgsz0 = (uint_t)sfmmup->sfmmu_pgsz[0]; pgsz1 = (uint_t)sfmmup->sfmmu_pgsz[1]; /* * If either of the TLBs are reprogrammed, choose * the largest mapping size as the preferred size, * if it fits the size and alignment constraints. * Else return the largest mapping size that fits, * if neither TLB is reprogrammed. */ if (pgsz0 > TTE8K || pgsz1 > TTE8K) { if (pgsz1 > pgsz0) { /* First try pgsz1 */ pgsz = hw_page_array[pgsz1].hp_size; if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) return (pgsz1); } if (pgsz0 > TTE8K) { /* Then try pgsz0, if !TTE8K */ pgsz = hw_page_array[pgsz0].hp_size; if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) return (pgsz0); } } else { /* Otherwise pick best fit if neither TLB is reprogrammed. */ for (szc = maxszc; szc > TTE8K; szc--) { if (disable_large_pages & (1 << szc)) continue; pgsz = hw_page_array[szc].hp_size; if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) return (szc); } } return (TTE8K); } /* * Function to reprogram the TLBs when page sizes used * by a process change significantly. */ void mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt) { extern int page_szc(size_t); uint8_t pgsz0, pgsz1; /* * Don't program 2nd dtlb for kernel and ism hat */ if (hat->sfmmu_ismhat || hat == ksfmmup) return; /* * hat->sfmmu_pgsz[] is an array whose elements * contain a sorted order of page sizes. Element * 0 is the most commonly used page size, followed * by element 1, and so on. * * ttecnt[] is an array of per-page-size page counts * mapped into the process. * * If the HAT's choice for page sizes is unsuitable, * we can override it here. The new values written * to the array will be handed back to us later to * do the actual programming of the TLB hardware. * */ pgsz0 = (uint8_t)MIN(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]); pgsz1 = (uint8_t)MAX(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]); /* * This implements PAGESIZE programming of the sTLB * if large TTE counts don't exceed the thresholds. */ if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0]) pgsz0 = page_szc(MMU_PAGESIZE); if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1]) pgsz1 = page_szc(MMU_PAGESIZE); hat->sfmmu_pgsz[0] = pgsz0; hat->sfmmu_pgsz[1] = pgsz1; /* otherwise, accept what the HAT chose for us */ } /* * The HAT calls this function when an MMU context is allocated so that we * can reprogram the large TLBs appropriately for the new process using * the context. * * The caller must hold the HAT lock. */ void mmu_set_ctx_page_sizes(struct hat *hat) { uint8_t pgsz0, pgsz1; uint8_t new_cext; ASSERT(sfmmu_hat_lock_held(hat)); /* * Don't program 2nd dtlb for kernel and ism hat */ if (hat->sfmmu_ismhat || hat == ksfmmup) return; /* * If supported, reprogram the TLBs to a larger pagesize. */ pgsz0 = hat->sfmmu_pgsz[0]; pgsz1 = hat->sfmmu_pgsz[1]; ASSERT(pgsz0 < mmu_page_sizes); ASSERT(pgsz1 < mmu_page_sizes); new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0); if (hat->sfmmu_cext != new_cext) { hat->sfmmu_cext = new_cext; } ctx_pgsz_array[hat->sfmmu_cnum] = hat->sfmmu_cext; /* * sfmmu_setctx_sec() will take care of the * rest of the dirty work for us. */ } /* * Return processor specific async error structure * size used. */ int cpu_aflt_size(void) { return (sizeof (opl_async_flt_t)); } /* * The cpu_sync_log_err() function is called via the [uc]e_drain() function to * post-process CPU events that are dequeued. As such, it can be invoked * from softint context, from AST processing in the trap() flow, or from the * panic flow. We decode the CPU-specific data, and take appropriate actions. * Historically this entry point was used to log the actual cmn_err(9F) text; * now with FMA it is used to prepare 'flt' to be converted into an ereport. * With FMA this function now also returns a flag which indicates to the * caller whether the ereport should be posted (1) or suppressed (0). */ /*ARGSUSED*/ static int cpu_sync_log_err(void *flt) { opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt; struct async_flt *aflt = (struct async_flt *)flt; /* * No extra processing of urgent error events. * Always generate ereports for these events. */ if (aflt->flt_status == OPL_ECC_URGENT_TRAP) return (1); /* * Additional processing for synchronous errors. */ switch (opl_flt->flt_type) { case OPL_CPU_INV_SFSR: return (1); case OPL_CPU_SYNC_UE: /* * The validity: SFSR_MK_UE bit has been checked * in opl_cpu_sync_error() * No more check is required. * * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W, * and they have been retrieved in cpu_queue_events() */ if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) { ASSERT(aflt->flt_in_memory); /* * We want to skip logging only if ALL the following * conditions are true: * * 1. We are not panicing already. * 2. The error is a memory error. * 3. There is only one error. * 4. The error is on a retired page. * 5. The error occurred under on_trap * protection AFLT_PROT_EC */ if (!panicstr && aflt->flt_prot == AFLT_PROT_EC && page_retire_check(aflt->flt_addr, NULL) == 0) { /* * Do not log an error from * the retired page */ softcall(ecc_page_zero, (void *)aflt->flt_addr); return (0); } if (!panicstr) cpu_page_retire(opl_flt); } return (1); case OPL_CPU_SYNC_OTHERS: /* * For the following error cases, the processor HW does * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt * to assign appropriate values here to reflect what we * think is the most likely cause of the problem w.r.t to * the particular error event. For Buserr and timeout * error event, we will assign OPL_ERRID_CHANNEL as the * most likely reason. For TLB parity or multiple hit * error events, we will assign the reason as * OPL_ERRID_CPU (cpu related problem) and set the * flt_eid_sid to point to the cpuid. */ if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) { /* * flt_eid_sid will not be used for this case. */ opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL; } if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) { opl_flt->flt_eid_mod = OPL_ERRID_CPU; opl_flt->flt_eid_sid = aflt->flt_inst; } /* * In case of no effective error bit */ if ((opl_flt->flt_bit & SFSR_ERRS) == 0) { opl_flt->flt_eid_mod = OPL_ERRID_CPU; opl_flt->flt_eid_sid = aflt->flt_inst; } break; default: return (1); } return (1); } /* * Retire the bad page that may contain the flushed error. */ void cpu_page_retire(opl_async_flt_t *opl_flt) { struct async_flt *aflt = (struct async_flt *)opl_flt; (void) page_retire(aflt->flt_addr, PR_UE); } /* * Invoked by error_init() early in startup and therefore before * startup_errorq() is called to drain any error Q - * * startup() * startup_end() * error_init() * cpu_error_init() * errorq_init() * errorq_drain() * start_other_cpus() * * The purpose of this routine is to create error-related taskqs. Taskqs * are used for this purpose because cpu_lock can't be grabbed from interrupt * context. * */ /*ARGSUSED*/ void cpu_error_init(int items) { opl_err_log = (opl_errlog_t *) kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP); if ((uint64_t)opl_err_log & MMU_PAGEOFFSET) cmn_err(CE_PANIC, "The base address of the error log " "is not page aligned"); } /* * We route all errors through a single switch statement. */ void cpu_ue_log_err(struct async_flt *aflt) { switch (aflt->flt_class) { case CPU_FAULT: if (cpu_sync_log_err(aflt)) cpu_ereport_post(aflt); break; case BUS_FAULT: bus_async_log_err(aflt); break; default: cmn_err(CE_WARN, "discarding async error %p with invalid " "fault class (0x%x)", (void *)aflt, aflt->flt_class); return; } } /* * Routine for panic hook callback from panic_idle(). * * Nothing to do here. */ void cpu_async_panic_callb(void) { } /* * Routine to return a string identifying the physical name * associated with a memory/cache error. */ /*ARGSUSED*/ int cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, uint64_t flt_addr, int flt_bus_id, int flt_in_memory, ushort_t flt_status, char *buf, int buflen, int *lenp) { int synd_code; int ret; /* * An AFSR of -1 defaults to a memory syndrome. */ synd_code = (int)flt_synd; if (&plat_get_mem_unum) { if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { buf[0] = '\0'; *lenp = 0; } return (ret); } buf[0] = '\0'; *lenp = 0; return (ENOTSUP); } /* * Wrapper for cpu_get_mem_unum() routine that takes an * async_flt struct rather than explicit arguments. */ int cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, char *buf, int buflen, int *lenp) { /* * We always pass -1 so that cpu_get_mem_unum will interpret this as a * memory error. */ return (cpu_get_mem_unum(synd_status, aflt->flt_synd, (uint64_t)-1, aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); } /* * This routine is a more generic interface to cpu_get_mem_unum() * that may be used by other modules (e.g. mm). */ /*ARGSUSED*/ int cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, char *buf, int buflen, int *lenp) { int synd_status, flt_in_memory, ret; ushort_t flt_status = 0; char unum[UNUM_NAMLEN]; /* * Check for an invalid address. */ if (afar == (uint64_t)-1) return (ENXIO); if (synd == (uint64_t)-1) synd_status = AFLT_STAT_INVALID; else synd_status = AFLT_STAT_VALID; flt_in_memory = (*afsr & SFSR_MEMORY) && pf_is_memory(afar >> MMU_PAGESHIFT); ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); if (ret != 0) return (ret); if (*lenp >= buflen) return (ENAMETOOLONG); (void) strncpy(buf, unum, buflen); return (0); } /* * Routine to return memory information associated * with a physical address and syndrome. */ /*ARGSUSED*/ int cpu_get_mem_info(uint64_t synd, uint64_t afar, uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, int *segsp, int *banksp, int *mcidp) { int synd_code = (int)synd; if (afar == (uint64_t)-1) return (ENXIO); if (p2get_mem_info != NULL) return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, bank_sizep, segsp, banksp, mcidp)); else return (ENOTSUP); } /* * Routine to return a string identifying the physical * name associated with a cpuid. */ int cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) { int ret; char unum[UNUM_NAMLEN]; if (&plat_get_cpu_unum) { if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) != 0) return (ret); } else { return (ENOTSUP); } if (*lenp >= buflen) return (ENAMETOOLONG); (void) strncpy(buf, unum, *lenp); return (0); } /* * This routine exports the name buffer size. */ size_t cpu_get_name_bufsize() { return (UNUM_NAMLEN); } /* * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH */ void cpu_flush_ecache(void) { flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, cpunodes[CPU->cpu_id].ecache_linesize); } static uint8_t flt_to_trap_type(struct async_flt *aflt) { if (aflt->flt_status & OPL_ECC_ISYNC_TRAP) return (TRAP_TYPE_ECC_I); if (aflt->flt_status & OPL_ECC_DSYNC_TRAP) return (TRAP_TYPE_ECC_D); if (aflt->flt_status & OPL_ECC_URGENT_TRAP) return (TRAP_TYPE_URGENT); return (-1); } /* * Encode the data saved in the opl_async_flt_t struct into * the FM ereport payload. */ /* ARGSUSED */ static void cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, nvlist_t *resource) { opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt; char unum[UNUM_NAMLEN]; char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ int len; if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR, DATA_TYPE_UINT64, aflt->flt_stat, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR, DATA_TYPE_UINT64, aflt->flt_addr, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR, DATA_TYPE_UINT64, aflt->flt_stat, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, DATA_TYPE_BOOLEAN_VALUE, (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS, DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL); } switch (opl_flt->flt_eid_mod) { case OPL_ERRID_CPU: (void) snprintf(sbuf, sizeof (sbuf), "%llX", (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id); (void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION, NULL, opl_flt->flt_eid_sid, (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf); fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, resource, NULL); break; case OPL_ERRID_CHANNEL: /* * No resource is created but the cpumem DE will find * the defective path by retreiving EID from SFSR which is * included in the payload. */ break; case OPL_ERRID_MEM: (void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len); (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, unum, NULL, (uint64_t)-1); fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, resource, NULL); break; case OPL_ERRID_PATH: /* * No resource is created but the cpumem DE will find * the defective path by retreiving EID from SFSR which is * included in the payload. */ break; } } /* * Returns whether fault address is valid for this error bit and * whether the address is "in memory" (i.e. pf_is_memory returns 1). */ /*ARGSUSED*/ static int cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit) { struct async_flt *aflt = (struct async_flt *)opl_flt; if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) { return ((t_afsr_bit & SFSR_MEMORY) && pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); } return (0); } /* * In OPL SCF does the stick synchronization. */ void sticksync_slave(void) { } /* * In OPL SCF does the stick synchronization. */ void sticksync_master(void) { } /* * Cpu private unitialization. OPL cpus do not use the private area. */ void cpu_uninit_private(struct cpu *cp) { cmp_delete_cpu(cp->cpu_id); } /* * Always flush an entire cache. */ void cpu_error_ecache_flush(void) { cpu_flush_ecache(); } void cpu_ereport_post(struct async_flt *aflt) { char *cpu_type, buf[FM_MAX_CLASS]; nv_alloc_t *nva = NULL; nvlist_t *ereport, *detector, *resource; errorq_elem_t *eqep; char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ if (aflt->flt_panic || panicstr) { eqep = errorq_reserve(ereport_errorq); if (eqep == NULL) return; ereport = errorq_elem_nvl(ereport_errorq, eqep); nva = errorq_elem_nva(ereport_errorq, eqep); } else { ereport = fm_nvlist_create(nva); } /* * Create the scheme "cpu" FMRI. */ detector = fm_nvlist_create(nva); resource = fm_nvlist_create(nva); switch (cpunodes[aflt->flt_inst].implementation) { case OLYMPUS_C_IMPL: cpu_type = FM_EREPORT_CPU_SPARC64_VI; break; default: cpu_type = FM_EREPORT_CPU_UNSUPPORTED; break; } (void) snprintf(sbuf, sizeof (sbuf), "%llX", (u_longlong_t)cpunodes[aflt->flt_inst].device_id); (void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL, aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version, sbuf); /* * Encode all the common data into the ereport. */ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL); /* * Encode the error specific data that was saved in * the async_flt structure into the ereport. */ cpu_payload_add_aflt(aflt, ereport, resource); if (aflt->flt_panic || panicstr) { errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); } else { (void) fm_ereport_post(ereport, EVCH_TRYHARD); fm_nvlist_destroy(ereport, FM_NVA_FREE); fm_nvlist_destroy(detector, FM_NVA_FREE); fm_nvlist_destroy(resource, FM_NVA_FREE); } } void cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) { int status; ddi_fm_error_t de; bzero(&de, sizeof (ddi_fm_error_t)); de.fme_version = DDI_FME_VERSION; de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); de.fme_flag = expected; de.fme_bus_specific = (void *)aflt->flt_addr; status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) aflt->flt_panic = 1; } void cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, errorq_t *eqp, uint_t flag) { struct async_flt *aflt = (struct async_flt *)payload; aflt->flt_erpt_class = error_class; errorq_dispatch(eqp, payload, payload_sz, flag); } void adjust_hw_copy_limits(int ecache_size) { /* * Set hw copy limits. * * /etc/system will be parsed later and can override one or more * of these settings. * * At this time, ecache size seems only mildly relevant. * We seem to run into issues with the d-cache and stalls * we see on misses. * * Cycle measurement indicates that 2 byte aligned copies fare * little better than doing things with VIS at around 512 bytes. * 4 byte aligned shows promise until around 1024 bytes. 8 Byte * aligned is faster whenever the source and destination data * in cache and the total size is less than 2 Kbytes. The 2K * limit seems to be driven by the 2K write cache. * When more than 2K of copies are done in non-VIS mode, stores * backup in the write cache. In VIS mode, the write cache is * bypassed, allowing faster cache-line writes aligned on cache * boundaries. * * In addition, in non-VIS mode, there is no prefetching, so * for larger copies, the advantage of prefetching to avoid even * occasional cache misses is enough to justify using the VIS code. * * During testing, it was discovered that netbench ran 3% slower * when hw_copy_limit_8 was 2K or larger. Apparently for server * applications, data is only used once (copied to the output * buffer, then copied by the network device off the system). Using * the VIS copy saves more L2 cache state. Network copies are * around 1.3K to 1.5K in size for historical reasons. * * Therefore, a limit of 1K bytes will be used for the 8 byte * aligned copy even for large caches and 8 MB ecache. The * infrastructure to allow different limits for different sized * caches is kept to allow further tuning in later releases. */ if (min_ecache_size == 0 && use_hw_bcopy) { /* * First time through - should be before /etc/system * is read. * Could skip the checks for zero but this lets us * preserve any debugger rewrites. */ if (hw_copy_limit_1 == 0) { hw_copy_limit_1 = VIS_COPY_THRESHOLD; priv_hcl_1 = hw_copy_limit_1; } if (hw_copy_limit_2 == 0) { hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; priv_hcl_2 = hw_copy_limit_2; } if (hw_copy_limit_4 == 0) { hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; priv_hcl_4 = hw_copy_limit_4; } if (hw_copy_limit_8 == 0) { hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; priv_hcl_8 = hw_copy_limit_8; } min_ecache_size = ecache_size; } else { /* * MP initialization. Called *after* /etc/system has * been parsed. One CPU has already been initialized. * Need to cater for /etc/system having scragged one * of our values. */ if (ecache_size == min_ecache_size) { /* * Same size ecache. We do nothing unless we * have a pessimistic ecache setting. In that * case we become more optimistic (if the cache is * large enough). */ if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { /* * Need to adjust hw_copy_limit* from our * pessimistic uniprocessor value to a more * optimistic UP value *iff* it hasn't been * reset. */ if ((ecache_size > 1048576) && (priv_hcl_8 == hw_copy_limit_8)) { if (ecache_size <= 2097152) hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; else if (ecache_size <= 4194304) hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; else hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; priv_hcl_8 = hw_copy_limit_8; } } } else if (ecache_size < min_ecache_size) { /* * A different ecache size. Can this even happen? */ if (priv_hcl_8 == hw_copy_limit_8) { /* * The previous value that we set * is unchanged (i.e., it hasn't been * scragged by /etc/system). Rewrite it. */ if (ecache_size <= 1048576) hw_copy_limit_8 = 8 * VIS_COPY_THRESHOLD; else if (ecache_size <= 2097152) hw_copy_limit_8 = 8 * VIS_COPY_THRESHOLD; else if (ecache_size <= 4194304) hw_copy_limit_8 = 8 * VIS_COPY_THRESHOLD; else hw_copy_limit_8 = 10 * VIS_COPY_THRESHOLD; priv_hcl_8 = hw_copy_limit_8; min_ecache_size = ecache_size; } } } } #define VIS_BLOCKSIZE 64 int dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) { int ret, watched; watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); ret = dtrace_blksuword32(addr, data, 0); if (watched) watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); return (ret); } void opl_cpu_reg_init() { uint64_t this_cpu_log; /* * We do not need to re-initialize cpu0 registers. */ if (cpu[getprocessorid()] == &cpu0) return; /* * Initialize Error log Scratch register for error handling. */ this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) + ERRLOG_BUFSZ * (getprocessorid()))); opl_error_setup(this_cpu_log); /* * Enable MMU translating multiple page sizes for * sITLB and sDTLB. */ opl_mpg_enable(); } /* * Queue one event in ue_queue based on ecc_type_to_info entry. */ static void cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason, ecc_type_to_info_t *eccp) { struct async_flt *aflt = (struct async_flt *)opl_flt; if (reason && strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { (void) strcat(reason, eccp->ec_reason); } opl_flt->flt_bit = eccp->ec_afsr_bit; opl_flt->flt_type = eccp->ec_flt_type; aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit); aflt->flt_payload = eccp->ec_err_payload; ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP)); cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt, sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); } /* * Queue events on async event queue one event per error bit. * Return number of events queued. */ int cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs) { struct async_flt *aflt = (struct async_flt *)opl_flt; ecc_type_to_info_t *eccp; int nevents = 0; /* * Queue expected errors, error bit and fault type must must match * in the ecc_type_to_info table. */ for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; eccp++) { if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && (eccp->ec_flags & aflt->flt_status) != 0) { /* * UE error event can be further * classified/breakdown into finer granularity * based on the flt_eid_mod value set by HW. We do * special handling here so that we can report UE * error in finer granularity as ue_mem, * ue_channel, ue_cpu or ue_path. */ if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) { opl_flt->flt_eid_mod = (aflt->flt_stat & SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT; opl_flt->flt_eid_sid = (aflt->flt_stat & SFSR_EID_SID) >> SFSR_EID_SID_SHIFT; /* * Need to advance eccp pointer by flt_eid_mod * so that we get an appropriate ecc pointer * * EID # of advances * ---------------------------------- * OPL_ERRID_MEM 0 * OPL_ERRID_CHANNEL 1 * OPL_ERRID_CPU 2 * OPL_ERRID_PATH 3 */ eccp += opl_flt->flt_eid_mod; } cpu_queue_one_event(opl_flt, reason, eccp); t_afsr_errs &= ~eccp->ec_afsr_bit; nevents++; } } return (nevents); } /* * Sync. error wrapper functions. * We use these functions in order to transfer here from the * nucleus trap handler information about trap type (data or * instruction) and trap level (0 or above 0). This way we * get rid of using SFSR's reserved bits. */ #define OPL_SYNC_TL0 0 #define OPL_SYNC_TL1 1 #define OPL_ISYNC_ERR 0 #define OPL_DSYNC_ERR 1 void opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) { uint64_t t_sfar = p_sfar; uint64_t t_sfsr = p_sfsr; opl_cpu_sync_error(rp, t_sfar, t_sfsr, OPL_SYNC_TL0, OPL_ISYNC_ERR); } void opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) { uint64_t t_sfar = p_sfar; uint64_t t_sfsr = p_sfsr; opl_cpu_sync_error(rp, t_sfar, t_sfsr, OPL_SYNC_TL1, OPL_ISYNC_ERR); } void opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) { uint64_t t_sfar = p_sfar; uint64_t t_sfsr = p_sfsr; opl_cpu_sync_error(rp, t_sfar, t_sfsr, OPL_SYNC_TL0, OPL_DSYNC_ERR); } void opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr) { uint64_t t_sfar = p_sfar; uint64_t t_sfsr = p_sfsr; opl_cpu_sync_error(rp, t_sfar, t_sfsr, OPL_SYNC_TL1, OPL_DSYNC_ERR); } /* * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL * and TLB_PRT. * This function is designed based on cpu_deferred_error(). */ static void opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr, uint_t tl, uint_t derr) { opl_async_flt_t opl_flt; struct async_flt *aflt; int trampolined = 0; char pr_reason[MAX_REASON_STRING]; uint64_t log_sfsr; int expected = DDI_FM_ERR_UNEXPECTED; ddi_acc_hdl_t *hp; /* * We need to look at p_flag to determine if the thread detected an * error while dumping core. We can't grab p_lock here, but it's ok * because we just need a consistent snapshot and we know that everyone * else will store a consistent set of bits while holding p_lock. We * don't have to worry about a race because SDOCORE is set once prior * to doing i/o from the process's address space and is never cleared. */ uint_t pflag = ttoproc(curthread)->p_flag; pr_reason[0] = '\0'; /* * handle the specific error */ bzero(&opl_flt, sizeof (opl_async_flt_t)); aflt = (struct async_flt *)&opl_flt; aflt->flt_id = gethrtime_waitfree(); aflt->flt_bus_id = getprocessorid(); aflt->flt_inst = CPU->cpu_id; aflt->flt_stat = t_sfsr; aflt->flt_addr = t_sfar; aflt->flt_pc = (caddr_t)rp->r_pc; aflt->flt_prot = (uchar_t)AFLT_PROT_NONE; aflt->flt_class = (uchar_t)CPU_FAULT; aflt->flt_priv = (uchar_t) (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); aflt->flt_tl = (uchar_t)tl; aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 || (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0); aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP; /* * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain. * So, clear all error bits to avoid mis-handling and force the system * panicked. * We skip all the procedures below down to the panic message call. */ if (!(t_sfsr & SFSR_FV)) { opl_flt.flt_type = OPL_CPU_INV_SFSR; aflt->flt_panic = 1; aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); fm_panic("%sErrors(s)", "invalid SFSR"); } /* * If either UE and MK bit is off, this is not valid UE error. * If it is not valid UE error, clear UE & MK_UE bits to prevent * mis-handling below. * aflt->flt_stat keeps the original bits as a reference. */ if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) != (SFSR_MK_UE|SFSR_UE)) { t_sfsr &= ~(SFSR_MK_UE|SFSR_UE); } /* * If the trap occurred in privileged mode at TL=0, we need to check to * see if we were executing in the kernel under on_trap() or t_lofault * protection. If so, modify the saved registers so that we return * from the trap to the appropriate trampoline routine. */ if (!aflt->flt_panic && aflt->flt_priv && tl == 0) { if (curthread->t_ontrap != NULL) { on_trap_data_t *otp = curthread->t_ontrap; if (otp->ot_prot & OT_DATA_EC) { aflt->flt_prot = (uchar_t)AFLT_PROT_EC; otp->ot_trap |= (ushort_t)OT_DATA_EC; rp->r_pc = otp->ot_trampoline; rp->r_npc = rp->r_pc + 4; trampolined = 1; } if ((t_sfsr & (SFSR_TO | SFSR_BERR)) && (otp->ot_prot & OT_DATA_ACCESS)) { aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS; otp->ot_trap |= (ushort_t)OT_DATA_ACCESS; rp->r_pc = otp->ot_trampoline; rp->r_npc = rp->r_pc + 4; trampolined = 1; /* * for peeks and caut_gets errors are expected */ hp = (ddi_acc_hdl_t *)otp->ot_handle; if (!hp) expected = DDI_FM_ERR_PEEK; else if (hp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) expected = DDI_FM_ERR_EXPECTED; } } else if (curthread->t_lofault) { aflt->flt_prot = AFLT_PROT_COPY; rp->r_g1 = EFAULT; rp->r_pc = curthread->t_lofault; rp->r_npc = rp->r_pc + 4; trampolined = 1; } } /* * If we're in user mode or we're doing a protected copy, we either * want the ASTON code below to send a signal to the user process * or we want to panic if aft_panic is set. * * If we're in privileged mode and we're not doing a copy, then we * need to check if we've trampolined. If we haven't trampolined, * we should panic. */ if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) aflt->flt_panic |= aft_panic; } else if (!trampolined) { aflt->flt_panic = 1; } /* * If we've trampolined due to a privileged TO or BERR, or if an * unprivileged TO or BERR occurred, we don't want to enqueue an * event for that TO or BERR. Queue all other events (if any) besides * the TO/BERR. */ log_sfsr = t_sfsr; if (trampolined) { log_sfsr &= ~(SFSR_TO | SFSR_BERR); } else if (!aflt->flt_priv) { /* * User mode, suppress messages if * cpu_berr_to_verbose is not set. */ if (!cpu_berr_to_verbose) log_sfsr &= ~(SFSR_TO | SFSR_BERR); } if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason, t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) { opl_flt.flt_type = OPL_CPU_INV_SFSR; aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC; cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); } if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) { cpu_run_bus_error_handlers(aflt, expected); } /* * Panic here if aflt->flt_panic has been set. Enqueued errors will * be logged as part of the panic flow. */ if (aflt->flt_panic) { if (pr_reason[0] == 0) strcpy(pr_reason, "invalid SFSR "); fm_panic("%sErrors(s)", pr_reason); } /* * If we queued an error and we are going to return from the trap and * the error was in user mode or inside of a copy routine, set AST flag * so the queue will be drained before returning to user mode. The * AST processing will also act on our failure policy. */ if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { int pcb_flag = 0; if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO))) pcb_flag |= ASYNC_HWERR; if (t_sfsr & SFSR_BERR) pcb_flag |= ASYNC_BERR; if (t_sfsr & SFSR_TO) pcb_flag |= ASYNC_BTO; ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; aston(curthread); } } /*ARGSUSED*/ void opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl) { opl_async_flt_t opl_flt; struct async_flt *aflt; char pr_reason[MAX_REASON_STRING]; /* normalize tl */ tl = (tl >= 2 ? 1 : 0); pr_reason[0] = '\0'; bzero(&opl_flt, sizeof (opl_async_flt_t)); aflt = (struct async_flt *)&opl_flt; aflt->flt_id = gethrtime_waitfree(); aflt->flt_bus_id = getprocessorid(); aflt->flt_inst = CPU->cpu_id; aflt->flt_stat = p_ugesr; aflt->flt_pc = (caddr_t)rp->r_pc; aflt->flt_class = (uchar_t)CPU_FAULT; aflt->flt_tl = tl; aflt->flt_priv = (uchar_t) (tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ? 1 : 0)); aflt->flt_status = OPL_ECC_URGENT_TRAP; aflt->flt_panic = 1; /* * HW does not set mod/sid in case of urgent error. * So we have to set it here. */ opl_flt.flt_eid_mod = OPL_ERRID_CPU; opl_flt.flt_eid_sid = aflt->flt_inst; if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) { opl_flt.flt_type = OPL_CPU_INV_UGESR; aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT; cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic); } fm_panic("Urgent Error"); } /* * Initialization error counters resetting. */ /* ARGSUSED */ static void opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when) { hdlr->cyh_func = (cyc_func_t)ras_cntr_reset; hdlr->cyh_level = CY_LOW_LEVEL; hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id; when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU); when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval; } void cpu_mp_init(void) { cyc_omni_handler_t hdlr; hdlr.cyo_online = opl_ras_online; hdlr.cyo_offline = NULL; hdlr.cyo_arg = NULL; mutex_enter(&cpu_lock); (void) cyclic_add_omni(&hdlr); mutex_exit(&cpu_lock); } /*ARGSUSED*/ void mmu_init_kernel_pgsz(struct hat *hat) { } size_t mmu_get_kernel_lpsize(size_t lpsize) { uint_t tte; if (lpsize == 0) { /* no setting for segkmem_lpsize in /etc/system: use default */ return (MMU_PAGESIZE4M); } for (tte = TTE8K; tte <= TTE4M; tte++) { if (lpsize == TTEBYTES(tte)) return (lpsize); } return (TTEBYTES(TTE8K)); } /* * The following are functions that are unused in * OPL cpu module. They are defined here to resolve * dependencies in the "unix" module. * Unused functions that should never be called in * OPL are coded with ASSERT(0). */ void cpu_disable_errors(void) {} void cpu_enable_errors(void) { ASSERT(0); } /*ARGSUSED*/ void cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t) { ASSERT(0); } /*ARGSUSED*/ void cpu_faulted_enter(struct cpu *cp) {} /*ARGSUSED*/ void cpu_faulted_exit(struct cpu *cp) {} /*ARGSUSED*/ void cpu_check_allcpus(struct async_flt *aflt) {} /*ARGSUSED*/ void cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t) { ASSERT(0); } /*ARGSUSED*/ void cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) { ASSERT(0); } /*ARGSUSED*/ void cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) { ASSERT(0); } /*ARGSUSED*/ void cpu_busy_ecache_scrub(struct cpu *cp) {} /*ARGSUSED*/ void cpu_idle_ecache_scrub(struct cpu *cp) {} /* ARGSUSED */ void cpu_change_speed(uint64_t divisor, uint64_t arg2) { ASSERT(0); } void cpu_init_cache_scrub(void) {} /* ARGSUSED */ int cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) { if (&plat_get_mem_sid) { return (plat_get_mem_sid(unum, buf, buflen, lenp)); } else { return (ENOTSUP); } } /* ARGSUSED */ int cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) { if (&plat_get_mem_addr) { return (plat_get_mem_addr(unum, sid, offset, addrp)); } else { return (ENOTSUP); } } /* ARGSUSED */ int cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) { if (&plat_get_mem_offset) { return (plat_get_mem_offset(flt_addr, offp)); } else { return (ENOTSUP); } } /*ARGSUSED*/ void itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) { ASSERT(0); } /*ARGSUSED*/ void dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) { ASSERT(0); } /*ARGSUSED*/ void read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) { ASSERT(0); } /*ARGSUSED*/ int ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, errorq_elem_t *eqep, size_t afltoffset) { ASSERT(0); return (0); } /*ARGSUSED*/ char * flt_to_error_type(struct async_flt *aflt) { ASSERT(0); return (NULL); }