/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CHEETAHPLUS_ERRATUM_25 #include #endif /* CHEETAHPLUS_ERRATUM_25 */ /* * Note that 'Cheetah PRM' refers to: * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III */ /* * Per CPU pointers to physical address of TL>0 logout data areas. * These pointers have to be in the kernel nucleus to avoid MMU * misses. */ uint64_t ch_err_tl1_paddrs[NCPU]; /* * One statically allocated structure to use during startup/DR * to prevent unnecessary panics. */ ch_err_tl1_data_t ch_err_tl1_data; /* * Per CPU pending error at TL>0, used by level15 softint handler */ uchar_t ch_err_tl1_pending[NCPU]; /* * For deferred CE re-enable after trap. */ taskq_t *ch_check_ce_tq; /* * Internal functions. */ static int cpu_async_log_err(void *flt, errorq_elem_t *eqep); static void cpu_log_diag_info(ch_async_flt_t *ch_flt); static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, ecc_type_to_info_t *eccp, ch_diag_data_t *cdp); static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit); static int clear_ecc(struct async_flt *ecc); #if defined(CPU_IMP_ECACHE_ASSOC) static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt); #endif static int cpu_ecache_set_size(struct cpu *cp); static int cpu_ectag_line_invalid(int cachesize, uint64_t tag); static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr); static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag); static int cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag); static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt); static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit); static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit); static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit); static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit); static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit); static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf); static void cpu_uninit_ecache_scrub_dr(struct cpu *cp); static void cpu_scrubphys(struct async_flt *aflt); static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *, int *, int *); static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *); static void cpu_ereport_init(struct async_flt *aflt); static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t); static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt); static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, uint64_t nceen, ch_cpu_logout_t *clop); static int cpu_ce_delayed_ec_logout(uint64_t); static int cpu_matching_ecache_line(uint64_t, void *, int, int *); static int cpu_error_is_ecache_data(int, uint64_t); static void cpu_fmri_cpu_set(nvlist_t *, int); static int cpu_error_to_resource_type(struct async_flt *aflt); #ifdef CHEETAHPLUS_ERRATUM_25 static int mondo_recover_proc(uint16_t, int); static void cheetah_nudge_init(void); static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when); static void cheetah_nudge_buddy(void); #endif /* CHEETAHPLUS_ERRATUM_25 */ #if defined(CPU_IMP_L1_CACHE_PARITY) static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt); static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index); static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word); static void cpu_icache_parity_info(ch_async_flt_t *ch_flt); static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index); static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt); static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index); static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *); static void cpu_payload_add_icache(struct async_flt *, nvlist_t *); #endif /* CPU_IMP_L1_CACHE_PARITY */ int (*p2get_mem_info)(int synd_code, uint64_t paddr, uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, int *segsp, int *banksp, int *mcidp); /* * This table is used to determine which bit(s) is(are) bad when an ECC * error occurs. The array is indexed by an 9-bit syndrome. The entries * of this array have the following semantics: * * 00-127 The number of the bad bit, when only one bit is bad. * 128 ECC bit C0 is bad. * 129 ECC bit C1 is bad. * 130 ECC bit C2 is bad. * 131 ECC bit C3 is bad. * 132 ECC bit C4 is bad. * 133 ECC bit C5 is bad. * 134 ECC bit C6 is bad. * 135 ECC bit C7 is bad. * 136 ECC bit C8 is bad. * 137-143 reserved for Mtag Data and ECC. * 144(M2) Two bits are bad within a nibble. * 145(M3) Three bits are bad within a nibble. * 146(M3) Four bits are bad within a nibble. * 147(M) Multiple bits (5 or more) are bad. * 148 NO bits are bad. * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5. */ #define C0 128 #define C1 129 #define C2 130 #define C3 131 #define C4 132 #define C5 133 #define C6 134 #define C7 135 #define C8 136 #define MT0 137 /* Mtag Data bit 0 */ #define MT1 138 #define MT2 139 #define MTC0 140 /* Mtag Check bit 0 */ #define MTC1 141 #define MTC2 142 #define MTC3 143 #define M2 144 #define M3 145 #define M4 146 #define M 147 #define NA 148 #if defined(JALAPENO) || defined(SERRANO) #define S003 149 /* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */ #define S003MEM 150 /* Syndrome 0x003 => likely from WDU/WBP */ #define SLAST S003MEM /* last special syndrome */ #else /* JALAPENO || SERRANO */ #define S003 149 /* Syndrome 0x003 => likely from EDU:ST */ #define S071 150 /* Syndrome 0x071 => likely from WDU/CPU */ #define S11C 151 /* Syndrome 0x11c => likely from BERR/DBERR */ #define SLAST S11C /* last special syndrome */ #endif /* JALAPENO || SERRANO */ #if defined(JALAPENO) || defined(SERRANO) #define BPAR0 152 /* syndrom 152 through 167 for bus parity */ #define BPAR15 167 #endif /* JALAPENO || SERRANO */ static uint8_t ecc_syndrome_tab[] = { NA, C0, C1, S003, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, #if defined(JALAPENO) || defined(SERRANO) 116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, #else /* JALAPENO || SERRANO */ 116, S071, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, #endif /* JALAPENO || SERRANO */ C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, 103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, 102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, 98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, #if defined(JALAPENO) || defined(SERRANO) M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, #else /* JALAPENO || SERRANO */ M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, S11C, M, M3, M, #endif /* JALAPENO || SERRANO */ M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, 94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, 89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, 86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, 77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, 74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, 80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, 111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M }; #define ESYND_TBL_SIZE (sizeof (ecc_syndrome_tab) / sizeof (uint8_t)) #if !(defined(JALAPENO) || defined(SERRANO)) /* * This table is used to determine which bit(s) is(are) bad when a Mtag * error occurs. The array is indexed by an 4-bit ECC syndrome. The entries * of this array have the following semantics: * * -1 Invalid mtag syndrome. * 137 Mtag Data 0 is bad. * 138 Mtag Data 1 is bad. * 139 Mtag Data 2 is bad. * 140 Mtag ECC 0 is bad. * 141 Mtag ECC 1 is bad. * 142 Mtag ECC 2 is bad. * 143 Mtag ECC 3 is bad. * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6. */ short mtag_syndrome_tab[] = { NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2, MT1, M2, MT2, M2, M2 }; #define MSYND_TBL_SIZE (sizeof (mtag_syndrome_tab) / sizeof (short)) #else /* !(JALAPENO || SERRANO) */ #define BSYND_TBL_SIZE 16 #endif /* !(JALAPENO || SERRANO) */ /* * Types returned from cpu_error_to_resource_type() */ #define ERRTYPE_UNKNOWN 0 #define ERRTYPE_CPU 1 #define ERRTYPE_MEMORY 2 #define ERRTYPE_ECACHE_DATA 3 /* * CE initial classification and subsequent action lookup table */ static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE]; static int ce_disp_inited; /* * Set to disable leaky and partner check for memory correctables */ int ce_xdiag_off; /* * The following are not incremented atomically so are indicative only */ static int ce_xdiag_drops; static int ce_xdiag_lkydrops; static int ce_xdiag_ptnrdrops; static int ce_xdiag_bad; /* * CE leaky check callback structure */ typedef struct { struct async_flt *lkycb_aflt; errorq_t *lkycb_eqp; errorq_elem_t *lkycb_eqep; } ce_lkychk_cb_t; /* * defines for various ecache_flush_flag's */ #define ECACHE_FLUSH_LINE 1 #define ECACHE_FLUSH_ALL 2 /* * STICK sync */ #define STICK_ITERATION 10 #define MAX_TSKEW 1 #define EV_A_START 0 #define EV_A_END 1 #define EV_B_START 2 #define EV_B_END 3 #define EVENTS 4 static int64_t stick_iter = STICK_ITERATION; static int64_t stick_tsk = MAX_TSKEW; typedef enum { EVENT_NULL = 0, SLAVE_START, SLAVE_CONT, MASTER_START } event_cmd_t; static volatile event_cmd_t stick_sync_cmd = EVENT_NULL; static int64_t timestamp[EVENTS]; static volatile int slave_done; #ifdef DEBUG #define DSYNC_ATTEMPTS 64 typedef struct { int64_t skew_val[DSYNC_ATTEMPTS]; } ss_t; ss_t stick_sync_stats[NCPU]; #endif /* DEBUG */ uint_t cpu_impl_dual_pgsz = 0; #if defined(CPU_IMP_DUAL_PAGESIZE) uint_t disable_dual_pgsz = 0; #endif /* CPU_IMP_DUAL_PAGESIZE */ /* * Save the cache bootup state for use when internal * caches are to be re-enabled after an error occurs. */ uint64_t cache_boot_state; /* * PA[22:0] represent Displacement in Safari configuration space. */ uint_t root_phys_addr_lo_mask = 0x7fffffu; bus_config_eclk_t bus_config_eclk[] = { #if defined(JALAPENO) || defined(SERRANO) {JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1}, {JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2}, {JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32}, #else /* JALAPENO || SERRANO */ {SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1}, {SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2}, {SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32}, #endif /* JALAPENO || SERRANO */ {0, 0} }; /* * Interval for deferred CEEN reenable */ int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS; /* * set in /etc/system to control logging of user BERR/TO's */ int cpu_berr_to_verbose = 0; /* * set to 0 in /etc/system to defer CEEN reenable for all CEs */ uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED; uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT; /* * Set of all offline cpus */ cpuset_t cpu_offline_set; static void cpu_delayed_check_ce_errors(void *); static void cpu_check_ce_errors(void *); void cpu_error_ecache_flush(ch_async_flt_t *); static int cpu_error_ecache_flush_required(ch_async_flt_t *); static void cpu_log_and_clear_ce(ch_async_flt_t *); void cpu_ce_detected(ch_cpu_errors_t *, int); /* * CE Leaky check timeout in microseconds. This is chosen to be twice the * memory refresh interval of current DIMMs (64ms). After initial fix that * gives at least one full refresh cycle in which the cell can leak * (whereafter further refreshes simply reinforce any incorrect bit value). */ clock_t cpu_ce_lkychk_timeout_usec = 128000; /* * CE partner check partner caching period in seconds */ int cpu_ce_ptnr_cachetime_sec = 60; /* * Sets trap table entry ttentry by overwriting eight instructions from ttlabel */ #define CH_SET_TRAP(ttentry, ttlabel) \ bcopy((const void *)&ttlabel, &ttentry, 32); \ flush_instr_mem((caddr_t)&ttentry, 32); static int min_ecache_size; static uint_t priv_hcl_1; static uint_t priv_hcl_2; static uint_t priv_hcl_4; static uint_t priv_hcl_8; void cpu_setup(void) { extern int at_flags; extern int cpc_has_overflow_intr; /* * Setup chip-specific trap handlers. */ cpu_init_trap(); cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3; /* * save the cache bootup state. */ cache_boot_state = get_dcu() & DCU_CACHE; /* * Due to the number of entries in the fully-associative tlb * this may have to be tuned lower than in spitfire. */ pp_slots = MIN(8, MAXPP_SLOTS); /* * Block stores do not invalidate all pages of the d$, pagecopy * et. al. need virtual translations with virtual coloring taken * into consideration. prefetch/ldd will pollute the d$ on the * load side. */ pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE; if (use_page_coloring) { do_pg_coloring = 1; } isa_list = "sparcv9+vis2 sparcv9+vis sparcv9 " "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus " "sparcv8 sparcv8-fsmuld sparcv7 sparc"; /* * On Panther-based machines, this should * also include AV_SPARC_POPC too */ cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2; /* * On cheetah, there's no hole in the virtual address space */ hole_start = hole_end = 0; /* * The kpm mapping window. * kpm_size: * The size of a single kpm range. * The overall size will be: kpm_size * vac_colors. * kpm_vbase: * The virtual start address of the kpm range within the kernel * virtual address space. kpm_vbase has to be kpm_size aligned. */ kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */ kpm_size_shift = 43; kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */ kpm_smallpages = 1; /* * The traptrace code uses either %tick or %stick for * timestamping. We have %stick so we can use it. */ traptrace_use_stick = 1; /* * Cheetah has a performance counter overflow interrupt */ cpc_has_overflow_intr = 1; #if defined(CPU_IMP_DUAL_PAGESIZE) /* * Use Cheetah+ and later dual page size support. */ if (!disable_dual_pgsz) { cpu_impl_dual_pgsz = 1; } #endif /* CPU_IMP_DUAL_PAGESIZE */ /* * Declare that this architecture/cpu combination does fpRAS. */ fpras_implemented = 1; /* * Setup CE lookup table */ CE_INITDISPTBL_POPULATE(ce_disp_table); ce_disp_inited = 1; } /* * Called by setcpudelay */ void cpu_init_tick_freq(void) { /* * For UltraSPARC III and beyond we want to use the * system clock rate as the basis for low level timing, * due to support of mixed speed CPUs and power managment. */ if (system_clock_freq == 0) cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq"); sys_tick_freq = system_clock_freq; } #ifdef CHEETAHPLUS_ERRATUM_25 /* * Tunables */ int cheetah_bpe_off = 0; int cheetah_sendmondo_recover = 1; int cheetah_sendmondo_fullscan = 0; int cheetah_sendmondo_recover_delay = 5; #define CHEETAH_LIVELOCK_MIN_DELAY 1 /* * Recovery Statistics */ typedef struct cheetah_livelock_entry { int cpuid; /* fallen cpu */ int buddy; /* cpu that ran recovery */ clock_t lbolt; /* when recovery started */ hrtime_t recovery_time; /* time spent in recovery */ } cheetah_livelock_entry_t; #define CHEETAH_LIVELOCK_NENTRY 32 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY]; int cheetah_livelock_entry_nxt; #define CHEETAH_LIVELOCK_ENTRY_NEXT(statp) { \ statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt; \ if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) { \ cheetah_livelock_entry_nxt = 0; \ } \ } #define CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val) statp->item = val struct { hrtime_t hrt; /* maximum recovery time */ int recovery; /* recovered */ int full_claimed; /* maximum pages claimed in full recovery */ int proc_entry; /* attempted to claim TSB */ int proc_tsb_scan; /* tsb scanned */ int proc_tsb_partscan; /* tsb partially scanned */ int proc_tsb_fullscan; /* whole tsb scanned */ int proc_claimed; /* maximum pages claimed in tsb scan */ int proc_user; /* user thread */ int proc_kernel; /* kernel thread */ int proc_onflt; /* bad stack */ int proc_cpu; /* null cpu */ int proc_thread; /* null thread */ int proc_proc; /* null proc */ int proc_as; /* null as */ int proc_hat; /* null hat */ int proc_hat_inval; /* hat contents don't make sense */ int proc_hat_busy; /* hat is changing TSBs */ int proc_tsb_reloc; /* TSB skipped because being relocated */ int proc_cnum_bad; /* cnum out of range */ int proc_cnum; /* last cnum processed */ tte_t proc_tte; /* last tte processed */ } cheetah_livelock_stat; #define CHEETAH_LIVELOCK_STAT(item) cheetah_livelock_stat.item++ #define CHEETAH_LIVELOCK_STATSET(item, value) \ cheetah_livelock_stat.item = value #define CHEETAH_LIVELOCK_MAXSTAT(item, value) { \ if (value > cheetah_livelock_stat.item) \ cheetah_livelock_stat.item = value; \ } /* * Attempt to recover a cpu by claiming every cache line as saved * in the TSB that the non-responsive cpu is using. Since we can't * grab any adaptive lock, this is at best an attempt to do so. Because * we don't grab any locks, we must operate under the protection of * on_fault(). * * Return 1 if cpuid could be recovered, 0 if failed. */ int mondo_recover_proc(uint16_t cpuid, int bn) { label_t ljb; cpu_t *cp; kthread_t *t; proc_t *p; struct as *as; struct hat *hat; uint_t cnum; struct tsb_info *tsbinfop; struct tsbe *tsbep; caddr_t tsbp; caddr_t end_tsbp; uint64_t paddr; uint64_t idsr; u_longlong_t pahi, palo; int pages_claimed = 0; tte_t tsbe_tte; int tried_kernel_tsb = 0; mmu_ctx_t *mmu_ctxp; CHEETAH_LIVELOCK_STAT(proc_entry); if (on_fault(&ljb)) { CHEETAH_LIVELOCK_STAT(proc_onflt); goto badstruct; } if ((cp = cpu[cpuid]) == NULL) { CHEETAH_LIVELOCK_STAT(proc_cpu); goto badstruct; } if ((t = cp->cpu_thread) == NULL) { CHEETAH_LIVELOCK_STAT(proc_thread); goto badstruct; } if ((p = ttoproc(t)) == NULL) { CHEETAH_LIVELOCK_STAT(proc_proc); goto badstruct; } if ((as = p->p_as) == NULL) { CHEETAH_LIVELOCK_STAT(proc_as); goto badstruct; } if ((hat = as->a_hat) == NULL) { CHEETAH_LIVELOCK_STAT(proc_hat); goto badstruct; } if (hat != ksfmmup) { CHEETAH_LIVELOCK_STAT(proc_user); if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) { CHEETAH_LIVELOCK_STAT(proc_hat_busy); goto badstruct; } tsbinfop = hat->sfmmu_tsb; if (tsbinfop == NULL) { CHEETAH_LIVELOCK_STAT(proc_hat_inval); goto badstruct; } tsbp = tsbinfop->tsb_va; end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); } else { CHEETAH_LIVELOCK_STAT(proc_kernel); tsbinfop = NULL; tsbp = ktsb_base; end_tsbp = tsbp + TSB_BYTES(ktsb_sz); } /* Verify as */ if (hat->sfmmu_as != as) { CHEETAH_LIVELOCK_STAT(proc_hat_inval); goto badstruct; } mmu_ctxp = CPU_MMU_CTXP(cp); ASSERT(mmu_ctxp); cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum; CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum); if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= mmu_ctxp->mmu_nctxs)) { CHEETAH_LIVELOCK_STAT(proc_cnum_bad); goto badstruct; } do { CHEETAH_LIVELOCK_STAT(proc_tsb_scan); /* * Skip TSBs being relocated. This is important because * we want to avoid the following deadlock scenario: * * 1) when we came in we set ourselves to "in recover" state. * 2) when we try to touch TSB being relocated the mapping * will be in the suspended state so we'll spin waiting * for it to be unlocked. * 3) when the CPU that holds the TSB mapping locked tries to * unlock it it will send a xtrap which will fail to xcall * us or the CPU we're trying to recover, and will in turn * enter the mondo code. * 4) since we are still spinning on the locked mapping * no further progress will be made and the system will * inevitably hard hang. * * A TSB not being relocated can't begin being relocated * while we're accessing it because we check * sendmondo_in_recover before relocating TSBs. */ if (hat != ksfmmup && (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) { CHEETAH_LIVELOCK_STAT(proc_tsb_reloc); goto next_tsbinfo; } for (tsbep = (struct tsbe *)tsbp; tsbep < (struct tsbe *)end_tsbp; tsbep++) { tsbe_tte = tsbep->tte_data; if (tsbe_tte.tte_val == 0) { /* * Invalid tte */ continue; } if (tsbe_tte.tte_se) { /* * Don't want device registers */ continue; } if (tsbe_tte.tte_cp == 0) { /* * Must be cached in E$ */ continue; } if (tsbep->tte_tag.tag_invalid != 0) { /* * Invalid tag, ingnore this entry. */ continue; } CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte); idsr = getidsr(); if ((idsr & (IDSR_NACK_BIT(bn) | IDSR_BUSY_BIT(bn))) == 0) { CHEETAH_LIVELOCK_STAT(proc_tsb_partscan); goto done; } pahi = tsbe_tte.tte_pahi; palo = tsbe_tte.tte_palo; paddr = (uint64_t)((pahi << 32) | (palo << MMU_PAGESHIFT)); claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)), CH_ECACHE_SUBBLK_SIZE); if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { shipit(cpuid, bn); } pages_claimed++; } next_tsbinfo: if (tsbinfop != NULL) tsbinfop = tsbinfop->tsb_next; if (tsbinfop != NULL) { tsbp = tsbinfop->tsb_va; end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc); } else if (tsbp == ktsb_base) { tried_kernel_tsb = 1; } else if (!tried_kernel_tsb) { tsbp = ktsb_base; end_tsbp = tsbp + TSB_BYTES(ktsb_sz); hat = ksfmmup; tsbinfop = NULL; } } while (tsbinfop != NULL || ((tsbp == ktsb_base) && !tried_kernel_tsb)); CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan); CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); no_fault(); idsr = getidsr(); if ((idsr & (IDSR_NACK_BIT(bn) | IDSR_BUSY_BIT(bn))) == 0) { return (1); } else { return (0); } done: no_fault(); CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed); return (1); badstruct: no_fault(); return (0); } /* * Attempt to claim ownership, temporarily, of every cache line that a * non-responsive cpu might be using. This might kick that cpu out of * this state. * * The return value indicates to the caller if we have exhausted all recovery * techniques. If 1 is returned, it is useless to call this function again * even for a different target CPU. */ int mondo_recover(uint16_t cpuid, int bn) { struct memseg *seg; uint64_t begin_pa, end_pa, cur_pa; hrtime_t begin_hrt, end_hrt; int retval = 0; int pages_claimed = 0; cheetah_livelock_entry_t *histp; uint64_t idsr; if (cas32(&sendmondo_in_recover, 0, 1) != 0) { /* * Wait while recovery takes place */ while (sendmondo_in_recover) { drv_usecwait(1); } /* * Assume we didn't claim the whole memory. If * the target of this caller is not recovered, * it will come back. */ return (retval); } CHEETAH_LIVELOCK_ENTRY_NEXT(histp) CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt); CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid); CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id); begin_hrt = gethrtime_waitfree(); /* * First try to claim the lines in the TSB the target * may have been using. */ if (mondo_recover_proc(cpuid, bn) == 1) { /* * Didn't claim the whole memory */ goto done; } /* * We tried using the TSB. The target is still * not recovered. Check if complete memory scan is * enabled. */ if (cheetah_sendmondo_fullscan == 0) { /* * Full memory scan is disabled. */ retval = 1; goto done; } /* * Try claiming the whole memory. */ for (seg = memsegs; seg; seg = seg->next) { begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT; end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT; for (cur_pa = begin_pa; cur_pa < end_pa; cur_pa += MMU_PAGESIZE) { idsr = getidsr(); if ((idsr & (IDSR_NACK_BIT(bn) | IDSR_BUSY_BIT(bn))) == 0) { /* * Didn't claim all memory */ goto done; } claimlines(cur_pa, MMU_PAGESIZE, CH_ECACHE_SUBBLK_SIZE); if ((idsr & IDSR_BUSY_BIT(bn)) == 0) { shipit(cpuid, bn); } pages_claimed++; } } /* * We did all we could. */ retval = 1; done: /* * Update statistics */ end_hrt = gethrtime_waitfree(); CHEETAH_LIVELOCK_STAT(recovery); CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt)); CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed); CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \ (end_hrt - begin_hrt)); while (cas32(&sendmondo_in_recover, 1, 0) != 1) ; return (retval); } /* * This is called by the cyclic framework when this CPU becomes online */ /*ARGSUSED*/ static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) { hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy; hdlr->cyh_level = CY_LOW_LEVEL; hdlr->cyh_arg = NULL; /* * Stagger the start time */ when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU); if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) { cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY; } when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC; } /* * Create a low level cyclic to send a xtrap to the next cpu online. * However, there's no need to have this running on a uniprocessor system. */ static void cheetah_nudge_init(void) { cyc_omni_handler_t hdlr; if (max_ncpus == 1) { return; } hdlr.cyo_online = cheetah_nudge_onln; hdlr.cyo_offline = NULL; hdlr.cyo_arg = NULL; mutex_enter(&cpu_lock); (void) cyclic_add_omni(&hdlr); mutex_exit(&cpu_lock); } /* * Cyclic handler to wake up buddy */ void cheetah_nudge_buddy(void) { /* * Disable kernel preemption to protect the cpu list */ kpreempt_disable(); if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) { xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1, 0, 0); } kpreempt_enable(); } #endif /* CHEETAHPLUS_ERRATUM_25 */ #ifdef SEND_MONDO_STATS uint32_t x_one_stimes[64]; uint32_t x_one_ltimes[16]; uint32_t x_set_stimes[64]; uint32_t x_set_ltimes[16]; uint32_t x_set_cpus[NCPU]; uint32_t x_nack_stimes[64]; #endif /* * Note: A version of this function is used by the debugger via the KDI, * and must be kept in sync with this version. Any changes made to this * function to support new chips or to accomodate errata must also be included * in the KDI-specific version. See us3_kdi.c. */ void send_one_mondo(int cpuid) { int busy, nack; uint64_t idsr, starttick, endtick, tick, lasttick; uint64_t busymask; #ifdef CHEETAHPLUS_ERRATUM_25 int recovered = 0; #endif CPU_STATS_ADDQ(CPU, sys, xcalls, 1); starttick = lasttick = gettick(); shipit(cpuid, 0); endtick = starttick + xc_tick_limit; busy = nack = 0; #if defined(JALAPENO) || defined(SERRANO) /* * Lower 2 bits of the agent ID determine which BUSY/NACK pair * will be used for dispatching interrupt. For now, assume * there are no more than IDSR_BN_SETS CPUs, hence no aliasing * issues with respect to BUSY/NACK pair usage. */ busymask = IDSR_BUSY_BIT(cpuid); #else /* JALAPENO || SERRANO */ busymask = IDSR_BUSY; #endif /* JALAPENO || SERRANO */ for (;;) { idsr = getidsr(); if (idsr == 0) break; tick = gettick(); /* * If there is a big jump between the current tick * count and lasttick, we have probably hit a break * point. Adjust endtick accordingly to avoid panic. */ if (tick > (lasttick + xc_tick_jump_limit)) endtick += (tick - lasttick); lasttick = tick; if (tick > endtick) { if (panic_quiesce) return; #ifdef CHEETAHPLUS_ERRATUM_25 if (cheetah_sendmondo_recover && recovered == 0) { if (mondo_recover(cpuid, 0)) { /* * We claimed the whole memory or * full scan is disabled. */ recovered++; } tick = gettick(); endtick = tick + xc_tick_limit; lasttick = tick; /* * Recheck idsr */ continue; } else #endif /* CHEETAHPLUS_ERRATUM_25 */ { cmn_err(CE_PANIC, "send mondo timeout " "(target 0x%x) [%d NACK %d BUSY]", cpuid, nack, busy); } } if (idsr & busymask) { busy++; continue; } drv_usecwait(1); shipit(cpuid, 0); nack++; busy = 0; } #ifdef SEND_MONDO_STATS { int n = gettick() - starttick; if (n < 8192) x_one_stimes[n >> 7]++; else x_one_ltimes[(n >> 13) & 0xf]++; } #endif } void syncfpu(void) { } /* * Return processor specific async error structure * size used. */ int cpu_aflt_size(void) { return (sizeof (ch_async_flt_t)); } /* * Tunable to disable the checking of other cpu logout areas during panic for * potential syndrome 71 generating errors. */ int enable_check_other_cpus_logout = 1; /* * Check other cpus logout area for potential synd 71 generating * errors. */ static void cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type, ch_cpu_logout_t *clop) { struct async_flt *aflt; ch_async_flt_t ch_flt; uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) { return; } bzero(&ch_flt, sizeof (ch_async_flt_t)); t_afar = clop->clo_data.chd_afar; t_afsr = clop->clo_data.chd_afsr; t_afsr_ext = clop->clo_data.chd_afsr_ext; #if defined(SERRANO) ch_flt.afar2 = clop->clo_data.chd_afar2; #endif /* SERRANO */ /* * In order to simplify code, we maintain this afsr_errs * variable which holds the aggregate of AFSR and AFSR_EXT * sticky bits. */ t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | (t_afsr & C_AFSR_ALL_ERRS); /* Setup the async fault structure */ aflt = (struct async_flt *)&ch_flt; aflt->flt_id = gethrtime_waitfree(); ch_flt.afsr_ext = t_afsr_ext; ch_flt.afsr_errs = t_afsr_errs; aflt->flt_stat = t_afsr; aflt->flt_addr = t_afar; aflt->flt_bus_id = cpuid; aflt->flt_inst = cpuid; aflt->flt_pc = tpc; aflt->flt_prot = AFLT_PROT_NONE; aflt->flt_class = CPU_FAULT; aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0); aflt->flt_tl = tl; aflt->flt_status = ecc_type; aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); /* * Queue events on the async event queue, one event per error bit. * If no events are queued, queue an event to complain. */ if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) { ch_flt.flt_type = CPU_INV_AFSR; cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); } /* * Zero out + invalidate CPU logout. */ bzero(clop, sizeof (ch_cpu_logout_t)); clop->clo_data.chd_afar = LOGOUT_INVALID; } /* * Check the logout areas of all other cpus for unlogged errors. */ static void cpu_check_other_cpus_logout(void) { int i, j; processorid_t myid; struct cpu *cp; ch_err_tl1_data_t *cl1p; myid = CPU->cpu_id; for (i = 0; i < NCPU; i++) { cp = cpu[i]; if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) || (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) { continue; } /* * Check each of the tl>0 logout areas */ cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]); for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) { if (cl1p->ch_err_tl1_flags == 0) continue; cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc, 1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout); } /* * Check each of the remaining logout areas */ cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP, CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout)); cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP, CPU_PRIVATE_PTR(cp, chpr_cecc_logout)); cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP, CPU_PRIVATE_PTR(cp, chpr_async_logout)); } } /* * The fast_ecc_err handler transfers control here for UCU, UCC events. * Note that we flush Ecache twice, once in the fast_ecc_err handler to * flush the error that caused the UCU/UCC, then again here at the end to * flush the TL=1 trap handler code out of the Ecache, so we can minimize * the probability of getting a TL>1 Fast ECC trap when we're fielding * another Fast ECC trap. * * Cheetah+ also handles: TSCE: No additional processing required. * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT. * * Note that the p_clo_flags input is only valid in cases where the * cpu_private struct is not yet initialized (since that is the only * time that information cannot be obtained from the logout struct.) */ /*ARGSUSED*/ void cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags) { ch_cpu_logout_t *clop; uint64_t ceen, nceen; /* * Get the CPU log out info. If we can't find our CPU private * pointer, then we will have to make due without any detailed * logout information. */ if (CPU_PRIVATE(CPU) == NULL) { clop = NULL; ceen = p_clo_flags & EN_REG_CEEN; nceen = p_clo_flags & EN_REG_NCEEN; } else { clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout); ceen = clop->clo_flags & EN_REG_CEEN; nceen = clop->clo_flags & EN_REG_NCEEN; } cpu_log_fast_ecc_error((caddr_t)rp->r_pc, (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop); } /* * Log fast ecc error, called from either Fast ECC at TL=0 or Fast * ECC at TL>0. Need to supply either a error register pointer or a * cpu logout structure pointer. */ static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen, uint64_t nceen, ch_cpu_logout_t *clop) { struct async_flt *aflt; ch_async_flt_t ch_flt; uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; char pr_reason[MAX_REASON_STRING]; ch_cpu_errors_t cpu_error_regs; bzero(&ch_flt, sizeof (ch_async_flt_t)); /* * If no cpu logout data, then we will have to make due without * any detailed logout information. */ if (clop == NULL) { ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; get_cpu_error_state(&cpu_error_regs); set_cpu_error_state(&cpu_error_regs); t_afar = cpu_error_regs.afar; t_afsr = cpu_error_regs.afsr; t_afsr_ext = cpu_error_regs.afsr_ext; #if defined(SERRANO) ch_flt.afar2 = cpu_error_regs.afar2; #endif /* SERRANO */ } else { t_afar = clop->clo_data.chd_afar; t_afsr = clop->clo_data.chd_afsr; t_afsr_ext = clop->clo_data.chd_afsr_ext; #if defined(SERRANO) ch_flt.afar2 = clop->clo_data.chd_afar2; #endif /* SERRANO */ } /* * In order to simplify code, we maintain this afsr_errs * variable which holds the aggregate of AFSR and AFSR_EXT * sticky bits. */ t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | (t_afsr & C_AFSR_ALL_ERRS); pr_reason[0] = '\0'; /* Setup the async fault structure */ aflt = (struct async_flt *)&ch_flt; aflt->flt_id = gethrtime_waitfree(); ch_flt.afsr_ext = t_afsr_ext; ch_flt.afsr_errs = t_afsr_errs; aflt->flt_stat = t_afsr; aflt->flt_addr = t_afar; aflt->flt_bus_id = getprocessorid(); aflt->flt_inst = CPU->cpu_id; aflt->flt_pc = tpc; aflt->flt_prot = AFLT_PROT_NONE; aflt->flt_class = CPU_FAULT; aflt->flt_priv = priv; aflt->flt_tl = tl; aflt->flt_status = ECC_F_TRAP; aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); /* * XXXX - Phenomenal hack to get around Solaris not getting all the * cmn_err messages out to the console. The situation is a UCU (in * priv mode) which causes a WDU which causes a UE (on the retry). * The messages for the UCU and WDU are enqueued and then pulled off * the async queue via softint and syslogd starts to process them * but doesn't get them to the console. The UE causes a panic, but * since the UCU/WDU messages are already in transit, those aren't * on the async queue. The hack is to check if we have a matching * WDU event for the UCU, and if it matches, we're more than likely * going to panic with a UE, unless we're under protection. So, we * check to see if we got a matching WDU event and if we're under * protection. * * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about * looks like this: * UCU->WDU->UE * For Panther, it could look like either of these: * UCU---->WDU->L3_WDU->UE * L3_UCU->WDU->L3_WDU->UE */ if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) && aflt->flt_panic == 0 && aflt->flt_priv != 0 && curthread->t_ontrap == NULL && curthread->t_lofault == NULL) { get_cpu_error_state(&cpu_error_regs); aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) && (cpu_error_regs.afar == t_afar)); aflt->flt_panic |= ((clop == NULL) && (t_afsr_errs & C_AFSR_WDU)); } /* * Queue events on the async event queue, one event per error bit. * If no events are queued or no Fast ECC events are on in the AFSR, * queue an event to complain. */ if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 || ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) { ch_flt.flt_type = CPU_INV_AFSR; cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); } /* * Zero out + invalidate CPU logout. */ if (clop) { bzero(clop, sizeof (ch_cpu_logout_t)); clop->clo_data.chd_afar = LOGOUT_INVALID; } /* * We carefully re-enable NCEEN and CEEN and then check if any deferred * or disrupting errors have happened. We do this because if a * deferred or disrupting error had occurred with NCEEN/CEEN off, the * trap will not be taken when NCEEN/CEEN is re-enabled. Note that * CEEN works differently on Cheetah than on Spitfire. Also, we enable * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a * deferred or disrupting error happening between checking the AFSR and * enabling NCEEN/CEEN. * * Note: CEEN and NCEEN are only reenabled if they were on when trap * taken. */ set_error_enable(get_error_enable() | (nceen | ceen)); if (clear_errors(&ch_flt)) { aflt->flt_panic |= ((ch_flt.afsr_errs & (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0); (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, NULL); } /* * Panic here if aflt->flt_panic has been set. Enqueued errors will * be logged as part of the panic flow. */ if (aflt->flt_panic) fm_panic("%sError(s)", pr_reason); /* * Flushing the Ecache here gets the part of the trap handler that * is run at TL=1 out of the Ecache. */ cpu_flush_ecache(); } /* * This is called via sys_trap from pil15_interrupt code if the * corresponding entry in ch_err_tl1_pending is set. Checks the * various ch_err_tl1_data structures for valid entries based on the bit * settings in the ch_err_tl1_flags entry of the structure. */ /*ARGSUSED*/ void cpu_tl1_error(struct regs *rp, int panic) { ch_err_tl1_data_t *cl1p, cl1; int i, ncl1ps; uint64_t me_flags; uint64_t ceen, nceen; if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) { cl1p = &ch_err_tl1_data; ncl1ps = 1; } else if (CPU_PRIVATE(CPU) != NULL) { cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]); ncl1ps = CH_ERR_TL1_TLMAX; } else { ncl1ps = 0; } for (i = 0; i < ncl1ps; i++, cl1p++) { if (cl1p->ch_err_tl1_flags == 0) continue; /* * Grab a copy of the logout data and invalidate * the logout area. */ cl1 = *cl1p; bzero(cl1p, sizeof (ch_err_tl1_data_t)); cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID; me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags); /* * Log "first error" in ch_err_tl1_data. */ if (cl1.ch_err_tl1_flags & CH_ERR_FECC) { ceen = get_error_enable() & EN_REG_CEEN; nceen = get_error_enable() & EN_REG_NCEEN; cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1, 1, ceen, nceen, &cl1.ch_err_tl1_logout); } #if defined(CPU_IMP_L1_CACHE_PARITY) if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) { cpu_parity_error(rp, cl1.ch_err_tl1_flags, (caddr_t)cl1.ch_err_tl1_tpc); } #endif /* CPU_IMP_L1_CACHE_PARITY */ /* * Log "multiple events" in ch_err_tl1_data. Note that * we don't read and clear the AFSR/AFAR in the TL>0 code * if the structure is busy, we just do the cache flushing * we have to do and then do the retry. So the AFSR/AFAR * at this point *should* have some relevant info. If there * are no valid errors in the AFSR, we'll assume they've * already been picked up and logged. For I$/D$ parity, * we just log an event with an "Unknown" (NULL) TPC. */ if (me_flags & CH_ERR_FECC) { ch_cpu_errors_t cpu_error_regs; uint64_t t_afsr_errs; /* * Get the error registers and see if there's * a pending error. If not, don't bother * generating an "Invalid AFSR" error event. */ get_cpu_error_state(&cpu_error_regs); t_afsr_errs = (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS) | (cpu_error_regs.afsr & C_AFSR_ALL_ERRS); if (t_afsr_errs != 0) { ceen = get_error_enable() & EN_REG_CEEN; nceen = get_error_enable() & EN_REG_NCEEN; cpu_log_fast_ecc_error((caddr_t)NULL, 1, 1, ceen, nceen, NULL); } } #if defined(CPU_IMP_L1_CACHE_PARITY) if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) { cpu_parity_error(rp, me_flags, (caddr_t)NULL); } #endif /* CPU_IMP_L1_CACHE_PARITY */ } } /* * Called from Fast ECC TL>0 handler in case of fatal error. * cpu_tl1_error should always find an associated ch_err_tl1_data structure, * but if we don't, we'll panic with something reasonable. */ /*ARGSUSED*/ void cpu_tl1_err_panic(struct regs *rp, ulong_t flags) { cpu_tl1_error(rp, 1); /* * Should never return, but just in case. */ fm_panic("Unsurvivable ECC Error at TL>0"); } /* * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST, * EDC, WDU, WDC, CPU, CPC, IVU, IVC events. * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC * * Cheetah+ also handles (No additional processing required): * DUE, DTO, DBERR (NCEEN controlled) * THCE (CEEN and ET_ECC_en controlled) * TUE (ET_ECC_en controlled) * * Panther further adds: * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) * THCE (CEEN and L2_tag_ECC_en controlled) * L3_THCE (CEEN and ET_ECC_en controlled) * * Note that the p_clo_flags input is only valid in cases where the * cpu_private struct is not yet initialized (since that is the only * time that information cannot be obtained from the logout struct.) */ /*ARGSUSED*/ void cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags) { struct async_flt *aflt; ch_async_flt_t ch_flt; char pr_reason[MAX_REASON_STRING]; ch_cpu_logout_t *clop; uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; ch_cpu_errors_t cpu_error_regs; bzero(&ch_flt, sizeof (ch_async_flt_t)); /* * Get the CPU log out info. If we can't find our CPU private * pointer, then we will have to make due without any detailed * logout information. */ if (CPU_PRIVATE(CPU) == NULL) { clop = NULL; ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; get_cpu_error_state(&cpu_error_regs); set_cpu_error_state(&cpu_error_regs); t_afar = cpu_error_regs.afar; t_afsr = cpu_error_regs.afsr; t_afsr_ext = cpu_error_regs.afsr_ext; #if defined(SERRANO) ch_flt.afar2 = cpu_error_regs.afar2; #endif /* SERRANO */ } else { clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); t_afar = clop->clo_data.chd_afar; t_afsr = clop->clo_data.chd_afsr; t_afsr_ext = clop->clo_data.chd_afsr_ext; #if defined(SERRANO) ch_flt.afar2 = clop->clo_data.chd_afar2; #endif /* SERRANO */ } /* * In order to simplify code, we maintain this afsr_errs * variable which holds the aggregate of AFSR and AFSR_EXT * sticky bits. */ t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | (t_afsr & C_AFSR_ALL_ERRS); pr_reason[0] = '\0'; /* Setup the async fault structure */ aflt = (struct async_flt *)&ch_flt; ch_flt.afsr_ext = t_afsr_ext; ch_flt.afsr_errs = t_afsr_errs; aflt->flt_stat = t_afsr; aflt->flt_addr = t_afar; aflt->flt_pc = (caddr_t)rp->r_pc; aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; aflt->flt_tl = 0; aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs); /* * If this trap is a result of one of the errors not masked * by cpu_ce_not_deferred, we don't reenable CEEN. Instead * indicate that a timeout is to be set later. */ if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) && !aflt->flt_panic) ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED; else ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED; /* * log the CE and clean up */ cpu_log_and_clear_ce(&ch_flt); /* * We re-enable CEEN (if required) and check if any disrupting errors * have happened. We do this because if a disrupting error had occurred * with CEEN off, the trap will not be taken when CEEN is re-enabled. * Note that CEEN works differently on Cheetah than on Spitfire. Also, * we enable CEEN *before* checking the AFSR to avoid the small window * of a error happening between checking the AFSR and enabling CEEN. */ if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER) set_error_enable(get_error_enable() | EN_REG_CEEN); if (clear_errors(&ch_flt)) { (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, NULL); } /* * Panic here if aflt->flt_panic has been set. Enqueued errors will * be logged as part of the panic flow. */ if (aflt->flt_panic) fm_panic("%sError(s)", pr_reason); } /* * The async_err handler transfers control here for UE, EMU, EDU:BLD, * L3_EDU:BLD, TO, and BERR events. * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR * * Cheetah+: No additional errors handled. * * Note that the p_clo_flags input is only valid in cases where the * cpu_private struct is not yet initialized (since that is the only * time that information cannot be obtained from the logout struct.) */ /*ARGSUSED*/ void cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags) { ushort_t ttype, tl; ch_async_flt_t ch_flt; struct async_flt *aflt; int trampolined = 0; char pr_reason[MAX_REASON_STRING]; ch_cpu_logout_t *clop; uint64_t ceen, clo_flags; uint64_t log_afsr; uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs; ch_cpu_errors_t cpu_error_regs; int expected = DDI_FM_ERR_UNEXPECTED; ddi_acc_hdl_t *hp; /* * We need to look at p_flag to determine if the thread detected an * error while dumping core. We can't grab p_lock here, but it's ok * because we just need a consistent snapshot and we know that everyone * else will store a consistent set of bits while holding p_lock. We * don't have to worry about a race because SDOCORE is set once prior * to doing i/o from the process's address space and is never cleared. */ uint_t pflag = ttoproc(curthread)->p_flag; bzero(&ch_flt, sizeof (ch_async_flt_t)); /* * Get the CPU log out info. If we can't find our CPU private * pointer then we will have to make due without any detailed * logout information. */ if (CPU_PRIVATE(CPU) == NULL) { clop = NULL; ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID; get_cpu_error_state(&cpu_error_regs); set_cpu_error_state(&cpu_error_regs); t_afar = cpu_error_regs.afar; t_afsr = cpu_error_regs.afsr; t_afsr_ext = cpu_error_regs.afsr_ext; #if defined(SERRANO) ch_flt.afar2 = cpu_error_regs.afar2; #endif /* SERRANO */ clo_flags = p_clo_flags; } else { clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout); t_afar = clop->clo_data.chd_afar; t_afsr = clop->clo_data.chd_afsr; t_afsr_ext = clop->clo_data.chd_afsr_ext; #if defined(SERRANO) ch_flt.afar2 = clop->clo_data.chd_afar2; #endif /* SERRANO */ clo_flags = clop->clo_flags; } /* * In order to simplify code, we maintain this afsr_errs * variable which holds the aggregate of AFSR and AFSR_EXT * sticky bits. */ t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) | (t_afsr & C_AFSR_ALL_ERRS); pr_reason[0] = '\0'; /* * Grab information encoded into our clo_flags field. */ ceen = clo_flags & EN_REG_CEEN; tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT; ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT; /* * handle the specific error */ aflt = (struct async_flt *)&ch_flt; aflt->flt_id = gethrtime_waitfree(); aflt->flt_bus_id = getprocessorid(); aflt->flt_inst = CPU->cpu_id; ch_flt.afsr_ext = t_afsr_ext; ch_flt.afsr_errs = t_afsr_errs; aflt->flt_stat = t_afsr; aflt->flt_addr = t_afar; aflt->flt_pc = (caddr_t)rp->r_pc; aflt->flt_prot = AFLT_PROT_NONE; aflt->flt_class = CPU_FAULT; aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; aflt->flt_tl = (uchar_t)tl; aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) || C_AFSR_PANIC(t_afsr_errs)); aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP); /* * If the trap occurred in privileged mode at TL=0, we need to check to * see if we were executing in the kernel under on_trap() or t_lofault * protection. If so, modify the saved registers so that we return * from the trap to the appropriate trampoline routine. */ if (aflt->flt_priv && tl == 0) { if (curthread->t_ontrap != NULL) { on_trap_data_t *otp = curthread->t_ontrap; if (otp->ot_prot & OT_DATA_EC) { aflt->flt_prot = AFLT_PROT_EC; otp->ot_trap |= OT_DATA_EC; rp->r_pc = otp->ot_trampoline; rp->r_npc = rp->r_pc + 4; trampolined = 1; } if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) && (otp->ot_prot & OT_DATA_ACCESS)) { aflt->flt_prot = AFLT_PROT_ACCESS; otp->ot_trap |= OT_DATA_ACCESS; rp->r_pc = otp->ot_trampoline; rp->r_npc = rp->r_pc + 4; trampolined = 1; /* * for peeks and caut_gets errors are expected */ hp = (ddi_acc_hdl_t *)otp->ot_handle; if (!hp) expected = DDI_FM_ERR_PEEK; else if (hp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) expected = DDI_FM_ERR_EXPECTED; } } else if (curthread->t_lofault) { aflt->flt_prot = AFLT_PROT_COPY; rp->r_g1 = EFAULT; rp->r_pc = curthread->t_lofault; rp->r_npc = rp->r_pc + 4; trampolined = 1; } } /* * If we're in user mode or we're doing a protected copy, we either * want the ASTON code below to send a signal to the user process * or we want to panic if aft_panic is set. * * If we're in privileged mode and we're not doing a copy, then we * need to check if we've trampolined. If we haven't trampolined, * we should panic. */ if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { if (t_afsr_errs & ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) & ~(C_AFSR_BERR | C_AFSR_TO))) aflt->flt_panic |= aft_panic; } else if (!trampolined) { aflt->flt_panic = 1; } /* * If we've trampolined due to a privileged TO or BERR, or if an * unprivileged TO or BERR occurred, we don't want to enqueue an * event for that TO or BERR. Queue all other events (if any) besides * the TO/BERR. Since we may not be enqueing any events, we need to * ignore the number of events queued. If we haven't trampolined due * to a TO or BERR, just enqueue events normally. */ log_afsr = t_afsr_errs; if (trampolined) { log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); } else if (!aflt->flt_priv) { /* * User mode, suppress messages if * cpu_berr_to_verbose is not set. */ if (!cpu_berr_to_verbose) log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR); } /* * Log any errors that occurred */ if (((log_afsr & ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) && cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) || (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) { ch_flt.flt_type = CPU_INV_AFSR; cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); } /* * Zero out + invalidate CPU logout. */ if (clop) { bzero(clop, sizeof (ch_cpu_logout_t)); clop->clo_data.chd_afar = LOGOUT_INVALID; } #if defined(JALAPENO) || defined(SERRANO) /* * UE/RUE/BERR/TO: Call our bus nexus friends to check for * IO errors that may have resulted in this trap. */ if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) { cpu_run_bus_error_handlers(aflt, expected); } /* * UE/RUE: If UE or RUE is in memory, we need to flush the bad * line from the Ecache. We also need to query the bus nexus for * fatal errors. Attempts to do diagnostic read on caches may * introduce more errors (especially when the module is bad). */ if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) { /* * Ask our bus nexus friends if they have any fatal errors. If * so, they will log appropriate error messages. */ if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) aflt->flt_panic = 1; /* * We got a UE or RUE and are panicking, save the fault PA in * a known location so that the platform specific panic code * can check for copyback errors. */ if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { panic_aflt = *aflt; } } /* * Flush Ecache line or entire Ecache */ if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR)) cpu_error_ecache_flush(&ch_flt); #else /* JALAPENO || SERRANO */ /* * UE/BERR/TO: Call our bus nexus friends to check for * IO errors that may have resulted in this trap. */ if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) { cpu_run_bus_error_handlers(aflt, expected); } /* * UE: If the UE is in memory, we need to flush the bad * line from the Ecache. We also need to query the bus nexus for * fatal errors. Attempts to do diagnostic read on caches may * introduce more errors (especially when the module is bad). */ if (t_afsr & C_AFSR_UE) { /* * Ask our legacy bus nexus friends if they have any fatal * errors. If so, they will log appropriate error messages. */ if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) aflt->flt_panic = 1; /* * We got a UE and are panicking, save the fault PA in a known * location so that the platform specific panic code can check * for copyback errors. */ if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) { panic_aflt = *aflt; } } /* * Flush Ecache line or entire Ecache */ if (t_afsr_errs & (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU)) cpu_error_ecache_flush(&ch_flt); #endif /* JALAPENO || SERRANO */ /* * We carefully re-enable NCEEN and CEEN and then check if any deferred * or disrupting errors have happened. We do this because if a * deferred or disrupting error had occurred with NCEEN/CEEN off, the * trap will not be taken when NCEEN/CEEN is re-enabled. Note that * CEEN works differently on Cheetah than on Spitfire. Also, we enable * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a * deferred or disrupting error happening between checking the AFSR and * enabling NCEEN/CEEN. * * Note: CEEN reenabled only if it was on when trap taken. */ set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen)); if (clear_errors(&ch_flt)) { /* * Check for secondary errors, and avoid panicking if we * have them */ if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs, t_afar) == 0) { aflt->flt_panic |= ((ch_flt.afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0); } (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, NULL); } /* * Panic here if aflt->flt_panic has been set. Enqueued errors will * be logged as part of the panic flow. */ if (aflt->flt_panic) fm_panic("%sError(s)", pr_reason); /* * If we queued an error and we are going to return from the trap and * the error was in user mode or inside of a copy routine, set AST flag * so the queue will be drained before returning to user mode. The * AST processing will also act on our failure policy. */ if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { int pcb_flag = 0; if (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS & ~(C_AFSR_BERR | C_AFSR_TO))) pcb_flag |= ASYNC_HWERR; if (t_afsr & C_AFSR_BERR) pcb_flag |= ASYNC_BERR; if (t_afsr & C_AFSR_TO) pcb_flag |= ASYNC_BTO; ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; aston(curthread); } } #if defined(CPU_IMP_L1_CACHE_PARITY) /* * Handling of data and instruction parity errors (traps 0x71, 0x72). * * For Panther, P$ data parity errors during floating point load hits * are also detected (reported as TT 0x71) and handled by this trap * handler. * * AFSR/AFAR are not set for parity errors, only TPC (a virtual address) * is available. */ /*ARGSUSED*/ void cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc) { ch_async_flt_t ch_flt; struct async_flt *aflt; uchar_t tl = ((flags & CH_ERR_TL) != 0); uchar_t iparity = ((flags & CH_ERR_IPE) != 0); uchar_t panic = ((flags & CH_ERR_PANIC) != 0); char *error_class; /* * Log the error. * For icache parity errors the fault address is the trap PC. * For dcache/pcache parity errors the instruction would have to * be decoded to determine the address and that isn't possible * at high PIL. */ bzero(&ch_flt, sizeof (ch_async_flt_t)); aflt = (struct async_flt *)&ch_flt; aflt->flt_id = gethrtime_waitfree(); aflt->flt_bus_id = getprocessorid(); aflt->flt_inst = CPU->cpu_id; aflt->flt_pc = tpc; aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR; aflt->flt_prot = AFLT_PROT_NONE; aflt->flt_class = CPU_FAULT; aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ? 1 : 0; aflt->flt_tl = tl; aflt->flt_panic = panic; aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP; ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY; if (iparity) { cpu_icache_parity_info(&ch_flt); if (ch_flt.parity_data.ipe.cpl_off != -1) error_class = FM_EREPORT_CPU_USIII_IDSPE; else if (ch_flt.parity_data.ipe.cpl_way != -1) error_class = FM_EREPORT_CPU_USIII_ITSPE; else error_class = FM_EREPORT_CPU_USIII_IPE; aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE; } else { cpu_dcache_parity_info(&ch_flt); if (ch_flt.parity_data.dpe.cpl_off != -1) error_class = FM_EREPORT_CPU_USIII_DDSPE; else if (ch_flt.parity_data.dpe.cpl_way != -1) error_class = FM_EREPORT_CPU_USIII_DTSPE; else error_class = FM_EREPORT_CPU_USIII_DPE; aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE; /* * For panther we also need to check the P$ for parity errors. */ if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { cpu_pcache_parity_info(&ch_flt); if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) { error_class = FM_EREPORT_CPU_USIII_PDSPE; aflt->flt_payload = FM_EREPORT_PAYLOAD_PCACHE_PE; } } } cpu_errorq_dispatch(error_class, (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); if (iparity) { /* * Invalidate entire I$. * This is required due to the use of diagnostic ASI * accesses that may result in a loss of I$ coherency. */ if (cache_boot_state & DCU_IC) { flush_icache(); } /* * According to section P.3.1 of the Panther PRM, we * need to do a little more for recovery on those * CPUs after encountering an I$ parity error. */ if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { flush_ipb(); correct_dcache_parity(dcache_size, dcache_linesize); flush_pcache(); } } else { /* * Since the valid bit is ignored when checking parity the * D$ data and tag must also be corrected. Set D$ data bits * to zero and set utag to 0, 1, 2, 3. */ correct_dcache_parity(dcache_size, dcache_linesize); /* * According to section P.3.3 of the Panther PRM, we * need to do a little more for recovery on those * CPUs after encountering a D$ or P$ parity error. * * As far as clearing P$ parity errors, it is enough to * simply invalidate all entries in the P$ since P$ parity * error traps are only generated for floating point load * hits. */ if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { flush_icache(); flush_ipb(); flush_pcache(); } } /* * Invalidate entire D$ if it was enabled. * This is done to avoid stale data in the D$ which might * occur with the D$ disabled and the trap handler doing * stores affecting lines already in the D$. */ if (cache_boot_state & DCU_DC) { flush_dcache(); } /* * Restore caches to their bootup state. */ set_dcu(get_dcu() | cache_boot_state); /* * Panic here if aflt->flt_panic has been set. Enqueued errors will * be logged as part of the panic flow. */ if (aflt->flt_panic) fm_panic("%sError(s)", iparity ? "IPE " : "DPE "); /* * If this error occurred at TL>0 then flush the E$ here to reduce * the chance of getting an unrecoverable Fast ECC error. This * flush will evict the part of the parity trap handler that is run * at TL>1. */ if (tl) { cpu_flush_ecache(); } } /* * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t * to indicate which portions of the captured data should be in the ereport. */ void cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt) { int way = ch_flt->parity_data.ipe.cpl_way; int offset = ch_flt->parity_data.ipe.cpl_off; int tag_index; struct async_flt *aflt = (struct async_flt *)ch_flt; if ((offset != -1) || (way != -1)) { /* * Parity error in I$ tag or data */ tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) ch_flt->parity_data.ipe.cpl_ic[way].ic_way = PN_ICIDX_TO_WAY(tag_index); else ch_flt->parity_data.ipe.cpl_ic[way].ic_way = CH_ICIDX_TO_WAY(tag_index); ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = IC_LOGFLAG_MAGIC; } else { /* * Parity error was not identified. * Log tags and data for all ways. */ for (way = 0; way < CH_ICACHE_NWAY; way++) { tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx; if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) ch_flt->parity_data.ipe.cpl_ic[way].ic_way = PN_ICIDX_TO_WAY(tag_index); else ch_flt->parity_data.ipe.cpl_ic[way].ic_way = CH_ICIDX_TO_WAY(tag_index); ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag = IC_LOGFLAG_MAGIC; } } } /* * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t * to indicate which portions of the captured data should be in the ereport. */ void cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt) { int way = ch_flt->parity_data.dpe.cpl_way; int offset = ch_flt->parity_data.dpe.cpl_off; int tag_index; if (offset != -1) { /* * Parity error in D$ or P$ data array. * * First check to see whether the parity error is in D$ or P$ * since P$ data parity errors are reported in Panther using * the same trap. */ if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) { tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx; ch_flt->parity_data.dpe.cpl_pc[way].pc_way = CH_PCIDX_TO_WAY(tag_index); ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag = PC_LOGFLAG_MAGIC; } else { tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; ch_flt->parity_data.dpe.cpl_dc[way].dc_way = CH_DCIDX_TO_WAY(tag_index); ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = DC_LOGFLAG_MAGIC; } } else if (way != -1) { /* * Parity error in D$ tag. */ tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx; ch_flt->parity_data.dpe.cpl_dc[way].dc_way = CH_DCIDX_TO_WAY(tag_index); ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag = DC_LOGFLAG_MAGIC; } } #endif /* CPU_IMP_L1_CACHE_PARITY */ /* * The cpu_async_log_err() function is called via the [uc]e_drain() function to * post-process CPU events that are dequeued. As such, it can be invoked * from softint context, from AST processing in the trap() flow, or from the * panic flow. We decode the CPU-specific data, and take appropriate actions. * Historically this entry point was used to log the actual cmn_err(9F) text; * now with FMA it is used to prepare 'flt' to be converted into an ereport. * With FMA this function now also returns a flag which indicates to the * caller whether the ereport should be posted (1) or suppressed (0). */ static int cpu_async_log_err(void *flt, errorq_elem_t *eqep) { ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt; struct async_flt *aflt = (struct async_flt *)flt; uint64_t errors; extern void memscrub_induced_error(void); switch (ch_flt->flt_type) { case CPU_INV_AFSR: /* * If it is a disrupting trap and the AFSR is zero, then * the event has probably already been noted. Do not post * an ereport. */ if ((aflt->flt_status & ECC_C_TRAP) && (!(aflt->flt_stat & C_AFSR_MASK))) return (0); else return (1); case CPU_TO: case CPU_BERR: case CPU_FATAL: case CPU_FPUERR: return (1); case CPU_UE_ECACHE_RETIRE: cpu_log_err(aflt); cpu_page_retire(ch_flt); return (1); /* * Cases where we may want to suppress logging or perform * extended diagnostics. */ case CPU_CE: case CPU_EMC: /* * We want to skip logging and further classification * only if ALL the following conditions are true: * * 1. There is only one error * 2. That error is a correctable memory error * 3. The error is caused by the memory scrubber (in * which case the error will have occurred under * on_trap protection) * 4. The error is on a retired page * * Note: AFLT_PROT_EC is used places other than the memory * scrubber. However, none of those errors should occur * on a retired page. */ if ((ch_flt->afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE && aflt->flt_prot == AFLT_PROT_EC) { if (page_retire_check(aflt->flt_addr, NULL) == 0) { if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { /* * Since we're skipping logging, we'll need * to schedule the re-enabling of CEEN */ (void) timeout(cpu_delayed_check_ce_errors, (void *)(uintptr_t)aflt->flt_inst, drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); } /* * Inform memscrubber - scrubbing induced * CE on a retired page. */ memscrub_induced_error(); return (0); } } /* * Perform/schedule further classification actions, but * only if the page is healthy (we don't want bad * pages inducing too much diagnostic activity). If we could * not find a page pointer then we also skip this. If * ce_scrub_xdiag_recirc returns nonzero then it has chosen * to copy and recirculate the event (for further diagnostics) * and we should not proceed to log it here. * * This must be the last step here before the cpu_log_err() * below - if an event recirculates cpu_ce_log_err() will * not call the current function but just proceed directly * to cpu_ereport_post after the cpu_log_err() avoided below. * * Note: Check cpu_impl_async_log_err if changing this */ if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) { CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOPP); } else { if (errors != PR_OK) { CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_PAGEDET); } else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep, offsetof(ch_async_flt_t, cmn_asyncflt))) { return (0); } } /*FALLTHRU*/ /* * Cases where we just want to report the error and continue. */ case CPU_CE_ECACHE: case CPU_UE_ECACHE: case CPU_IV: case CPU_ORPH: cpu_log_err(aflt); return (1); /* * Cases where we want to fall through to handle panicking. */ case CPU_UE: /* * We want to skip logging in the same conditions as the * CE case. In addition, we want to make sure we're not * panicking. */ if (!panicstr && (ch_flt->afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE && aflt->flt_prot == AFLT_PROT_EC) { if (page_retire_check(aflt->flt_addr, NULL) == 0) { /* Zero the address to clear the error */ softcall(ecc_page_zero, (void *)aflt->flt_addr); /* * Inform memscrubber - scrubbing induced * UE on a retired page. */ memscrub_induced_error(); return (0); } } cpu_log_err(aflt); break; default: /* * If the us3_common.c code doesn't know the flt_type, it may * be an implementation-specific code. Call into the impldep * backend to find out what to do: if it tells us to continue, * break and handle as if falling through from a UE; if not, * the impldep backend has handled the error and we're done. */ switch (cpu_impl_async_log_err(flt, eqep)) { case CH_ASYNC_LOG_DONE: return (1); case CH_ASYNC_LOG_RECIRC: return (0); case CH_ASYNC_LOG_CONTINUE: break; /* continue on to handle UE-like error */ default: cmn_err(CE_WARN, "discarding error 0x%p with " "invalid fault type (0x%x)", (void *)aflt, ch_flt->flt_type); return (0); } } /* ... fall through from the UE case */ if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { if (!panicstr) { cpu_page_retire(ch_flt); } else { /* * Clear UEs on panic so that we don't * get haunted by them during panic or * after reboot */ cpu_clearphys(aflt); (void) clear_errors(NULL); } } return (1); } /* * Retire the bad page that may contain the flushed error. */ void cpu_page_retire(ch_async_flt_t *ch_flt) { struct async_flt *aflt = (struct async_flt *)ch_flt; (void) page_retire(aflt->flt_addr, PR_UE); } /* * Return true if the error specified in the AFSR indicates * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$ * for Panther, none for Jalapeno/Serrano). */ /* ARGSUSED */ static int cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr) { #if defined(JALAPENO) || defined(SERRANO) return (0); #elif defined(CHEETAH_PLUS) if (IS_PANTHER(cpunodes[cpuid].implementation)) return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0); return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0); #else /* CHEETAH_PLUS */ return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0); #endif } /* * The cpu_log_err() function is called by cpu_async_log_err() to perform the * generic event post-processing for correctable and uncorrectable memory, * E$, and MTag errors. Historically this entry point was used to log bits of * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be * converted into an ereport. In addition, it transmits the error to any * platform-specific service-processor FRU logging routines, if available. */ void cpu_log_err(struct async_flt *aflt) { char unum[UNUM_NAMLEN]; int synd_status, synd_code, afar_status; ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit)) aflt->flt_status |= ECC_ECACHE; else aflt->flt_status &= ~ECC_ECACHE; /* * Determine syndrome status. */ synd_status = afsr_to_synd_status(aflt->flt_inst, ch_flt->afsr_errs, ch_flt->flt_bit); /* * Determine afar status. */ if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) afar_status = afsr_to_afar_status(ch_flt->afsr_errs, ch_flt->flt_bit); else afar_status = AFLT_STAT_INVALID; synd_code = synd_to_synd_code(synd_status, aflt->flt_synd, ch_flt->flt_bit); /* * If afar status is not invalid do a unum lookup. */ if (afar_status != AFLT_STAT_INVALID) { (void) cpu_get_mem_unum_synd(synd_code, aflt, unum); } else { unum[0] = '\0'; } /* * Do not send the fruid message (plat_ecc_error_data_t) * to the SC if it can handle the enhanced error information * (plat_ecc_error2_data_t) or when the tunable * ecc_log_fruid_enable is set to 0. */ if (&plat_ecc_capability_sc_get && plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) { if (&plat_log_fruid_error) plat_log_fruid_error(synd_code, aflt, unum, ch_flt->flt_bit); } if (aflt->flt_func != NULL) aflt->flt_func(aflt, unum); if (afar_status != AFLT_STAT_INVALID) cpu_log_diag_info(ch_flt); /* * If we have a CEEN error , we do not reenable CEEN until after * we exit the trap handler. Otherwise, another error may * occur causing the handler to be entered recursively. * We set a timeout to trigger in cpu_ceen_delay_secs seconds, * to try and ensure that the CPU makes progress in the face * of a CE storm. */ if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) { (void) timeout(cpu_delayed_check_ce_errors, (void *)(uintptr_t)aflt->flt_inst, drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); } } /* * Invoked by error_init() early in startup and therefore before * startup_errorq() is called to drain any error Q - * * startup() * startup_end() * error_init() * cpu_error_init() * errorq_init() * errorq_drain() * start_other_cpus() * * The purpose of this routine is to create error-related taskqs. Taskqs * are used for this purpose because cpu_lock can't be grabbed from interrupt * context. */ void cpu_error_init(int items) { /* * Create taskq(s) to reenable CE */ ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri, items, items, TASKQ_PREPOPULATE); } void cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep) { char unum[UNUM_NAMLEN]; int len; switch (aflt->flt_class) { case CPU_FAULT: cpu_ereport_init(aflt); if (cpu_async_log_err(aflt, eqep)) cpu_ereport_post(aflt); break; case BUS_FAULT: if (aflt->flt_func != NULL) { (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, UNUM_NAMLEN, &len); aflt->flt_func(aflt, unum); } break; case RECIRC_CPU_FAULT: aflt->flt_class = CPU_FAULT; cpu_log_err(aflt); cpu_ereport_post(aflt); break; case RECIRC_BUS_FAULT: ASSERT(aflt->flt_class != RECIRC_BUS_FAULT); /*FALLTHRU*/ default: cmn_err(CE_WARN, "discarding CE error 0x%p with invalid " "fault class (0x%x)", (void *)aflt, aflt->flt_class); return; } } /* * Scrub and classify a CE. This function must not modify the * fault structure passed to it but instead should return the classification * information. */ static uchar_t cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried) { uchar_t disp = CE_XDIAG_EXTALG; on_trap_data_t otd; uint64_t orig_err; ch_cpu_logout_t *clop; /* * Clear CEEN. CPU CE TL > 0 trap handling will already have done * this, but our other callers have not. Disable preemption to * avoid CPU migration so that we restore CEEN on the correct * cpu later. * * CEEN is cleared so that further CEs that our instruction and * data footprint induce do not cause use to either creep down * kernel stack to the point of overflow, or do so much CE * notification as to make little real forward progress. * * NCEEN must not be cleared. However it is possible that * our accesses to the flt_addr may provoke a bus error or timeout * if the offending address has just been unconfigured as part of * a DR action. So we must operate under on_trap protection. */ kpreempt_disable(); orig_err = get_error_enable(); if (orig_err & EN_REG_CEEN) set_error_enable(orig_err & ~EN_REG_CEEN); /* * Our classification algorithm includes the line state before * the scrub; we'd like this captured after the detection and * before the algorithm below - the earlier the better. * * If we've come from a cpu CE trap then this info already exists * in the cpu logout area. * * For a CE detected by memscrub for which there was no trap * (running with CEEN off) cpu_log_and_clear_ce has called * cpu_ce_delayed_ec_logout to capture some cache data, and * marked the fault structure as incomplete as a flag to later * logging code. * * If called directly from an IO detected CE there has been * no line data capture. In this case we logout to the cpu logout * area - that's appropriate since it's the cpu cache data we need * for classification. We thus borrow the cpu logout area for a * short time, and cpu_ce_delayed_ec_logout will mark it as busy in * this time (we will invalidate it again below). * * If called from the partner check xcall handler then this cpu * (the partner) has not necessarily experienced a CE at this * address. But we want to capture line state before its scrub * attempt since we use that in our classification. */ if (logout_tried == B_FALSE) { if (!cpu_ce_delayed_ec_logout(ecc->flt_addr)) disp |= CE_XDIAG_NOLOGOUT; } /* * Scrub memory, then check AFSR for errors. The AFAR we scrub may * no longer be valid (if DR'd since the initial event) so we * perform this scrub under on_trap protection. If this access is * ok then further accesses below will also be ok - DR cannot * proceed while this thread is active (preemption is disabled); * to be safe we'll nonetheless use on_trap again below. */ if (!on_trap(&otd, OT_DATA_ACCESS)) { cpu_scrubphys(ecc); } else { no_trap(); if (orig_err & EN_REG_CEEN) set_error_enable(orig_err); kpreempt_enable(); return (disp); } no_trap(); /* * Did the casx read of the scrub log a CE that matches the AFAR? * Note that it's quite possible that the read sourced the data from * another cpu. */ if (clear_ecc(ecc)) disp |= CE_XDIAG_CE1; /* * Read the data again. This time the read is very likely to * come from memory since the scrub induced a writeback to memory. */ if (!on_trap(&otd, OT_DATA_ACCESS)) { (void) lddphys(P2ALIGN(ecc->flt_addr, 8)); } else { no_trap(); if (orig_err & EN_REG_CEEN) set_error_enable(orig_err); kpreempt_enable(); return (disp); } no_trap(); /* Did that read induce a CE that matches the AFAR? */ if (clear_ecc(ecc)) disp |= CE_XDIAG_CE2; /* * Look at the logout information and record whether we found the * line in l2/l3 cache. For Panther we are interested in whether * we found it in either cache (it won't reside in both but * it is possible to read it that way given the moving target). */ clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL; if (!(disp & CE_XDIAG_NOLOGOUT) && clop && clop->clo_data.chd_afar != LOGOUT_INVALID) { int hit, level; int state; int totalsize; ch_ec_data_t *ecp; /* * If hit is nonzero then a match was found and hit will * be one greater than the index which hit. For Panther we * also need to pay attention to level to see which of l2$ or * l3$ it hit in. */ hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data, 0, &level); if (hit) { --hit; disp |= CE_XDIAG_AFARMATCH; if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { if (level == 2) ecp = &clop->clo_data.chd_l2_data[hit]; else ecp = &clop->clo_data.chd_ec_data[hit]; } else { ASSERT(level == 2); ecp = &clop->clo_data.chd_ec_data[hit]; } totalsize = cpunodes[CPU->cpu_id].ecache_size; state = cpu_ectag_pa_to_subblk_state(totalsize, ecc->flt_addr, ecp->ec_tag); /* * Cheetah variants use different state encodings - * the CH_ECSTATE_* defines vary depending on the * module we're compiled for. Translate into our * one true version. Conflate Owner-Shared state * of SSM mode with Owner as victimisation of such * lines may cause a writeback. */ switch (state) { case CH_ECSTATE_MOD: disp |= EC_STATE_M; break; case CH_ECSTATE_OWN: case CH_ECSTATE_OWS: disp |= EC_STATE_O; break; case CH_ECSTATE_EXL: disp |= EC_STATE_E; break; case CH_ECSTATE_SHR: disp |= EC_STATE_S; break; default: disp |= EC_STATE_I; break; } } /* * If we initiated the delayed logout then we are responsible * for invalidating the logout area. */ if (logout_tried == B_FALSE) { bzero(clop, sizeof (ch_cpu_logout_t)); clop->clo_data.chd_afar = LOGOUT_INVALID; } } /* * Re-enable CEEN if we turned it off. */ if (orig_err & EN_REG_CEEN) set_error_enable(orig_err); kpreempt_enable(); return (disp); } /* * Scrub a correctable memory error and collect data for classification * of CE type. This function is called in the detection path, ie tl0 handling * of a correctable error trap (cpus) or interrupt (IO) at high PIL. */ void cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried) { /* * Cheetah CE classification does not set any bits in flt_status. * Instead we will record classification datapoints in flt_disp. */ ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); /* * To check if the error detected by IO is persistent, sticky or * intermittent. This is noticed by clear_ecc(). */ if (ecc->flt_status & ECC_IOBUS) ecc->flt_stat = C_AFSR_MEMORY; /* * Record information from this first part of the algorithm in * flt_disp. */ ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried); } /* * Select a partner to perform a further CE classification check from. * Must be called with kernel preemption disabled (to stop the cpu list * from changing). The detecting cpu we are partnering has cpuid * aflt->flt_inst; we might not be running on the detecting cpu. * * Restrict choice to active cpus in the same cpu partition as ourselves in * an effort to stop bad cpus in one partition causing other partitions to * perform excessive diagnostic activity. Actually since the errorq drain * is run from a softint most of the time and that is a global mechanism * this isolation is only partial. Return NULL if we fail to find a * suitable partner. * * We prefer a partner that is in a different latency group to ourselves as * we will share fewer datapaths. If such a partner is unavailable then * choose one in the same lgroup but prefer a different chip and only allow * a sibling core if flags includes PTNR_SIBLINGOK. If all else fails and * flags includes PTNR_SELFOK then permit selection of the original detector. * * We keep a cache of the last partner selected for a cpu, and we'll try to * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds * have passed since that selection was made. This provides the benefit * of the point-of-view of different partners over time but without * requiring frequent cpu list traversals. */ #define PTNR_SIBLINGOK 0x1 /* Allow selection of sibling core */ #define PTNR_SELFOK 0x2 /* Allow selection of cpu to "partner" itself */ static cpu_t * ce_ptnr_select(struct async_flt *aflt, int flags, int *typep) { cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr; hrtime_t lasttime, thistime; ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL); dtcr = cpu[aflt->flt_inst]; /* * Short-circuit for the following cases: * . the dtcr is not flagged active * . there is just one cpu present * . the detector has disappeared * . we were given a bad flt_inst cpuid; this should not happen * (eg PCI code now fills flt_inst) but if it does it is no * reason to panic. * . there is just one cpu left online in the cpu partition * * If we return NULL after this point then we do not update the * chpr_ceptnr_seltime which will cause us to perform a full lookup * again next time; this is the case where the only other cpu online * in the detector's partition is on the same chip as the detector * and since CEEN re-enable is throttled even that case should not * hurt performance. */ if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) { return (NULL); } if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) { if (flags & PTNR_SELFOK) { *typep = CE_XDIAG_PTNR_SELF; return (dtcr); } else { return (NULL); } } thistime = gethrtime(); lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime); /* * Select a starting point. */ if (!lasttime) { /* * We've never selected a partner for this detector before. * Start the scan at the next online cpu in the same cpu * partition. */ sp = dtcr->cpu_next_part; } else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) { /* * Our last selection has not aged yet. If this partner: * . is still a valid cpu, * . is still in the same partition as the detector * . is still marked active * . satisfies the 'flags' argument criteria * then select it again without updating the timestamp. */ sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; if (sp == NULL || sp->cpu_part != dtcr->cpu_part || !cpu_flagged_active(sp->cpu_flags) || (sp == dtcr && !(flags & PTNR_SELFOK)) || (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) && !(flags & PTNR_SIBLINGOK))) { sp = dtcr->cpu_next_part; } else { if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { *typep = CE_XDIAG_PTNR_REMOTE; } else if (sp == dtcr) { *typep = CE_XDIAG_PTNR_SELF; } else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) { *typep = CE_XDIAG_PTNR_SIBLING; } else { *typep = CE_XDIAG_PTNR_LOCAL; } return (sp); } } else { /* * Our last selection has aged. If it is nonetheless still a * valid cpu then start the scan at the next cpu in the * partition after our last partner. If the last selection * is no longer a valid cpu then go with our default. In * this way we slowly cycle through possible partners to * obtain multiple viewpoints over time. */ sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)]; if (sp == NULL) { sp = dtcr->cpu_next_part; } else { sp = sp->cpu_next_part; /* may be dtcr */ if (sp->cpu_part != dtcr->cpu_part) sp = dtcr; } } /* * We have a proposed starting point for our search, but if this * cpu is offline then its cpu_next_part will point to itself * so we can't use that to iterate over cpus in this partition in * the loop below. We still want to avoid iterating over cpus not * in our partition, so in the case that our starting point is offline * we will repoint it to be the detector itself; and if the detector * happens to be offline we'll return NULL from the following loop. */ if (!cpu_flagged_active(sp->cpu_flags)) { sp = dtcr; } ptnr = sp; locptnr = NULL; sibptnr = NULL; do { if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags)) continue; if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) { CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id; CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; *typep = CE_XDIAG_PTNR_REMOTE; return (ptnr); } if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) { if (sibptnr == NULL) sibptnr = ptnr; continue; } if (locptnr == NULL) locptnr = ptnr; } while ((ptnr = ptnr->cpu_next_part) != sp); /* * A foreign partner has already been returned if one was available. * * If locptnr is not NULL it is a cpu in the same lgroup as the * detector, is active, and is not a sibling of the detector. * * If sibptnr is not NULL it is a sibling of the detector, and is * active. * * If we have to resort to using the detector itself we have already * checked that it is active. */ if (locptnr) { CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id; CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; *typep = CE_XDIAG_PTNR_LOCAL; return (locptnr); } else if (sibptnr && flags & PTNR_SIBLINGOK) { CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id; CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; *typep = CE_XDIAG_PTNR_SIBLING; return (sibptnr); } else if (flags & PTNR_SELFOK) { CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id; CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime; *typep = CE_XDIAG_PTNR_SELF; return (dtcr); } return (NULL); } /* * Cross call handler that is requested to run on the designated partner of * a cpu that experienced a possibly sticky or possibly persistnet CE. */ static void ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp) { *dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE); } /* * The associated errorqs are never destroyed so we do not need to deal with * them disappearing before this timeout fires. If the affected memory * has been DR'd out since the original event the scrub algrithm will catch * any errors and return null disposition info. If the original detecting * cpu has been DR'd out then ereport detector info will not be able to * lookup CPU type; with a small timeout this is unlikely. */ static void ce_lkychk_cb(ce_lkychk_cb_t *cbarg) { struct async_flt *aflt = cbarg->lkycb_aflt; uchar_t disp; cpu_t *cp; int ptnrtype; kpreempt_disable(); if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK, &ptnrtype)) { xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt, (uint64_t)&disp); CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp); CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); } else { ce_xdiag_lkydrops++; if (ncpus > 1) CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOPTNR); } kpreempt_enable(); errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC); kmem_free(cbarg, sizeof (ce_lkychk_cb_t)); } /* * Called from errorq drain code when processing a CE error, both from * CPU and PCI drain functions. Decide what further classification actions, * if any, we will perform. Perform immediate actions now, and schedule * delayed actions as required. Note that we are no longer necessarily running * on the detecting cpu, and that the async_flt structure will not persist on * return from this function. * * Calls to this function should aim to be self-throtlling in some way. With * the delayed re-enable of CEEN the absolute rate of calls should not * be excessive. Callers should also avoid performing in-depth classification * for events in pages that are already known to be suspect. * * We return nonzero to indicate that the event has been copied and * recirculated for further testing. The caller should not log the event * in this case - it will be logged when further test results are available. * * Our possible contexts are that of errorq_drain: below lock level or from * panic context. We can assume that the cpu we are running on is online. */ #ifdef DEBUG static int ce_xdiag_forceaction; #endif int ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, errorq_elem_t *eqep, size_t afltoffset) { ce_dispact_t dispact, action; cpu_t *cp; uchar_t dtcrinfo, disp; int ptnrtype; if (!ce_disp_inited || panicstr || ce_xdiag_off) { ce_xdiag_drops++; return (0); } else if (!aflt->flt_in_memory) { ce_xdiag_drops++; CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM); return (0); } dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); /* * Some correctable events are not scrubbed/classified, such as those * noticed at the tail of cpu_deferred_error. So if there is no * initial detector classification go no further. */ if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) { ce_xdiag_drops++; CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB); return (0); } dispact = CE_DISPACT(ce_disp_table, CE_XDIAG_AFARMATCHED(dtcrinfo), CE_XDIAG_STATE(dtcrinfo), CE_XDIAG_CE1SEEN(dtcrinfo), CE_XDIAG_CE2SEEN(dtcrinfo)); action = CE_ACT(dispact); /* bad lookup caught below */ #ifdef DEBUG if (ce_xdiag_forceaction != 0) action = ce_xdiag_forceaction; #endif switch (action) { case CE_ACT_LKYCHK: { caddr_t ndata; errorq_elem_t *neqep; struct async_flt *ecc; ce_lkychk_cb_t *cbargp; if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) { ce_xdiag_lkydrops++; CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_DUPFAIL); break; } ecc = (struct async_flt *)(ndata + afltoffset); ASSERT(ecc->flt_class == CPU_FAULT || ecc->flt_class == BUS_FAULT); ecc->flt_class = (ecc->flt_class == CPU_FAULT) ? RECIRC_CPU_FAULT : RECIRC_BUS_FAULT; cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP); cbargp->lkycb_aflt = ecc; cbargp->lkycb_eqp = eqp; cbargp->lkycb_eqep = neqep; (void) timeout((void (*)(void *))ce_lkychk_cb, (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec)); return (1); } case CE_ACT_PTNRCHK: kpreempt_disable(); /* stop cpu list changing */ if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) { xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt, (uint64_t)&disp); CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp); CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id); CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype); } else if (ncpus > 1) { ce_xdiag_ptnrdrops++; CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOPTNR); } else { ce_xdiag_ptnrdrops++; CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_UNIPROC); } kpreempt_enable(); break; case CE_ACT_DONE: break; case CE_ACT(CE_DISP_BAD): default: #ifdef DEBUG cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action); #endif ce_xdiag_bad++; CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD); break; } return (0); } /* * We route all errors through a single switch statement. */ void cpu_ue_log_err(struct async_flt *aflt) { switch (aflt->flt_class) { case CPU_FAULT: cpu_ereport_init(aflt); if (cpu_async_log_err(aflt, NULL)) cpu_ereport_post(aflt); break; case BUS_FAULT: bus_async_log_err(aflt); break; default: cmn_err(CE_WARN, "discarding async error %p with invalid " "fault class (0x%x)", (void *)aflt, aflt->flt_class); return; } } /* * Routine for panic hook callback from panic_idle(). */ void cpu_async_panic_callb(void) { ch_async_flt_t ch_flt; struct async_flt *aflt; ch_cpu_errors_t cpu_error_regs; uint64_t afsr_errs; get_cpu_error_state(&cpu_error_regs); afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); if (afsr_errs) { bzero(&ch_flt, sizeof (ch_async_flt_t)); aflt = (struct async_flt *)&ch_flt; aflt->flt_id = gethrtime_waitfree(); aflt->flt_bus_id = getprocessorid(); aflt->flt_inst = CPU->cpu_id; aflt->flt_stat = cpu_error_regs.afsr; aflt->flt_addr = cpu_error_regs.afar; aflt->flt_prot = AFLT_PROT_NONE; aflt->flt_class = CPU_FAULT; aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0); aflt->flt_panic = 1; ch_flt.afsr_ext = cpu_error_regs.afsr_ext; ch_flt.afsr_errs = afsr_errs; #if defined(SERRANO) ch_flt.afar2 = cpu_error_regs.afar2; #endif /* SERRANO */ (void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL); } } /* * Routine to convert a syndrome into a syndrome code. */ static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit) { if (synd_status == AFLT_STAT_INVALID) return (-1); /* * Use the syndrome to index the appropriate syndrome table, * to get the code indicating which bit(s) is(are) bad. */ if (afsr_bit & (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { if (afsr_bit & C_AFSR_MSYND_ERRS) { #if defined(JALAPENO) || defined(SERRANO) if ((synd == 0) || (synd >= BSYND_TBL_SIZE)) return (-1); else return (BPAR0 + synd); #else /* JALAPENO || SERRANO */ if ((synd == 0) || (synd >= MSYND_TBL_SIZE)) return (-1); else return (mtag_syndrome_tab[synd]); #endif /* JALAPENO || SERRANO */ } else { if ((synd == 0) || (synd >= ESYND_TBL_SIZE)) return (-1); else return (ecc_syndrome_tab[synd]); } } else { return (-1); } } int cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) { if (&plat_get_mem_sid) return (plat_get_mem_sid(unum, buf, buflen, lenp)); else return (ENOTSUP); } int cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) { if (&plat_get_mem_offset) return (plat_get_mem_offset(flt_addr, offp)); else return (ENOTSUP); } int cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) { if (&plat_get_mem_addr) return (plat_get_mem_addr(unum, sid, offset, addrp)); else return (ENOTSUP); } /* * Routine to return a string identifying the physical name * associated with a memory/cache error. */ int cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat, uint64_t flt_addr, int flt_bus_id, int flt_in_memory, ushort_t flt_status, char *buf, int buflen, int *lenp) { int synd_code; int ret; /* * An AFSR of -1 defaults to a memory syndrome. */ if (flt_stat == (uint64_t)-1) flt_stat = C_AFSR_CE; synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat); /* * Syndrome code must be either a single-bit error code * (0...143) or -1 for unum lookup. */ if (synd_code < 0 || synd_code >= M2) synd_code = -1; if (&plat_get_mem_unum) { if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id, flt_in_memory, flt_status, buf, buflen, lenp)) != 0) { buf[0] = '\0'; *lenp = 0; } return (ret); } return (ENOTSUP); } /* * Wrapper for cpu_get_mem_unum() routine that takes an * async_flt struct rather than explicit arguments. */ int cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, char *buf, int buflen, int *lenp) { /* * If we come thru here for an IO bus error aflt->flt_stat will * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum() * so it will interpret this as a memory error. */ return (cpu_get_mem_unum(synd_status, aflt->flt_synd, (aflt->flt_class == BUS_FAULT) ? (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit, aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); } /* * Return unum string given synd_code and async_flt into * the buf with size UNUM_NAMLEN */ static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf) { int ret, len; /* * Syndrome code must be either a single-bit error code * (0...143) or -1 for unum lookup. */ if (synd_code < 0 || synd_code >= M2) synd_code = -1; if (&plat_get_mem_unum) { if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory, aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) { buf[0] = '\0'; } return (ret); } buf[0] = '\0'; return (ENOTSUP); } /* * This routine is a more generic interface to cpu_get_mem_unum() * that may be used by other modules (e.g. the 'mm' driver, through * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events). */ int cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, char *buf, int buflen, int *lenp) { int synd_status, flt_in_memory, ret; ushort_t flt_status = 0; char unum[UNUM_NAMLEN]; uint64_t t_afsr_errs; /* * Check for an invalid address. */ if (afar == (uint64_t)-1) return (ENXIO); if (synd == (uint64_t)-1) synd_status = AFLT_STAT_INVALID; else synd_status = AFLT_STAT_VALID; flt_in_memory = (*afsr & C_AFSR_MEMORY) && pf_is_memory(afar >> MMU_PAGESHIFT); /* * Get aggregate AFSR for call to cpu_error_is_ecache_data. */ if (*afsr == (uint64_t)-1) t_afsr_errs = C_AFSR_CE; else { t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS); #if defined(CHEETAH_PLUS) if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS); #endif /* CHEETAH_PLUS */ } /* * Turn on ECC_ECACHE if error type is E$ Data. */ if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs)) flt_status |= ECC_ECACHE; ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar, CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp); if (ret != 0) return (ret); if (*lenp >= buflen) return (ENAMETOOLONG); (void) strncpy(buf, unum, buflen); return (0); } /* * Routine to return memory information associated * with a physical address and syndrome. */ int cpu_get_mem_info(uint64_t synd, uint64_t afar, uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, int *segsp, int *banksp, int *mcidp) { int synd_status, synd_code; if (afar == (uint64_t)-1) return (ENXIO); if (synd == (uint64_t)-1) synd_status = AFLT_STAT_INVALID; else synd_status = AFLT_STAT_VALID; synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE); if (p2get_mem_info != NULL) return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep, bank_sizep, segsp, banksp, mcidp)); else return (ENOTSUP); } /* * Routine to return a string identifying the physical * name associated with a cpuid. */ int cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) { int ret; char unum[UNUM_NAMLEN]; if (&plat_get_cpu_unum) { if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp)) != 0) return (ret); } else { return (ENOTSUP); } if (*lenp >= buflen) return (ENAMETOOLONG); (void) strncpy(buf, unum, buflen); return (0); } /* * This routine exports the name buffer size. */ size_t cpu_get_name_bufsize() { return (UNUM_NAMLEN); } /* * Historical function, apparantly not used. */ /* ARGSUSED */ void cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) {} /* * Historical function only called for SBus errors in debugging. */ /*ARGSUSED*/ void read_ecc_data(struct async_flt *aflt, short verbose, short ce_err) {} /* * Clear the AFSR sticky bits. The routine returns a non-zero value if * any of the AFSR's sticky errors are detected. If a non-null pointer to * an async fault structure argument is passed in, the captured error state * (AFSR, AFAR) info will be returned in the structure. */ int clear_errors(ch_async_flt_t *ch_flt) { struct async_flt *aflt = (struct async_flt *)ch_flt; ch_cpu_errors_t cpu_error_regs; get_cpu_error_state(&cpu_error_regs); if (ch_flt != NULL) { aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK; aflt->flt_addr = cpu_error_regs.afar; ch_flt->afsr_ext = cpu_error_regs.afsr_ext; ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS); #if defined(SERRANO) ch_flt->afar2 = cpu_error_regs.afar2; #endif /* SERRANO */ } set_cpu_error_state(&cpu_error_regs); return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) | (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0); } /* * Clear any AFSR error bits, and check for persistence. * * It would be desirable to also insist that syndrome match. PCI handling * has already filled flt_synd. For errors trapped by CPU we only fill * flt_synd when we queue the event, so we do not have a valid flt_synd * during initial classification (it is valid if we're called as part of * subsequent low-pil additional classification attempts). We could try * to determine which syndrome to use: we know we're only called for * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use * would be esynd/none and esynd/msynd, respectively. If that is * implemented then what do we do in the case that we do experience an * error on the same afar but with different syndrome? At the very least * we should count such occurences. Anyway, for now, we'll leave it as * it has been for ages. */ static int clear_ecc(struct async_flt *aflt) { ch_cpu_errors_t cpu_error_regs; /* * Snapshot the AFSR and AFAR and clear any errors */ get_cpu_error_state(&cpu_error_regs); set_cpu_error_state(&cpu_error_regs); /* * If any of the same memory access error bits are still on and * the AFAR matches, return that the error is persistent. */ return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 && cpu_error_regs.afar == aflt->flt_addr); } /* * Turn off all cpu error detection, normally only used for panics. */ void cpu_disable_errors(void) { xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE); /* * With error detection now turned off, check the other cpus * logout areas for any unlogged errors. */ if (enable_check_other_cpus_logout) { cpu_check_other_cpus_logout(); /* * Make a second pass over the logout areas, in case * there is a failing CPU in an error-trap loop which * will write to the logout area once it is emptied. */ cpu_check_other_cpus_logout(); } } /* * Enable errors. */ void cpu_enable_errors(void) { xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE); } /* * Flush the entire ecache using displacement flush by reading through a * physical address range twice as large as the Ecache. */ void cpu_flush_ecache(void) { flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size, cpunodes[CPU->cpu_id].ecache_linesize); } /* * Return CPU E$ set size - E$ size divided by the associativity. * We use this function in places where the CPU_PRIVATE ptr may not be * initialized yet. Note that for send_mondo and in the Ecache scrubber, * we're guaranteed that CPU_PRIVATE is initialized. Also, cpunodes is set * up before the kernel switches from OBP's to the kernel's trap table, so * we don't have to worry about cpunodes being unitialized. */ int cpu_ecache_set_size(struct cpu *cp) { if (CPU_PRIVATE(cp)) return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size)); return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway()); } /* * Flush Ecache line. * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno. * Uses normal displacement flush for Cheetah. */ static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt) { struct async_flt *aflt = (struct async_flt *)ch_flt; int ec_set_size = cpu_ecache_set_size(CPU); ecache_flush_line(aflt->flt_addr, ec_set_size); } /* * Scrub physical address. * Scrub code is different depending upon whether this a Cheetah+ with 2-way * Ecache or direct-mapped Ecache. */ static void cpu_scrubphys(struct async_flt *aflt) { int ec_set_size = cpu_ecache_set_size(CPU); scrubphys(aflt->flt_addr, ec_set_size); } /* * Clear physical address. * Scrub code is different depending upon whether this a Cheetah+ with 2-way * Ecache or direct-mapped Ecache. */ void cpu_clearphys(struct async_flt *aflt) { int lsize = cpunodes[CPU->cpu_id].ecache_linesize; int ec_set_size = cpu_ecache_set_size(CPU); clearphys(aflt->flt_addr, ec_set_size, lsize); } #if defined(CPU_IMP_ECACHE_ASSOC) /* * Check for a matching valid line in all the sets. * If found, return set# + 1. Otherwise return 0. */ static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt) { struct async_flt *aflt = (struct async_flt *)ch_flt; int totalsize = cpunodes[CPU->cpu_id].ecache_size; int ec_set_size = cpu_ecache_set_size(CPU); ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; int nway = cpu_ecache_nway(); int i; for (i = 0; i < nway; i++, ecp++) { if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) && (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) == cpu_ectag_to_pa(ec_set_size, ecp->ec_tag)) return (i+1); } return (0); } #endif /* CPU_IMP_ECACHE_ASSOC */ /* * Check whether a line in the given logout info matches the specified * fault address. If reqval is set then the line must not be Invalid. * Returns 0 on failure; on success (way + 1) is returned an *level is * set to 2 for l2$ or 3 for l3$. */ static int cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level) { ch_diag_data_t *cdp = data; ch_ec_data_t *ecp; int totalsize, ec_set_size; int i, ways; int match = 0; int tagvalid; uint64_t addr, tagpa; int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation); /* * Check the l2$ logout data */ if (ispanther) { ecp = &cdp->chd_l2_data[0]; ec_set_size = PN_L2_SET_SIZE; ways = PN_L2_NWAYS; } else { ecp = &cdp->chd_ec_data[0]; ec_set_size = cpu_ecache_set_size(CPU); ways = cpu_ecache_nway(); totalsize = cpunodes[CPU->cpu_id].ecache_size; } /* remove low order PA bits from fault address not used in PA tag */ addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); for (i = 0; i < ways; i++, ecp++) { if (ispanther) { tagpa = PN_L2TAG_TO_PA(ecp->ec_tag); tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag); } else { tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag); tagvalid = !cpu_ectag_line_invalid(totalsize, ecp->ec_tag); } if (tagpa == addr && (!reqval || tagvalid)) { match = i + 1; *level = 2; break; } } if (match || !ispanther) return (match); /* For Panther we also check the l3$ */ ecp = &cdp->chd_ec_data[0]; ec_set_size = PN_L3_SET_SIZE; ways = PN_L3_NWAYS; addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size); for (i = 0; i < ways; i++, ecp++) { if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval || !PN_L3_LINE_INVALID(ecp->ec_tag))) { match = i + 1; *level = 3; break; } } return (match); } #if defined(CPU_IMP_L1_CACHE_PARITY) /* * Record information related to the source of an Dcache Parity Error. */ static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt) { int dc_set_size = dcache_size / CH_DCACHE_NWAY; int index; /* * Since instruction decode cannot be done at high PIL * just examine the entire Dcache to locate the error. */ if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { ch_flt->parity_data.dpe.cpl_way = -1; ch_flt->parity_data.dpe.cpl_off = -1; } for (index = 0; index < dc_set_size; index += dcache_linesize) cpu_dcache_parity_check(ch_flt, index); } /* * Check all ways of the Dcache at a specified index for good parity. */ static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index) { int dc_set_size = dcache_size / CH_DCACHE_NWAY; uint64_t parity_bits, pbits, data_word; static int parity_bits_popc[] = { 0, 1, 1, 0 }; int way, word, data_byte; ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0]; ch_dc_data_t tmp_dcp; for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) { /* * Perform diagnostic read. */ get_dcache_dtag(index + way * dc_set_size, (uint64_t *)&tmp_dcp); /* * Check tag for even parity. * Sum of 1 bits (including parity bit) should be even. */ if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) { /* * If this is the first error log detailed information * about it and check the snoop tag. Otherwise just * record the fact that we found another error. */ if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { ch_flt->parity_data.dpe.cpl_way = way; ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY; ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG; if (popc64(tmp_dcp.dc_sntag & CHP_DCSNTAG_PARMASK) & 1) { ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_SNTAG; ch_flt->parity_data.dpe.cpl_lcnt++; } bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t)); } ch_flt->parity_data.dpe.cpl_lcnt++; } if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { /* * Panther has more parity bits than the other * processors for covering dcache data and so each * byte of data in each word has its own parity bit. */ parity_bits = tmp_dcp.dc_pn_data_parity; for (word = 0; word < 4; word++) { data_word = tmp_dcp.dc_data[word]; pbits = parity_bits & PN_DC_DATA_PARITY_MASK; for (data_byte = 0; data_byte < 8; data_byte++) { if (((popc64(data_word & PN_DC_DATA_PARITY_MASK)) & 1) ^ (pbits & 1)) { cpu_record_dc_data_parity( ch_flt, dcp, &tmp_dcp, way, word); } pbits >>= 1; data_word >>= 8; } parity_bits >>= 8; } } else { /* * Check data array for even parity. * The 8 parity bits are grouped into 4 pairs each * of which covers a 64-bit word. The endianness is * reversed -- the low-order parity bits cover the * high-order data words. */ parity_bits = tmp_dcp.dc_utag >> 8; for (word = 0; word < 4; word++) { pbits = (parity_bits >> (6 - word * 2)) & 3; if ((popc64(tmp_dcp.dc_data[word]) + parity_bits_popc[pbits]) & 1) { cpu_record_dc_data_parity(ch_flt, dcp, &tmp_dcp, way, word); } } } } } static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt, ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word) { /* * If this is the first error log detailed information about it. * Otherwise just record the fact that we found another error. */ if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { ch_flt->parity_data.dpe.cpl_way = way; ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY; ch_flt->parity_data.dpe.cpl_off = word * 8; bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t)); } ch_flt->parity_data.dpe.cpl_lcnt++; } /* * Record information related to the source of an Icache Parity Error. * * Called with the Icache disabled so any diagnostic accesses are safe. */ static void cpu_icache_parity_info(ch_async_flt_t *ch_flt) { int ic_set_size; int ic_linesize; int index; if (CPU_PRIVATE(CPU)) { ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / CH_ICACHE_NWAY; ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); } else { ic_set_size = icache_size / CH_ICACHE_NWAY; ic_linesize = icache_linesize; } ch_flt->parity_data.ipe.cpl_way = -1; ch_flt->parity_data.ipe.cpl_off = -1; for (index = 0; index < ic_set_size; index += ic_linesize) cpu_icache_parity_check(ch_flt, index); } /* * Check all ways of the Icache at a specified index for good parity. */ static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index) { uint64_t parmask, pn_inst_parity; int ic_set_size; int ic_linesize; int flt_index, way, instr, num_instr; struct async_flt *aflt = (struct async_flt *)ch_flt; ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0]; ch_ic_data_t tmp_icp; if (CPU_PRIVATE(CPU)) { ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) / CH_ICACHE_NWAY; ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize); } else { ic_set_size = icache_size / CH_ICACHE_NWAY; ic_linesize = icache_linesize; } /* * Panther has twice as many instructions per icache line and the * instruction parity bit is in a different location. */ if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t); pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK; } else { num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t); pn_inst_parity = 0; } /* * Index at which we expect to find the parity error. */ flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize); for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) { /* * Diagnostic reads expect address argument in ASI format. */ get_icache_dtag(2 * (index + way * ic_set_size), (uint64_t *)&tmp_icp); /* * If this is the index in which we expect to find the * error log detailed information about each of the ways. * This information will be displayed later if we can't * determine the exact way in which the error is located. */ if (flt_index == index) bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t)); /* * Check tag for even parity. * Sum of 1 bits (including parity bit) should be even. */ if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) { /* * If this way is the one in which we expected * to find the error record the way and check the * snoop tag. Otherwise just record the fact we * found another error. */ if (flt_index == index) { ch_flt->parity_data.ipe.cpl_way = way; ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG; if (popc64(tmp_icp.ic_sntag & CHP_ICSNTAG_PARMASK) & 1) { ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_SNTAG; ch_flt->parity_data.ipe.cpl_lcnt++; } } ch_flt->parity_data.ipe.cpl_lcnt++; continue; } /* * Check instruction data for even parity. * Bits participating in parity differ for PC-relative * versus non-PC-relative instructions. */ for (instr = 0; instr < num_instr; instr++) { parmask = (tmp_icp.ic_data[instr] & CH_ICDATA_PRED_ISPCREL) ? (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) : (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity); if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) { /* * If this way is the one in which we expected * to find the error record the way and offset. * Otherwise just log the fact we found another * error. */ if (flt_index == index) { ch_flt->parity_data.ipe.cpl_way = way; ch_flt->parity_data.ipe.cpl_off = instr * 4; } ch_flt->parity_data.ipe.cpl_lcnt++; continue; } } } } /* * Record information related to the source of an Pcache Parity Error. */ static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt) { int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; int index; /* * Since instruction decode cannot be done at high PIL just * examine the entire Pcache to check for any parity errors. */ if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { ch_flt->parity_data.dpe.cpl_way = -1; ch_flt->parity_data.dpe.cpl_off = -1; } for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE) cpu_pcache_parity_check(ch_flt, index); } /* * Check all ways of the Pcache at a specified index for good parity. */ static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index) { int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY; int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t); int way, word, pbit, parity_bits; ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0]; ch_pc_data_t tmp_pcp; for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) { /* * Perform diagnostic read. */ get_pcache_dtag(index + way * pc_set_size, (uint64_t *)&tmp_pcp); /* * Check data array for odd parity. There are 8 parity * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each * of those bits covers exactly 8 bytes of the data * array: * * parity bit P$ data bytes covered * ---------- --------------------- * 50 63:56 * 51 55:48 * 52 47:40 * 53 39:32 * 54 31:24 * 55 23:16 * 56 15:8 * 57 7:0 */ parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status); for (word = 0; word < pc_data_words; word++) { pbit = (parity_bits >> (pc_data_words - word - 1)) & 1; if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) { /* * If this is the first error log detailed * information about it. Otherwise just record * the fact that we found another error. */ if (ch_flt->parity_data.dpe.cpl_lcnt == 0) { ch_flt->parity_data.dpe.cpl_way = way; ch_flt->parity_data.dpe.cpl_cache = CPU_PC_PARITY; ch_flt->parity_data.dpe.cpl_off = word * sizeof (uint64_t); bcopy(&tmp_pcp, pcp, sizeof (ch_pc_data_t)); } ch_flt->parity_data.dpe.cpl_lcnt++; } } } } /* * Add L1 Data cache data to the ereport payload. */ static void cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl) { ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; ch_dc_data_t *dcp; ch_dc_data_t dcdata[CH_DCACHE_NWAY]; uint_t nelem; int i, ways_to_check, ways_logged = 0; /* * If this is an D$ fault then there may be multiple * ways captured in the ch_parity_log_t structure. * Otherwise, there will be at most one way captured * in the ch_diag_data_t struct. * Check each way to see if it should be encoded. */ if (ch_flt->flt_type == CPU_DC_PARITY) ways_to_check = CH_DCACHE_NWAY; else ways_to_check = 1; for (i = 0; i < ways_to_check; i++) { if (ch_flt->flt_type == CPU_DC_PARITY) dcp = &ch_flt->parity_data.dpe.cpl_dc[i]; else dcp = &ch_flt->flt_diag_data.chd_dc_data; if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) { bcopy(dcp, &dcdata[ways_logged], sizeof (ch_dc_data_t)); ways_logged++; } } /* * Add the dcache data to the payload. */ fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS, DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); if (ways_logged != 0) { nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged; fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA, DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL); } } /* * Add L1 Instruction cache data to the ereport payload. */ static void cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl) { ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; ch_ic_data_t *icp; ch_ic_data_t icdata[CH_ICACHE_NWAY]; uint_t nelem; int i, ways_to_check, ways_logged = 0; /* * If this is an I$ fault then there may be multiple * ways captured in the ch_parity_log_t structure. * Otherwise, there will be at most one way captured * in the ch_diag_data_t struct. * Check each way to see if it should be encoded. */ if (ch_flt->flt_type == CPU_IC_PARITY) ways_to_check = CH_ICACHE_NWAY; else ways_to_check = 1; for (i = 0; i < ways_to_check; i++) { if (ch_flt->flt_type == CPU_IC_PARITY) icp = &ch_flt->parity_data.ipe.cpl_ic[i]; else icp = &ch_flt->flt_diag_data.chd_ic_data; if (icp->ic_logflag == IC_LOGFLAG_MAGIC) { bcopy(icp, &icdata[ways_logged], sizeof (ch_ic_data_t)); ways_logged++; } } /* * Add the icache data to the payload. */ fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS, DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); if (ways_logged != 0) { nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged; fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA, DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL); } } #endif /* CPU_IMP_L1_CACHE_PARITY */ /* * Add ecache data to payload. */ static void cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl) { ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; ch_ec_data_t *ecp; ch_ec_data_t ecdata[CHD_EC_DATA_SETS]; uint_t nelem; int i, ways_logged = 0; /* * Check each way to see if it should be encoded * and concatinate it into a temporary buffer. */ for (i = 0; i < CHD_EC_DATA_SETS; i++) { ecp = &ch_flt->flt_diag_data.chd_ec_data[i]; if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { bcopy(ecp, &ecdata[ways_logged], sizeof (ch_ec_data_t)); ways_logged++; } } /* * Panther CPUs have an additional level of cache and so * what we just collected was the L3 (ecache) and not the * L2 cache. */ if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { /* * Add the L3 (ecache) data to the payload. */ fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS, DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); if (ways_logged != 0) { nelem = sizeof (ch_ec_data_t) / sizeof (uint64_t) * ways_logged; fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA, DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL); } /* * Now collect the L2 cache. */ ways_logged = 0; for (i = 0; i < PN_L2_NWAYS; i++) { ecp = &ch_flt->flt_diag_data.chd_l2_data[i]; if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) { bcopy(ecp, &ecdata[ways_logged], sizeof (ch_ec_data_t)); ways_logged++; } } } /* * Add the L2 cache data to the payload. */ fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS, DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL); if (ways_logged != 0) { nelem = sizeof (ch_ec_data_t) / sizeof (uint64_t) * ways_logged; fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA, DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)ecdata, NULL); } } /* * Initialize cpu scheme for specified cpu. */ static void cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid) { char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ uint8_t mask; mask = cpunodes[cpuid].version; (void) snprintf(sbuf, sizeof (sbuf), "%llX", (u_longlong_t)cpunodes[cpuid].device_id); (void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL, cpuid, &mask, (const char *)sbuf); } /* * Returns ereport resource type. */ static int cpu_error_to_resource_type(struct async_flt *aflt) { ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; switch (ch_flt->flt_type) { case CPU_CE_ECACHE: case CPU_UE_ECACHE: case CPU_UE_ECACHE_RETIRE: case CPU_ORPH: /* * If AFSR error bit indicates L2$ Data for Cheetah, * Cheetah+ or Jaguar, or L3$ Data for Panther, return * E$ Data type, otherwise, return CPU type. */ if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit)) return (ERRTYPE_ECACHE_DATA); return (ERRTYPE_CPU); case CPU_CE: case CPU_UE: case CPU_EMC: case CPU_DUE: case CPU_RCE: case CPU_RUE: case CPU_FRC: case CPU_FRU: return (ERRTYPE_MEMORY); case CPU_IC_PARITY: case CPU_DC_PARITY: case CPU_FPUERR: case CPU_PC_PARITY: case CPU_ITLB_PARITY: case CPU_DTLB_PARITY: return (ERRTYPE_CPU); } return (ERRTYPE_UNKNOWN); } /* * Encode the data saved in the ch_async_flt_t struct into * the FM ereport payload. */ static void cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload, nvlist_t *resource, int *afar_status, int *synd_status) { ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; *synd_status = AFLT_STAT_INVALID; *afar_status = AFLT_STAT_INVALID; if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR, DATA_TYPE_UINT64, aflt->flt_stat, NULL); } if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) && IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT, DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) { *afar_status = afsr_to_afar_status(ch_flt->afsr_errs, ch_flt->flt_bit); fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, aflt->flt_addr, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC, DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL, DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT, DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, DATA_TYPE_BOOLEAN_VALUE, (aflt->flt_priv ? B_TRUE : B_FALSE), NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME, DATA_TYPE_BOOLEAN_VALUE, (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) { *synd_status = afsr_to_synd_status(aflt->flt_inst, ch_flt->afsr_errs, ch_flt->flt_bit); fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, flt_to_error_type(aflt), NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, DATA_TYPE_UINT64, aflt->flt_disp, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2) cpu_payload_add_ecache(aflt, payload); if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION, DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED, DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL); } if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) { fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK, DATA_TYPE_UINT32_ARRAY, 16, (uint32_t *)&ch_flt->flt_fpdata, NULL); } #if defined(CPU_IMP_L1_CACHE_PARITY) if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D) cpu_payload_add_dcache(aflt, payload); if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I) cpu_payload_add_icache(aflt, payload); #endif /* CPU_IMP_L1_CACHE_PARITY */ #if defined(CHEETAH_PLUS) if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P) cpu_payload_add_pcache(aflt, payload); if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB) cpu_payload_add_tlb(aflt, payload); #endif /* CHEETAH_PLUS */ /* * Create the FMRI that goes into the payload * and contains the unum info if necessary. */ if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) { char unum[UNUM_NAMLEN] = ""; char sid[DIMM_SERIAL_ID_LEN] = ""; int len, ret, rtype, synd_code; uint64_t offset = (uint64_t)-1; rtype = cpu_error_to_resource_type(aflt); switch (rtype) { case ERRTYPE_MEMORY: case ERRTYPE_ECACHE_DATA: /* * Memory errors, do unum lookup */ if (*afar_status == AFLT_STAT_INVALID) break; if (rtype == ERRTYPE_ECACHE_DATA) aflt->flt_status |= ECC_ECACHE; else aflt->flt_status &= ~ECC_ECACHE; synd_code = synd_to_synd_code(*synd_status, aflt->flt_synd, ch_flt->flt_bit); if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0) break; ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN, &len); if (ret == 0) { (void) cpu_get_mem_offset(aflt->flt_addr, &offset); } fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, unum, (ret == 0) ? sid : NULL, offset); fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, resource, NULL); break; case ERRTYPE_CPU: /* * On-board processor array error, add cpu resource. */ cpu_fmri_cpu_set(resource, aflt->flt_inst); fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, resource, NULL); break; } } } /* * Initialize the way info if necessary. */ void cpu_ereport_init(struct async_flt *aflt) { ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0]; int i; /* * Initialize the info in the CPU logout structure. * The I$/D$ way information is not initialized here * since it is captured in the logout assembly code. */ for (i = 0; i < CHD_EC_DATA_SETS; i++) (ecp + i)->ec_way = i; for (i = 0; i < PN_L2_NWAYS; i++) (l2p + i)->ec_way = i; } /* * Returns whether fault address is valid for this error bit and * whether the address is "in memory" (i.e. pf_is_memory returns 1). */ int cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) { struct async_flt *aflt = (struct async_flt *)ch_flt; return ((t_afsr_bit & C_AFSR_MEMORY) && afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) == AFLT_STAT_VALID && pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)); } /* * Returns whether fault address is valid based on the error bit for the * one event being queued and whether the address is "in memory". */ static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit) { struct async_flt *aflt = (struct async_flt *)ch_flt; int afar_status; uint64_t afsr_errs, afsr_ow, *ow_bits; if (!(t_afsr_bit & C_AFSR_MEMORY) || !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT)) return (0); afsr_errs = ch_flt->afsr_errs; afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit); switch (afar_status) { case AFLT_STAT_VALID: return (1); case AFLT_STAT_AMBIGUOUS: /* * Status is ambiguous since another error bit (or bits) * of equal priority to the specified bit on in the afsr, * so check those bits. Return 1 only if the bits on in the * same class as the t_afsr_bit are also C_AFSR_MEMORY bits. * Otherwise not all the equal priority bits are for memory * errors, so return 0. */ ow_bits = afar_overwrite; while ((afsr_ow = *ow_bits++) != 0) { /* * Get other bits that are on in t_afsr_bit's priority * class to check for Memory Error bits only. */ if (afsr_ow & t_afsr_bit) { if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY) return (0); else return (1); } } /*FALLTHRU*/ default: return (0); } } static void cpu_log_diag_info(ch_async_flt_t *ch_flt) { struct async_flt *aflt = (struct async_flt *)ch_flt; ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data; ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data; ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0]; #if defined(CPU_IMP_ECACHE_ASSOC) int i, nway; #endif /* CPU_IMP_ECACHE_ASSOC */ /* * Check if the CPU log out captured was valid. */ if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID || ch_flt->flt_data_incomplete) return; #if defined(CPU_IMP_ECACHE_ASSOC) nway = cpu_ecache_nway(); i = cpu_ecache_line_valid(ch_flt); if (i == 0 || i > nway) { for (i = 0; i < nway; i++) ecp[i].ec_logflag = EC_LOGFLAG_MAGIC; } else ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC; #else /* CPU_IMP_ECACHE_ASSOC */ ecp->ec_logflag = EC_LOGFLAG_MAGIC; #endif /* CPU_IMP_ECACHE_ASSOC */ #if defined(CHEETAH_PLUS) pn_cpu_log_diag_l2_info(ch_flt); #endif /* CHEETAH_PLUS */ if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) { dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx); dcp->dc_logflag = DC_LOGFLAG_MAGIC; } if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) { if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx); else icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx); icp->ic_logflag = IC_LOGFLAG_MAGIC; } } /* * Cheetah ECC calculation. * * We only need to do the calculation on the data bits and can ignore check * bit and Mtag bit terms in the calculation. */ static uint64_t ch_ecc_table[9][2] = { /* * low order 64-bits high-order 64-bits */ { 0x46bffffeccd1177f, 0x488800022100014c }, { 0x42fccc81331ff77f, 0x14424f1010249184 }, { 0x8898827c222f1ffe, 0x22c1222808184aaf }, { 0xf7632203e131ccf1, 0xe1241121848292b8 }, { 0x7f5511421b113809, 0x901c88d84288aafe }, { 0x1d49412184882487, 0x8f338c87c044c6ef }, { 0xf552181014448344, 0x7ff8f4443e411911 }, { 0x2189240808f24228, 0xfeeff8cc81333f42 }, { 0x3280008440001112, 0xfee88b337ffffd62 }, }; /* * 64-bit population count, use well-known popcnt trick. * We could use the UltraSPARC V9 POPC instruction, but some * CPUs including Cheetahplus and Jaguar do not support that * instruction. */ int popc64(uint64_t val) { int cnt; for (cnt = 0; val != 0; val &= val - 1) cnt++; return (cnt); } /* * Generate the 9 ECC bits for the 128-bit chunk based on the table above. * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number * of 1 bits == 0, so we can just use the least significant bit of the popcnt * instead of doing all the xor's. */ uint32_t us3_gen_ecc(uint64_t data_low, uint64_t data_high) { int bitno, s; int synd = 0; for (bitno = 0; bitno < 9; bitno++) { s = (popc64(data_low & ch_ecc_table[bitno][0]) + popc64(data_high & ch_ecc_table[bitno][1])) & 1; synd |= (s << bitno); } return (synd); } /* * Queue one event based on ecc_type_to_info entry. If the event has an AFT1 * tag associated with it or is a fatal event (aflt_panic set), it is sent to * the UE event queue. Otherwise it is dispatched to the CE event queue. */ static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason, ecc_type_to_info_t *eccp, ch_diag_data_t *cdp) { struct async_flt *aflt = (struct async_flt *)ch_flt; if (reason && strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) { (void) strcat(reason, eccp->ec_reason); } ch_flt->flt_bit = eccp->ec_afsr_bit; ch_flt->flt_type = eccp->ec_flt_type; if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID) ch_flt->flt_diag_data = *cdp; else ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; aflt->flt_in_memory = cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit); if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS) aflt->flt_synd = GET_M_SYND(aflt->flt_stat); else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) aflt->flt_synd = GET_E_SYND(aflt->flt_stat); else aflt->flt_synd = 0; aflt->flt_payload = eccp->ec_err_payload; if (aflt->flt_panic || (eccp->ec_afsr_bit & (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1))) cpu_errorq_dispatch(eccp->ec_err_class, (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); else cpu_errorq_dispatch(eccp->ec_err_class, (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue, aflt->flt_panic); } /* * Queue events on async event queue one event per error bit. First we * queue the events that we "expect" for the given trap, then we queue events * that we may not expect. Return number of events queued. */ int cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs, ch_cpu_logout_t *clop) { struct async_flt *aflt = (struct async_flt *)ch_flt; ecc_type_to_info_t *eccp; int nevents = 0; uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat; #if defined(CHEETAH_PLUS) uint64_t orig_t_afsr_errs; #endif uint64_t primary_afsr_ext = ch_flt->afsr_ext; uint64_t primary_afsr_errs = ch_flt->afsr_errs; ch_diag_data_t *cdp = NULL; t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS); #if defined(CHEETAH_PLUS) orig_t_afsr_errs = t_afsr_errs; /* * For Cheetah+, log the shadow AFSR/AFAR bits first. */ if (clop != NULL) { /* * Set the AFSR and AFAR fields to the shadow registers. The * flt_addr and flt_stat fields will be reset to the primaries * below, but the sdw_addr and sdw_stat will stay as the * secondaries. */ cdp = &clop->clo_sdw_data; aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar; aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr; ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext; ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) | (cdp->chd_afsr & C_AFSR_ALL_ERRS); /* * If the primary and shadow AFSR differ, tag the shadow as * the first fault. */ if ((primary_afar != cdp->chd_afar) || (primary_afsr_errs != ch_flt->afsr_errs)) { aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT); } /* * Check AFSR bits as well as AFSR_EXT bits in order of * the AFAR overwrite priority. Our stored AFSR_EXT value * is expected to be zero for those CPUs which do not have * an AFSR_EXT register. */ for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) { if ((eccp->ec_afsr_bit & (ch_flt->afsr_errs & t_afsr_errs)) && ((eccp->ec_flags & aflt->flt_status) != 0)) { cpu_queue_one_event(ch_flt, reason, eccp, cdp); cdp = NULL; t_afsr_errs &= ~eccp->ec_afsr_bit; nevents++; } } /* * If the ME bit is on in the primary AFSR turn all the * error bits on again that may set the ME bit to make * sure we see the ME AFSR error logs. */ if ((primary_afsr & C_AFSR_ME) != 0) t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS); } #endif /* CHEETAH_PLUS */ if (clop != NULL) cdp = &clop->clo_data; /* * Queue expected errors, error bit and fault type must match * in the ecc_type_to_info table. */ for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; eccp++) { if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 && (eccp->ec_flags & aflt->flt_status) != 0) { #if defined(SERRANO) /* * For FRC/FRU errors on Serrano the afar2 captures * the address and the associated data is * in the shadow logout area. */ if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { if (clop != NULL) cdp = &clop->clo_sdw_data; aflt->flt_addr = ch_flt->afar2; } else { if (clop != NULL) cdp = &clop->clo_data; aflt->flt_addr = primary_afar; } #else /* SERRANO */ aflt->flt_addr = primary_afar; #endif /* SERRANO */ aflt->flt_stat = primary_afsr; ch_flt->afsr_ext = primary_afsr_ext; ch_flt->afsr_errs = primary_afsr_errs; cpu_queue_one_event(ch_flt, reason, eccp, cdp); cdp = NULL; t_afsr_errs &= ~eccp->ec_afsr_bit; nevents++; } } /* * Queue unexpected errors, error bit only match. */ for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL; eccp++) { if (eccp->ec_afsr_bit & t_afsr_errs) { #if defined(SERRANO) /* * For FRC/FRU errors on Serrano the afar2 captures * the address and the associated data is * in the shadow logout area. */ if (eccp->ec_afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) { if (clop != NULL) cdp = &clop->clo_sdw_data; aflt->flt_addr = ch_flt->afar2; } else { if (clop != NULL) cdp = &clop->clo_data; aflt->flt_addr = primary_afar; } #else /* SERRANO */ aflt->flt_addr = primary_afar; #endif /* SERRANO */ aflt->flt_stat = primary_afsr; ch_flt->afsr_ext = primary_afsr_ext; ch_flt->afsr_errs = primary_afsr_errs; cpu_queue_one_event(ch_flt, reason, eccp, cdp); cdp = NULL; t_afsr_errs &= ~eccp->ec_afsr_bit; nevents++; } } return (nevents); } /* * Return trap type number. */ uint8_t flt_to_trap_type(struct async_flt *aflt) { if (aflt->flt_status & ECC_I_TRAP) return (TRAP_TYPE_ECC_I); if (aflt->flt_status & ECC_D_TRAP) return (TRAP_TYPE_ECC_D); if (aflt->flt_status & ECC_F_TRAP) return (TRAP_TYPE_ECC_F); if (aflt->flt_status & ECC_C_TRAP) return (TRAP_TYPE_ECC_C); if (aflt->flt_status & ECC_DP_TRAP) return (TRAP_TYPE_ECC_DP); if (aflt->flt_status & ECC_IP_TRAP) return (TRAP_TYPE_ECC_IP); if (aflt->flt_status & ECC_ITLB_TRAP) return (TRAP_TYPE_ECC_ITLB); if (aflt->flt_status & ECC_DTLB_TRAP) return (TRAP_TYPE_ECC_DTLB); return (TRAP_TYPE_UNKNOWN); } /* * Decide an error type based on detector and leaky/partner tests. * The following array is used for quick translation - it must * stay in sync with ce_dispact_t. */ static char *cetypes[] = { CE_DISP_DESC_U, CE_DISP_DESC_I, CE_DISP_DESC_PP, CE_DISP_DESC_P, CE_DISP_DESC_L, CE_DISP_DESC_PS, CE_DISP_DESC_S }; char * flt_to_error_type(struct async_flt *aflt) { ce_dispact_t dispact, disp; uchar_t dtcrinfo, ptnrinfo, lkyinfo; /* * The memory payload bundle is shared by some events that do * not perform any classification. For those flt_disp will be * 0 and we will return "unknown". */ if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0) return (cetypes[CE_DISP_UNKNOWN]); dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp); /* * It is also possible that no scrub/classification was performed * by the detector, for instance where a disrupting error logged * in the AFSR while CEEN was off in cpu_deferred_error. */ if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) return (cetypes[CE_DISP_UNKNOWN]); /* * Lookup type in initial classification/action table */ dispact = CE_DISPACT(ce_disp_table, CE_XDIAG_AFARMATCHED(dtcrinfo), CE_XDIAG_STATE(dtcrinfo), CE_XDIAG_CE1SEEN(dtcrinfo), CE_XDIAG_CE2SEEN(dtcrinfo)); /* * A bad lookup is not something to panic production systems for. */ ASSERT(dispact != CE_DISP_BAD); if (dispact == CE_DISP_BAD) return (cetypes[CE_DISP_UNKNOWN]); disp = CE_DISP(dispact); switch (disp) { case CE_DISP_UNKNOWN: case CE_DISP_INTERMITTENT: break; case CE_DISP_POSS_PERS: /* * "Possible persistent" errors to which we have applied a valid * leaky test can be separated into "persistent" or "leaky". */ lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp); if (CE_XDIAG_TESTVALID(lkyinfo)) { if (CE_XDIAG_CE1SEEN(lkyinfo) || CE_XDIAG_CE2SEEN(lkyinfo)) disp = CE_DISP_LEAKY; else disp = CE_DISP_PERS; } break; case CE_DISP_POSS_STICKY: /* * Promote "possible sticky" results that have been * confirmed by a partner test to "sticky". Unconfirmed * "possible sticky" events are left at that status - we do not * guess at any bad reader/writer etc status here. */ ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp); if (CE_XDIAG_TESTVALID(ptnrinfo) && CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo)) disp = CE_DISP_STICKY; /* * Promote "possible sticky" results on a uniprocessor * to "sticky" */ if (disp == CE_DISP_POSS_STICKY && CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC) disp = CE_DISP_STICKY; break; default: disp = CE_DISP_UNKNOWN; break; } return (cetypes[disp]); } /* * Given the entire afsr, the specific bit to check and a prioritized list of * error bits, determine the validity of the various overwrite priority * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have * different overwrite priorities. * * Given a specific afsr error bit and the entire afsr, there are three cases: * INVALID: The specified bit is lower overwrite priority than some other * error bit which is on in the afsr (or IVU/IVC). * VALID: The specified bit is higher priority than all other error bits * which are on in the afsr. * AMBIGUOUS: Another error bit (or bits) of equal priority to the specified * bit is on in the afsr. */ int afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits) { uint64_t afsr_ow; while ((afsr_ow = *ow_bits++) != 0) { /* * If bit is in the priority class, check to see if another * bit in the same class is on => ambiguous. Otherwise, * the value is valid. If the bit is not on at this priority * class, but a higher priority bit is on, then the value is * invalid. */ if (afsr_ow & afsr_bit) { /* * If equal pri bit is on, ambiguous. */ if (afsr & (afsr_ow & ~afsr_bit)) return (AFLT_STAT_AMBIGUOUS); return (AFLT_STAT_VALID); } else if (afsr & afsr_ow) break; } /* * We didn't find a match or a higher priority bit was on. Not * finding a match handles the case of invalid AFAR for IVC, IVU. */ return (AFLT_STAT_INVALID); } static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit) { #if defined(SERRANO) if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU)) return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite)); else #endif /* SERRANO */ return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite)); } static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit) { return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite)); } static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit) { return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite)); } static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit) { #ifdef lint cpuid = cpuid; #endif #if defined(CHEETAH_PLUS) /* * The M_SYND overwrite policy is combined with the E_SYND overwrite * policy for Cheetah+ and separate for Panther CPUs. */ if (afsr_bit & C_AFSR_MSYND_ERRS) { if (IS_PANTHER(cpunodes[cpuid].implementation)) return (afsr_to_msynd_status(afsr, afsr_bit)); else return (afsr_to_esynd_status(afsr, afsr_bit)); } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { if (IS_PANTHER(cpunodes[cpuid].implementation)) return (afsr_to_pn_esynd_status(afsr, afsr_bit)); else return (afsr_to_esynd_status(afsr, afsr_bit)); #else /* CHEETAH_PLUS */ if (afsr_bit & C_AFSR_MSYND_ERRS) { return (afsr_to_msynd_status(afsr, afsr_bit)); } else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) { return (afsr_to_esynd_status(afsr, afsr_bit)); #endif /* CHEETAH_PLUS */ } else { return (AFLT_STAT_INVALID); } } /* * Slave CPU stick synchronization. */ void sticksync_slave(void) { int i; int tries = 0; int64_t tskew; int64_t av_tskew; kpreempt_disable(); /* wait for the master side */ while (stick_sync_cmd != SLAVE_START) ; /* * Synchronization should only take a few tries at most. But in the * odd case where the cpu isn't cooperating we'll keep trying. A cpu * without it's stick synchronized wouldn't be a good citizen. */ while (slave_done == 0) { /* * Time skew calculation. */ av_tskew = tskew = 0; for (i = 0; i < stick_iter; i++) { /* make location hot */ timestamp[EV_A_START] = 0; stick_timestamp(×tamp[EV_A_START]); /* tell the master we're ready */ stick_sync_cmd = MASTER_START; /* and wait */ while (stick_sync_cmd != SLAVE_CONT) ; /* Event B end */ stick_timestamp(×tamp[EV_B_END]); /* calculate time skew */ tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START]) - (timestamp[EV_A_END] - timestamp[EV_A_START])) / 2; /* keep running count */ av_tskew += tskew; } /* for */ /* * Adjust stick for time skew if not within the max allowed; * otherwise we're all done. */ if (stick_iter != 0) av_tskew = av_tskew/stick_iter; if (ABS(av_tskew) > stick_tsk) { /* * If the skew is 1 (the slave's STICK register * is 1 STICK ahead of the master's), stick_adj * could fail to adjust the slave's STICK register * if the STICK read on the slave happens to * align with the increment of the STICK. * Therefore, we increment the skew to 2. */ if (av_tskew == 1) av_tskew++; stick_adj(-av_tskew); } else slave_done = 1; #ifdef DEBUG if (tries < DSYNC_ATTEMPTS) stick_sync_stats[CPU->cpu_id].skew_val[tries] = av_tskew; ++tries; #endif /* DEBUG */ #ifdef lint tries = tries; #endif } /* while */ /* allow the master to finish */ stick_sync_cmd = EVENT_NULL; kpreempt_enable(); } /* * Master CPU side of stick synchronization. * - timestamp end of Event A * - timestamp beginning of Event B */ void sticksync_master(void) { int i; kpreempt_disable(); /* tell the slave we've started */ slave_done = 0; stick_sync_cmd = SLAVE_START; while (slave_done == 0) { for (i = 0; i < stick_iter; i++) { /* wait for the slave */ while (stick_sync_cmd != MASTER_START) ; /* Event A end */ stick_timestamp(×tamp[EV_A_END]); /* make location hot */ timestamp[EV_B_START] = 0; stick_timestamp(×tamp[EV_B_START]); /* tell the slave to continue */ stick_sync_cmd = SLAVE_CONT; } /* for */ /* wait while slave calculates time skew */ while (stick_sync_cmd == SLAVE_CONT) ; } /* while */ kpreempt_enable(); } /* * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to * do Spitfire hack of xcall'ing all the cpus to ask to check for them. Also, * in cpu_async_panic_callb, each cpu checks for CPU events on its way to * panic idle. */ /*ARGSUSED*/ void cpu_check_allcpus(struct async_flt *aflt) {} struct kmem_cache *ch_private_cache; /* * Cpu private unitialization. Uninitialize the Ecache scrubber and * deallocate the scrubber data structures and cpu_private data structure. */ void cpu_uninit_private(struct cpu *cp) { cheetah_private_t *chprp = CPU_PRIVATE(cp); ASSERT(chprp); cpu_uninit_ecache_scrub_dr(cp); CPU_PRIVATE(cp) = NULL; ch_err_tl1_paddrs[cp->cpu_id] = NULL; kmem_cache_free(ch_private_cache, chprp); cmp_delete_cpu(cp->cpu_id); } /* * Cheetah Cache Scrubbing * * The primary purpose of Cheetah cache scrubbing is to reduce the exposure * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not * protected by either parity or ECC. * * We currently default the E$ and D$ scan rate to 100 (scan 10% of the * cache per second). Due to the the specifics of how the I$ control * logic works with respect to the ASI used to scrub I$ lines, the entire * I$ is scanned at once. */ /* * Tuneables to enable and disable the scrubbing of the caches, and to tune * scrubbing behavior. These may be changed via /etc/system or using mdb * on a running system. */ int dcache_scrub_enable = 1; /* D$ scrubbing is on by default */ /* * The following are the PIL levels that the softints/cross traps will fire at. */ uint_t ecache_scrub_pil = PIL_9; /* E$ scrub PIL for cross traps */ uint_t dcache_scrub_pil = PIL_9; /* D$ scrub PIL for cross traps */ uint_t icache_scrub_pil = PIL_9; /* I$ scrub PIL for cross traps */ #if defined(JALAPENO) /* * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber * on Jalapeno. */ int ecache_scrub_enable = 0; #else /* JALAPENO */ /* * With all other cpu types, E$ scrubbing is on by default */ int ecache_scrub_enable = 1; #endif /* JALAPENO */ #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO) /* * The I$ scrubber tends to cause latency problems for real-time SW, so it * is disabled by default on non-Cheetah systems */ int icache_scrub_enable = 0; /* * Tuneables specifying the scrub calls per second and the scan rate * for each cache * * The cyclic times are set during boot based on the following values. * Changing these values in mdb after this time will have no effect. If * a different value is desired, it must be set in /etc/system before a * reboot. */ int ecache_calls_a_sec = 1; int dcache_calls_a_sec = 2; int icache_calls_a_sec = 2; int ecache_scan_rate_idle = 1; int ecache_scan_rate_busy = 1; int dcache_scan_rate_idle = 1; int dcache_scan_rate_busy = 1; int icache_scan_rate_idle = 1; int icache_scan_rate_busy = 1; #else /* CHEETAH_PLUS || JALAPENO || SERRANO */ int icache_scrub_enable = 1; /* I$ scrubbing is on by default */ int ecache_calls_a_sec = 100; /* E$ scrub calls per seconds */ int dcache_calls_a_sec = 100; /* D$ scrub calls per seconds */ int icache_calls_a_sec = 100; /* I$ scrub calls per seconds */ int ecache_scan_rate_idle = 100; /* E$ scan rate (in tenths of a %) */ int ecache_scan_rate_busy = 100; /* E$ scan rate (in tenths of a %) */ int dcache_scan_rate_idle = 100; /* D$ scan rate (in tenths of a %) */ int dcache_scan_rate_busy = 100; /* D$ scan rate (in tenths of a %) */ int icache_scan_rate_idle = 100; /* I$ scan rate (in tenths of a %) */ int icache_scan_rate_busy = 100; /* I$ scan rate (in tenths of a %) */ #endif /* CHEETAH_PLUS || JALAPENO || SERRANO */ /* * In order to scrub on offline cpus, a cross trap is sent. The handler will * increment the outstanding request counter and schedule a softint to run * the scrubber. */ extern xcfunc_t cache_scrubreq_tl1; /* * These are the softint functions for each cache scrubber */ static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2); static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2); static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2); /* * The cache scrub info table contains cache specific information * and allows for some of the scrub code to be table driven, reducing * duplication of cache similar code. * * This table keeps a copy of the value in the calls per second variable * (?cache_calls_a_sec). This makes it much more difficult for someone * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in * mdb in a misguided attempt to disable the scrubber). */ struct scrub_info { int *csi_enable; /* scrubber enable flag */ int csi_freq; /* scrubber calls per second */ int csi_index; /* index to chsm_outstanding[] */ uint64_t csi_inum; /* scrubber interrupt number */ cyclic_id_t csi_omni_cyc_id; /* omni cyclic ID */ cyclic_id_t csi_offline_cyc_id; /* offline cyclic ID */ char csi_name[3]; /* cache name for this scrub entry */ } cache_scrub_info[] = { { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"}, { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"}, { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"} }; /* * If scrubbing is enabled, increment the outstanding request counter. If it * is 1 (meaning there were no previous requests outstanding), call * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing * a self trap. */ static void do_scrub(struct scrub_info *csi) { ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); int index = csi->csi_index; uint32_t *outstanding = &csmp->chsm_outstanding[index]; if (*(csi->csi_enable) && (csmp->chsm_enable[index])) { if (atomic_add_32_nv(outstanding, 1) == 1) { xt_one_unchecked(CPU->cpu_id, setsoftint_tl1, csi->csi_inum, 0); } } } /* * Omni cyclics don't fire on offline cpus, so we use another cyclic to * cross-trap the offline cpus. */ static void do_scrub_offline(struct scrub_info *csi) { ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); if (CPUSET_ISNULL(cpu_offline_set)) { /* * No offline cpus - nothing to do */ return; } if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) { xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum, csi->csi_index); } } /* * This is the initial setup for the scrubber cyclics - it sets the * interrupt level, frequency, and function to call. */ /*ARGSUSED*/ static void cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) { struct scrub_info *csi = (struct scrub_info *)arg; ASSERT(csi != NULL); hdlr->cyh_func = (cyc_func_t)do_scrub; hdlr->cyh_level = CY_LOW_LEVEL; hdlr->cyh_arg = arg; when->cyt_when = 0; /* Start immediately */ when->cyt_interval = NANOSEC / csi->csi_freq; } /* * Initialization for cache scrubbing. * This routine is called AFTER all cpus have had cpu_init_private called * to initialize their private data areas. */ void cpu_init_cache_scrub(void) { int i; struct scrub_info *csi; cyc_omni_handler_t omni_hdlr; cyc_handler_t offline_hdlr; cyc_time_t when; /* * save away the maximum number of lines for the D$ */ dcache_nlines = dcache_size / dcache_linesize; /* * register the softints for the cache scrubbing */ cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum = add_softintr(ecache_scrub_pil, scrub_ecache_line_intr, (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT); cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec; cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum = add_softintr(dcache_scrub_pil, scrub_dcache_line_intr, (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT); cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec; cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum = add_softintr(icache_scrub_pil, scrub_icache_line_intr, (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT); cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec; /* * start the scrubbing for all the caches */ mutex_enter(&cpu_lock); for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) { csi = &cache_scrub_info[i]; if (!(*csi->csi_enable)) continue; /* * force the following to be true: * 1 <= calls_a_sec <= hz */ if (csi->csi_freq > hz) { cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high " "(%d); resetting to hz (%d)", csi->csi_name, csi->csi_freq, hz); csi->csi_freq = hz; } else if (csi->csi_freq < 1) { cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low " "(%d); resetting to 1", csi->csi_name, csi->csi_freq); csi->csi_freq = 1; } omni_hdlr.cyo_online = cpu_scrub_cyclic_setup; omni_hdlr.cyo_offline = NULL; omni_hdlr.cyo_arg = (void *)csi; offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline; offline_hdlr.cyh_arg = (void *)csi; offline_hdlr.cyh_level = CY_LOW_LEVEL; when.cyt_when = 0; /* Start immediately */ when.cyt_interval = NANOSEC / csi->csi_freq; csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr); csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when); } register_cpu_setup_func(cpu_scrub_cpu_setup, NULL); mutex_exit(&cpu_lock); } /* * Indicate that the specified cpu is idle. */ void cpu_idle_ecache_scrub(struct cpu *cp) { if (CPU_PRIVATE(cp) != NULL) { ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); csmp->chsm_ecache_busy = ECACHE_CPU_IDLE; } } /* * Indicate that the specified cpu is busy. */ void cpu_busy_ecache_scrub(struct cpu *cp) { if (CPU_PRIVATE(cp) != NULL) { ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); csmp->chsm_ecache_busy = ECACHE_CPU_BUSY; } } /* * Initialization for cache scrubbing for the specified cpu. */ void cpu_init_ecache_scrub_dr(struct cpu *cp) { ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); int cpuid = cp->cpu_id; /* initialize the number of lines in the caches */ csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) / CPU_PRIVATE_VAL(cp, chpr_icache_linesize); /* * do_scrub() and do_scrub_offline() check both the global * ?cache_scrub_enable and this per-cpu enable variable. All scrubbers * check this value before scrubbing. Currently, we use it to * disable the E$ scrubber on multi-core cpus or while running at * slowed speed. For now, just turn everything on and allow * cpu_init_private() to change it if necessary. */ csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1; csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1; csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1; cpu_busy_ecache_scrub(cp); } /* * Un-initialization for cache scrubbing for the specified cpu. */ static void cpu_uninit_ecache_scrub_dr(struct cpu *cp) { ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc); /* * un-initialize bookkeeping for cache scrubbing */ bzero(csmp, sizeof (ch_scrub_misc_t)); cpu_idle_ecache_scrub(cp); } /* * Called periodically on each CPU to scrub the D$. */ static void scrub_dcache(int how_many) { int i; ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D]; /* * scrub the desired number of lines */ for (i = 0; i < how_many; i++) { /* * scrub a D$ line */ dcache_inval_line(index); /* * calculate the next D$ line to scrub, assumes * that dcache_nlines is a power of 2 */ index = (index + 1) & (dcache_nlines - 1); } /* * set the scrub index for the next visit */ csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index; } /* * Handler for D$ scrub inum softint. Call scrub_dcache until * we decrement the outstanding request count to zero. */ /*ARGSUSED*/ static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2) { int i; int how_many; int outstanding; ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D]; struct scrub_info *csi = (struct scrub_info *)arg1; int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? dcache_scan_rate_idle : dcache_scan_rate_busy; /* * The scan rates are expressed in units of tenths of a * percent. A scan rate of 1000 (100%) means the whole * cache is scanned every second. */ how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq); do { outstanding = *countp; for (i = 0; i < outstanding; i++) { scrub_dcache(how_many); } } while (atomic_add_32_nv(countp, -outstanding)); return (DDI_INTR_CLAIMED); } /* * Called periodically on each CPU to scrub the I$. The I$ is scrubbed * by invalidating lines. Due to the characteristics of the ASI which * is used to invalidate an I$ line, the entire I$ must be invalidated * vs. an individual I$ line. */ static void scrub_icache(int how_many) { int i; ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I]; int icache_nlines = csmp->chsm_icache_nlines; /* * scrub the desired number of lines */ for (i = 0; i < how_many; i++) { /* * since the entire I$ must be scrubbed at once, * wait until the index wraps to zero to invalidate * the entire I$ */ if (index == 0) { icache_inval_all(); } /* * calculate the next I$ line to scrub, assumes * that chsm_icache_nlines is a power of 2 */ index = (index + 1) & (icache_nlines - 1); } /* * set the scrub index for the next visit */ csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index; } /* * Handler for I$ scrub inum softint. Call scrub_icache until * we decrement the outstanding request count to zero. */ /*ARGSUSED*/ static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2) { int i; int how_many; int outstanding; ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I]; struct scrub_info *csi = (struct scrub_info *)arg1; int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? icache_scan_rate_idle : icache_scan_rate_busy; int icache_nlines = csmp->chsm_icache_nlines; /* * The scan rates are expressed in units of tenths of a * percent. A scan rate of 1000 (100%) means the whole * cache is scanned every second. */ how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq); do { outstanding = *countp; for (i = 0; i < outstanding; i++) { scrub_icache(how_many); } } while (atomic_add_32_nv(countp, -outstanding)); return (DDI_INTR_CLAIMED); } /* * Called periodically on each CPU to scrub the E$. */ static void scrub_ecache(int how_many) { ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); int i; int cpuid = CPU->cpu_id; int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E]; int nlines = csmp->chsm_ecache_nlines; int linesize = cpunodes[cpuid].ecache_linesize; int ec_set_size = cpu_ecache_set_size(CPU); /* * scrub the desired number of lines */ for (i = 0; i < how_many; i++) { /* * scrub the E$ line */ ecache_flush_line(ecache_flushaddr + (index * linesize), ec_set_size); /* * calculate the next E$ line to scrub based on twice * the number of E$ lines (to displace lines containing * flush area data), assumes that the number of lines * is a power of 2 */ index = (index + 1) & ((nlines << 1) - 1); } /* * set the ecache scrub index for the next visit */ csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index; } /* * Handler for E$ scrub inum softint. Call the E$ scrubber until * we decrement the outstanding request count to zero. * * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may * become negative after the atomic_add_32_nv(). This is not a problem, as * the next trip around the loop won't scrub anything, and the next add will * reset the count back to zero. */ /*ARGSUSED*/ static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) { int i; int how_many; int outstanding; ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc); uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E]; struct scrub_info *csi = (struct scrub_info *)arg1; int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ? ecache_scan_rate_idle : ecache_scan_rate_busy; int ecache_nlines = csmp->chsm_ecache_nlines; /* * The scan rates are expressed in units of tenths of a * percent. A scan rate of 1000 (100%) means the whole * cache is scanned every second. */ how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq); do { outstanding = *countp; for (i = 0; i < outstanding; i++) { scrub_ecache(how_many); } } while (atomic_add_32_nv(countp, -outstanding)); return (DDI_INTR_CLAIMED); } /* * Timeout function to reenable CE */ static void cpu_delayed_check_ce_errors(void *arg) { if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg, TQ_NOSLEEP)) { (void) timeout(cpu_delayed_check_ce_errors, arg, drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); } } /* * CE Deferred Re-enable after trap. * * When the CPU gets a disrupting trap for any of the errors * controlled by the CEEN bit, CEEN is disabled in the trap handler * immediately. To eliminate the possibility of multiple CEs causing * recursive stack overflow in the trap handler, we cannot * reenable CEEN while still running in the trap handler. Instead, * after a CE is logged on a CPU, we schedule a timeout function, * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs * seconds. This function will check whether any further CEs * have occurred on that CPU, and if none have, will reenable CEEN. * * If further CEs have occurred while CEEN is disabled, another * timeout will be scheduled. This is to ensure that the CPU can * make progress in the face of CE 'storms', and that it does not * spend all its time logging CE errors. */ static void cpu_check_ce_errors(void *arg) { int cpuid = (int)(uintptr_t)arg; cpu_t *cp; /* * We acquire cpu_lock. */ ASSERT(curthread->t_pil == 0); /* * verify that the cpu is still around, DR * could have got there first ... */ mutex_enter(&cpu_lock); cp = cpu_get(cpuid); if (cp == NULL) { mutex_exit(&cpu_lock); return; } /* * make sure we don't migrate across CPUs * while checking our CE status. */ kpreempt_disable(); /* * If we are running on the CPU that got the * CE, we can do the checks directly. */ if (cp->cpu_id == CPU->cpu_id) { mutex_exit(&cpu_lock); cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0); kpreempt_enable(); return; } kpreempt_enable(); /* * send an x-call to get the CPU that originally * got the CE to do the necessary checks. If we can't * send the x-call, reschedule the timeout, otherwise we * lose CEEN forever on that CPU. */ if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) { xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce, TIMEOUT_CEEN_CHECK, 0); mutex_exit(&cpu_lock); } else { /* * When the CPU is not accepting xcalls, or * the processor is offlined, we don't want to * incur the extra overhead of trying to schedule the * CE timeout indefinitely. However, we don't want to lose * CE checking forever. * * Keep rescheduling the timeout, accepting the additional * overhead as the cost of correctness in the case where we get * a CE, disable CEEN, offline the CPU during the * the timeout interval, and then online it at some * point in the future. This is unlikely given the short * cpu_ceen_delay_secs. */ mutex_exit(&cpu_lock); (void) timeout(cpu_delayed_check_ce_errors, (void *)(uintptr_t)cp->cpu_id, drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC)); } } /* * This routine will check whether CEs have occurred while * CEEN is disabled. Any CEs detected will be logged and, if * possible, scrubbed. * * The memscrubber will also use this routine to clear any errors * caused by its scrubbing with CEEN disabled. * * flag == SCRUBBER_CEEN_CHECK * called from memscrubber, just check/scrub, no reset * paddr physical addr. for start of scrub pages * vaddr virtual addr. for scrub area * psz page size of area to be scrubbed * * flag == TIMEOUT_CEEN_CHECK * timeout function has triggered, reset timeout or CEEN * * Note: We must not migrate cpus during this function. This can be * achieved by one of: * - invoking as target of an x-call in which case we're at XCALL_PIL * The flag value must be first xcall argument. * - disabling kernel preemption. This should be done for very short * periods so is not suitable for SCRUBBER_CEEN_CHECK where we might * scrub an extended area with cpu_check_block. The call for * TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept * brief for this case. * - binding to a cpu, eg with thread_affinity_set(). This is used * in the SCRUBBER_CEEN_CHECK case, but is not practical for * the TIMEOUT_CEEN_CHECK because both need cpu_lock. */ void cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz) { ch_cpu_errors_t cpu_error_regs; uint64_t ec_err_enable; uint64_t page_offset; /* Read AFSR */ get_cpu_error_state(&cpu_error_regs); /* * If no CEEN errors have occurred during the timeout * interval, it is safe to re-enable CEEN and exit. */ if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) | (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) { if (flag == TIMEOUT_CEEN_CHECK && !((ec_err_enable = get_error_enable()) & EN_REG_CEEN)) set_error_enable(ec_err_enable | EN_REG_CEEN); return; } /* * Ensure that CEEN was not reenabled (maybe by DR) before * we log/clear the error. */ if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN) set_error_enable(ec_err_enable & ~EN_REG_CEEN); /* * log/clear the CE. If CE_CEEN_DEFER is passed, the * timeout will be rescheduled when the error is logged. */ if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) | (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext))) cpu_ce_detected(&cpu_error_regs, CE_CEEN_DEFER | CE_CEEN_TIMEOUT); else cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT); /* * If the memory scrubber runs while CEEN is * disabled, (or if CEEN is disabled during the * scrub as a result of a CE being triggered by * it), the range being scrubbed will not be * completely cleaned. If there are multiple CEs * in the range at most two of these will be dealt * with, (one by the trap handler and one by the * timeout). It is also possible that none are dealt * with, (CEEN disabled and another CE occurs before * the timeout triggers). So to ensure that the * memory is actually scrubbed, we have to access each * memory location in the range and then check whether * that access causes a CE. */ if (flag == SCRUBBER_CEEN_CHECK && va) { if ((cpu_error_regs.afar >= pa) && (cpu_error_regs.afar < (pa + psz))) { /* * Force a load from physical memory for each * 64-byte block, then check AFSR to determine * whether this access caused an error. * * This is a slow way to do a scrub, but as it will * only be invoked when the memory scrubber actually * triggered a CE, it should not happen too * frequently. * * cut down what we need to check as the scrubber * has verified up to AFAR, so get it's offset * into the page and start there. */ page_offset = (uint64_t)(cpu_error_regs.afar & (psz - 1)); va = (caddr_t)(va + (P2ALIGN(page_offset, 64))); psz -= (uint_t)(P2ALIGN(page_offset, 64)); cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)), psz); } } /* * Reset error enable if this CE is not masked. */ if ((flag == TIMEOUT_CEEN_CHECK) && (cpu_error_regs.afsr & cpu_ce_not_deferred)) set_error_enable(ec_err_enable | EN_REG_CEEN); } /* * Attempt a cpu logout for an error that we did not trap for, such * as a CE noticed with CEEN off. It is assumed that we are still running * on the cpu that took the error and that we cannot migrate. Returns * 0 on success, otherwise nonzero. */ static int cpu_ce_delayed_ec_logout(uint64_t afar) { ch_cpu_logout_t *clop; if (CPU_PRIVATE(CPU) == NULL) return (0); clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) != LOGOUT_INVALID) return (0); cpu_delayed_logout(afar, clop); return (1); } /* * We got an error while CEEN was disabled. We * need to clean up after it and log whatever * information we have on the CE. */ void cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag) { ch_async_flt_t ch_flt; struct async_flt *aflt; char pr_reason[MAX_REASON_STRING]; bzero(&ch_flt, sizeof (ch_async_flt_t)); ch_flt.flt_trapped_ce = flag; aflt = (struct async_flt *)&ch_flt; aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK; ch_flt.afsr_ext = cpu_error_regs->afsr_ext; ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) | (cpu_error_regs->afsr & C_AFSR_ALL_ERRS); aflt->flt_addr = cpu_error_regs->afar; #if defined(SERRANO) ch_flt.afar2 = cpu_error_regs->afar2; #endif /* SERRANO */ aflt->flt_pc = NULL; aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0); aflt->flt_tl = 0; aflt->flt_panic = 0; cpu_log_and_clear_ce(&ch_flt); /* * check if we caused any errors during cleanup */ if (clear_errors(&ch_flt)) { pr_reason[0] = '\0'; (void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs, NULL); } } /* * Log/clear CEEN-controlled disrupting errors */ static void cpu_log_and_clear_ce(ch_async_flt_t *ch_flt) { struct async_flt *aflt; uint64_t afsr, afsr_errs; ch_cpu_logout_t *clop; char pr_reason[MAX_REASON_STRING]; on_trap_data_t *otp = curthread->t_ontrap; aflt = (struct async_flt *)ch_flt; afsr = aflt->flt_stat; afsr_errs = ch_flt->afsr_errs; aflt->flt_id = gethrtime_waitfree(); aflt->flt_bus_id = getprocessorid(); aflt->flt_inst = CPU->cpu_id; aflt->flt_prot = AFLT_PROT_NONE; aflt->flt_class = CPU_FAULT; aflt->flt_status = ECC_C_TRAP; pr_reason[0] = '\0'; /* * Get the CPU log out info for Disrupting Trap. */ if (CPU_PRIVATE(CPU) == NULL) { clop = NULL; ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID; } else { clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout); } if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) { ch_cpu_errors_t cpu_error_regs; get_cpu_error_state(&cpu_error_regs); (void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar); clop->clo_data.chd_afsr = cpu_error_regs.afsr; clop->clo_data.chd_afar = cpu_error_regs.afar; clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext; clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr; clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar; clop->clo_sdw_data.chd_afsr_ext = cpu_error_regs.shadow_afsr_ext; #if defined(SERRANO) clop->clo_data.chd_afar2 = cpu_error_regs.afar2; #endif /* SERRANO */ ch_flt->flt_data_incomplete = 1; /* * The logging/clear code expects AFSR/AFAR to be cleared. * The trap handler does it for CEEN enabled errors * so we need to do it here. */ set_cpu_error_state(&cpu_error_regs); } #if defined(JALAPENO) || defined(SERRANO) /* * FRC: Can't scrub memory as we don't have AFAR for Jalapeno. * For Serrano, even thou we do have the AFAR, we still do the * scrub on the RCE side since that's where the error type can * be properly classified as intermittent, persistent, etc. * * CE/RCE: If error is in memory and AFAR is valid, scrub the memory. * Must scrub memory before cpu_queue_events, as scrubbing memory sets * the flt_status bits. */ if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) && (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) { cpu_ce_scrub_mem_err(aflt, B_TRUE); } #else /* JALAPENO || SERRANO */ /* * CE/EMC: If error is in memory and AFAR is valid, scrub the memory. * Must scrub memory before cpu_queue_events, as scrubbing memory sets * the flt_status bits. */ if (afsr & (C_AFSR_CE|C_AFSR_EMC)) { if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) || cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) { cpu_ce_scrub_mem_err(aflt, B_TRUE); } } #endif /* JALAPENO || SERRANO */ /* * Update flt_prot if this error occurred under on_trap protection. */ if (otp != NULL && (otp->ot_prot & OT_DATA_EC)) aflt->flt_prot = AFLT_PROT_EC; /* * Queue events on the async event queue, one event per error bit. */ if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 || (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) { ch_flt->flt_type = CPU_INV_AFSR; cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR, (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); } /* * Zero out + invalidate CPU logout. */ if (clop) { bzero(clop, sizeof (ch_cpu_logout_t)); clop->clo_data.chd_afar = LOGOUT_INVALID; } /* * If either a CPC, WDC or EDC error has occurred while CEEN * was disabled, we need to flush either the entire * E$ or an E$ line. */ #if defined(JALAPENO) || defined(SERRANO) if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC)) #else /* JALAPENO || SERRANO */ if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC | C_AFSR_L3_CPC | C_AFSR_L3_WDC)) #endif /* JALAPENO || SERRANO */ cpu_error_ecache_flush(ch_flt); } /* * depending on the error type, we determine whether we * need to flush the entire ecache or just a line. */ static int cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt) { struct async_flt *aflt; uint64_t afsr; uint64_t afsr_errs = ch_flt->afsr_errs; aflt = (struct async_flt *)ch_flt; afsr = aflt->flt_stat; /* * If we got multiple errors, no point in trying * the individual cases, just flush the whole cache */ if (afsr & C_AFSR_ME) { return (ECACHE_FLUSH_ALL); } /* * If either a CPC, WDC or EDC error has occurred while CEEN * was disabled, we need to flush entire E$. We can't just * flush the cache line affected as the ME bit * is not set when multiple correctable errors of the same * type occur, so we might have multiple CPC or EDC errors, * with only the first recorded. */ #if defined(JALAPENO) || defined(SERRANO) if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) { #else /* JALAPENO || SERRANO */ if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC | C_AFSR_L3_EDC | C_AFSR_L3_WDC)) { #endif /* JALAPENO || SERRANO */ return (ECACHE_FLUSH_ALL); } #if defined(JALAPENO) || defined(SERRANO) /* * If only UE or RUE is set, flush the Ecache line, otherwise * flush the entire Ecache. */ if (afsr & (C_AFSR_UE|C_AFSR_RUE)) { if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE || (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) { return (ECACHE_FLUSH_LINE); } else { return (ECACHE_FLUSH_ALL); } } #else /* JALAPENO || SERRANO */ /* * If UE only is set, flush the Ecache line, otherwise * flush the entire Ecache. */ if (afsr_errs & C_AFSR_UE) { if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE) { return (ECACHE_FLUSH_LINE); } else { return (ECACHE_FLUSH_ALL); } } #endif /* JALAPENO || SERRANO */ /* * EDU: If EDU only is set, flush the ecache line, otherwise * flush the entire Ecache. */ if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) { if (((afsr_errs & ~C_AFSR_EDU) == 0) || ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) { return (ECACHE_FLUSH_LINE); } else { return (ECACHE_FLUSH_ALL); } } /* * BERR: If BERR only is set, flush the Ecache line, otherwise * flush the entire Ecache. */ if (afsr_errs & C_AFSR_BERR) { if ((afsr_errs & ~C_AFSR_BERR) == 0) { return (ECACHE_FLUSH_LINE); } else { return (ECACHE_FLUSH_ALL); } } return (0); } void cpu_error_ecache_flush(ch_async_flt_t *ch_flt) { int ecache_flush_flag = cpu_error_ecache_flush_required(ch_flt); /* * Flush Ecache line or entire Ecache based on above checks. */ if (ecache_flush_flag == ECACHE_FLUSH_ALL) cpu_flush_ecache(); else if (ecache_flush_flag == ECACHE_FLUSH_LINE) { cpu_flush_ecache_line(ch_flt); } } /* * Extract the PA portion from the E$ tag. */ uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag) { if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) return (JG_ECTAG_TO_PA(setsize, tag)); else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) return (PN_L3TAG_TO_PA(tag)); else return (CH_ECTAG_TO_PA(setsize, tag)); } /* * Convert the E$ tag PA into an E$ subblock index. */ static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr) { if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) /* Panther has only one subblock per line */ return (0); else return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr)); } /* * All subblocks in an E$ line must be invalid for * the line to be invalid. */ int cpu_ectag_line_invalid(int cachesize, uint64_t tag) { if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) return (JG_ECTAG_LINE_INVALID(cachesize, tag)); else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) return (PN_L3_LINE_INVALID(tag)); else return (CH_ECTAG_LINE_INVALID(cachesize, tag)); } /* * Extract state bits for a subblock given the tag. Note that for Panther * this works on both l2 and l3 tags. */ static int cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag) { if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation)) return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) return (tag & CH_ECSTATE_MASK); else return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag)); } /* * Cpu specific initialization. */ void cpu_mp_init(void) { #ifdef CHEETAHPLUS_ERRATUM_25 if (cheetah_sendmondo_recover) { cheetah_nudge_init(); } #endif } void cpu_ereport_post(struct async_flt *aflt) { char *cpu_type, buf[FM_MAX_CLASS]; nv_alloc_t *nva = NULL; nvlist_t *ereport, *detector, *resource; errorq_elem_t *eqep; ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; char unum[UNUM_NAMLEN]; int synd_code; uint8_t msg_type; plat_ecc_ch_async_flt_t plat_ecc_ch_flt; if (aflt->flt_panic || panicstr) { eqep = errorq_reserve(ereport_errorq); if (eqep == NULL) return; ereport = errorq_elem_nvl(ereport_errorq, eqep); nva = errorq_elem_nva(ereport_errorq, eqep); } else { ereport = fm_nvlist_create(nva); } /* * Create the scheme "cpu" FMRI. */ detector = fm_nvlist_create(nva); resource = fm_nvlist_create(nva); switch (cpunodes[aflt->flt_inst].implementation) { case CHEETAH_IMPL: cpu_type = FM_EREPORT_CPU_USIII; break; case CHEETAH_PLUS_IMPL: cpu_type = FM_EREPORT_CPU_USIIIplus; break; case JALAPENO_IMPL: cpu_type = FM_EREPORT_CPU_USIIIi; break; case SERRANO_IMPL: cpu_type = FM_EREPORT_CPU_USIIIiplus; break; case JAGUAR_IMPL: cpu_type = FM_EREPORT_CPU_USIV; break; case PANTHER_IMPL: cpu_type = FM_EREPORT_CPU_USIVplus; break; default: cpu_type = FM_EREPORT_CPU_UNSUPPORTED; break; } cpu_fmri_cpu_set(detector, aflt->flt_inst); /* * Encode all the common data into the ereport. */ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class); fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1), detector, NULL); /* * Encode the error specific data that was saved in * the async_flt structure into the ereport. */ cpu_payload_add_aflt(aflt, ereport, resource, &plat_ecc_ch_flt.ecaf_afar_status, &plat_ecc_ch_flt.ecaf_synd_status); if (aflt->flt_panic || panicstr) { errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); } else { (void) fm_ereport_post(ereport, EVCH_TRYHARD); fm_nvlist_destroy(ereport, FM_NVA_FREE); fm_nvlist_destroy(detector, FM_NVA_FREE); fm_nvlist_destroy(resource, FM_NVA_FREE); } /* * Send the enhanced error information (plat_ecc_error2_data_t) * to the SC olny if it can process it. */ if (&plat_ecc_capability_sc_get && plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) { msg_type = cpu_flt_bit_to_plat_error(aflt); if (msg_type != PLAT_ECC_ERROR2_NONE) { /* * If afar status is not invalid do a unum lookup. */ if (plat_ecc_ch_flt.ecaf_afar_status != AFLT_STAT_INVALID) { synd_code = synd_to_synd_code( plat_ecc_ch_flt.ecaf_synd_status, aflt->flt_synd, ch_flt->flt_bit); (void) cpu_get_mem_unum_synd(synd_code, aflt, unum); } else { unum[0] = '\0'; } plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar; plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr; plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext; plat_ecc_ch_flt.ecaf_sdw_afsr_ext = ch_flt->flt_sdw_afsr_ext; if (&plat_log_fruid_error2) plat_log_fruid_error2(msg_type, unum, aflt, &plat_ecc_ch_flt); } } } void cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) { int status; ddi_fm_error_t de; bzero(&de, sizeof (ddi_fm_error_t)); de.fme_version = DDI_FME_VERSION; de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1); de.fme_flag = expected; de.fme_bus_specific = (void *)aflt->flt_addr; status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) aflt->flt_panic = 1; } void cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, errorq_t *eqp, uint_t flag) { struct async_flt *aflt = (struct async_flt *)payload; aflt->flt_erpt_class = error_class; errorq_dispatch(eqp, payload, payload_sz, flag); } /* * This routine may be called by the IO module, but does not do * anything in this cpu module. The SERD algorithm is handled by * cpumem-diagnosis engine instead. */ /*ARGSUSED*/ void cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) {} void adjust_hw_copy_limits(int ecache_size) { /* * Set hw copy limits. * * /etc/system will be parsed later and can override one or more * of these settings. * * At this time, ecache size seems only mildly relevant. * We seem to run into issues with the d-cache and stalls * we see on misses. * * Cycle measurement indicates that 2 byte aligned copies fare * little better than doing things with VIS at around 512 bytes. * 4 byte aligned shows promise until around 1024 bytes. 8 Byte * aligned is faster whenever the source and destination data * in cache and the total size is less than 2 Kbytes. The 2K * limit seems to be driven by the 2K write cache. * When more than 2K of copies are done in non-VIS mode, stores * backup in the write cache. In VIS mode, the write cache is * bypassed, allowing faster cache-line writes aligned on cache * boundaries. * * In addition, in non-VIS mode, there is no prefetching, so * for larger copies, the advantage of prefetching to avoid even * occasional cache misses is enough to justify using the VIS code. * * During testing, it was discovered that netbench ran 3% slower * when hw_copy_limit_8 was 2K or larger. Apparently for server * applications, data is only used once (copied to the output * buffer, then copied by the network device off the system). Using * the VIS copy saves more L2 cache state. Network copies are * around 1.3K to 1.5K in size for historical reasons. * * Therefore, a limit of 1K bytes will be used for the 8 byte * aligned copy even for large caches and 8 MB ecache. The * infrastructure to allow different limits for different sized * caches is kept to allow further tuning in later releases. */ if (min_ecache_size == 0 && use_hw_bcopy) { /* * First time through - should be before /etc/system * is read. * Could skip the checks for zero but this lets us * preserve any debugger rewrites. */ if (hw_copy_limit_1 == 0) { hw_copy_limit_1 = VIS_COPY_THRESHOLD; priv_hcl_1 = hw_copy_limit_1; } if (hw_copy_limit_2 == 0) { hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD; priv_hcl_2 = hw_copy_limit_2; } if (hw_copy_limit_4 == 0) { hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD; priv_hcl_4 = hw_copy_limit_4; } if (hw_copy_limit_8 == 0) { hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; priv_hcl_8 = hw_copy_limit_8; } min_ecache_size = ecache_size; } else { /* * MP initialization. Called *after* /etc/system has * been parsed. One CPU has already been initialized. * Need to cater for /etc/system having scragged one * of our values. */ if (ecache_size == min_ecache_size) { /* * Same size ecache. We do nothing unless we * have a pessimistic ecache setting. In that * case we become more optimistic (if the cache is * large enough). */ if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) { /* * Need to adjust hw_copy_limit* from our * pessimistic uniprocessor value to a more * optimistic UP value *iff* it hasn't been * reset. */ if ((ecache_size > 1048576) && (priv_hcl_8 == hw_copy_limit_8)) { if (ecache_size <= 2097152) hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; else if (ecache_size <= 4194304) hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; else hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD; priv_hcl_8 = hw_copy_limit_8; } } } else if (ecache_size < min_ecache_size) { /* * A different ecache size. Can this even happen? */ if (priv_hcl_8 == hw_copy_limit_8) { /* * The previous value that we set * is unchanged (i.e., it hasn't been * scragged by /etc/system). Rewrite it. */ if (ecache_size <= 1048576) hw_copy_limit_8 = 8 * VIS_COPY_THRESHOLD; else if (ecache_size <= 2097152) hw_copy_limit_8 = 8 * VIS_COPY_THRESHOLD; else if (ecache_size <= 4194304) hw_copy_limit_8 = 8 * VIS_COPY_THRESHOLD; else hw_copy_limit_8 = 10 * VIS_COPY_THRESHOLD; priv_hcl_8 = hw_copy_limit_8; min_ecache_size = ecache_size; } } } } /* * Called from illegal instruction trap handler to see if we can attribute * the trap to a fpras check. */ int fpras_chktrap(struct regs *rp) { int op; struct fpras_chkfngrp *cgp; uintptr_t tpc = (uintptr_t)rp->r_pc; if (fpras_chkfngrps == NULL) return (0); cgp = &fpras_chkfngrps[CPU->cpu_id]; for (op = 0; op < FPRAS_NCOPYOPS; ++op) { if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 && tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult) break; } if (op == FPRAS_NCOPYOPS) return (0); /* * This is an fpRAS failure caught through an illegal * instruction - trampoline. */ rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline; rp->r_npc = rp->r_pc + 4; return (1); } /* * fpras_failure is called when a fpras check detects a bad calculation * result or an illegal instruction trap is attributed to an fpras * check. In all cases we are still bound to CPU. */ int fpras_failure(int op, int how) { int use_hw_bcopy_orig, use_hw_bzero_orig; uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig; ch_async_flt_t ch_flt; struct async_flt *aflt = (struct async_flt *)&ch_flt; struct fpras_chkfn *sfp, *cfp; uint32_t *sip, *cip; int i; /* * We're running on a sick CPU. Avoid further FPU use at least for * the time in which we dispatch an ereport and (if applicable) panic. */ use_hw_bcopy_orig = use_hw_bcopy; use_hw_bzero_orig = use_hw_bzero; hcl1_orig = hw_copy_limit_1; hcl2_orig = hw_copy_limit_2; hcl4_orig = hw_copy_limit_4; hcl8_orig = hw_copy_limit_8; use_hw_bcopy = use_hw_bzero = 0; hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 = hw_copy_limit_8 = 0; bzero(&ch_flt, sizeof (ch_async_flt_t)); aflt->flt_id = gethrtime_waitfree(); aflt->flt_class = CPU_FAULT; aflt->flt_inst = CPU->cpu_id; aflt->flt_status = (how << 8) | op; aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY; ch_flt.flt_type = CPU_FPUERR; /* * We must panic if the copy operation had no lofault protection - * ie, don't panic for copyin, copyout, kcopy and bcopy called * under on_fault and do panic for unprotected bcopy and hwblkpagecopy. */ aflt->flt_panic = (curthread->t_lofault == NULL); /* * XOR the source instruction block with the copied instruction * block - this will show us which bit(s) are corrupted. */ sfp = (struct fpras_chkfn *)fpras_chkfn_type1; cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op]; if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) { sip = &sfp->fpras_blk0[0]; cip = &cfp->fpras_blk0[0]; } else { sip = &sfp->fpras_blk1[0]; cip = &cfp->fpras_blk1[0]; } for (i = 0; i < 16; ++i, ++sip, ++cip) ch_flt.flt_fpdata[i] = *sip ^ *cip; cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic); if (aflt->flt_panic) fm_panic("FPU failure on CPU %d", CPU->cpu_id); /* * We get here for copyin/copyout and kcopy or bcopy where the * caller has used on_fault. We will flag the error so that * the process may be killed The trap_async_hwerr mechanism will * take appropriate further action (such as a reboot, contract * notification etc). Since we may be continuing we will * restore the global hardware copy acceleration switches. * * When we return from this function to the copy function we want to * avoid potentially bad data being used, ie we want the affected * copy function to return an error. The caller should therefore * invoke its lofault handler (which always exists for these functions) * which will return the appropriate error. */ ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR; aston(curthread); use_hw_bcopy = use_hw_bcopy_orig; use_hw_bzero = use_hw_bzero_orig; hw_copy_limit_1 = hcl1_orig; hw_copy_limit_2 = hcl2_orig; hw_copy_limit_4 = hcl4_orig; hw_copy_limit_8 = hcl8_orig; return (1); } #define VIS_BLOCKSIZE 64 int dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) { int ret, watched; watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); ret = dtrace_blksuword32(addr, data, 0); if (watched) watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); return (ret); } /* * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the * faulted cpu into that state). Cross-trap to the faulted cpu to clear * CEEN from the EER to disable traps for further disrupting error types * on that cpu. We could cross-call instead, but that has a larger * instruction and data footprint than cross-trapping, and the cpu is known * to be faulted. */ void cpu_faulted_enter(struct cpu *cp) { xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS); } /* * Called when a cpu leaves the CPU_FAULTED state to return to one of * offline, spare, or online (by the cpu requesting this state change). * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of * disrupting error bits that have accumulated without trapping, then * we cross-trap to re-enable CEEN controlled traps. */ void cpu_faulted_exit(struct cpu *cp) { ch_cpu_errors_t cpu_error_regs; cpu_error_regs.afsr = C_AFSR_CECC_ERRS; if (IS_PANTHER(cpunodes[cp->cpu_id].implementation)) cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS; xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state, (uint64_t)&cpu_error_regs, 0); xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS); } /* * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by * the errors in the original AFSR, 0 otherwise. * * For all procs if the initial error was a BERR or TO, then it is possible * that we may have caused a secondary BERR or TO in the process of logging the * inital error via cpu_run_bus_error_handlers(). If this is the case then * if the request was protected then a panic is still not necessary, if not * protected then aft_panic is already set - so either way there's no need * to set aft_panic for the secondary error. * * For Cheetah and Jalapeno, if the original error was a UE which occurred on * a store merge, then the error handling code will call cpu_deferred_error(). * When clear_errors() is called, it will determine that secondary errors have * occurred - in particular, the store merge also caused a EDU and WDU that * weren't discovered until this point. * * We do three checks to verify that we are in this case. If we pass all three * checks, we return 1 to indicate that we should not panic. If any unexpected * errors occur, we return 0. * * For Cheetah+ and derivative procs, the store merge causes a DUE, which is * handled in cpu_disrupting_errors(). Since this function is not even called * in the case we are interested in, we just return 0 for these processors. */ /*ARGSUSED*/ static int cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs, uint64_t t_afar) { #if defined(CHEETAH_PLUS) #else /* CHEETAH_PLUS */ struct async_flt *aflt = (struct async_flt *)ch_flt; #endif /* CHEETAH_PLUS */ /* * Was the original error a BERR or TO and only a BERR or TO * (multiple errors are also OK) */ if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) { /* * Is the new error a BERR or TO and only a BERR or TO * (multiple errors are also OK) */ if ((ch_flt->afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) return (1); } #if defined(CHEETAH_PLUS) return (0); #else /* CHEETAH_PLUS */ /* * Now look for secondary effects of a UE on cheetah/jalapeno * * Check the original error was a UE, and only a UE. Note that * the ME bit will cause us to fail this check. */ if (t_afsr_errs != C_AFSR_UE) return (0); /* * Check the secondary errors were exclusively an EDU and/or WDU. */ if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0) return (0); /* * Check the AFAR of the original error and secondary errors * match to the 64-byte boundary */ if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64)) return (0); /* * We've passed all the checks, so it's a secondary error! */ return (1); #endif /* CHEETAH_PLUS */ } /* * Translate the flt_bit or flt_type into an error type. First, flt_bit * is checked for any valid errors. If found, the error type is * returned. If not found, the flt_type is checked for L1$ parity errors. */ /*ARGSUSED*/ static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt) { #if defined(JALAPENO) /* * Currently, logging errors to the SC is not supported on Jalapeno */ return (PLAT_ECC_ERROR2_NONE); #else ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt; switch (ch_flt->flt_bit) { case C_AFSR_CE: return (PLAT_ECC_ERROR2_CE); case C_AFSR_UCC: case C_AFSR_EDC: case C_AFSR_WDC: case C_AFSR_CPC: return (PLAT_ECC_ERROR2_L2_CE); case C_AFSR_EMC: return (PLAT_ECC_ERROR2_EMC); case C_AFSR_IVC: return (PLAT_ECC_ERROR2_IVC); case C_AFSR_UE: return (PLAT_ECC_ERROR2_UE); case C_AFSR_UCU: case C_AFSR_EDU: case C_AFSR_WDU: case C_AFSR_CPU: return (PLAT_ECC_ERROR2_L2_UE); case C_AFSR_IVU: return (PLAT_ECC_ERROR2_IVU); case C_AFSR_TO: return (PLAT_ECC_ERROR2_TO); case C_AFSR_BERR: return (PLAT_ECC_ERROR2_BERR); #if defined(CHEETAH_PLUS) case C_AFSR_L3_EDC: case C_AFSR_L3_UCC: case C_AFSR_L3_CPC: case C_AFSR_L3_WDC: return (PLAT_ECC_ERROR2_L3_CE); case C_AFSR_IMC: return (PLAT_ECC_ERROR2_IMC); case C_AFSR_TSCE: return (PLAT_ECC_ERROR2_L2_TSCE); case C_AFSR_THCE: return (PLAT_ECC_ERROR2_L2_THCE); case C_AFSR_L3_MECC: return (PLAT_ECC_ERROR2_L3_MECC); case C_AFSR_L3_THCE: return (PLAT_ECC_ERROR2_L3_THCE); case C_AFSR_L3_CPU: case C_AFSR_L3_EDU: case C_AFSR_L3_UCU: case C_AFSR_L3_WDU: return (PLAT_ECC_ERROR2_L3_UE); case C_AFSR_DUE: return (PLAT_ECC_ERROR2_DUE); case C_AFSR_DTO: return (PLAT_ECC_ERROR2_DTO); case C_AFSR_DBERR: return (PLAT_ECC_ERROR2_DBERR); #endif /* CHEETAH_PLUS */ default: switch (ch_flt->flt_type) { #if defined(CPU_IMP_L1_CACHE_PARITY) case CPU_IC_PARITY: return (PLAT_ECC_ERROR2_IPE); case CPU_DC_PARITY: if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) { if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) { return (PLAT_ECC_ERROR2_PCACHE); } } return (PLAT_ECC_ERROR2_DPE); #endif /* CPU_IMP_L1_CACHE_PARITY */ case CPU_ITLB_PARITY: return (PLAT_ECC_ERROR2_ITLB); case CPU_DTLB_PARITY: return (PLAT_ECC_ERROR2_DTLB); default: return (PLAT_ECC_ERROR2_NONE); } } #endif /* JALAPENO */ }