1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/machparam.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/elf_SPARC.h> 36*7c478bd9Sstevel@tonic-gate #include <vm/hat_sfmmu.h> 37*7c478bd9Sstevel@tonic-gate #include <vm/page.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/spitregs.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/async.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/dditypes.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/cpu_module.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/prom_debug.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/prom_plat.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/intreg.h> 51*7c478bd9Sstevel@tonic-gate #include <sys/machtrap.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/ontrap.h> 53*7c478bd9Sstevel@tonic-gate #include <sys/ivintr.h> 54*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 55*7c478bd9Sstevel@tonic-gate #include <sys/panic.h> 56*7c478bd9Sstevel@tonic-gate #include <sys/ndifm.h> 57*7c478bd9Sstevel@tonic-gate #include <sys/fm/protocol.h> 58*7c478bd9Sstevel@tonic-gate #include <sys/fm/util.h> 59*7c478bd9Sstevel@tonic-gate #include <sys/fm/cpu/UltraSPARC-II.h> 60*7c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 61*7c478bd9Sstevel@tonic-gate #include <sys/ecc_kstat.h> 62*7c478bd9Sstevel@tonic-gate #include <sys/watchpoint.h> 63*7c478bd9Sstevel@tonic-gate #include <sys/dtrace.h> 64*7c478bd9Sstevel@tonic-gate #include <sys/errclassify.h> 65*7c478bd9Sstevel@tonic-gate 66*7c478bd9Sstevel@tonic-gate uchar_t *ctx_pgsz_array = NULL; 67*7c478bd9Sstevel@tonic-gate 68*7c478bd9Sstevel@tonic-gate /* 69*7c478bd9Sstevel@tonic-gate * Structure for the 8 byte ecache data dump and the associated AFSR state. 70*7c478bd9Sstevel@tonic-gate * There will be 8 of these structures used to dump an ecache line (64 bytes). 71*7c478bd9Sstevel@tonic-gate */ 72*7c478bd9Sstevel@tonic-gate typedef struct sf_ec_data_elm { 73*7c478bd9Sstevel@tonic-gate uint64_t ec_d8; 74*7c478bd9Sstevel@tonic-gate uint64_t ec_afsr; 75*7c478bd9Sstevel@tonic-gate } ec_data_t; 76*7c478bd9Sstevel@tonic-gate 77*7c478bd9Sstevel@tonic-gate /* 78*7c478bd9Sstevel@tonic-gate * Define spitfire (Ultra I/II) specific asynchronous error structure 79*7c478bd9Sstevel@tonic-gate */ 80*7c478bd9Sstevel@tonic-gate typedef struct spitfire_async_flt { 81*7c478bd9Sstevel@tonic-gate struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 82*7c478bd9Sstevel@tonic-gate ushort_t flt_type; /* types of faults - cpu specific */ 83*7c478bd9Sstevel@tonic-gate ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 84*7c478bd9Sstevel@tonic-gate uint64_t flt_ec_tag; /* E$ tag info */ 85*7c478bd9Sstevel@tonic-gate int flt_ec_lcnt; /* number of bad E$ lines */ 86*7c478bd9Sstevel@tonic-gate ushort_t flt_sdbh; /* UDBH reg */ 87*7c478bd9Sstevel@tonic-gate ushort_t flt_sdbl; /* UDBL reg */ 88*7c478bd9Sstevel@tonic-gate } spitf_async_flt; 89*7c478bd9Sstevel@tonic-gate 90*7c478bd9Sstevel@tonic-gate /* 91*7c478bd9Sstevel@tonic-gate * Prototypes for support routines in spitfire_asm.s: 92*7c478bd9Sstevel@tonic-gate */ 93*7c478bd9Sstevel@tonic-gate extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 94*7c478bd9Sstevel@tonic-gate extern uint64_t get_lsu(void); 95*7c478bd9Sstevel@tonic-gate extern void set_lsu(uint64_t ncc); 96*7c478bd9Sstevel@tonic-gate extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 97*7c478bd9Sstevel@tonic-gate uint64_t *oafsr, uint64_t *acc_afsr); 98*7c478bd9Sstevel@tonic-gate extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 99*7c478bd9Sstevel@tonic-gate extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 100*7c478bd9Sstevel@tonic-gate uint64_t *acc_afsr); 101*7c478bd9Sstevel@tonic-gate extern uint64_t read_and_clear_afsr(); 102*7c478bd9Sstevel@tonic-gate extern void write_ec_tag_parity(uint32_t id); 103*7c478bd9Sstevel@tonic-gate extern void write_hb_ec_tag_parity(uint32_t id); 104*7c478bd9Sstevel@tonic-gate 105*7c478bd9Sstevel@tonic-gate /* 106*7c478bd9Sstevel@tonic-gate * Spitfire module routines: 107*7c478bd9Sstevel@tonic-gate */ 108*7c478bd9Sstevel@tonic-gate static void cpu_async_log_err(void *flt); 109*7c478bd9Sstevel@tonic-gate /*PRINTFLIKE6*/ 110*7c478bd9Sstevel@tonic-gate static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 111*7c478bd9Sstevel@tonic-gate uint_t logflags, const char *endstr, const char *fmt, ...); 112*7c478bd9Sstevel@tonic-gate 113*7c478bd9Sstevel@tonic-gate static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 114*7c478bd9Sstevel@tonic-gate static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 115*7c478bd9Sstevel@tonic-gate static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 116*7c478bd9Sstevel@tonic-gate 117*7c478bd9Sstevel@tonic-gate static void log_ce_err(struct async_flt *aflt, char *unum); 118*7c478bd9Sstevel@tonic-gate static void log_ue_err(struct async_flt *aflt, char *unum); 119*7c478bd9Sstevel@tonic-gate static void check_misc_err(spitf_async_flt *spf_flt); 120*7c478bd9Sstevel@tonic-gate static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 121*7c478bd9Sstevel@tonic-gate static int check_ecc(struct async_flt *aflt); 122*7c478bd9Sstevel@tonic-gate static uint_t get_cpu_status(uint64_t arg); 123*7c478bd9Sstevel@tonic-gate static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 124*7c478bd9Sstevel@tonic-gate static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 125*7c478bd9Sstevel@tonic-gate int *m, uint64_t *afsr); 126*7c478bd9Sstevel@tonic-gate static void ecache_kstat_init(struct cpu *cp); 127*7c478bd9Sstevel@tonic-gate static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 128*7c478bd9Sstevel@tonic-gate uint64_t paddr, int mpb, uint64_t); 129*7c478bd9Sstevel@tonic-gate static uint64_t ecache_scrub_misc_err(int, uint64_t); 130*7c478bd9Sstevel@tonic-gate static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 131*7c478bd9Sstevel@tonic-gate static void ecache_page_retire(void *); 132*7c478bd9Sstevel@tonic-gate static int ecc_kstat_update(kstat_t *ksp, int rw); 133*7c478bd9Sstevel@tonic-gate static int ce_count_unum(int status, int len, char *unum); 134*7c478bd9Sstevel@tonic-gate static void add_leaky_bucket_timeout(void); 135*7c478bd9Sstevel@tonic-gate static int synd_to_synd_code(int synd_status, ushort_t synd); 136*7c478bd9Sstevel@tonic-gate 137*7c478bd9Sstevel@tonic-gate extern uint_t read_all_memscrub; 138*7c478bd9Sstevel@tonic-gate extern void memscrub_run(void); 139*7c478bd9Sstevel@tonic-gate 140*7c478bd9Sstevel@tonic-gate static uchar_t isus2i; /* set if sabre */ 141*7c478bd9Sstevel@tonic-gate static uchar_t isus2e; /* set if hummingbird */ 142*7c478bd9Sstevel@tonic-gate 143*7c478bd9Sstevel@tonic-gate /* 144*7c478bd9Sstevel@tonic-gate * Default ecache mask and shift settings for Spitfire. If we detect a 145*7c478bd9Sstevel@tonic-gate * different CPU implementation, we will modify these values at boot time. 146*7c478bd9Sstevel@tonic-gate */ 147*7c478bd9Sstevel@tonic-gate static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 148*7c478bd9Sstevel@tonic-gate static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 149*7c478bd9Sstevel@tonic-gate static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 150*7c478bd9Sstevel@tonic-gate static int cpu_ec_par_shift = S_ECPAR_SHIFT; 151*7c478bd9Sstevel@tonic-gate static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 152*7c478bd9Sstevel@tonic-gate static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 153*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 154*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 155*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 156*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate /* 159*7c478bd9Sstevel@tonic-gate * Default ecache state bits for Spitfire. These individual bits indicate if 160*7c478bd9Sstevel@tonic-gate * the given line is in any of the valid or modified states, respectively. 161*7c478bd9Sstevel@tonic-gate * Again, we modify these at boot if we detect a different CPU. 162*7c478bd9Sstevel@tonic-gate */ 163*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 164*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 165*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_parity = S_EC_PARITY; 166*7c478bd9Sstevel@tonic-gate static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate /* 169*7c478bd9Sstevel@tonic-gate * This table is used to determine which bit(s) is(are) bad when an ECC 170*7c478bd9Sstevel@tonic-gate * error occurrs. The array is indexed an 8-bit syndrome. The entries 171*7c478bd9Sstevel@tonic-gate * of this array have the following semantics: 172*7c478bd9Sstevel@tonic-gate * 173*7c478bd9Sstevel@tonic-gate * 00-63 The number of the bad bit, when only one bit is bad. 174*7c478bd9Sstevel@tonic-gate * 64 ECC bit C0 is bad. 175*7c478bd9Sstevel@tonic-gate * 65 ECC bit C1 is bad. 176*7c478bd9Sstevel@tonic-gate * 66 ECC bit C2 is bad. 177*7c478bd9Sstevel@tonic-gate * 67 ECC bit C3 is bad. 178*7c478bd9Sstevel@tonic-gate * 68 ECC bit C4 is bad. 179*7c478bd9Sstevel@tonic-gate * 69 ECC bit C5 is bad. 180*7c478bd9Sstevel@tonic-gate * 70 ECC bit C6 is bad. 181*7c478bd9Sstevel@tonic-gate * 71 ECC bit C7 is bad. 182*7c478bd9Sstevel@tonic-gate * 72 Two bits are bad. 183*7c478bd9Sstevel@tonic-gate * 73 Three bits are bad. 184*7c478bd9Sstevel@tonic-gate * 74 Four bits are bad. 185*7c478bd9Sstevel@tonic-gate * 75 More than Four bits are bad. 186*7c478bd9Sstevel@tonic-gate * 76 NO bits are bad. 187*7c478bd9Sstevel@tonic-gate * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 188*7c478bd9Sstevel@tonic-gate */ 189*7c478bd9Sstevel@tonic-gate 190*7c478bd9Sstevel@tonic-gate #define C0 64 191*7c478bd9Sstevel@tonic-gate #define C1 65 192*7c478bd9Sstevel@tonic-gate #define C2 66 193*7c478bd9Sstevel@tonic-gate #define C3 67 194*7c478bd9Sstevel@tonic-gate #define C4 68 195*7c478bd9Sstevel@tonic-gate #define C5 69 196*7c478bd9Sstevel@tonic-gate #define C6 70 197*7c478bd9Sstevel@tonic-gate #define C7 71 198*7c478bd9Sstevel@tonic-gate #define M2 72 199*7c478bd9Sstevel@tonic-gate #define M3 73 200*7c478bd9Sstevel@tonic-gate #define M4 74 201*7c478bd9Sstevel@tonic-gate #define MX 75 202*7c478bd9Sstevel@tonic-gate #define NA 76 203*7c478bd9Sstevel@tonic-gate 204*7c478bd9Sstevel@tonic-gate #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 205*7c478bd9Sstevel@tonic-gate (synd_code < C0)) 206*7c478bd9Sstevel@tonic-gate #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 207*7c478bd9Sstevel@tonic-gate (synd_code <= C7)) 208*7c478bd9Sstevel@tonic-gate 209*7c478bd9Sstevel@tonic-gate static char ecc_syndrome_tab[] = 210*7c478bd9Sstevel@tonic-gate { 211*7c478bd9Sstevel@tonic-gate NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 212*7c478bd9Sstevel@tonic-gate C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 213*7c478bd9Sstevel@tonic-gate C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 214*7c478bd9Sstevel@tonic-gate M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 215*7c478bd9Sstevel@tonic-gate C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 216*7c478bd9Sstevel@tonic-gate M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 217*7c478bd9Sstevel@tonic-gate M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 218*7c478bd9Sstevel@tonic-gate M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 219*7c478bd9Sstevel@tonic-gate C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 220*7c478bd9Sstevel@tonic-gate M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 221*7c478bd9Sstevel@tonic-gate M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 222*7c478bd9Sstevel@tonic-gate M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 223*7c478bd9Sstevel@tonic-gate M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 224*7c478bd9Sstevel@tonic-gate M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 225*7c478bd9Sstevel@tonic-gate M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 226*7c478bd9Sstevel@tonic-gate M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 227*7c478bd9Sstevel@tonic-gate }; 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate #define SYND_TBL_SIZE 256 230*7c478bd9Sstevel@tonic-gate 231*7c478bd9Sstevel@tonic-gate /* 232*7c478bd9Sstevel@tonic-gate * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 233*7c478bd9Sstevel@tonic-gate * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 234*7c478bd9Sstevel@tonic-gate */ 235*7c478bd9Sstevel@tonic-gate #define UDBL_REG 0x8000 236*7c478bd9Sstevel@tonic-gate #define UDBL(synd) ((synd & UDBL_REG) >> 15) 237*7c478bd9Sstevel@tonic-gate #define SYND(synd) (synd & 0x7FFF) 238*7c478bd9Sstevel@tonic-gate 239*7c478bd9Sstevel@tonic-gate /* 240*7c478bd9Sstevel@tonic-gate * These error types are specific to Spitfire and are used internally for the 241*7c478bd9Sstevel@tonic-gate * spitfire fault structure flt_type field. 242*7c478bd9Sstevel@tonic-gate */ 243*7c478bd9Sstevel@tonic-gate #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 244*7c478bd9Sstevel@tonic-gate #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 245*7c478bd9Sstevel@tonic-gate #define CPU_WP_ERR 2 /* WP parity error */ 246*7c478bd9Sstevel@tonic-gate #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 247*7c478bd9Sstevel@tonic-gate #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 248*7c478bd9Sstevel@tonic-gate #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 249*7c478bd9Sstevel@tonic-gate #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 250*7c478bd9Sstevel@tonic-gate #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 251*7c478bd9Sstevel@tonic-gate #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 252*7c478bd9Sstevel@tonic-gate #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 253*7c478bd9Sstevel@tonic-gate #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 254*7c478bd9Sstevel@tonic-gate #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 255*7c478bd9Sstevel@tonic-gate #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 256*7c478bd9Sstevel@tonic-gate #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 257*7c478bd9Sstevel@tonic-gate #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 258*7c478bd9Sstevel@tonic-gate #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 259*7c478bd9Sstevel@tonic-gate 260*7c478bd9Sstevel@tonic-gate /* 261*7c478bd9Sstevel@tonic-gate * Macro to access the "Spitfire cpu private" data structure. 262*7c478bd9Sstevel@tonic-gate */ 263*7c478bd9Sstevel@tonic-gate #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 264*7c478bd9Sstevel@tonic-gate 265*7c478bd9Sstevel@tonic-gate /* 266*7c478bd9Sstevel@tonic-gate * set to 0 to disable automatic retiring of pages on 267*7c478bd9Sstevel@tonic-gate * DIMMs that have excessive soft errors 268*7c478bd9Sstevel@tonic-gate */ 269*7c478bd9Sstevel@tonic-gate int automatic_page_removal = 1; 270*7c478bd9Sstevel@tonic-gate 271*7c478bd9Sstevel@tonic-gate /* 272*7c478bd9Sstevel@tonic-gate * Heuristic for figuring out which module to replace. 273*7c478bd9Sstevel@tonic-gate * Relative likelihood that this P_SYND indicates that this module is bad. 274*7c478bd9Sstevel@tonic-gate * We call it a "score", though, not a relative likelihood. 275*7c478bd9Sstevel@tonic-gate * 276*7c478bd9Sstevel@tonic-gate * Step 1. 277*7c478bd9Sstevel@tonic-gate * Assign a score to each byte of P_SYND according to the following rules: 278*7c478bd9Sstevel@tonic-gate * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 279*7c478bd9Sstevel@tonic-gate * If one bit on, give it a 95. 280*7c478bd9Sstevel@tonic-gate * If seven bits on, give it a 10. 281*7c478bd9Sstevel@tonic-gate * If two bits on: 282*7c478bd9Sstevel@tonic-gate * in different nybbles, a 90 283*7c478bd9Sstevel@tonic-gate * in same nybble, but unaligned, 85 284*7c478bd9Sstevel@tonic-gate * in same nybble and as an aligned pair, 80 285*7c478bd9Sstevel@tonic-gate * If six bits on, look at the bits that are off: 286*7c478bd9Sstevel@tonic-gate * in same nybble and as an aligned pair, 15 287*7c478bd9Sstevel@tonic-gate * in same nybble, but unaligned, 20 288*7c478bd9Sstevel@tonic-gate * in different nybbles, a 25 289*7c478bd9Sstevel@tonic-gate * If three bits on: 290*7c478bd9Sstevel@tonic-gate * in diferent nybbles, no aligned pairs, 75 291*7c478bd9Sstevel@tonic-gate * in diferent nybbles, one aligned pair, 70 292*7c478bd9Sstevel@tonic-gate * in the same nybble, 65 293*7c478bd9Sstevel@tonic-gate * If five bits on, look at the bits that are off: 294*7c478bd9Sstevel@tonic-gate * in the same nybble, 30 295*7c478bd9Sstevel@tonic-gate * in diferent nybbles, one aligned pair, 35 296*7c478bd9Sstevel@tonic-gate * in diferent nybbles, no aligned pairs, 40 297*7c478bd9Sstevel@tonic-gate * If four bits on: 298*7c478bd9Sstevel@tonic-gate * all in one nybble, 45 299*7c478bd9Sstevel@tonic-gate * as two aligned pairs, 50 300*7c478bd9Sstevel@tonic-gate * one aligned pair, 55 301*7c478bd9Sstevel@tonic-gate * no aligned pairs, 60 302*7c478bd9Sstevel@tonic-gate * 303*7c478bd9Sstevel@tonic-gate * Step 2: 304*7c478bd9Sstevel@tonic-gate * Take the higher of the two scores (one for each byte) as the score 305*7c478bd9Sstevel@tonic-gate * for the module. 306*7c478bd9Sstevel@tonic-gate * 307*7c478bd9Sstevel@tonic-gate * Print the score for each module, and field service should replace the 308*7c478bd9Sstevel@tonic-gate * module with the highest score. 309*7c478bd9Sstevel@tonic-gate */ 310*7c478bd9Sstevel@tonic-gate 311*7c478bd9Sstevel@tonic-gate /* 312*7c478bd9Sstevel@tonic-gate * In the table below, the first row/column comment indicates the 313*7c478bd9Sstevel@tonic-gate * number of bits on in that nybble; the second row/column comment is 314*7c478bd9Sstevel@tonic-gate * the hex digit. 315*7c478bd9Sstevel@tonic-gate */ 316*7c478bd9Sstevel@tonic-gate 317*7c478bd9Sstevel@tonic-gate static int 318*7c478bd9Sstevel@tonic-gate p_synd_score_table[256] = { 319*7c478bd9Sstevel@tonic-gate /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 320*7c478bd9Sstevel@tonic-gate /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 321*7c478bd9Sstevel@tonic-gate /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 322*7c478bd9Sstevel@tonic-gate /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323*7c478bd9Sstevel@tonic-gate /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 324*7c478bd9Sstevel@tonic-gate /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 325*7c478bd9Sstevel@tonic-gate /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 326*7c478bd9Sstevel@tonic-gate /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327*7c478bd9Sstevel@tonic-gate /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 328*7c478bd9Sstevel@tonic-gate /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 329*7c478bd9Sstevel@tonic-gate /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 330*7c478bd9Sstevel@tonic-gate /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331*7c478bd9Sstevel@tonic-gate /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 332*7c478bd9Sstevel@tonic-gate /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 333*7c478bd9Sstevel@tonic-gate /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 334*7c478bd9Sstevel@tonic-gate /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335*7c478bd9Sstevel@tonic-gate /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 336*7c478bd9Sstevel@tonic-gate /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 337*7c478bd9Sstevel@tonic-gate }; 338*7c478bd9Sstevel@tonic-gate 339*7c478bd9Sstevel@tonic-gate int 340*7c478bd9Sstevel@tonic-gate ecc_psynd_score(ushort_t p_synd) 341*7c478bd9Sstevel@tonic-gate { 342*7c478bd9Sstevel@tonic-gate int i, j, a, b; 343*7c478bd9Sstevel@tonic-gate 344*7c478bd9Sstevel@tonic-gate i = p_synd & 0xFF; 345*7c478bd9Sstevel@tonic-gate j = (p_synd >> 8) & 0xFF; 346*7c478bd9Sstevel@tonic-gate 347*7c478bd9Sstevel@tonic-gate a = p_synd_score_table[i]; 348*7c478bd9Sstevel@tonic-gate b = p_synd_score_table[j]; 349*7c478bd9Sstevel@tonic-gate 350*7c478bd9Sstevel@tonic-gate return (a > b ? a : b); 351*7c478bd9Sstevel@tonic-gate } 352*7c478bd9Sstevel@tonic-gate 353*7c478bd9Sstevel@tonic-gate /* 354*7c478bd9Sstevel@tonic-gate * Async Fault Logging 355*7c478bd9Sstevel@tonic-gate * 356*7c478bd9Sstevel@tonic-gate * To ease identifying, reading, and filtering async fault log messages, the 357*7c478bd9Sstevel@tonic-gate * label [AFT#] is now prepended to each async fault message. These messages 358*7c478bd9Sstevel@tonic-gate * and the logging rules are implemented by cpu_aflt_log(), below. 359*7c478bd9Sstevel@tonic-gate * 360*7c478bd9Sstevel@tonic-gate * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 361*7c478bd9Sstevel@tonic-gate * This includes both corrected ECC memory and ecache faults. 362*7c478bd9Sstevel@tonic-gate * 363*7c478bd9Sstevel@tonic-gate * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 364*7c478bd9Sstevel@tonic-gate * else except CE errors) with a priority of 1 (highest). This tag 365*7c478bd9Sstevel@tonic-gate * is also used for panic messages that result from an async fault. 366*7c478bd9Sstevel@tonic-gate * 367*7c478bd9Sstevel@tonic-gate * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 368*7c478bd9Sstevel@tonic-gate * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 369*7c478bd9Sstevel@tonic-gate * of the E-$ data and tags. 370*7c478bd9Sstevel@tonic-gate * 371*7c478bd9Sstevel@tonic-gate * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 372*7c478bd9Sstevel@tonic-gate * printed on the console. To send all AFT logs to both the log and the 373*7c478bd9Sstevel@tonic-gate * console, set aft_verbose = 1. 374*7c478bd9Sstevel@tonic-gate */ 375*7c478bd9Sstevel@tonic-gate 376*7c478bd9Sstevel@tonic-gate #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 377*7c478bd9Sstevel@tonic-gate #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 378*7c478bd9Sstevel@tonic-gate #define CPU_ERRID 0x0004 /* print flt_id */ 379*7c478bd9Sstevel@tonic-gate #define CPU_TL 0x0008 /* print flt_tl */ 380*7c478bd9Sstevel@tonic-gate #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 381*7c478bd9Sstevel@tonic-gate #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 382*7c478bd9Sstevel@tonic-gate #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 383*7c478bd9Sstevel@tonic-gate #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 384*7c478bd9Sstevel@tonic-gate #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 385*7c478bd9Sstevel@tonic-gate #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 386*7c478bd9Sstevel@tonic-gate #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 387*7c478bd9Sstevel@tonic-gate #define CPU_FAULTPC 0x0800 /* print flt_pc */ 388*7c478bd9Sstevel@tonic-gate #define CPU_SYND 0x1000 /* print flt_synd and unum */ 389*7c478bd9Sstevel@tonic-gate 390*7c478bd9Sstevel@tonic-gate #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 391*7c478bd9Sstevel@tonic-gate CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 392*7c478bd9Sstevel@tonic-gate CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 393*7c478bd9Sstevel@tonic-gate CPU_FAULTPC) 394*7c478bd9Sstevel@tonic-gate #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 395*7c478bd9Sstevel@tonic-gate #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 396*7c478bd9Sstevel@tonic-gate ~CPU_SPACE) 397*7c478bd9Sstevel@tonic-gate #define PARERR_LFLAGS (CMN_LFLAGS) 398*7c478bd9Sstevel@tonic-gate #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 399*7c478bd9Sstevel@tonic-gate #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 400*7c478bd9Sstevel@tonic-gate ~CPU_FLTCPU & ~CPU_FAULTPC) 401*7c478bd9Sstevel@tonic-gate #define BERRTO_LFLAGS (CMN_LFLAGS) 402*7c478bd9Sstevel@tonic-gate #define NO_LFLAGS (0) 403*7c478bd9Sstevel@tonic-gate 404*7c478bd9Sstevel@tonic-gate #define AFSR_FMTSTR0 "\020\1ME" 405*7c478bd9Sstevel@tonic-gate #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 406*7c478bd9Sstevel@tonic-gate "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 407*7c478bd9Sstevel@tonic-gate #define UDB_FMTSTR "\020\012UE\011CE" 408*7c478bd9Sstevel@tonic-gate 409*7c478bd9Sstevel@tonic-gate /* 410*7c478bd9Sstevel@tonic-gate * Maximum number of contexts for Spitfire. 411*7c478bd9Sstevel@tonic-gate */ 412*7c478bd9Sstevel@tonic-gate #define MAX_NCTXS (1 << 13) 413*7c478bd9Sstevel@tonic-gate 414*7c478bd9Sstevel@tonic-gate /* 415*7c478bd9Sstevel@tonic-gate * Save the cache bootup state for use when internal 416*7c478bd9Sstevel@tonic-gate * caches are to be re-enabled after an error occurs. 417*7c478bd9Sstevel@tonic-gate */ 418*7c478bd9Sstevel@tonic-gate uint64_t cache_boot_state = 0; 419*7c478bd9Sstevel@tonic-gate 420*7c478bd9Sstevel@tonic-gate /* 421*7c478bd9Sstevel@tonic-gate * PA[31:0] represent Displacement in UPA configuration space. 422*7c478bd9Sstevel@tonic-gate */ 423*7c478bd9Sstevel@tonic-gate uint_t root_phys_addr_lo_mask = 0xffffffff; 424*7c478bd9Sstevel@tonic-gate 425*7c478bd9Sstevel@tonic-gate /* 426*7c478bd9Sstevel@tonic-gate * Spitfire legacy globals 427*7c478bd9Sstevel@tonic-gate */ 428*7c478bd9Sstevel@tonic-gate int itlb_entries; 429*7c478bd9Sstevel@tonic-gate int dtlb_entries; 430*7c478bd9Sstevel@tonic-gate 431*7c478bd9Sstevel@tonic-gate void 432*7c478bd9Sstevel@tonic-gate cpu_setup(void) 433*7c478bd9Sstevel@tonic-gate { 434*7c478bd9Sstevel@tonic-gate extern int page_retire_messages; 435*7c478bd9Sstevel@tonic-gate extern int at_flags; 436*7c478bd9Sstevel@tonic-gate #if defined(SF_ERRATA_57) 437*7c478bd9Sstevel@tonic-gate extern caddr_t errata57_limit; 438*7c478bd9Sstevel@tonic-gate #endif 439*7c478bd9Sstevel@tonic-gate extern int disable_text_largepages; 440*7c478bd9Sstevel@tonic-gate extern int disable_initdata_largepages; 441*7c478bd9Sstevel@tonic-gate 442*7c478bd9Sstevel@tonic-gate cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 443*7c478bd9Sstevel@tonic-gate 444*7c478bd9Sstevel@tonic-gate at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 445*7c478bd9Sstevel@tonic-gate 446*7c478bd9Sstevel@tonic-gate /* 447*7c478bd9Sstevel@tonic-gate * Spitfire isn't currently FMA-aware, so we have to enable the 448*7c478bd9Sstevel@tonic-gate * page retirement messages. 449*7c478bd9Sstevel@tonic-gate */ 450*7c478bd9Sstevel@tonic-gate page_retire_messages = 1; 451*7c478bd9Sstevel@tonic-gate 452*7c478bd9Sstevel@tonic-gate /* 453*7c478bd9Sstevel@tonic-gate * save the cache bootup state. 454*7c478bd9Sstevel@tonic-gate */ 455*7c478bd9Sstevel@tonic-gate cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 456*7c478bd9Sstevel@tonic-gate 457*7c478bd9Sstevel@tonic-gate /* 458*7c478bd9Sstevel@tonic-gate * Use the maximum number of contexts available for Spitfire unless 459*7c478bd9Sstevel@tonic-gate * it has been tuned for debugging. 460*7c478bd9Sstevel@tonic-gate * We are checking against 0 here since this value can be patched 461*7c478bd9Sstevel@tonic-gate * while booting. It can not be patched via /etc/system since it 462*7c478bd9Sstevel@tonic-gate * will be patched too late and thus cause the system to panic. 463*7c478bd9Sstevel@tonic-gate */ 464*7c478bd9Sstevel@tonic-gate if (nctxs == 0) 465*7c478bd9Sstevel@tonic-gate nctxs = MAX_NCTXS; 466*7c478bd9Sstevel@tonic-gate 467*7c478bd9Sstevel@tonic-gate if (use_page_coloring) { 468*7c478bd9Sstevel@tonic-gate do_pg_coloring = 1; 469*7c478bd9Sstevel@tonic-gate if (use_virtual_coloring) 470*7c478bd9Sstevel@tonic-gate do_virtual_coloring = 1; 471*7c478bd9Sstevel@tonic-gate } 472*7c478bd9Sstevel@tonic-gate 473*7c478bd9Sstevel@tonic-gate /* 474*7c478bd9Sstevel@tonic-gate * Tune pp_slots to use up to 1/8th of the tlb entries. 475*7c478bd9Sstevel@tonic-gate */ 476*7c478bd9Sstevel@tonic-gate pp_slots = MIN(8, MAXPP_SLOTS); 477*7c478bd9Sstevel@tonic-gate 478*7c478bd9Sstevel@tonic-gate /* 479*7c478bd9Sstevel@tonic-gate * Block stores invalidate all pages of the d$ so pagecopy 480*7c478bd9Sstevel@tonic-gate * et. al. do not need virtual translations with virtual 481*7c478bd9Sstevel@tonic-gate * coloring taken into consideration. 482*7c478bd9Sstevel@tonic-gate */ 483*7c478bd9Sstevel@tonic-gate pp_consistent_coloring = 0; 484*7c478bd9Sstevel@tonic-gate 485*7c478bd9Sstevel@tonic-gate isa_list = 486*7c478bd9Sstevel@tonic-gate "sparcv9+vis sparcv9 " 487*7c478bd9Sstevel@tonic-gate "sparcv8plus+vis sparcv8plus " 488*7c478bd9Sstevel@tonic-gate "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 489*7c478bd9Sstevel@tonic-gate 490*7c478bd9Sstevel@tonic-gate cpu_hwcap_flags = AV_SPARC_VIS; 491*7c478bd9Sstevel@tonic-gate 492*7c478bd9Sstevel@tonic-gate /* 493*7c478bd9Sstevel@tonic-gate * On Spitfire, there's a hole in the address space 494*7c478bd9Sstevel@tonic-gate * that we must never map (the hardware only support 44-bits of 495*7c478bd9Sstevel@tonic-gate * virtual address). Later CPUs are expected to have wider 496*7c478bd9Sstevel@tonic-gate * supported address ranges. 497*7c478bd9Sstevel@tonic-gate * 498*7c478bd9Sstevel@tonic-gate * See address map on p23 of the UltraSPARC 1 user's manual. 499*7c478bd9Sstevel@tonic-gate */ 500*7c478bd9Sstevel@tonic-gate hole_start = (caddr_t)0x80000000000ull; 501*7c478bd9Sstevel@tonic-gate hole_end = (caddr_t)0xfffff80000000000ull; 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate /* 504*7c478bd9Sstevel@tonic-gate * A spitfire call bug requires us to be a further 4Gbytes of 505*7c478bd9Sstevel@tonic-gate * firewall from the spec. 506*7c478bd9Sstevel@tonic-gate * 507*7c478bd9Sstevel@tonic-gate * See Spitfire Errata #21 508*7c478bd9Sstevel@tonic-gate */ 509*7c478bd9Sstevel@tonic-gate hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 510*7c478bd9Sstevel@tonic-gate hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 511*7c478bd9Sstevel@tonic-gate 512*7c478bd9Sstevel@tonic-gate /* 513*7c478bd9Sstevel@tonic-gate * The kpm mapping window. 514*7c478bd9Sstevel@tonic-gate * kpm_size: 515*7c478bd9Sstevel@tonic-gate * The size of a single kpm range. 516*7c478bd9Sstevel@tonic-gate * The overall size will be: kpm_size * vac_colors. 517*7c478bd9Sstevel@tonic-gate * kpm_vbase: 518*7c478bd9Sstevel@tonic-gate * The virtual start address of the kpm range within the kernel 519*7c478bd9Sstevel@tonic-gate * virtual address space. kpm_vbase has to be kpm_size aligned. 520*7c478bd9Sstevel@tonic-gate */ 521*7c478bd9Sstevel@tonic-gate kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 522*7c478bd9Sstevel@tonic-gate kpm_size_shift = 41; 523*7c478bd9Sstevel@tonic-gate kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 524*7c478bd9Sstevel@tonic-gate 525*7c478bd9Sstevel@tonic-gate #if defined(SF_ERRATA_57) 526*7c478bd9Sstevel@tonic-gate errata57_limit = (caddr_t)0x80000000ul; 527*7c478bd9Sstevel@tonic-gate #endif 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate /* 530*7c478bd9Sstevel@tonic-gate * Allow only 8K, 64K and 4M pages for text by default. 531*7c478bd9Sstevel@tonic-gate * Allow only 8K and 64K page for initialized data segments by 532*7c478bd9Sstevel@tonic-gate * default. 533*7c478bd9Sstevel@tonic-gate */ 534*7c478bd9Sstevel@tonic-gate disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | 535*7c478bd9Sstevel@tonic-gate (1 << TTE256M); 536*7c478bd9Sstevel@tonic-gate disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | 537*7c478bd9Sstevel@tonic-gate (1 << TTE32M) | (1 << TTE256M); 538*7c478bd9Sstevel@tonic-gate } 539*7c478bd9Sstevel@tonic-gate 540*7c478bd9Sstevel@tonic-gate static int 541*7c478bd9Sstevel@tonic-gate getintprop(dnode_t node, char *name, int deflt) 542*7c478bd9Sstevel@tonic-gate { 543*7c478bd9Sstevel@tonic-gate int value; 544*7c478bd9Sstevel@tonic-gate 545*7c478bd9Sstevel@tonic-gate switch (prom_getproplen(node, name)) { 546*7c478bd9Sstevel@tonic-gate case 0: 547*7c478bd9Sstevel@tonic-gate value = 1; /* boolean properties */ 548*7c478bd9Sstevel@tonic-gate break; 549*7c478bd9Sstevel@tonic-gate 550*7c478bd9Sstevel@tonic-gate case sizeof (int): 551*7c478bd9Sstevel@tonic-gate (void) prom_getprop(node, name, (caddr_t)&value); 552*7c478bd9Sstevel@tonic-gate break; 553*7c478bd9Sstevel@tonic-gate 554*7c478bd9Sstevel@tonic-gate default: 555*7c478bd9Sstevel@tonic-gate value = deflt; 556*7c478bd9Sstevel@tonic-gate break; 557*7c478bd9Sstevel@tonic-gate } 558*7c478bd9Sstevel@tonic-gate 559*7c478bd9Sstevel@tonic-gate return (value); 560*7c478bd9Sstevel@tonic-gate } 561*7c478bd9Sstevel@tonic-gate 562*7c478bd9Sstevel@tonic-gate /* 563*7c478bd9Sstevel@tonic-gate * Set the magic constants of the implementation. 564*7c478bd9Sstevel@tonic-gate */ 565*7c478bd9Sstevel@tonic-gate void 566*7c478bd9Sstevel@tonic-gate cpu_fiximp(dnode_t dnode) 567*7c478bd9Sstevel@tonic-gate { 568*7c478bd9Sstevel@tonic-gate extern int vac_size, vac_shift; 569*7c478bd9Sstevel@tonic-gate extern uint_t vac_mask; 570*7c478bd9Sstevel@tonic-gate extern int dcache_line_mask; 571*7c478bd9Sstevel@tonic-gate int i, a; 572*7c478bd9Sstevel@tonic-gate static struct { 573*7c478bd9Sstevel@tonic-gate char *name; 574*7c478bd9Sstevel@tonic-gate int *var; 575*7c478bd9Sstevel@tonic-gate } prop[] = { 576*7c478bd9Sstevel@tonic-gate "dcache-size", &dcache_size, 577*7c478bd9Sstevel@tonic-gate "dcache-line-size", &dcache_linesize, 578*7c478bd9Sstevel@tonic-gate "icache-size", &icache_size, 579*7c478bd9Sstevel@tonic-gate "icache-line-size", &icache_linesize, 580*7c478bd9Sstevel@tonic-gate "ecache-size", &ecache_size, 581*7c478bd9Sstevel@tonic-gate "ecache-line-size", &ecache_alignsize, 582*7c478bd9Sstevel@tonic-gate "ecache-associativity", &ecache_associativity, 583*7c478bd9Sstevel@tonic-gate "#itlb-entries", &itlb_entries, 584*7c478bd9Sstevel@tonic-gate "#dtlb-entries", &dtlb_entries, 585*7c478bd9Sstevel@tonic-gate }; 586*7c478bd9Sstevel@tonic-gate 587*7c478bd9Sstevel@tonic-gate for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 588*7c478bd9Sstevel@tonic-gate if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 589*7c478bd9Sstevel@tonic-gate *prop[i].var = a; 590*7c478bd9Sstevel@tonic-gate } 591*7c478bd9Sstevel@tonic-gate } 592*7c478bd9Sstevel@tonic-gate 593*7c478bd9Sstevel@tonic-gate ecache_setsize = ecache_size / ecache_associativity; 594*7c478bd9Sstevel@tonic-gate 595*7c478bd9Sstevel@tonic-gate vac_size = S_VAC_SIZE; 596*7c478bd9Sstevel@tonic-gate vac_mask = MMU_PAGEMASK & (vac_size - 1); 597*7c478bd9Sstevel@tonic-gate i = 0; a = vac_size; 598*7c478bd9Sstevel@tonic-gate while (a >>= 1) 599*7c478bd9Sstevel@tonic-gate ++i; 600*7c478bd9Sstevel@tonic-gate vac_shift = i; 601*7c478bd9Sstevel@tonic-gate shm_alignment = vac_size; 602*7c478bd9Sstevel@tonic-gate vac = 1; 603*7c478bd9Sstevel@tonic-gate 604*7c478bd9Sstevel@tonic-gate dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 605*7c478bd9Sstevel@tonic-gate 606*7c478bd9Sstevel@tonic-gate /* 607*7c478bd9Sstevel@tonic-gate * UltraSPARC I & II have ecache sizes running 608*7c478bd9Sstevel@tonic-gate * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 609*7c478bd9Sstevel@tonic-gate * and 8 MB. Adjust the copyin/copyout limits 610*7c478bd9Sstevel@tonic-gate * according to the cache size. The magic number 611*7c478bd9Sstevel@tonic-gate * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 612*7c478bd9Sstevel@tonic-gate * and its floor of VIS_COPY_THRESHOLD bytes before it will use 613*7c478bd9Sstevel@tonic-gate * VIS instructions. 614*7c478bd9Sstevel@tonic-gate * 615*7c478bd9Sstevel@tonic-gate * We assume that all CPUs on the system have the same size 616*7c478bd9Sstevel@tonic-gate * ecache. We're also called very early in the game. 617*7c478bd9Sstevel@tonic-gate * /etc/system will be parsed *after* we're called so 618*7c478bd9Sstevel@tonic-gate * these values can be overwritten. 619*7c478bd9Sstevel@tonic-gate */ 620*7c478bd9Sstevel@tonic-gate 621*7c478bd9Sstevel@tonic-gate hw_copy_limit_1 = VIS_COPY_THRESHOLD; 622*7c478bd9Sstevel@tonic-gate if (ecache_size <= 524288) { 623*7c478bd9Sstevel@tonic-gate hw_copy_limit_2 = VIS_COPY_THRESHOLD; 624*7c478bd9Sstevel@tonic-gate hw_copy_limit_4 = VIS_COPY_THRESHOLD; 625*7c478bd9Sstevel@tonic-gate hw_copy_limit_8 = VIS_COPY_THRESHOLD; 626*7c478bd9Sstevel@tonic-gate } else if (ecache_size == 1048576) { 627*7c478bd9Sstevel@tonic-gate hw_copy_limit_2 = 1024; 628*7c478bd9Sstevel@tonic-gate hw_copy_limit_4 = 1280; 629*7c478bd9Sstevel@tonic-gate hw_copy_limit_8 = 1536; 630*7c478bd9Sstevel@tonic-gate } else if (ecache_size == 2097152) { 631*7c478bd9Sstevel@tonic-gate hw_copy_limit_2 = 1536; 632*7c478bd9Sstevel@tonic-gate hw_copy_limit_4 = 2048; 633*7c478bd9Sstevel@tonic-gate hw_copy_limit_8 = 2560; 634*7c478bd9Sstevel@tonic-gate } else if (ecache_size == 4194304) { 635*7c478bd9Sstevel@tonic-gate hw_copy_limit_2 = 2048; 636*7c478bd9Sstevel@tonic-gate hw_copy_limit_4 = 2560; 637*7c478bd9Sstevel@tonic-gate hw_copy_limit_8 = 3072; 638*7c478bd9Sstevel@tonic-gate } else { 639*7c478bd9Sstevel@tonic-gate hw_copy_limit_2 = 2560; 640*7c478bd9Sstevel@tonic-gate hw_copy_limit_4 = 3072; 641*7c478bd9Sstevel@tonic-gate hw_copy_limit_8 = 3584; 642*7c478bd9Sstevel@tonic-gate } 643*7c478bd9Sstevel@tonic-gate } 644*7c478bd9Sstevel@tonic-gate 645*7c478bd9Sstevel@tonic-gate /* 646*7c478bd9Sstevel@tonic-gate * Called by setcpudelay 647*7c478bd9Sstevel@tonic-gate */ 648*7c478bd9Sstevel@tonic-gate void 649*7c478bd9Sstevel@tonic-gate cpu_init_tick_freq(void) 650*7c478bd9Sstevel@tonic-gate { 651*7c478bd9Sstevel@tonic-gate /* 652*7c478bd9Sstevel@tonic-gate * Determine the cpu frequency by calling 653*7c478bd9Sstevel@tonic-gate * tod_get_cpufrequency. Use an approximate freqency 654*7c478bd9Sstevel@tonic-gate * value computed by the prom if the tod module 655*7c478bd9Sstevel@tonic-gate * is not initialized and loaded yet. 656*7c478bd9Sstevel@tonic-gate */ 657*7c478bd9Sstevel@tonic-gate if (tod_ops.tod_get_cpufrequency != NULL) { 658*7c478bd9Sstevel@tonic-gate mutex_enter(&tod_lock); 659*7c478bd9Sstevel@tonic-gate sys_tick_freq = tod_ops.tod_get_cpufrequency(); 660*7c478bd9Sstevel@tonic-gate mutex_exit(&tod_lock); 661*7c478bd9Sstevel@tonic-gate } else { 662*7c478bd9Sstevel@tonic-gate #if defined(HUMMINGBIRD) 663*7c478bd9Sstevel@tonic-gate /* 664*7c478bd9Sstevel@tonic-gate * the hummingbird version of %stick is used as the basis for 665*7c478bd9Sstevel@tonic-gate * low level timing; this provides an independent constant-rate 666*7c478bd9Sstevel@tonic-gate * clock for general system use, and frees power mgmt to set 667*7c478bd9Sstevel@tonic-gate * various cpu clock speeds. 668*7c478bd9Sstevel@tonic-gate */ 669*7c478bd9Sstevel@tonic-gate if (system_clock_freq == 0) 670*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 671*7c478bd9Sstevel@tonic-gate system_clock_freq); 672*7c478bd9Sstevel@tonic-gate sys_tick_freq = system_clock_freq; 673*7c478bd9Sstevel@tonic-gate #else /* SPITFIRE */ 674*7c478bd9Sstevel@tonic-gate sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 675*7c478bd9Sstevel@tonic-gate #endif 676*7c478bd9Sstevel@tonic-gate } 677*7c478bd9Sstevel@tonic-gate } 678*7c478bd9Sstevel@tonic-gate 679*7c478bd9Sstevel@tonic-gate 680*7c478bd9Sstevel@tonic-gate void shipit(int upaid); 681*7c478bd9Sstevel@tonic-gate extern uint64_t xc_tick_limit; 682*7c478bd9Sstevel@tonic-gate extern uint64_t xc_tick_jump_limit; 683*7c478bd9Sstevel@tonic-gate 684*7c478bd9Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 685*7c478bd9Sstevel@tonic-gate uint64_t x_early[NCPU][64]; 686*7c478bd9Sstevel@tonic-gate #endif 687*7c478bd9Sstevel@tonic-gate 688*7c478bd9Sstevel@tonic-gate /* 689*7c478bd9Sstevel@tonic-gate * Note: A version of this function is used by the debugger via the KDI, 690*7c478bd9Sstevel@tonic-gate * and must be kept in sync with this version. Any changes made to this 691*7c478bd9Sstevel@tonic-gate * function to support new chips or to accomodate errata must also be included 692*7c478bd9Sstevel@tonic-gate * in the KDI-specific version. See spitfire_kdi.c. 693*7c478bd9Sstevel@tonic-gate */ 694*7c478bd9Sstevel@tonic-gate void 695*7c478bd9Sstevel@tonic-gate send_one_mondo(int cpuid) 696*7c478bd9Sstevel@tonic-gate { 697*7c478bd9Sstevel@tonic-gate uint64_t idsr, starttick, endtick; 698*7c478bd9Sstevel@tonic-gate int upaid, busy, nack; 699*7c478bd9Sstevel@tonic-gate uint64_t tick, tick_prev; 700*7c478bd9Sstevel@tonic-gate ulong_t ticks; 701*7c478bd9Sstevel@tonic-gate 702*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 703*7c478bd9Sstevel@tonic-gate upaid = CPUID_TO_UPAID(cpuid); 704*7c478bd9Sstevel@tonic-gate tick = starttick = gettick(); 705*7c478bd9Sstevel@tonic-gate shipit(upaid); 706*7c478bd9Sstevel@tonic-gate endtick = starttick + xc_tick_limit; 707*7c478bd9Sstevel@tonic-gate busy = nack = 0; 708*7c478bd9Sstevel@tonic-gate for (;;) { 709*7c478bd9Sstevel@tonic-gate idsr = getidsr(); 710*7c478bd9Sstevel@tonic-gate if (idsr == 0) 711*7c478bd9Sstevel@tonic-gate break; 712*7c478bd9Sstevel@tonic-gate /* 713*7c478bd9Sstevel@tonic-gate * When we detect an irregular tick jump, we adjust 714*7c478bd9Sstevel@tonic-gate * the timer window to the current tick value. 715*7c478bd9Sstevel@tonic-gate */ 716*7c478bd9Sstevel@tonic-gate tick_prev = tick; 717*7c478bd9Sstevel@tonic-gate tick = gettick(); 718*7c478bd9Sstevel@tonic-gate ticks = tick - tick_prev; 719*7c478bd9Sstevel@tonic-gate if (ticks > xc_tick_jump_limit) { 720*7c478bd9Sstevel@tonic-gate endtick = tick + xc_tick_limit; 721*7c478bd9Sstevel@tonic-gate } else if (tick > endtick) { 722*7c478bd9Sstevel@tonic-gate if (panic_quiesce) 723*7c478bd9Sstevel@tonic-gate return; 724*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 725*7c478bd9Sstevel@tonic-gate "send mondo timeout (target 0x%x) [%d NACK %d BUSY]", 726*7c478bd9Sstevel@tonic-gate upaid, nack, busy); 727*7c478bd9Sstevel@tonic-gate } 728*7c478bd9Sstevel@tonic-gate if (idsr & IDSR_BUSY) { 729*7c478bd9Sstevel@tonic-gate busy++; 730*7c478bd9Sstevel@tonic-gate continue; 731*7c478bd9Sstevel@tonic-gate } 732*7c478bd9Sstevel@tonic-gate drv_usecwait(1); 733*7c478bd9Sstevel@tonic-gate shipit(upaid); 734*7c478bd9Sstevel@tonic-gate nack++; 735*7c478bd9Sstevel@tonic-gate busy = 0; 736*7c478bd9Sstevel@tonic-gate } 737*7c478bd9Sstevel@tonic-gate #ifdef SEND_MONDO_STATS 738*7c478bd9Sstevel@tonic-gate x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 739*7c478bd9Sstevel@tonic-gate #endif 740*7c478bd9Sstevel@tonic-gate } 741*7c478bd9Sstevel@tonic-gate 742*7c478bd9Sstevel@tonic-gate void 743*7c478bd9Sstevel@tonic-gate send_mondo_set(cpuset_t set) 744*7c478bd9Sstevel@tonic-gate { 745*7c478bd9Sstevel@tonic-gate int i; 746*7c478bd9Sstevel@tonic-gate 747*7c478bd9Sstevel@tonic-gate for (i = 0; i < NCPU; i++) 748*7c478bd9Sstevel@tonic-gate if (CPU_IN_SET(set, i)) { 749*7c478bd9Sstevel@tonic-gate send_one_mondo(i); 750*7c478bd9Sstevel@tonic-gate CPUSET_DEL(set, i); 751*7c478bd9Sstevel@tonic-gate if (CPUSET_ISNULL(set)) 752*7c478bd9Sstevel@tonic-gate break; 753*7c478bd9Sstevel@tonic-gate } 754*7c478bd9Sstevel@tonic-gate } 755*7c478bd9Sstevel@tonic-gate 756*7c478bd9Sstevel@tonic-gate void 757*7c478bd9Sstevel@tonic-gate syncfpu(void) 758*7c478bd9Sstevel@tonic-gate { 759*7c478bd9Sstevel@tonic-gate } 760*7c478bd9Sstevel@tonic-gate 761*7c478bd9Sstevel@tonic-gate /* 762*7c478bd9Sstevel@tonic-gate * Determine the size of the CPU module's error structure in bytes. This is 763*7c478bd9Sstevel@tonic-gate * called once during boot to initialize the error queues. 764*7c478bd9Sstevel@tonic-gate */ 765*7c478bd9Sstevel@tonic-gate int 766*7c478bd9Sstevel@tonic-gate cpu_aflt_size(void) 767*7c478bd9Sstevel@tonic-gate { 768*7c478bd9Sstevel@tonic-gate /* 769*7c478bd9Sstevel@tonic-gate * We need to determine whether this is a sabre, Hummingbird or a 770*7c478bd9Sstevel@tonic-gate * Spitfire/Blackbird impl and set the appropriate state variables for 771*7c478bd9Sstevel@tonic-gate * ecache tag manipulation. We can't do this in cpu_setup() as it is 772*7c478bd9Sstevel@tonic-gate * too early in the boot flow and the cpunodes are not initialized. 773*7c478bd9Sstevel@tonic-gate * This routine will be called once after cpunodes[] is ready, so do 774*7c478bd9Sstevel@tonic-gate * it here. 775*7c478bd9Sstevel@tonic-gate */ 776*7c478bd9Sstevel@tonic-gate if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 777*7c478bd9Sstevel@tonic-gate isus2i = 1; 778*7c478bd9Sstevel@tonic-gate cpu_ec_tag_mask = SB_ECTAG_MASK; 779*7c478bd9Sstevel@tonic-gate cpu_ec_state_mask = SB_ECSTATE_MASK; 780*7c478bd9Sstevel@tonic-gate cpu_ec_par_mask = SB_ECPAR_MASK; 781*7c478bd9Sstevel@tonic-gate cpu_ec_par_shift = SB_ECPAR_SHIFT; 782*7c478bd9Sstevel@tonic-gate cpu_ec_tag_shift = SB_ECTAG_SHIFT; 783*7c478bd9Sstevel@tonic-gate cpu_ec_state_shift = SB_ECSTATE_SHIFT; 784*7c478bd9Sstevel@tonic-gate cpu_ec_state_exl = SB_ECSTATE_EXL; 785*7c478bd9Sstevel@tonic-gate cpu_ec_state_mod = SB_ECSTATE_MOD; 786*7c478bd9Sstevel@tonic-gate 787*7c478bd9Sstevel@tonic-gate /* These states do not exist in sabre - set to 0xFF */ 788*7c478bd9Sstevel@tonic-gate cpu_ec_state_shr = 0xFF; 789*7c478bd9Sstevel@tonic-gate cpu_ec_state_own = 0xFF; 790*7c478bd9Sstevel@tonic-gate 791*7c478bd9Sstevel@tonic-gate cpu_ec_state_valid = SB_ECSTATE_VALID; 792*7c478bd9Sstevel@tonic-gate cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 793*7c478bd9Sstevel@tonic-gate cpu_ec_state_parity = SB_ECSTATE_PARITY; 794*7c478bd9Sstevel@tonic-gate cpu_ec_parity = SB_EC_PARITY; 795*7c478bd9Sstevel@tonic-gate } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 796*7c478bd9Sstevel@tonic-gate isus2e = 1; 797*7c478bd9Sstevel@tonic-gate cpu_ec_tag_mask = HB_ECTAG_MASK; 798*7c478bd9Sstevel@tonic-gate cpu_ec_state_mask = HB_ECSTATE_MASK; 799*7c478bd9Sstevel@tonic-gate cpu_ec_par_mask = HB_ECPAR_MASK; 800*7c478bd9Sstevel@tonic-gate cpu_ec_par_shift = HB_ECPAR_SHIFT; 801*7c478bd9Sstevel@tonic-gate cpu_ec_tag_shift = HB_ECTAG_SHIFT; 802*7c478bd9Sstevel@tonic-gate cpu_ec_state_shift = HB_ECSTATE_SHIFT; 803*7c478bd9Sstevel@tonic-gate cpu_ec_state_exl = HB_ECSTATE_EXL; 804*7c478bd9Sstevel@tonic-gate cpu_ec_state_mod = HB_ECSTATE_MOD; 805*7c478bd9Sstevel@tonic-gate 806*7c478bd9Sstevel@tonic-gate /* These states do not exist in hummingbird - set to 0xFF */ 807*7c478bd9Sstevel@tonic-gate cpu_ec_state_shr = 0xFF; 808*7c478bd9Sstevel@tonic-gate cpu_ec_state_own = 0xFF; 809*7c478bd9Sstevel@tonic-gate 810*7c478bd9Sstevel@tonic-gate cpu_ec_state_valid = HB_ECSTATE_VALID; 811*7c478bd9Sstevel@tonic-gate cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 812*7c478bd9Sstevel@tonic-gate cpu_ec_state_parity = HB_ECSTATE_PARITY; 813*7c478bd9Sstevel@tonic-gate cpu_ec_parity = HB_EC_PARITY; 814*7c478bd9Sstevel@tonic-gate } 815*7c478bd9Sstevel@tonic-gate 816*7c478bd9Sstevel@tonic-gate return (sizeof (spitf_async_flt)); 817*7c478bd9Sstevel@tonic-gate } 818*7c478bd9Sstevel@tonic-gate 819*7c478bd9Sstevel@tonic-gate 820*7c478bd9Sstevel@tonic-gate /* 821*7c478bd9Sstevel@tonic-gate * Correctable ecc error trap handler 822*7c478bd9Sstevel@tonic-gate */ 823*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 824*7c478bd9Sstevel@tonic-gate void 825*7c478bd9Sstevel@tonic-gate cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 826*7c478bd9Sstevel@tonic-gate uint_t p_afsr_high, uint_t p_afar_high) 827*7c478bd9Sstevel@tonic-gate { 828*7c478bd9Sstevel@tonic-gate ushort_t sdbh, sdbl; 829*7c478bd9Sstevel@tonic-gate ushort_t e_syndh, e_syndl; 830*7c478bd9Sstevel@tonic-gate spitf_async_flt spf_flt; 831*7c478bd9Sstevel@tonic-gate struct async_flt *ecc; 832*7c478bd9Sstevel@tonic-gate int queue = 1; 833*7c478bd9Sstevel@tonic-gate 834*7c478bd9Sstevel@tonic-gate uint64_t t_afar = p_afar; 835*7c478bd9Sstevel@tonic-gate uint64_t t_afsr = p_afsr; 836*7c478bd9Sstevel@tonic-gate 837*7c478bd9Sstevel@tonic-gate /* 838*7c478bd9Sstevel@tonic-gate * Note: the Spitfire data buffer error registers 839*7c478bd9Sstevel@tonic-gate * (upper and lower halves) are or'ed into the upper 840*7c478bd9Sstevel@tonic-gate * word of the afsr by ce_err(). 841*7c478bd9Sstevel@tonic-gate */ 842*7c478bd9Sstevel@tonic-gate sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 843*7c478bd9Sstevel@tonic-gate sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 844*7c478bd9Sstevel@tonic-gate 845*7c478bd9Sstevel@tonic-gate e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 846*7c478bd9Sstevel@tonic-gate e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 847*7c478bd9Sstevel@tonic-gate 848*7c478bd9Sstevel@tonic-gate t_afsr &= S_AFSR_MASK; 849*7c478bd9Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 850*7c478bd9Sstevel@tonic-gate 851*7c478bd9Sstevel@tonic-gate /* Setup the async fault structure */ 852*7c478bd9Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 853*7c478bd9Sstevel@tonic-gate ecc = (struct async_flt *)&spf_flt; 854*7c478bd9Sstevel@tonic-gate ecc->flt_id = gethrtime_waitfree(); 855*7c478bd9Sstevel@tonic-gate ecc->flt_stat = t_afsr; 856*7c478bd9Sstevel@tonic-gate ecc->flt_addr = t_afar; 857*7c478bd9Sstevel@tonic-gate ecc->flt_status = ECC_C_TRAP; 858*7c478bd9Sstevel@tonic-gate ecc->flt_bus_id = getprocessorid(); 859*7c478bd9Sstevel@tonic-gate ecc->flt_inst = CPU->cpu_id; 860*7c478bd9Sstevel@tonic-gate ecc->flt_pc = (caddr_t)rp->r_pc; 861*7c478bd9Sstevel@tonic-gate ecc->flt_func = log_ce_err; 862*7c478bd9Sstevel@tonic-gate ecc->flt_in_memory = 863*7c478bd9Sstevel@tonic-gate (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 864*7c478bd9Sstevel@tonic-gate spf_flt.flt_sdbh = sdbh; 865*7c478bd9Sstevel@tonic-gate spf_flt.flt_sdbl = sdbl; 866*7c478bd9Sstevel@tonic-gate 867*7c478bd9Sstevel@tonic-gate /* 868*7c478bd9Sstevel@tonic-gate * Check for fatal conditions. 869*7c478bd9Sstevel@tonic-gate */ 870*7c478bd9Sstevel@tonic-gate check_misc_err(&spf_flt); 871*7c478bd9Sstevel@tonic-gate 872*7c478bd9Sstevel@tonic-gate /* 873*7c478bd9Sstevel@tonic-gate * Pananoid checks for valid AFSR and UDBs 874*7c478bd9Sstevel@tonic-gate */ 875*7c478bd9Sstevel@tonic-gate if ((t_afsr & P_AFSR_CE) == 0) { 876*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 877*7c478bd9Sstevel@tonic-gate "** Panic due to CE bit not set in the AFSR", 878*7c478bd9Sstevel@tonic-gate " Corrected Memory Error on"); 879*7c478bd9Sstevel@tonic-gate } 880*7c478bd9Sstevel@tonic-gate 881*7c478bd9Sstevel@tonic-gate /* 882*7c478bd9Sstevel@tonic-gate * We want to skip logging only if ALL the following 883*7c478bd9Sstevel@tonic-gate * conditions are true: 884*7c478bd9Sstevel@tonic-gate * 885*7c478bd9Sstevel@tonic-gate * 1. There is only one error 886*7c478bd9Sstevel@tonic-gate * 2. That error is a correctable memory error 887*7c478bd9Sstevel@tonic-gate * 3. The error is caused by the memory scrubber (in which case 888*7c478bd9Sstevel@tonic-gate * the error will have occurred under on_trap protection) 889*7c478bd9Sstevel@tonic-gate * 4. The error is on a retired page 890*7c478bd9Sstevel@tonic-gate * 891*7c478bd9Sstevel@tonic-gate * Note: OT_DATA_EC is used places other than the memory scrubber. 892*7c478bd9Sstevel@tonic-gate * However, none of those errors should occur on a retired page. 893*7c478bd9Sstevel@tonic-gate */ 894*7c478bd9Sstevel@tonic-gate if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 895*7c478bd9Sstevel@tonic-gate curthread->t_ontrap != NULL) { 896*7c478bd9Sstevel@tonic-gate 897*7c478bd9Sstevel@tonic-gate if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 898*7c478bd9Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 899*7c478bd9Sstevel@tonic-gate (ecc->flt_addr >> MMU_PAGESHIFT)); 900*7c478bd9Sstevel@tonic-gate 901*7c478bd9Sstevel@tonic-gate if (pp != NULL && page_isretired(pp)) { 902*7c478bd9Sstevel@tonic-gate queue = 0; 903*7c478bd9Sstevel@tonic-gate } 904*7c478bd9Sstevel@tonic-gate } 905*7c478bd9Sstevel@tonic-gate } 906*7c478bd9Sstevel@tonic-gate 907*7c478bd9Sstevel@tonic-gate if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 908*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 909*7c478bd9Sstevel@tonic-gate "** Panic due to CE bits not set in the UDBs", 910*7c478bd9Sstevel@tonic-gate " Corrected Memory Error on"); 911*7c478bd9Sstevel@tonic-gate } 912*7c478bd9Sstevel@tonic-gate 913*7c478bd9Sstevel@tonic-gate if ((sdbh >> 8) & 1) { 914*7c478bd9Sstevel@tonic-gate ecc->flt_synd = e_syndh; 915*7c478bd9Sstevel@tonic-gate ce_scrub(ecc); 916*7c478bd9Sstevel@tonic-gate if (queue) { 917*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 918*7c478bd9Sstevel@tonic-gate sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 919*7c478bd9Sstevel@tonic-gate } 920*7c478bd9Sstevel@tonic-gate } 921*7c478bd9Sstevel@tonic-gate 922*7c478bd9Sstevel@tonic-gate if ((sdbl >> 8) & 1) { 923*7c478bd9Sstevel@tonic-gate ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 924*7c478bd9Sstevel@tonic-gate ecc->flt_synd = e_syndl | UDBL_REG; 925*7c478bd9Sstevel@tonic-gate ce_scrub(ecc); 926*7c478bd9Sstevel@tonic-gate if (queue) { 927*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 928*7c478bd9Sstevel@tonic-gate sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 929*7c478bd9Sstevel@tonic-gate } 930*7c478bd9Sstevel@tonic-gate } 931*7c478bd9Sstevel@tonic-gate 932*7c478bd9Sstevel@tonic-gate /* 933*7c478bd9Sstevel@tonic-gate * Re-enable all error trapping (CEEN currently cleared). 934*7c478bd9Sstevel@tonic-gate */ 935*7c478bd9Sstevel@tonic-gate clr_datapath(); 936*7c478bd9Sstevel@tonic-gate set_asyncflt(P_AFSR_CE); 937*7c478bd9Sstevel@tonic-gate set_error_enable(EER_ENABLE); 938*7c478bd9Sstevel@tonic-gate } 939*7c478bd9Sstevel@tonic-gate 940*7c478bd9Sstevel@tonic-gate /* 941*7c478bd9Sstevel@tonic-gate * Cpu specific CE logging routine 942*7c478bd9Sstevel@tonic-gate */ 943*7c478bd9Sstevel@tonic-gate static void 944*7c478bd9Sstevel@tonic-gate log_ce_err(struct async_flt *aflt, char *unum) 945*7c478bd9Sstevel@tonic-gate { 946*7c478bd9Sstevel@tonic-gate spitf_async_flt spf_flt; 947*7c478bd9Sstevel@tonic-gate 948*7c478bd9Sstevel@tonic-gate if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 949*7c478bd9Sstevel@tonic-gate return; 950*7c478bd9Sstevel@tonic-gate } 951*7c478bd9Sstevel@tonic-gate 952*7c478bd9Sstevel@tonic-gate spf_flt.cmn_asyncflt = *aflt; 953*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 954*7c478bd9Sstevel@tonic-gate " Corrected Memory Error detected by"); 955*7c478bd9Sstevel@tonic-gate } 956*7c478bd9Sstevel@tonic-gate 957*7c478bd9Sstevel@tonic-gate /* 958*7c478bd9Sstevel@tonic-gate * Spitfire does not perform any further CE classification refinement 959*7c478bd9Sstevel@tonic-gate */ 960*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 961*7c478bd9Sstevel@tonic-gate int 962*7c478bd9Sstevel@tonic-gate ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 963*7c478bd9Sstevel@tonic-gate size_t afltoffset) 964*7c478bd9Sstevel@tonic-gate { 965*7c478bd9Sstevel@tonic-gate return (0); 966*7c478bd9Sstevel@tonic-gate } 967*7c478bd9Sstevel@tonic-gate 968*7c478bd9Sstevel@tonic-gate char * 969*7c478bd9Sstevel@tonic-gate flt_to_error_type(struct async_flt *aflt) 970*7c478bd9Sstevel@tonic-gate { 971*7c478bd9Sstevel@tonic-gate if (aflt->flt_status & ECC_INTERMITTENT) 972*7c478bd9Sstevel@tonic-gate return (ERR_TYPE_DESC_INTERMITTENT); 973*7c478bd9Sstevel@tonic-gate if (aflt->flt_status & ECC_PERSISTENT) 974*7c478bd9Sstevel@tonic-gate return (ERR_TYPE_DESC_PERSISTENT); 975*7c478bd9Sstevel@tonic-gate if (aflt->flt_status & ECC_STICKY) 976*7c478bd9Sstevel@tonic-gate return (ERR_TYPE_DESC_STICKY); 977*7c478bd9Sstevel@tonic-gate return (ERR_TYPE_DESC_UNKNOWN); 978*7c478bd9Sstevel@tonic-gate } 979*7c478bd9Sstevel@tonic-gate 980*7c478bd9Sstevel@tonic-gate /* 981*7c478bd9Sstevel@tonic-gate * Called by correctable ecc error logging code to print out 982*7c478bd9Sstevel@tonic-gate * the stick/persistent/intermittent status of the error. 983*7c478bd9Sstevel@tonic-gate */ 984*7c478bd9Sstevel@tonic-gate static void 985*7c478bd9Sstevel@tonic-gate cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 986*7c478bd9Sstevel@tonic-gate { 987*7c478bd9Sstevel@tonic-gate ushort_t status; 988*7c478bd9Sstevel@tonic-gate char *status1_str = "Memory"; 989*7c478bd9Sstevel@tonic-gate char *status2_str = "Intermittent"; 990*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 991*7c478bd9Sstevel@tonic-gate 992*7c478bd9Sstevel@tonic-gate status = aflt->flt_status; 993*7c478bd9Sstevel@tonic-gate 994*7c478bd9Sstevel@tonic-gate if (status & ECC_ECACHE) 995*7c478bd9Sstevel@tonic-gate status1_str = "Ecache"; 996*7c478bd9Sstevel@tonic-gate 997*7c478bd9Sstevel@tonic-gate if (status & ECC_STICKY) 998*7c478bd9Sstevel@tonic-gate status2_str = "Sticky"; 999*7c478bd9Sstevel@tonic-gate else if (status & ECC_PERSISTENT) 1000*7c478bd9Sstevel@tonic-gate status2_str = "Persistent"; 1001*7c478bd9Sstevel@tonic-gate 1002*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 1003*7c478bd9Sstevel@tonic-gate NULL, " Corrected %s Error on %s is %s", 1004*7c478bd9Sstevel@tonic-gate status1_str, unum, status2_str); 1005*7c478bd9Sstevel@tonic-gate } 1006*7c478bd9Sstevel@tonic-gate 1007*7c478bd9Sstevel@tonic-gate /* 1008*7c478bd9Sstevel@tonic-gate * check for a valid ce syndrome, then call the 1009*7c478bd9Sstevel@tonic-gate * displacement flush scrubbing code, and then check the afsr to see if 1010*7c478bd9Sstevel@tonic-gate * the error was persistent or intermittent. Reread the afar/afsr to see 1011*7c478bd9Sstevel@tonic-gate * if the error was not scrubbed successfully, and is therefore sticky. 1012*7c478bd9Sstevel@tonic-gate */ 1013*7c478bd9Sstevel@tonic-gate /*ARGSUSED1*/ 1014*7c478bd9Sstevel@tonic-gate void 1015*7c478bd9Sstevel@tonic-gate cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 1016*7c478bd9Sstevel@tonic-gate { 1017*7c478bd9Sstevel@tonic-gate uint64_t eer, afsr; 1018*7c478bd9Sstevel@tonic-gate ushort_t status; 1019*7c478bd9Sstevel@tonic-gate 1020*7c478bd9Sstevel@tonic-gate ASSERT(getpil() > LOCK_LEVEL); 1021*7c478bd9Sstevel@tonic-gate 1022*7c478bd9Sstevel@tonic-gate /* 1023*7c478bd9Sstevel@tonic-gate * It is possible that the flt_addr is not a valid 1024*7c478bd9Sstevel@tonic-gate * physical address. To deal with this, we disable 1025*7c478bd9Sstevel@tonic-gate * NCEEN while we scrub that address. If this causes 1026*7c478bd9Sstevel@tonic-gate * a TIMEOUT/BERR, we know this is an invalid 1027*7c478bd9Sstevel@tonic-gate * memory location. 1028*7c478bd9Sstevel@tonic-gate */ 1029*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 1030*7c478bd9Sstevel@tonic-gate eer = get_error_enable(); 1031*7c478bd9Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 1032*7c478bd9Sstevel@tonic-gate set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1033*7c478bd9Sstevel@tonic-gate 1034*7c478bd9Sstevel@tonic-gate /* 1035*7c478bd9Sstevel@tonic-gate * To check if the error detected by IO is persistent, sticky or 1036*7c478bd9Sstevel@tonic-gate * intermittent. 1037*7c478bd9Sstevel@tonic-gate */ 1038*7c478bd9Sstevel@tonic-gate if (ecc->flt_status & ECC_IOBUS) { 1039*7c478bd9Sstevel@tonic-gate ecc->flt_stat = P_AFSR_CE; 1040*7c478bd9Sstevel@tonic-gate } 1041*7c478bd9Sstevel@tonic-gate 1042*7c478bd9Sstevel@tonic-gate scrubphys(P2ALIGN(ecc->flt_addr, 64), 1043*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 1044*7c478bd9Sstevel@tonic-gate 1045*7c478bd9Sstevel@tonic-gate get_asyncflt(&afsr); 1046*7c478bd9Sstevel@tonic-gate if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1047*7c478bd9Sstevel@tonic-gate /* 1048*7c478bd9Sstevel@tonic-gate * Must ensure that we don't get the TIMEOUT/BERR 1049*7c478bd9Sstevel@tonic-gate * when we reenable NCEEN, so we clear the AFSR. 1050*7c478bd9Sstevel@tonic-gate */ 1051*7c478bd9Sstevel@tonic-gate set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1052*7c478bd9Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 1053*7c478bd9Sstevel@tonic-gate set_error_enable(eer); 1054*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 1055*7c478bd9Sstevel@tonic-gate return; 1056*7c478bd9Sstevel@tonic-gate } 1057*7c478bd9Sstevel@tonic-gate 1058*7c478bd9Sstevel@tonic-gate if (eer & EER_NCEEN) 1059*7c478bd9Sstevel@tonic-gate set_error_enable(eer & ~EER_CEEN); 1060*7c478bd9Sstevel@tonic-gate 1061*7c478bd9Sstevel@tonic-gate /* 1062*7c478bd9Sstevel@tonic-gate * Check and clear any ECC errors from the scrub. If the scrub did 1063*7c478bd9Sstevel@tonic-gate * not trip over the error, mark it intermittent. If the scrub did 1064*7c478bd9Sstevel@tonic-gate * trip the error again and it did not scrub away, mark it sticky. 1065*7c478bd9Sstevel@tonic-gate * Otherwise mark it persistent. 1066*7c478bd9Sstevel@tonic-gate */ 1067*7c478bd9Sstevel@tonic-gate if (check_ecc(ecc) != 0) { 1068*7c478bd9Sstevel@tonic-gate cpu_read_paddr(ecc, 0, 1); 1069*7c478bd9Sstevel@tonic-gate 1070*7c478bd9Sstevel@tonic-gate if (check_ecc(ecc) != 0) 1071*7c478bd9Sstevel@tonic-gate status = ECC_STICKY; 1072*7c478bd9Sstevel@tonic-gate else 1073*7c478bd9Sstevel@tonic-gate status = ECC_PERSISTENT; 1074*7c478bd9Sstevel@tonic-gate } else 1075*7c478bd9Sstevel@tonic-gate status = ECC_INTERMITTENT; 1076*7c478bd9Sstevel@tonic-gate 1077*7c478bd9Sstevel@tonic-gate if (eer & (EER_CEEN | EER_NCEEN)) 1078*7c478bd9Sstevel@tonic-gate set_error_enable(eer); 1079*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 1080*7c478bd9Sstevel@tonic-gate 1081*7c478bd9Sstevel@tonic-gate ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1082*7c478bd9Sstevel@tonic-gate ecc->flt_status |= status; 1083*7c478bd9Sstevel@tonic-gate } 1084*7c478bd9Sstevel@tonic-gate 1085*7c478bd9Sstevel@tonic-gate /* 1086*7c478bd9Sstevel@tonic-gate * get the syndrome and unum, and then call the routines 1087*7c478bd9Sstevel@tonic-gate * to check the other cpus and iobuses, and then do the error logging. 1088*7c478bd9Sstevel@tonic-gate */ 1089*7c478bd9Sstevel@tonic-gate /*ARGSUSED1*/ 1090*7c478bd9Sstevel@tonic-gate void 1091*7c478bd9Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1092*7c478bd9Sstevel@tonic-gate { 1093*7c478bd9Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 1094*7c478bd9Sstevel@tonic-gate int len = 0; 1095*7c478bd9Sstevel@tonic-gate int ce_verbose = 0; 1096*7c478bd9Sstevel@tonic-gate 1097*7c478bd9Sstevel@tonic-gate ASSERT(ecc->flt_func != NULL); 1098*7c478bd9Sstevel@tonic-gate 1099*7c478bd9Sstevel@tonic-gate /* Get the unum string for logging purposes */ 1100*7c478bd9Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1101*7c478bd9Sstevel@tonic-gate UNUM_NAMLEN, &len); 1102*7c478bd9Sstevel@tonic-gate 1103*7c478bd9Sstevel@tonic-gate /* Call specific error logging routine */ 1104*7c478bd9Sstevel@tonic-gate (void) (*ecc->flt_func)(ecc, unum); 1105*7c478bd9Sstevel@tonic-gate 1106*7c478bd9Sstevel@tonic-gate /* 1107*7c478bd9Sstevel@tonic-gate * Count errors per unum. 1108*7c478bd9Sstevel@tonic-gate * Non-memory errors are all counted via a special unum string. 1109*7c478bd9Sstevel@tonic-gate */ 1110*7c478bd9Sstevel@tonic-gate if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING && 1111*7c478bd9Sstevel@tonic-gate automatic_page_removal) { 1112*7c478bd9Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 1113*7c478bd9Sstevel@tonic-gate (ecc->flt_addr >> MMU_PAGESHIFT)); 1114*7c478bd9Sstevel@tonic-gate 1115*7c478bd9Sstevel@tonic-gate if (pp) { 1116*7c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 1117*7c478bd9Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_FAILING); 1118*7c478bd9Sstevel@tonic-gate } 1119*7c478bd9Sstevel@tonic-gate } 1120*7c478bd9Sstevel@tonic-gate 1121*7c478bd9Sstevel@tonic-gate if (ecc->flt_panic) { 1122*7c478bd9Sstevel@tonic-gate ce_verbose = 1; 1123*7c478bd9Sstevel@tonic-gate } else if ((ecc->flt_class == BUS_FAULT) || 1124*7c478bd9Sstevel@tonic-gate (ecc->flt_stat & P_AFSR_CE)) { 1125*7c478bd9Sstevel@tonic-gate ce_verbose = (ce_verbose_memory > 0); 1126*7c478bd9Sstevel@tonic-gate } else { 1127*7c478bd9Sstevel@tonic-gate ce_verbose = 1; 1128*7c478bd9Sstevel@tonic-gate } 1129*7c478bd9Sstevel@tonic-gate 1130*7c478bd9Sstevel@tonic-gate if (ce_verbose) { 1131*7c478bd9Sstevel@tonic-gate spitf_async_flt sflt; 1132*7c478bd9Sstevel@tonic-gate int synd_code; 1133*7c478bd9Sstevel@tonic-gate 1134*7c478bd9Sstevel@tonic-gate sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1135*7c478bd9Sstevel@tonic-gate 1136*7c478bd9Sstevel@tonic-gate cpu_ce_log_status(&sflt, unum); 1137*7c478bd9Sstevel@tonic-gate 1138*7c478bd9Sstevel@tonic-gate synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1139*7c478bd9Sstevel@tonic-gate SYND(ecc->flt_synd)); 1140*7c478bd9Sstevel@tonic-gate 1141*7c478bd9Sstevel@tonic-gate if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1142*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1143*7c478bd9Sstevel@tonic-gate NULL, " ECC Data Bit %2d was in error " 1144*7c478bd9Sstevel@tonic-gate "and corrected", synd_code); 1145*7c478bd9Sstevel@tonic-gate } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1146*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1147*7c478bd9Sstevel@tonic-gate NULL, " ECC Check Bit %2d was in error " 1148*7c478bd9Sstevel@tonic-gate "and corrected", synd_code - C0); 1149*7c478bd9Sstevel@tonic-gate } else { 1150*7c478bd9Sstevel@tonic-gate /* 1151*7c478bd9Sstevel@tonic-gate * These are UE errors - we shouldn't be getting CE 1152*7c478bd9Sstevel@tonic-gate * traps for these; handle them in case of bad h/w. 1153*7c478bd9Sstevel@tonic-gate */ 1154*7c478bd9Sstevel@tonic-gate switch (synd_code) { 1155*7c478bd9Sstevel@tonic-gate case M2: 1156*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1157*7c478bd9Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1158*7c478bd9Sstevel@tonic-gate " Two ECC Bits were in error"); 1159*7c478bd9Sstevel@tonic-gate break; 1160*7c478bd9Sstevel@tonic-gate case M3: 1161*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1162*7c478bd9Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1163*7c478bd9Sstevel@tonic-gate " Three ECC Bits were in error"); 1164*7c478bd9Sstevel@tonic-gate break; 1165*7c478bd9Sstevel@tonic-gate case M4: 1166*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1167*7c478bd9Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1168*7c478bd9Sstevel@tonic-gate " Four ECC Bits were in error"); 1169*7c478bd9Sstevel@tonic-gate break; 1170*7c478bd9Sstevel@tonic-gate case MX: 1171*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1172*7c478bd9Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1173*7c478bd9Sstevel@tonic-gate " More than Four ECC bits were " 1174*7c478bd9Sstevel@tonic-gate "in error"); 1175*7c478bd9Sstevel@tonic-gate break; 1176*7c478bd9Sstevel@tonic-gate default: 1177*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &sflt, 1178*7c478bd9Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 1179*7c478bd9Sstevel@tonic-gate " Unknown fault syndrome %d", 1180*7c478bd9Sstevel@tonic-gate synd_code); 1181*7c478bd9Sstevel@tonic-gate break; 1182*7c478bd9Sstevel@tonic-gate } 1183*7c478bd9Sstevel@tonic-gate } 1184*7c478bd9Sstevel@tonic-gate } 1185*7c478bd9Sstevel@tonic-gate 1186*7c478bd9Sstevel@tonic-gate /* Display entire cache line, if valid address */ 1187*7c478bd9Sstevel@tonic-gate if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1188*7c478bd9Sstevel@tonic-gate read_ecc_data(ecc, 1, 1); 1189*7c478bd9Sstevel@tonic-gate } 1190*7c478bd9Sstevel@tonic-gate 1191*7c478bd9Sstevel@tonic-gate /* 1192*7c478bd9Sstevel@tonic-gate * We route all errors through a single switch statement. 1193*7c478bd9Sstevel@tonic-gate */ 1194*7c478bd9Sstevel@tonic-gate void 1195*7c478bd9Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt) 1196*7c478bd9Sstevel@tonic-gate { 1197*7c478bd9Sstevel@tonic-gate 1198*7c478bd9Sstevel@tonic-gate switch (aflt->flt_class) { 1199*7c478bd9Sstevel@tonic-gate case CPU_FAULT: 1200*7c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt); 1201*7c478bd9Sstevel@tonic-gate break; 1202*7c478bd9Sstevel@tonic-gate 1203*7c478bd9Sstevel@tonic-gate case BUS_FAULT: 1204*7c478bd9Sstevel@tonic-gate bus_async_log_err(aflt); 1205*7c478bd9Sstevel@tonic-gate break; 1206*7c478bd9Sstevel@tonic-gate 1207*7c478bd9Sstevel@tonic-gate default: 1208*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1209*7c478bd9Sstevel@tonic-gate "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1210*7c478bd9Sstevel@tonic-gate break; 1211*7c478bd9Sstevel@tonic-gate } 1212*7c478bd9Sstevel@tonic-gate } 1213*7c478bd9Sstevel@tonic-gate 1214*7c478bd9Sstevel@tonic-gate /* Values for action variable in cpu_async_error() */ 1215*7c478bd9Sstevel@tonic-gate #define ACTION_NONE 0 1216*7c478bd9Sstevel@tonic-gate #define ACTION_TRAMPOLINE 1 1217*7c478bd9Sstevel@tonic-gate #define ACTION_AST_FLAGS 2 1218*7c478bd9Sstevel@tonic-gate 1219*7c478bd9Sstevel@tonic-gate /* 1220*7c478bd9Sstevel@tonic-gate * Access error trap handler for asynchronous cpu errors. This routine is 1221*7c478bd9Sstevel@tonic-gate * called to handle a data or instruction access error. All fatal errors are 1222*7c478bd9Sstevel@tonic-gate * completely handled by this routine (by panicking). Non fatal error logging 1223*7c478bd9Sstevel@tonic-gate * is queued for later processing either via AST or softint at a lower PIL. 1224*7c478bd9Sstevel@tonic-gate * In case of panic, the error log queue will also be processed as part of the 1225*7c478bd9Sstevel@tonic-gate * panic flow to ensure all errors are logged. This routine is called with all 1226*7c478bd9Sstevel@tonic-gate * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1227*7c478bd9Sstevel@tonic-gate * error bits are also cleared. The hardware has also disabled the I and 1228*7c478bd9Sstevel@tonic-gate * D-caches for us, so we must re-enable them before returning. 1229*7c478bd9Sstevel@tonic-gate * 1230*7c478bd9Sstevel@tonic-gate * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1231*7c478bd9Sstevel@tonic-gate * 1232*7c478bd9Sstevel@tonic-gate * _______________________________________________________________ 1233*7c478bd9Sstevel@tonic-gate * | Privileged tl0 | Unprivileged | 1234*7c478bd9Sstevel@tonic-gate * | Protected | Unprotected | Protected | Unprotected | 1235*7c478bd9Sstevel@tonic-gate * |on_trap|lofault| | | | 1236*7c478bd9Sstevel@tonic-gate * -------------|-------|-------+---------------+---------------+-------------| 1237*7c478bd9Sstevel@tonic-gate * | | | | | | 1238*7c478bd9Sstevel@tonic-gate * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1239*7c478bd9Sstevel@tonic-gate * | | | | | | 1240*7c478bd9Sstevel@tonic-gate * TO/BERR | T | S | L,P | n/a | S | 1241*7c478bd9Sstevel@tonic-gate * | | | | | | 1242*7c478bd9Sstevel@tonic-gate * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1243*7c478bd9Sstevel@tonic-gate * | | | | | | 1244*7c478bd9Sstevel@tonic-gate * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1245*7c478bd9Sstevel@tonic-gate * ____________________________________________________________________________ 1246*7c478bd9Sstevel@tonic-gate * 1247*7c478bd9Sstevel@tonic-gate * 1248*7c478bd9Sstevel@tonic-gate * Action codes: 1249*7c478bd9Sstevel@tonic-gate * 1250*7c478bd9Sstevel@tonic-gate * L - log 1251*7c478bd9Sstevel@tonic-gate * M - kick off memscrubber if flt_in_memory 1252*7c478bd9Sstevel@tonic-gate * P - panic 1253*7c478bd9Sstevel@tonic-gate * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1254*7c478bd9Sstevel@tonic-gate * R - i) if aft_panic is set, panic 1255*7c478bd9Sstevel@tonic-gate * ii) otherwise, send hwerr event to contract and SIGKILL to process 1256*7c478bd9Sstevel@tonic-gate * S - send SIGBUS to process 1257*7c478bd9Sstevel@tonic-gate * T - trampoline 1258*7c478bd9Sstevel@tonic-gate * 1259*7c478bd9Sstevel@tonic-gate * Special cases: 1260*7c478bd9Sstevel@tonic-gate * 1261*7c478bd9Sstevel@tonic-gate * 1) if aft_testfatal is set, all faults result in a panic regardless 1262*7c478bd9Sstevel@tonic-gate * of type (even WP), protection (even on_trap), or privilege. 1263*7c478bd9Sstevel@tonic-gate */ 1264*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1265*7c478bd9Sstevel@tonic-gate void 1266*7c478bd9Sstevel@tonic-gate cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1267*7c478bd9Sstevel@tonic-gate uint_t p_afsr_high, uint_t p_afar_high) 1268*7c478bd9Sstevel@tonic-gate { 1269*7c478bd9Sstevel@tonic-gate ushort_t sdbh, sdbl, ttype, tl; 1270*7c478bd9Sstevel@tonic-gate spitf_async_flt spf_flt; 1271*7c478bd9Sstevel@tonic-gate struct async_flt *aflt; 1272*7c478bd9Sstevel@tonic-gate char pr_reason[28]; 1273*7c478bd9Sstevel@tonic-gate uint64_t oafsr; 1274*7c478bd9Sstevel@tonic-gate uint64_t acc_afsr = 0; /* accumulated afsr */ 1275*7c478bd9Sstevel@tonic-gate int action = ACTION_NONE; 1276*7c478bd9Sstevel@tonic-gate uint64_t t_afar = p_afar; 1277*7c478bd9Sstevel@tonic-gate uint64_t t_afsr = p_afsr; 1278*7c478bd9Sstevel@tonic-gate int expected = DDI_FM_ERR_UNEXPECTED; 1279*7c478bd9Sstevel@tonic-gate ddi_acc_hdl_t *hp; 1280*7c478bd9Sstevel@tonic-gate 1281*7c478bd9Sstevel@tonic-gate /* 1282*7c478bd9Sstevel@tonic-gate * We need to look at p_flag to determine if the thread detected an 1283*7c478bd9Sstevel@tonic-gate * error while dumping core. We can't grab p_lock here, but it's ok 1284*7c478bd9Sstevel@tonic-gate * because we just need a consistent snapshot and we know that everyone 1285*7c478bd9Sstevel@tonic-gate * else will store a consistent set of bits while holding p_lock. We 1286*7c478bd9Sstevel@tonic-gate * don't have to worry about a race because SDOCORE is set once prior 1287*7c478bd9Sstevel@tonic-gate * to doing i/o from the process's address space and is never cleared. 1288*7c478bd9Sstevel@tonic-gate */ 1289*7c478bd9Sstevel@tonic-gate uint_t pflag = ttoproc(curthread)->p_flag; 1290*7c478bd9Sstevel@tonic-gate 1291*7c478bd9Sstevel@tonic-gate pr_reason[0] = '\0'; 1292*7c478bd9Sstevel@tonic-gate 1293*7c478bd9Sstevel@tonic-gate /* 1294*7c478bd9Sstevel@tonic-gate * Note: the Spitfire data buffer error registers 1295*7c478bd9Sstevel@tonic-gate * (upper and lower halves) are or'ed into the upper 1296*7c478bd9Sstevel@tonic-gate * word of the afsr by async_err() if P_AFSR_UE is set. 1297*7c478bd9Sstevel@tonic-gate */ 1298*7c478bd9Sstevel@tonic-gate sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1299*7c478bd9Sstevel@tonic-gate sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1300*7c478bd9Sstevel@tonic-gate 1301*7c478bd9Sstevel@tonic-gate /* 1302*7c478bd9Sstevel@tonic-gate * Grab the ttype encoded in <63:53> of the saved 1303*7c478bd9Sstevel@tonic-gate * afsr passed from async_err() 1304*7c478bd9Sstevel@tonic-gate */ 1305*7c478bd9Sstevel@tonic-gate ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1306*7c478bd9Sstevel@tonic-gate tl = (ushort_t)(t_afsr >> 62); 1307*7c478bd9Sstevel@tonic-gate 1308*7c478bd9Sstevel@tonic-gate t_afsr &= S_AFSR_MASK; 1309*7c478bd9Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1310*7c478bd9Sstevel@tonic-gate 1311*7c478bd9Sstevel@tonic-gate /* 1312*7c478bd9Sstevel@tonic-gate * Initialize most of the common and CPU-specific structure. We derive 1313*7c478bd9Sstevel@tonic-gate * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1314*7c478bd9Sstevel@tonic-gate * initial setting of aflt->flt_panic is based on TL: we must panic if 1315*7c478bd9Sstevel@tonic-gate * the error occurred at TL > 0. We also set flt_panic if the test/demo 1316*7c478bd9Sstevel@tonic-gate * tuneable aft_testfatal is set (not the default). 1317*7c478bd9Sstevel@tonic-gate */ 1318*7c478bd9Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 1319*7c478bd9Sstevel@tonic-gate aflt = (struct async_flt *)&spf_flt; 1320*7c478bd9Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 1321*7c478bd9Sstevel@tonic-gate aflt->flt_stat = t_afsr; 1322*7c478bd9Sstevel@tonic-gate aflt->flt_addr = t_afar; 1323*7c478bd9Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 1324*7c478bd9Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 1325*7c478bd9Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc; 1326*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 1327*7c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 1328*7c478bd9Sstevel@tonic-gate aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1329*7c478bd9Sstevel@tonic-gate aflt->flt_tl = (uchar_t)tl; 1330*7c478bd9Sstevel@tonic-gate aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1331*7c478bd9Sstevel@tonic-gate aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1332*7c478bd9Sstevel@tonic-gate 1333*7c478bd9Sstevel@tonic-gate /* 1334*7c478bd9Sstevel@tonic-gate * Set flt_status based on the trap type. If we end up here as the 1335*7c478bd9Sstevel@tonic-gate * result of a UE detected by the CE handling code, leave status 0. 1336*7c478bd9Sstevel@tonic-gate */ 1337*7c478bd9Sstevel@tonic-gate switch (ttype) { 1338*7c478bd9Sstevel@tonic-gate case T_DATA_ERROR: 1339*7c478bd9Sstevel@tonic-gate aflt->flt_status = ECC_D_TRAP; 1340*7c478bd9Sstevel@tonic-gate break; 1341*7c478bd9Sstevel@tonic-gate case T_INSTR_ERROR: 1342*7c478bd9Sstevel@tonic-gate aflt->flt_status = ECC_I_TRAP; 1343*7c478bd9Sstevel@tonic-gate break; 1344*7c478bd9Sstevel@tonic-gate } 1345*7c478bd9Sstevel@tonic-gate 1346*7c478bd9Sstevel@tonic-gate spf_flt.flt_sdbh = sdbh; 1347*7c478bd9Sstevel@tonic-gate spf_flt.flt_sdbl = sdbl; 1348*7c478bd9Sstevel@tonic-gate 1349*7c478bd9Sstevel@tonic-gate /* 1350*7c478bd9Sstevel@tonic-gate * Check for fatal async errors. 1351*7c478bd9Sstevel@tonic-gate */ 1352*7c478bd9Sstevel@tonic-gate check_misc_err(&spf_flt); 1353*7c478bd9Sstevel@tonic-gate 1354*7c478bd9Sstevel@tonic-gate /* 1355*7c478bd9Sstevel@tonic-gate * If the trap occurred in privileged mode at TL=0, we need to check to 1356*7c478bd9Sstevel@tonic-gate * see if we were executing in the kernel under on_trap() or t_lofault 1357*7c478bd9Sstevel@tonic-gate * protection. If so, modify the saved registers so that we return 1358*7c478bd9Sstevel@tonic-gate * from the trap to the appropriate trampoline routine. 1359*7c478bd9Sstevel@tonic-gate */ 1360*7c478bd9Sstevel@tonic-gate if (aflt->flt_priv && tl == 0) { 1361*7c478bd9Sstevel@tonic-gate if (curthread->t_ontrap != NULL) { 1362*7c478bd9Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap; 1363*7c478bd9Sstevel@tonic-gate 1364*7c478bd9Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_EC) { 1365*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC; 1366*7c478bd9Sstevel@tonic-gate otp->ot_trap |= OT_DATA_EC; 1367*7c478bd9Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 1368*7c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 1369*7c478bd9Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 1370*7c478bd9Sstevel@tonic-gate } 1371*7c478bd9Sstevel@tonic-gate 1372*7c478bd9Sstevel@tonic-gate if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1373*7c478bd9Sstevel@tonic-gate (otp->ot_prot & OT_DATA_ACCESS)) { 1374*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_ACCESS; 1375*7c478bd9Sstevel@tonic-gate otp->ot_trap |= OT_DATA_ACCESS; 1376*7c478bd9Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 1377*7c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 1378*7c478bd9Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 1379*7c478bd9Sstevel@tonic-gate /* 1380*7c478bd9Sstevel@tonic-gate * for peeks and caut_gets errors are expected 1381*7c478bd9Sstevel@tonic-gate */ 1382*7c478bd9Sstevel@tonic-gate hp = (ddi_acc_hdl_t *)otp->ot_handle; 1383*7c478bd9Sstevel@tonic-gate if (!hp) 1384*7c478bd9Sstevel@tonic-gate expected = DDI_FM_ERR_PEEK; 1385*7c478bd9Sstevel@tonic-gate else if (hp->ah_acc.devacc_attr_access == 1386*7c478bd9Sstevel@tonic-gate DDI_CAUTIOUS_ACC) 1387*7c478bd9Sstevel@tonic-gate expected = DDI_FM_ERR_EXPECTED; 1388*7c478bd9Sstevel@tonic-gate } 1389*7c478bd9Sstevel@tonic-gate 1390*7c478bd9Sstevel@tonic-gate } else if (curthread->t_lofault) { 1391*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_COPY; 1392*7c478bd9Sstevel@tonic-gate rp->r_g1 = EFAULT; 1393*7c478bd9Sstevel@tonic-gate rp->r_pc = curthread->t_lofault; 1394*7c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 1395*7c478bd9Sstevel@tonic-gate action = ACTION_TRAMPOLINE; 1396*7c478bd9Sstevel@tonic-gate } 1397*7c478bd9Sstevel@tonic-gate } 1398*7c478bd9Sstevel@tonic-gate 1399*7c478bd9Sstevel@tonic-gate /* 1400*7c478bd9Sstevel@tonic-gate * Determine if this error needs to be treated as fatal. Note that 1401*7c478bd9Sstevel@tonic-gate * multiple errors detected upon entry to this trap handler does not 1402*7c478bd9Sstevel@tonic-gate * necessarily warrant a panic. We only want to panic if the trap 1403*7c478bd9Sstevel@tonic-gate * happened in privileged mode and not under t_ontrap or t_lofault 1404*7c478bd9Sstevel@tonic-gate * protection. The exception is WP: if we *only* get WP, it is not 1405*7c478bd9Sstevel@tonic-gate * fatal even if the trap occurred in privileged mode, except on Sabre. 1406*7c478bd9Sstevel@tonic-gate * 1407*7c478bd9Sstevel@tonic-gate * aft_panic, if set, effectively makes us treat usermode 1408*7c478bd9Sstevel@tonic-gate * UE/EDP/LDP faults as if they were privileged - so we we will 1409*7c478bd9Sstevel@tonic-gate * panic instead of sending a contract event. A lofault-protected 1410*7c478bd9Sstevel@tonic-gate * fault will normally follow the contract event; if aft_panic is 1411*7c478bd9Sstevel@tonic-gate * set this will be changed to a panic. 1412*7c478bd9Sstevel@tonic-gate * 1413*7c478bd9Sstevel@tonic-gate * For usermode BERR/BTO errors, eg from processes performing device 1414*7c478bd9Sstevel@tonic-gate * control through mapped device memory, we need only deliver 1415*7c478bd9Sstevel@tonic-gate * a SIGBUS to the offending process. 1416*7c478bd9Sstevel@tonic-gate * 1417*7c478bd9Sstevel@tonic-gate * Some additional flt_panic reasons (eg, WP on Sabre) will be 1418*7c478bd9Sstevel@tonic-gate * checked later; for now we implement the common reasons. 1419*7c478bd9Sstevel@tonic-gate */ 1420*7c478bd9Sstevel@tonic-gate if (aflt->flt_prot == AFLT_PROT_NONE) { 1421*7c478bd9Sstevel@tonic-gate /* 1422*7c478bd9Sstevel@tonic-gate * Beware - multiple bits may be set in AFSR 1423*7c478bd9Sstevel@tonic-gate */ 1424*7c478bd9Sstevel@tonic-gate if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1425*7c478bd9Sstevel@tonic-gate if (aflt->flt_priv || aft_panic) 1426*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1427*7c478bd9Sstevel@tonic-gate } 1428*7c478bd9Sstevel@tonic-gate 1429*7c478bd9Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1430*7c478bd9Sstevel@tonic-gate if (aflt->flt_priv) 1431*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1432*7c478bd9Sstevel@tonic-gate } 1433*7c478bd9Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1434*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1435*7c478bd9Sstevel@tonic-gate } 1436*7c478bd9Sstevel@tonic-gate 1437*7c478bd9Sstevel@tonic-gate /* 1438*7c478bd9Sstevel@tonic-gate * UE/BERR/TO: Call our bus nexus friends to check for 1439*7c478bd9Sstevel@tonic-gate * IO errors that may have resulted in this trap. 1440*7c478bd9Sstevel@tonic-gate */ 1441*7c478bd9Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1442*7c478bd9Sstevel@tonic-gate cpu_run_bus_error_handlers(aflt, expected); 1443*7c478bd9Sstevel@tonic-gate } 1444*7c478bd9Sstevel@tonic-gate 1445*7c478bd9Sstevel@tonic-gate /* 1446*7c478bd9Sstevel@tonic-gate * Handle UE: If the UE is in memory, we need to flush the bad line from 1447*7c478bd9Sstevel@tonic-gate * the E-cache. We also need to query the bus nexus for fatal errors. 1448*7c478bd9Sstevel@tonic-gate * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1449*7c478bd9Sstevel@tonic-gate * caches may introduce more parity errors (especially when the module 1450*7c478bd9Sstevel@tonic-gate * is bad) and in sabre there is no guarantee that such errors 1451*7c478bd9Sstevel@tonic-gate * (if introduced) are written back as poisoned data. 1452*7c478bd9Sstevel@tonic-gate */ 1453*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_UE) { 1454*7c478bd9Sstevel@tonic-gate int i; 1455*7c478bd9Sstevel@tonic-gate 1456*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "UE "); 1457*7c478bd9Sstevel@tonic-gate 1458*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = CPU_UE_ERR; 1459*7c478bd9Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1460*7c478bd9Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1: 0; 1461*7c478bd9Sstevel@tonic-gate 1462*7c478bd9Sstevel@tonic-gate /* 1463*7c478bd9Sstevel@tonic-gate * With UE, we have the PA of the fault. 1464*7c478bd9Sstevel@tonic-gate * Let do a diagnostic read to get the ecache 1465*7c478bd9Sstevel@tonic-gate * data and tag info of the bad line for logging. 1466*7c478bd9Sstevel@tonic-gate */ 1467*7c478bd9Sstevel@tonic-gate if (aflt->flt_in_memory) { 1468*7c478bd9Sstevel@tonic-gate uint32_t ec_set_size; 1469*7c478bd9Sstevel@tonic-gate uchar_t state; 1470*7c478bd9Sstevel@tonic-gate uint32_t ecache_idx; 1471*7c478bd9Sstevel@tonic-gate uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1472*7c478bd9Sstevel@tonic-gate 1473*7c478bd9Sstevel@tonic-gate /* touch the line to put it in ecache */ 1474*7c478bd9Sstevel@tonic-gate acc_afsr |= read_and_clear_afsr(); 1475*7c478bd9Sstevel@tonic-gate (void) lddphys(faultpa); 1476*7c478bd9Sstevel@tonic-gate acc_afsr |= (read_and_clear_afsr() & 1477*7c478bd9Sstevel@tonic-gate ~(P_AFSR_EDP | P_AFSR_UE)); 1478*7c478bd9Sstevel@tonic-gate 1479*7c478bd9Sstevel@tonic-gate ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1480*7c478bd9Sstevel@tonic-gate ecache_associativity; 1481*7c478bd9Sstevel@tonic-gate 1482*7c478bd9Sstevel@tonic-gate for (i = 0; i < ecache_associativity; i++) { 1483*7c478bd9Sstevel@tonic-gate ecache_idx = i * ec_set_size + 1484*7c478bd9Sstevel@tonic-gate (aflt->flt_addr % ec_set_size); 1485*7c478bd9Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1486*7c478bd9Sstevel@tonic-gate (uint64_t *)&spf_flt.flt_ec_data[0], 1487*7c478bd9Sstevel@tonic-gate &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1488*7c478bd9Sstevel@tonic-gate acc_afsr |= oafsr; 1489*7c478bd9Sstevel@tonic-gate 1490*7c478bd9Sstevel@tonic-gate state = (uchar_t)((spf_flt.flt_ec_tag & 1491*7c478bd9Sstevel@tonic-gate cpu_ec_state_mask) >> cpu_ec_state_shift); 1492*7c478bd9Sstevel@tonic-gate 1493*7c478bd9Sstevel@tonic-gate if ((state & cpu_ec_state_valid) && 1494*7c478bd9Sstevel@tonic-gate ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1495*7c478bd9Sstevel@tonic-gate ((uint64_t)aflt->flt_addr >> 1496*7c478bd9Sstevel@tonic-gate cpu_ec_tag_shift))) 1497*7c478bd9Sstevel@tonic-gate break; 1498*7c478bd9Sstevel@tonic-gate } 1499*7c478bd9Sstevel@tonic-gate 1500*7c478bd9Sstevel@tonic-gate /* 1501*7c478bd9Sstevel@tonic-gate * Check to see if the ecache tag is valid for the 1502*7c478bd9Sstevel@tonic-gate * fault PA. In the very unlikely event where the 1503*7c478bd9Sstevel@tonic-gate * line could be victimized, no ecache info will be 1504*7c478bd9Sstevel@tonic-gate * available. If this is the case, capture the line 1505*7c478bd9Sstevel@tonic-gate * from memory instead. 1506*7c478bd9Sstevel@tonic-gate */ 1507*7c478bd9Sstevel@tonic-gate if ((state & cpu_ec_state_valid) == 0 || 1508*7c478bd9Sstevel@tonic-gate (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1509*7c478bd9Sstevel@tonic-gate ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1510*7c478bd9Sstevel@tonic-gate for (i = 0; i < 8; i++, faultpa += 8) { 1511*7c478bd9Sstevel@tonic-gate ec_data_t *ecdptr; 1512*7c478bd9Sstevel@tonic-gate 1513*7c478bd9Sstevel@tonic-gate ecdptr = &spf_flt.flt_ec_data[i]; 1514*7c478bd9Sstevel@tonic-gate acc_afsr |= read_and_clear_afsr(); 1515*7c478bd9Sstevel@tonic-gate ecdptr->ec_d8 = lddphys(faultpa); 1516*7c478bd9Sstevel@tonic-gate acc_afsr |= (read_and_clear_afsr() & 1517*7c478bd9Sstevel@tonic-gate ~(P_AFSR_EDP | P_AFSR_UE)); 1518*7c478bd9Sstevel@tonic-gate ecdptr->ec_afsr = 0; 1519*7c478bd9Sstevel@tonic-gate /* null afsr value */ 1520*7c478bd9Sstevel@tonic-gate } 1521*7c478bd9Sstevel@tonic-gate 1522*7c478bd9Sstevel@tonic-gate /* 1523*7c478bd9Sstevel@tonic-gate * Mark tag invalid to indicate mem dump 1524*7c478bd9Sstevel@tonic-gate * when we print out the info. 1525*7c478bd9Sstevel@tonic-gate */ 1526*7c478bd9Sstevel@tonic-gate spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1527*7c478bd9Sstevel@tonic-gate } 1528*7c478bd9Sstevel@tonic-gate spf_flt.flt_ec_lcnt = 1; 1529*7c478bd9Sstevel@tonic-gate 1530*7c478bd9Sstevel@tonic-gate /* 1531*7c478bd9Sstevel@tonic-gate * Flush out the bad line 1532*7c478bd9Sstevel@tonic-gate */ 1533*7c478bd9Sstevel@tonic-gate flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1534*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 1535*7c478bd9Sstevel@tonic-gate 1536*7c478bd9Sstevel@tonic-gate acc_afsr |= clear_errors(NULL, NULL); 1537*7c478bd9Sstevel@tonic-gate } 1538*7c478bd9Sstevel@tonic-gate 1539*7c478bd9Sstevel@tonic-gate /* 1540*7c478bd9Sstevel@tonic-gate * Ask our bus nexus friends if they have any fatal errors. If 1541*7c478bd9Sstevel@tonic-gate * so, they will log appropriate error messages and panic as a 1542*7c478bd9Sstevel@tonic-gate * result. We then queue an event for each UDB that reports a 1543*7c478bd9Sstevel@tonic-gate * UE. Each UE reported in a UDB will have its own log message. 1544*7c478bd9Sstevel@tonic-gate * 1545*7c478bd9Sstevel@tonic-gate * Note from kbn: In the case where there are multiple UEs 1546*7c478bd9Sstevel@tonic-gate * (ME bit is set) - the AFAR address is only accurate to 1547*7c478bd9Sstevel@tonic-gate * the 16-byte granularity. One cannot tell whether the AFAR 1548*7c478bd9Sstevel@tonic-gate * belongs to the UDBH or UDBL syndromes. In this case, we 1549*7c478bd9Sstevel@tonic-gate * always report the AFAR address to be 16-byte aligned. 1550*7c478bd9Sstevel@tonic-gate * 1551*7c478bd9Sstevel@tonic-gate * If we're on a Sabre, there is no SDBL, but it will always 1552*7c478bd9Sstevel@tonic-gate * read as zero, so the sdbl test below will safely fail. 1553*7c478bd9Sstevel@tonic-gate */ 1554*7c478bd9Sstevel@tonic-gate if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1555*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1556*7c478bd9Sstevel@tonic-gate 1557*7c478bd9Sstevel@tonic-gate if (sdbh & P_DER_UE) { 1558*7c478bd9Sstevel@tonic-gate aflt->flt_synd = sdbh & P_DER_E_SYND; 1559*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1560*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1561*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1562*7c478bd9Sstevel@tonic-gate } 1563*7c478bd9Sstevel@tonic-gate if (sdbl & P_DER_UE) { 1564*7c478bd9Sstevel@tonic-gate aflt->flt_synd = sdbl & P_DER_E_SYND; 1565*7c478bd9Sstevel@tonic-gate aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1566*7c478bd9Sstevel@tonic-gate if (!(aflt->flt_stat & P_AFSR_ME)) 1567*7c478bd9Sstevel@tonic-gate aflt->flt_addr |= 0x8; 1568*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1569*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1570*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1571*7c478bd9Sstevel@tonic-gate } 1572*7c478bd9Sstevel@tonic-gate 1573*7c478bd9Sstevel@tonic-gate /* 1574*7c478bd9Sstevel@tonic-gate * We got a UE and are panicking, save the fault PA in a known 1575*7c478bd9Sstevel@tonic-gate * location so that the platform specific panic code can check 1576*7c478bd9Sstevel@tonic-gate * for copyback errors. 1577*7c478bd9Sstevel@tonic-gate */ 1578*7c478bd9Sstevel@tonic-gate if (aflt->flt_panic && aflt->flt_in_memory) { 1579*7c478bd9Sstevel@tonic-gate panic_aflt = *aflt; 1580*7c478bd9Sstevel@tonic-gate } 1581*7c478bd9Sstevel@tonic-gate } 1582*7c478bd9Sstevel@tonic-gate 1583*7c478bd9Sstevel@tonic-gate /* 1584*7c478bd9Sstevel@tonic-gate * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1585*7c478bd9Sstevel@tonic-gate * async error for logging. For Sabre, we panic on EDP or LDP. 1586*7c478bd9Sstevel@tonic-gate */ 1587*7c478bd9Sstevel@tonic-gate if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1588*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = CPU_EDP_LDP_ERR; 1589*7c478bd9Sstevel@tonic-gate 1590*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_EDP) 1591*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "EDP "); 1592*7c478bd9Sstevel@tonic-gate 1593*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_LDP) 1594*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "LDP "); 1595*7c478bd9Sstevel@tonic-gate 1596*7c478bd9Sstevel@tonic-gate /* 1597*7c478bd9Sstevel@tonic-gate * Here we have no PA to work with. 1598*7c478bd9Sstevel@tonic-gate * Scan each line in the ecache to look for 1599*7c478bd9Sstevel@tonic-gate * the one with bad parity. 1600*7c478bd9Sstevel@tonic-gate */ 1601*7c478bd9Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 1602*7c478bd9Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1603*7c478bd9Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1604*7c478bd9Sstevel@tonic-gate acc_afsr |= (oafsr & ~P_AFSR_WP); 1605*7c478bd9Sstevel@tonic-gate 1606*7c478bd9Sstevel@tonic-gate /* 1607*7c478bd9Sstevel@tonic-gate * If we found a bad PA, update the state to indicate if it is 1608*7c478bd9Sstevel@tonic-gate * memory or I/O space. This code will be important if we ever 1609*7c478bd9Sstevel@tonic-gate * support cacheable frame buffers. 1610*7c478bd9Sstevel@tonic-gate */ 1611*7c478bd9Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 1612*7c478bd9Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1613*7c478bd9Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 1614*7c478bd9Sstevel@tonic-gate } 1615*7c478bd9Sstevel@tonic-gate 1616*7c478bd9Sstevel@tonic-gate if (isus2i || isus2e) 1617*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1618*7c478bd9Sstevel@tonic-gate 1619*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1620*7c478bd9Sstevel@tonic-gate FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1621*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1622*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1623*7c478bd9Sstevel@tonic-gate } 1624*7c478bd9Sstevel@tonic-gate 1625*7c478bd9Sstevel@tonic-gate /* 1626*7c478bd9Sstevel@tonic-gate * Timeout and bus error handling. There are two cases to consider: 1627*7c478bd9Sstevel@tonic-gate * 1628*7c478bd9Sstevel@tonic-gate * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1629*7c478bd9Sstevel@tonic-gate * have already modified the saved registers so that we will return 1630*7c478bd9Sstevel@tonic-gate * from the trap to the appropriate trampoline routine; otherwise panic. 1631*7c478bd9Sstevel@tonic-gate * 1632*7c478bd9Sstevel@tonic-gate * (2) In user mode, we can simply use our AST mechanism to deliver 1633*7c478bd9Sstevel@tonic-gate * a SIGBUS. We do not log the occurence - processes performing 1634*7c478bd9Sstevel@tonic-gate * device control would generate lots of uninteresting messages. 1635*7c478bd9Sstevel@tonic-gate */ 1636*7c478bd9Sstevel@tonic-gate if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1637*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_TO) 1638*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "BTO "); 1639*7c478bd9Sstevel@tonic-gate 1640*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_BERR) 1641*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "BERR "); 1642*7c478bd9Sstevel@tonic-gate 1643*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = CPU_BTO_BERR_ERR; 1644*7c478bd9Sstevel@tonic-gate if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1645*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1646*7c478bd9Sstevel@tonic-gate FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1647*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1648*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1649*7c478bd9Sstevel@tonic-gate } 1650*7c478bd9Sstevel@tonic-gate } 1651*7c478bd9Sstevel@tonic-gate 1652*7c478bd9Sstevel@tonic-gate /* 1653*7c478bd9Sstevel@tonic-gate * Handle WP: WP happens when the ecache is victimized and a parity 1654*7c478bd9Sstevel@tonic-gate * error was detected on a writeback. The data in question will be 1655*7c478bd9Sstevel@tonic-gate * poisoned as a UE will be written back. The PA is not logged and 1656*7c478bd9Sstevel@tonic-gate * it is possible that it doesn't belong to the trapped thread. The 1657*7c478bd9Sstevel@tonic-gate * WP trap is not fatal, but it could be fatal to someone that 1658*7c478bd9Sstevel@tonic-gate * subsequently accesses the toxic page. We set read_all_memscrub 1659*7c478bd9Sstevel@tonic-gate * to force the memscrubber to read all of memory when it awakens. 1660*7c478bd9Sstevel@tonic-gate * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1661*7c478bd9Sstevel@tonic-gate * UE back to poison the data. 1662*7c478bd9Sstevel@tonic-gate */ 1663*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_WP) { 1664*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "WP "); 1665*7c478bd9Sstevel@tonic-gate if (isus2i || isus2e) { 1666*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1667*7c478bd9Sstevel@tonic-gate } else { 1668*7c478bd9Sstevel@tonic-gate read_all_memscrub = 1; 1669*7c478bd9Sstevel@tonic-gate } 1670*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = CPU_WP_ERR; 1671*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1672*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1673*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1674*7c478bd9Sstevel@tonic-gate } 1675*7c478bd9Sstevel@tonic-gate 1676*7c478bd9Sstevel@tonic-gate /* 1677*7c478bd9Sstevel@tonic-gate * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1678*7c478bd9Sstevel@tonic-gate * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1679*7c478bd9Sstevel@tonic-gate * This is fatal. 1680*7c478bd9Sstevel@tonic-gate */ 1681*7c478bd9Sstevel@tonic-gate 1682*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_CP) { 1683*7c478bd9Sstevel@tonic-gate if (isus2i || isus2e) { 1684*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "CP "); 1685*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1686*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1687*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1688*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1689*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1690*7c478bd9Sstevel@tonic-gate } else { 1691*7c478bd9Sstevel@tonic-gate /* 1692*7c478bd9Sstevel@tonic-gate * Orphan CP: Happens due to signal integrity problem 1693*7c478bd9Sstevel@tonic-gate * on a CPU, where a CP is reported, without reporting 1694*7c478bd9Sstevel@tonic-gate * its associated UE. This is handled by locating the 1695*7c478bd9Sstevel@tonic-gate * bad parity line and would kick off the memscrubber 1696*7c478bd9Sstevel@tonic-gate * to find the UE if in memory or in another's cache. 1697*7c478bd9Sstevel@tonic-gate */ 1698*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1699*7c478bd9Sstevel@tonic-gate (void) strcat(pr_reason, "ORPHAN_CP "); 1700*7c478bd9Sstevel@tonic-gate 1701*7c478bd9Sstevel@tonic-gate /* 1702*7c478bd9Sstevel@tonic-gate * Here we have no PA to work with. 1703*7c478bd9Sstevel@tonic-gate * Scan each line in the ecache to look for 1704*7c478bd9Sstevel@tonic-gate * the one with bad parity. 1705*7c478bd9Sstevel@tonic-gate */ 1706*7c478bd9Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 1707*7c478bd9Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1708*7c478bd9Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1709*7c478bd9Sstevel@tonic-gate &oafsr); 1710*7c478bd9Sstevel@tonic-gate acc_afsr |= oafsr; 1711*7c478bd9Sstevel@tonic-gate 1712*7c478bd9Sstevel@tonic-gate /* 1713*7c478bd9Sstevel@tonic-gate * If we found a bad PA, update the state to indicate 1714*7c478bd9Sstevel@tonic-gate * if it is memory or I/O space. 1715*7c478bd9Sstevel@tonic-gate */ 1716*7c478bd9Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 1717*7c478bd9Sstevel@tonic-gate aflt->flt_in_memory = 1718*7c478bd9Sstevel@tonic-gate (pf_is_memory(aflt->flt_addr >> 1719*7c478bd9Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 1720*7c478bd9Sstevel@tonic-gate } 1721*7c478bd9Sstevel@tonic-gate read_all_memscrub = 1; 1722*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1723*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1724*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1725*7c478bd9Sstevel@tonic-gate 1726*7c478bd9Sstevel@tonic-gate } 1727*7c478bd9Sstevel@tonic-gate } 1728*7c478bd9Sstevel@tonic-gate 1729*7c478bd9Sstevel@tonic-gate /* 1730*7c478bd9Sstevel@tonic-gate * If we queued an error other than WP or CP and we are going to return 1731*7c478bd9Sstevel@tonic-gate * from the trap and the error was in user mode or inside of a 1732*7c478bd9Sstevel@tonic-gate * copy routine, set AST flag so the queue will be drained before 1733*7c478bd9Sstevel@tonic-gate * returning to user mode. 1734*7c478bd9Sstevel@tonic-gate * 1735*7c478bd9Sstevel@tonic-gate * For UE/LDP/EDP, the AST processing will SIGKILL the process 1736*7c478bd9Sstevel@tonic-gate * and send an event to its process contract. 1737*7c478bd9Sstevel@tonic-gate * 1738*7c478bd9Sstevel@tonic-gate * For BERR/BTO, the AST processing will SIGBUS the process. There 1739*7c478bd9Sstevel@tonic-gate * will have been no error queued in this case. 1740*7c478bd9Sstevel@tonic-gate */ 1741*7c478bd9Sstevel@tonic-gate if ((t_afsr & 1742*7c478bd9Sstevel@tonic-gate (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1743*7c478bd9Sstevel@tonic-gate (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1744*7c478bd9Sstevel@tonic-gate int pcb_flag = 0; 1745*7c478bd9Sstevel@tonic-gate 1746*7c478bd9Sstevel@tonic-gate if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1747*7c478bd9Sstevel@tonic-gate pcb_flag |= ASYNC_HWERR; 1748*7c478bd9Sstevel@tonic-gate 1749*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_BERR) 1750*7c478bd9Sstevel@tonic-gate pcb_flag |= ASYNC_BERR; 1751*7c478bd9Sstevel@tonic-gate 1752*7c478bd9Sstevel@tonic-gate if (t_afsr & P_AFSR_TO) 1753*7c478bd9Sstevel@tonic-gate pcb_flag |= ASYNC_BTO; 1754*7c478bd9Sstevel@tonic-gate 1755*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1756*7c478bd9Sstevel@tonic-gate aston(curthread); 1757*7c478bd9Sstevel@tonic-gate action = ACTION_AST_FLAGS; 1758*7c478bd9Sstevel@tonic-gate } 1759*7c478bd9Sstevel@tonic-gate 1760*7c478bd9Sstevel@tonic-gate /* 1761*7c478bd9Sstevel@tonic-gate * In response to a deferred error, we must do one of three things: 1762*7c478bd9Sstevel@tonic-gate * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1763*7c478bd9Sstevel@tonic-gate * set in cases (1) and (2) - check that either action is set or 1764*7c478bd9Sstevel@tonic-gate * (3) is true. 1765*7c478bd9Sstevel@tonic-gate * 1766*7c478bd9Sstevel@tonic-gate * On II, the WP writes poisoned data back to memory, which will 1767*7c478bd9Sstevel@tonic-gate * cause a UE and a panic or reboot when read. In this case, we 1768*7c478bd9Sstevel@tonic-gate * don't need to panic at this time. On IIi and IIe, 1769*7c478bd9Sstevel@tonic-gate * aflt->flt_panic is already set above. 1770*7c478bd9Sstevel@tonic-gate */ 1771*7c478bd9Sstevel@tonic-gate ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1772*7c478bd9Sstevel@tonic-gate (t_afsr & P_AFSR_WP)); 1773*7c478bd9Sstevel@tonic-gate 1774*7c478bd9Sstevel@tonic-gate /* 1775*7c478bd9Sstevel@tonic-gate * Make a final sanity check to make sure we did not get any more async 1776*7c478bd9Sstevel@tonic-gate * errors and accumulate the afsr. 1777*7c478bd9Sstevel@tonic-gate */ 1778*7c478bd9Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1779*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 1780*7c478bd9Sstevel@tonic-gate (void) clear_errors(&spf_flt, NULL); 1781*7c478bd9Sstevel@tonic-gate 1782*7c478bd9Sstevel@tonic-gate /* 1783*7c478bd9Sstevel@tonic-gate * Take care of a special case: If there is a UE in the ecache flush 1784*7c478bd9Sstevel@tonic-gate * area, we'll see it in flush_ecache(). This will trigger the 1785*7c478bd9Sstevel@tonic-gate * CPU_ADDITIONAL_ERRORS case below. 1786*7c478bd9Sstevel@tonic-gate * 1787*7c478bd9Sstevel@tonic-gate * This could occur if the original error was a UE in the flush area, 1788*7c478bd9Sstevel@tonic-gate * or if the original error was an E$ error that was flushed out of 1789*7c478bd9Sstevel@tonic-gate * the E$ in scan_ecache(). 1790*7c478bd9Sstevel@tonic-gate * 1791*7c478bd9Sstevel@tonic-gate * If it's at the same address that we're already logging, then it's 1792*7c478bd9Sstevel@tonic-gate * probably one of these cases. Clear the bit so we don't trip over 1793*7c478bd9Sstevel@tonic-gate * it on the additional errors case, which could cause an unnecessary 1794*7c478bd9Sstevel@tonic-gate * panic. 1795*7c478bd9Sstevel@tonic-gate */ 1796*7c478bd9Sstevel@tonic-gate if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1797*7c478bd9Sstevel@tonic-gate acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1798*7c478bd9Sstevel@tonic-gate else 1799*7c478bd9Sstevel@tonic-gate acc_afsr |= aflt->flt_stat; 1800*7c478bd9Sstevel@tonic-gate 1801*7c478bd9Sstevel@tonic-gate /* 1802*7c478bd9Sstevel@tonic-gate * Check the acumulated afsr for the important bits. 1803*7c478bd9Sstevel@tonic-gate * Make sure the spf_flt.flt_type value is set, and 1804*7c478bd9Sstevel@tonic-gate * enque an error. 1805*7c478bd9Sstevel@tonic-gate */ 1806*7c478bd9Sstevel@tonic-gate if (acc_afsr & 1807*7c478bd9Sstevel@tonic-gate (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1808*7c478bd9Sstevel@tonic-gate if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1809*7c478bd9Sstevel@tonic-gate P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1810*7c478bd9Sstevel@tonic-gate P_AFSR_ISAP)) 1811*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 1812*7c478bd9Sstevel@tonic-gate 1813*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1814*7c478bd9Sstevel@tonic-gate aflt->flt_stat = acc_afsr; 1815*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1816*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1817*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 1818*7c478bd9Sstevel@tonic-gate } 1819*7c478bd9Sstevel@tonic-gate 1820*7c478bd9Sstevel@tonic-gate /* 1821*7c478bd9Sstevel@tonic-gate * If aflt->flt_panic is set at this point, we need to panic as the 1822*7c478bd9Sstevel@tonic-gate * result of a trap at TL > 0, or an error we determined to be fatal. 1823*7c478bd9Sstevel@tonic-gate * We've already enqueued the error in one of the if-clauses above, 1824*7c478bd9Sstevel@tonic-gate * and it will be dequeued and logged as part of the panic flow. 1825*7c478bd9Sstevel@tonic-gate */ 1826*7c478bd9Sstevel@tonic-gate if (aflt->flt_panic) { 1827*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1828*7c478bd9Sstevel@tonic-gate "See previous message(s) for details", " %sError(s)", 1829*7c478bd9Sstevel@tonic-gate pr_reason); 1830*7c478bd9Sstevel@tonic-gate } 1831*7c478bd9Sstevel@tonic-gate 1832*7c478bd9Sstevel@tonic-gate /* 1833*7c478bd9Sstevel@tonic-gate * Before returning, we must re-enable errors, and 1834*7c478bd9Sstevel@tonic-gate * reset the caches to their boot-up state. 1835*7c478bd9Sstevel@tonic-gate */ 1836*7c478bd9Sstevel@tonic-gate set_lsu(get_lsu() | cache_boot_state); 1837*7c478bd9Sstevel@tonic-gate set_error_enable(EER_ENABLE); 1838*7c478bd9Sstevel@tonic-gate } 1839*7c478bd9Sstevel@tonic-gate 1840*7c478bd9Sstevel@tonic-gate /* 1841*7c478bd9Sstevel@tonic-gate * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1842*7c478bd9Sstevel@tonic-gate * This routine is shared by the CE and UE handling code. 1843*7c478bd9Sstevel@tonic-gate */ 1844*7c478bd9Sstevel@tonic-gate static void 1845*7c478bd9Sstevel@tonic-gate check_misc_err(spitf_async_flt *spf_flt) 1846*7c478bd9Sstevel@tonic-gate { 1847*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 1848*7c478bd9Sstevel@tonic-gate char *fatal_str = NULL; 1849*7c478bd9Sstevel@tonic-gate 1850*7c478bd9Sstevel@tonic-gate /* 1851*7c478bd9Sstevel@tonic-gate * The ISAP and ETP errors are supposed to cause a POR 1852*7c478bd9Sstevel@tonic-gate * from the system, so in theory we never, ever see these messages. 1853*7c478bd9Sstevel@tonic-gate * ISAP, ETP and IVUE are considered to be fatal. 1854*7c478bd9Sstevel@tonic-gate */ 1855*7c478bd9Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_ISAP) 1856*7c478bd9Sstevel@tonic-gate fatal_str = " System Address Parity Error on"; 1857*7c478bd9Sstevel@tonic-gate else if (aflt->flt_stat & P_AFSR_ETP) 1858*7c478bd9Sstevel@tonic-gate fatal_str = " Ecache Tag Parity Error on"; 1859*7c478bd9Sstevel@tonic-gate else if (aflt->flt_stat & P_AFSR_IVUE) 1860*7c478bd9Sstevel@tonic-gate fatal_str = " Interrupt Vector Uncorrectable Error on"; 1861*7c478bd9Sstevel@tonic-gate if (fatal_str != NULL) { 1862*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1863*7c478bd9Sstevel@tonic-gate NULL, fatal_str); 1864*7c478bd9Sstevel@tonic-gate } 1865*7c478bd9Sstevel@tonic-gate } 1866*7c478bd9Sstevel@tonic-gate 1867*7c478bd9Sstevel@tonic-gate /* 1868*7c478bd9Sstevel@tonic-gate * Routine to convert a syndrome into a syndrome code. 1869*7c478bd9Sstevel@tonic-gate */ 1870*7c478bd9Sstevel@tonic-gate static int 1871*7c478bd9Sstevel@tonic-gate synd_to_synd_code(int synd_status, ushort_t synd) 1872*7c478bd9Sstevel@tonic-gate { 1873*7c478bd9Sstevel@tonic-gate if (synd_status != AFLT_STAT_VALID) 1874*7c478bd9Sstevel@tonic-gate return (-1); 1875*7c478bd9Sstevel@tonic-gate 1876*7c478bd9Sstevel@tonic-gate /* 1877*7c478bd9Sstevel@tonic-gate * Use the 8-bit syndrome to index the ecc_syndrome_tab 1878*7c478bd9Sstevel@tonic-gate * to get the code indicating which bit(s) is(are) bad. 1879*7c478bd9Sstevel@tonic-gate */ 1880*7c478bd9Sstevel@tonic-gate if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1881*7c478bd9Sstevel@tonic-gate return (-1); 1882*7c478bd9Sstevel@tonic-gate else 1883*7c478bd9Sstevel@tonic-gate return (ecc_syndrome_tab[synd]); 1884*7c478bd9Sstevel@tonic-gate } 1885*7c478bd9Sstevel@tonic-gate 1886*7c478bd9Sstevel@tonic-gate /* 1887*7c478bd9Sstevel@tonic-gate * Routine to return a string identifying the physical name 1888*7c478bd9Sstevel@tonic-gate * associated with a memory/cache error. 1889*7c478bd9Sstevel@tonic-gate */ 1890*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1891*7c478bd9Sstevel@tonic-gate int 1892*7c478bd9Sstevel@tonic-gate cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1893*7c478bd9Sstevel@tonic-gate uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1894*7c478bd9Sstevel@tonic-gate char *buf, int buflen, int *lenp) 1895*7c478bd9Sstevel@tonic-gate { 1896*7c478bd9Sstevel@tonic-gate short synd_code; 1897*7c478bd9Sstevel@tonic-gate int ret; 1898*7c478bd9Sstevel@tonic-gate 1899*7c478bd9Sstevel@tonic-gate if (flt_in_memory) { 1900*7c478bd9Sstevel@tonic-gate synd_code = synd_to_synd_code(synd_status, synd); 1901*7c478bd9Sstevel@tonic-gate if (synd_code == -1) { 1902*7c478bd9Sstevel@tonic-gate ret = EINVAL; 1903*7c478bd9Sstevel@tonic-gate } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1904*7c478bd9Sstevel@tonic-gate buf, buflen, lenp) != 0) { 1905*7c478bd9Sstevel@tonic-gate ret = EIO; 1906*7c478bd9Sstevel@tonic-gate } else if (*lenp <= 1) { 1907*7c478bd9Sstevel@tonic-gate ret = EINVAL; 1908*7c478bd9Sstevel@tonic-gate } else { 1909*7c478bd9Sstevel@tonic-gate ret = 0; 1910*7c478bd9Sstevel@tonic-gate } 1911*7c478bd9Sstevel@tonic-gate } else { 1912*7c478bd9Sstevel@tonic-gate ret = ENOTSUP; 1913*7c478bd9Sstevel@tonic-gate } 1914*7c478bd9Sstevel@tonic-gate 1915*7c478bd9Sstevel@tonic-gate if (ret != 0) { 1916*7c478bd9Sstevel@tonic-gate buf[0] = '\0'; 1917*7c478bd9Sstevel@tonic-gate *lenp = 0; 1918*7c478bd9Sstevel@tonic-gate } 1919*7c478bd9Sstevel@tonic-gate 1920*7c478bd9Sstevel@tonic-gate return (ret); 1921*7c478bd9Sstevel@tonic-gate } 1922*7c478bd9Sstevel@tonic-gate 1923*7c478bd9Sstevel@tonic-gate /* 1924*7c478bd9Sstevel@tonic-gate * Wrapper for cpu_get_mem_unum() routine that takes an 1925*7c478bd9Sstevel@tonic-gate * async_flt struct rather than explicit arguments. 1926*7c478bd9Sstevel@tonic-gate */ 1927*7c478bd9Sstevel@tonic-gate int 1928*7c478bd9Sstevel@tonic-gate cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1929*7c478bd9Sstevel@tonic-gate char *buf, int buflen, int *lenp) 1930*7c478bd9Sstevel@tonic-gate { 1931*7c478bd9Sstevel@tonic-gate return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1932*7c478bd9Sstevel@tonic-gate aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1933*7c478bd9Sstevel@tonic-gate aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1934*7c478bd9Sstevel@tonic-gate } 1935*7c478bd9Sstevel@tonic-gate 1936*7c478bd9Sstevel@tonic-gate /* 1937*7c478bd9Sstevel@tonic-gate * This routine is a more generic interface to cpu_get_mem_unum(), 1938*7c478bd9Sstevel@tonic-gate * that may be used by other modules (e.g. mm). 1939*7c478bd9Sstevel@tonic-gate */ 1940*7c478bd9Sstevel@tonic-gate int 1941*7c478bd9Sstevel@tonic-gate cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1942*7c478bd9Sstevel@tonic-gate char *buf, int buflen, int *lenp) 1943*7c478bd9Sstevel@tonic-gate { 1944*7c478bd9Sstevel@tonic-gate int synd_status, flt_in_memory, ret; 1945*7c478bd9Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 1946*7c478bd9Sstevel@tonic-gate 1947*7c478bd9Sstevel@tonic-gate /* 1948*7c478bd9Sstevel@tonic-gate * Check for an invalid address. 1949*7c478bd9Sstevel@tonic-gate */ 1950*7c478bd9Sstevel@tonic-gate if (afar == (uint64_t)-1) 1951*7c478bd9Sstevel@tonic-gate return (ENXIO); 1952*7c478bd9Sstevel@tonic-gate 1953*7c478bd9Sstevel@tonic-gate if (synd == (uint64_t)-1) 1954*7c478bd9Sstevel@tonic-gate synd_status = AFLT_STAT_INVALID; 1955*7c478bd9Sstevel@tonic-gate else 1956*7c478bd9Sstevel@tonic-gate synd_status = AFLT_STAT_VALID; 1957*7c478bd9Sstevel@tonic-gate 1958*7c478bd9Sstevel@tonic-gate flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1959*7c478bd9Sstevel@tonic-gate 1960*7c478bd9Sstevel@tonic-gate if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1961*7c478bd9Sstevel@tonic-gate CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1962*7c478bd9Sstevel@tonic-gate != 0) 1963*7c478bd9Sstevel@tonic-gate return (ret); 1964*7c478bd9Sstevel@tonic-gate 1965*7c478bd9Sstevel@tonic-gate if (*lenp >= buflen) 1966*7c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 1967*7c478bd9Sstevel@tonic-gate 1968*7c478bd9Sstevel@tonic-gate (void) strncpy(buf, unum, buflen); 1969*7c478bd9Sstevel@tonic-gate 1970*7c478bd9Sstevel@tonic-gate return (0); 1971*7c478bd9Sstevel@tonic-gate } 1972*7c478bd9Sstevel@tonic-gate 1973*7c478bd9Sstevel@tonic-gate /* 1974*7c478bd9Sstevel@tonic-gate * Routine to return memory information associated 1975*7c478bd9Sstevel@tonic-gate * with a physical address and syndrome. 1976*7c478bd9Sstevel@tonic-gate */ 1977*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1978*7c478bd9Sstevel@tonic-gate int 1979*7c478bd9Sstevel@tonic-gate cpu_get_mem_info(uint64_t synd, uint64_t afar, 1980*7c478bd9Sstevel@tonic-gate uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1981*7c478bd9Sstevel@tonic-gate int *segsp, int *banksp, int *mcidp) 1982*7c478bd9Sstevel@tonic-gate { 1983*7c478bd9Sstevel@tonic-gate return (ENOTSUP); 1984*7c478bd9Sstevel@tonic-gate } 1985*7c478bd9Sstevel@tonic-gate 1986*7c478bd9Sstevel@tonic-gate /* 1987*7c478bd9Sstevel@tonic-gate * Routine to return a string identifying the physical 1988*7c478bd9Sstevel@tonic-gate * name associated with a cpuid. 1989*7c478bd9Sstevel@tonic-gate */ 1990*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1991*7c478bd9Sstevel@tonic-gate int 1992*7c478bd9Sstevel@tonic-gate cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1993*7c478bd9Sstevel@tonic-gate { 1994*7c478bd9Sstevel@tonic-gate return (ENOTSUP); 1995*7c478bd9Sstevel@tonic-gate } 1996*7c478bd9Sstevel@tonic-gate 1997*7c478bd9Sstevel@tonic-gate /* 1998*7c478bd9Sstevel@tonic-gate * This routine returns the size of the kernel's FRU name buffer. 1999*7c478bd9Sstevel@tonic-gate */ 2000*7c478bd9Sstevel@tonic-gate size_t 2001*7c478bd9Sstevel@tonic-gate cpu_get_name_bufsize() 2002*7c478bd9Sstevel@tonic-gate { 2003*7c478bd9Sstevel@tonic-gate return (UNUM_NAMLEN); 2004*7c478bd9Sstevel@tonic-gate } 2005*7c478bd9Sstevel@tonic-gate 2006*7c478bd9Sstevel@tonic-gate /* 2007*7c478bd9Sstevel@tonic-gate * Cpu specific log func for UEs. 2008*7c478bd9Sstevel@tonic-gate */ 2009*7c478bd9Sstevel@tonic-gate static void 2010*7c478bd9Sstevel@tonic-gate log_ue_err(struct async_flt *aflt, char *unum) 2011*7c478bd9Sstevel@tonic-gate { 2012*7c478bd9Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2013*7c478bd9Sstevel@tonic-gate int len = 0; 2014*7c478bd9Sstevel@tonic-gate 2015*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2016*7c478bd9Sstevel@tonic-gate int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2017*7c478bd9Sstevel@tonic-gate 2018*7c478bd9Sstevel@tonic-gate /* 2019*7c478bd9Sstevel@tonic-gate * Paranoid Check for priv mismatch 2020*7c478bd9Sstevel@tonic-gate * Only applicable for UEs 2021*7c478bd9Sstevel@tonic-gate */ 2022*7c478bd9Sstevel@tonic-gate if (afsr_priv != aflt->flt_priv) { 2023*7c478bd9Sstevel@tonic-gate /* 2024*7c478bd9Sstevel@tonic-gate * The priv bits in %tstate and %afsr did not match; we expect 2025*7c478bd9Sstevel@tonic-gate * this to be very rare, so flag it with a message. 2026*7c478bd9Sstevel@tonic-gate */ 2027*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2028*7c478bd9Sstevel@tonic-gate ": PRIV bit in TSTATE and AFSR mismatched; " 2029*7c478bd9Sstevel@tonic-gate "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2030*7c478bd9Sstevel@tonic-gate 2031*7c478bd9Sstevel@tonic-gate /* update saved afsr to reflect the correct priv */ 2032*7c478bd9Sstevel@tonic-gate aflt->flt_stat &= ~P_AFSR_PRIV; 2033*7c478bd9Sstevel@tonic-gate if (aflt->flt_priv) 2034*7c478bd9Sstevel@tonic-gate aflt->flt_stat |= P_AFSR_PRIV; 2035*7c478bd9Sstevel@tonic-gate } 2036*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 2037*7c478bd9Sstevel@tonic-gate 2038*7c478bd9Sstevel@tonic-gate (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2039*7c478bd9Sstevel@tonic-gate UNUM_NAMLEN, &len); 2040*7c478bd9Sstevel@tonic-gate 2041*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2042*7c478bd9Sstevel@tonic-gate " Uncorrectable Memory Error on"); 2043*7c478bd9Sstevel@tonic-gate 2044*7c478bd9Sstevel@tonic-gate if (SYND(aflt->flt_synd) == 0x3) { 2045*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2046*7c478bd9Sstevel@tonic-gate " Syndrome 0x3 indicates that this may not be a " 2047*7c478bd9Sstevel@tonic-gate "memory module problem"); 2048*7c478bd9Sstevel@tonic-gate } 2049*7c478bd9Sstevel@tonic-gate 2050*7c478bd9Sstevel@tonic-gate if (aflt->flt_in_memory) 2051*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2052*7c478bd9Sstevel@tonic-gate } 2053*7c478bd9Sstevel@tonic-gate 2054*7c478bd9Sstevel@tonic-gate 2055*7c478bd9Sstevel@tonic-gate /* 2056*7c478bd9Sstevel@tonic-gate * The cpu_async_log_err() function is called via the ue_drain() function to 2057*7c478bd9Sstevel@tonic-gate * handle logging for CPU events that are dequeued. As such, it can be invoked 2058*7c478bd9Sstevel@tonic-gate * from softint context, from AST processing in the trap() flow, or from the 2059*7c478bd9Sstevel@tonic-gate * panic flow. We decode the CPU-specific data, and log appropriate messages. 2060*7c478bd9Sstevel@tonic-gate */ 2061*7c478bd9Sstevel@tonic-gate static void 2062*7c478bd9Sstevel@tonic-gate cpu_async_log_err(void *flt) 2063*7c478bd9Sstevel@tonic-gate { 2064*7c478bd9Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2065*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)flt; 2066*7c478bd9Sstevel@tonic-gate char unum[UNUM_NAMLEN]; 2067*7c478bd9Sstevel@tonic-gate char *space; 2068*7c478bd9Sstevel@tonic-gate char *ecache_scrub_logstr = NULL; 2069*7c478bd9Sstevel@tonic-gate 2070*7c478bd9Sstevel@tonic-gate switch (spf_flt->flt_type) { 2071*7c478bd9Sstevel@tonic-gate case CPU_UE_ERR: 2072*7c478bd9Sstevel@tonic-gate /* 2073*7c478bd9Sstevel@tonic-gate * We want to skip logging only if ALL the following 2074*7c478bd9Sstevel@tonic-gate * conditions are true: 2075*7c478bd9Sstevel@tonic-gate * 2076*7c478bd9Sstevel@tonic-gate * 1. We are not panicking 2077*7c478bd9Sstevel@tonic-gate * 2. There is only one error 2078*7c478bd9Sstevel@tonic-gate * 3. That error is a memory error 2079*7c478bd9Sstevel@tonic-gate * 4. The error is caused by the memory scrubber (in 2080*7c478bd9Sstevel@tonic-gate * which case the error will have occurred under 2081*7c478bd9Sstevel@tonic-gate * on_trap protection) 2082*7c478bd9Sstevel@tonic-gate * 5. The error is on a retired page 2083*7c478bd9Sstevel@tonic-gate * 2084*7c478bd9Sstevel@tonic-gate * Note 1: AFLT_PROT_EC is used places other than the memory 2085*7c478bd9Sstevel@tonic-gate * scrubber. However, none of those errors should occur 2086*7c478bd9Sstevel@tonic-gate * on a retired page. 2087*7c478bd9Sstevel@tonic-gate * 2088*7c478bd9Sstevel@tonic-gate * Note 2: In the CE case, these errors are discarded before 2089*7c478bd9Sstevel@tonic-gate * the errorq. In the UE case, we must wait until now -- 2090*7c478bd9Sstevel@tonic-gate * softcall() grabs a mutex, which we can't do at a high PIL. 2091*7c478bd9Sstevel@tonic-gate */ 2092*7c478bd9Sstevel@tonic-gate if (!panicstr && 2093*7c478bd9Sstevel@tonic-gate (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2094*7c478bd9Sstevel@tonic-gate aflt->flt_prot == AFLT_PROT_EC) { 2095*7c478bd9Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 2096*7c478bd9Sstevel@tonic-gate (aflt->flt_addr >> MMU_PAGESHIFT)); 2097*7c478bd9Sstevel@tonic-gate 2098*7c478bd9Sstevel@tonic-gate if (pp != NULL && page_isretired(pp)) { 2099*7c478bd9Sstevel@tonic-gate 2100*7c478bd9Sstevel@tonic-gate /* Zero the address to clear the error */ 2101*7c478bd9Sstevel@tonic-gate softcall(ecc_page_zero, (void *)aflt->flt_addr); 2102*7c478bd9Sstevel@tonic-gate return; 2103*7c478bd9Sstevel@tonic-gate } 2104*7c478bd9Sstevel@tonic-gate } 2105*7c478bd9Sstevel@tonic-gate 2106*7c478bd9Sstevel@tonic-gate /* 2107*7c478bd9Sstevel@tonic-gate * Log the UE and check for causes of this UE error that 2108*7c478bd9Sstevel@tonic-gate * don't cause a trap (Copyback error). cpu_async_error() 2109*7c478bd9Sstevel@tonic-gate * has already checked the i/o buses for us. 2110*7c478bd9Sstevel@tonic-gate */ 2111*7c478bd9Sstevel@tonic-gate log_ue_err(aflt, unum); 2112*7c478bd9Sstevel@tonic-gate if (aflt->flt_in_memory) 2113*7c478bd9Sstevel@tonic-gate cpu_check_allcpus(aflt); 2114*7c478bd9Sstevel@tonic-gate break; 2115*7c478bd9Sstevel@tonic-gate 2116*7c478bd9Sstevel@tonic-gate case CPU_EDP_LDP_ERR: 2117*7c478bd9Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_EDP) 2118*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2119*7c478bd9Sstevel@tonic-gate NULL, " EDP event on"); 2120*7c478bd9Sstevel@tonic-gate 2121*7c478bd9Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_LDP) 2122*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2123*7c478bd9Sstevel@tonic-gate NULL, " LDP event on"); 2124*7c478bd9Sstevel@tonic-gate 2125*7c478bd9Sstevel@tonic-gate /* Log ecache info if exist */ 2126*7c478bd9Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) { 2127*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2128*7c478bd9Sstevel@tonic-gate 2129*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2130*7c478bd9Sstevel@tonic-gate NULL, " AFAR was derived from E$Tag"); 2131*7c478bd9Sstevel@tonic-gate } else { 2132*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2133*7c478bd9Sstevel@tonic-gate NULL, " No error found in ecache (No fault " 2134*7c478bd9Sstevel@tonic-gate "PA available)"); 2135*7c478bd9Sstevel@tonic-gate } 2136*7c478bd9Sstevel@tonic-gate break; 2137*7c478bd9Sstevel@tonic-gate 2138*7c478bd9Sstevel@tonic-gate case CPU_WP_ERR: 2139*7c478bd9Sstevel@tonic-gate /* 2140*7c478bd9Sstevel@tonic-gate * If the memscrub thread hasn't yet read 2141*7c478bd9Sstevel@tonic-gate * all of memory, as we requested in the 2142*7c478bd9Sstevel@tonic-gate * trap handler, then give it a kick to 2143*7c478bd9Sstevel@tonic-gate * make sure it does. 2144*7c478bd9Sstevel@tonic-gate */ 2145*7c478bd9Sstevel@tonic-gate if (!isus2i && !isus2e && read_all_memscrub) 2146*7c478bd9Sstevel@tonic-gate memscrub_run(); 2147*7c478bd9Sstevel@tonic-gate 2148*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2149*7c478bd9Sstevel@tonic-gate " WP event on"); 2150*7c478bd9Sstevel@tonic-gate return; 2151*7c478bd9Sstevel@tonic-gate 2152*7c478bd9Sstevel@tonic-gate case CPU_BTO_BERR_ERR: 2153*7c478bd9Sstevel@tonic-gate /* 2154*7c478bd9Sstevel@tonic-gate * A bus timeout or error occurred that was in user mode or not 2155*7c478bd9Sstevel@tonic-gate * in a protected kernel code region. 2156*7c478bd9Sstevel@tonic-gate */ 2157*7c478bd9Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_BERR) { 2158*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2159*7c478bd9Sstevel@tonic-gate spf_flt, BERRTO_LFLAGS, NULL, 2160*7c478bd9Sstevel@tonic-gate " Bus Error on System Bus in %s mode from", 2161*7c478bd9Sstevel@tonic-gate aflt->flt_priv ? "privileged" : "user"); 2162*7c478bd9Sstevel@tonic-gate } 2163*7c478bd9Sstevel@tonic-gate 2164*7c478bd9Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_TO) { 2165*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2166*7c478bd9Sstevel@tonic-gate spf_flt, BERRTO_LFLAGS, NULL, 2167*7c478bd9Sstevel@tonic-gate " Timeout on System Bus in %s mode from", 2168*7c478bd9Sstevel@tonic-gate aflt->flt_priv ? "privileged" : "user"); 2169*7c478bd9Sstevel@tonic-gate } 2170*7c478bd9Sstevel@tonic-gate 2171*7c478bd9Sstevel@tonic-gate return; 2172*7c478bd9Sstevel@tonic-gate 2173*7c478bd9Sstevel@tonic-gate case CPU_PANIC_CP_ERR: 2174*7c478bd9Sstevel@tonic-gate /* 2175*7c478bd9Sstevel@tonic-gate * Process the Copyback (CP) error info (if any) obtained from 2176*7c478bd9Sstevel@tonic-gate * polling all the cpus in the panic flow. This case is only 2177*7c478bd9Sstevel@tonic-gate * entered if we are panicking. 2178*7c478bd9Sstevel@tonic-gate */ 2179*7c478bd9Sstevel@tonic-gate ASSERT(panicstr != NULL); 2180*7c478bd9Sstevel@tonic-gate ASSERT(aflt->flt_id == panic_aflt.flt_id); 2181*7c478bd9Sstevel@tonic-gate 2182*7c478bd9Sstevel@tonic-gate /* See which space - this info may not exist */ 2183*7c478bd9Sstevel@tonic-gate if (panic_aflt.flt_status & ECC_D_TRAP) 2184*7c478bd9Sstevel@tonic-gate space = "Data "; 2185*7c478bd9Sstevel@tonic-gate else if (panic_aflt.flt_status & ECC_I_TRAP) 2186*7c478bd9Sstevel@tonic-gate space = "Instruction "; 2187*7c478bd9Sstevel@tonic-gate else 2188*7c478bd9Sstevel@tonic-gate space = ""; 2189*7c478bd9Sstevel@tonic-gate 2190*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2191*7c478bd9Sstevel@tonic-gate " AFAR was derived from UE report," 2192*7c478bd9Sstevel@tonic-gate " CP event on CPU%d (caused %saccess error on %s%d)", 2193*7c478bd9Sstevel@tonic-gate aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2194*7c478bd9Sstevel@tonic-gate "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2195*7c478bd9Sstevel@tonic-gate 2196*7c478bd9Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) 2197*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2198*7c478bd9Sstevel@tonic-gate else 2199*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2200*7c478bd9Sstevel@tonic-gate NULL, " No cache dump available"); 2201*7c478bd9Sstevel@tonic-gate 2202*7c478bd9Sstevel@tonic-gate return; 2203*7c478bd9Sstevel@tonic-gate 2204*7c478bd9Sstevel@tonic-gate case CPU_TRAPPING_CP_ERR: 2205*7c478bd9Sstevel@tonic-gate /* 2206*7c478bd9Sstevel@tonic-gate * For sabre only. This is a copyback ecache parity error due 2207*7c478bd9Sstevel@tonic-gate * to a PCI DMA read. We should be panicking if we get here. 2208*7c478bd9Sstevel@tonic-gate */ 2209*7c478bd9Sstevel@tonic-gate ASSERT(panicstr != NULL); 2210*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2211*7c478bd9Sstevel@tonic-gate " AFAR was derived from UE report," 2212*7c478bd9Sstevel@tonic-gate " CP event on CPU%d (caused Data access error " 2213*7c478bd9Sstevel@tonic-gate "on PCIBus)", aflt->flt_inst); 2214*7c478bd9Sstevel@tonic-gate return; 2215*7c478bd9Sstevel@tonic-gate 2216*7c478bd9Sstevel@tonic-gate /* 2217*7c478bd9Sstevel@tonic-gate * We log the ecache lines of the following states, 2218*7c478bd9Sstevel@tonic-gate * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2219*7c478bd9Sstevel@tonic-gate * dirty_bad_busy if ecache_scrub_verbose is set and panic 2220*7c478bd9Sstevel@tonic-gate * in addition to logging if ecache_scrub_panic is set. 2221*7c478bd9Sstevel@tonic-gate */ 2222*7c478bd9Sstevel@tonic-gate case CPU_BADLINE_CI_ERR: 2223*7c478bd9Sstevel@tonic-gate ecache_scrub_logstr = "CBI"; 2224*7c478bd9Sstevel@tonic-gate /* FALLTHRU */ 2225*7c478bd9Sstevel@tonic-gate 2226*7c478bd9Sstevel@tonic-gate case CPU_BADLINE_CB_ERR: 2227*7c478bd9Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 2228*7c478bd9Sstevel@tonic-gate ecache_scrub_logstr = "CBB"; 2229*7c478bd9Sstevel@tonic-gate /* FALLTHRU */ 2230*7c478bd9Sstevel@tonic-gate 2231*7c478bd9Sstevel@tonic-gate case CPU_BADLINE_DI_ERR: 2232*7c478bd9Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 2233*7c478bd9Sstevel@tonic-gate ecache_scrub_logstr = "DBI"; 2234*7c478bd9Sstevel@tonic-gate /* FALLTHRU */ 2235*7c478bd9Sstevel@tonic-gate 2236*7c478bd9Sstevel@tonic-gate case CPU_BADLINE_DB_ERR: 2237*7c478bd9Sstevel@tonic-gate if (ecache_scrub_logstr == NULL) 2238*7c478bd9Sstevel@tonic-gate ecache_scrub_logstr = "DBB"; 2239*7c478bd9Sstevel@tonic-gate 2240*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, 2241*7c478bd9Sstevel@tonic-gate (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2242*7c478bd9Sstevel@tonic-gate " %s event on", ecache_scrub_logstr); 2243*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2244*7c478bd9Sstevel@tonic-gate 2245*7c478bd9Sstevel@tonic-gate return; 2246*7c478bd9Sstevel@tonic-gate 2247*7c478bd9Sstevel@tonic-gate case CPU_ORPHAN_CP_ERR: 2248*7c478bd9Sstevel@tonic-gate /* 2249*7c478bd9Sstevel@tonic-gate * Orphan CPs, where the CP bit is set, but when a CPU 2250*7c478bd9Sstevel@tonic-gate * doesn't report a UE. 2251*7c478bd9Sstevel@tonic-gate */ 2252*7c478bd9Sstevel@tonic-gate if (read_all_memscrub) 2253*7c478bd9Sstevel@tonic-gate memscrub_run(); 2254*7c478bd9Sstevel@tonic-gate 2255*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2256*7c478bd9Sstevel@tonic-gate NULL, " Orphan CP event on"); 2257*7c478bd9Sstevel@tonic-gate 2258*7c478bd9Sstevel@tonic-gate /* Log ecache info if exist */ 2259*7c478bd9Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 0) 2260*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2261*7c478bd9Sstevel@tonic-gate else 2262*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_NOTE, 2, spf_flt, 2263*7c478bd9Sstevel@tonic-gate (CP_LFLAGS | CPU_FLTCPU), NULL, 2264*7c478bd9Sstevel@tonic-gate " No error found in ecache (No fault " 2265*7c478bd9Sstevel@tonic-gate "PA available"); 2266*7c478bd9Sstevel@tonic-gate return; 2267*7c478bd9Sstevel@tonic-gate 2268*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_ADDR_PAR_ERR: 2269*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2270*7c478bd9Sstevel@tonic-gate " E$ Tag Address Parity error on"); 2271*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2272*7c478bd9Sstevel@tonic-gate return; 2273*7c478bd9Sstevel@tonic-gate 2274*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_STATE_ERR: 2275*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2276*7c478bd9Sstevel@tonic-gate " E$ Tag State Parity error on"); 2277*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2278*7c478bd9Sstevel@tonic-gate return; 2279*7c478bd9Sstevel@tonic-gate 2280*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_TAG_ERR: 2281*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2282*7c478bd9Sstevel@tonic-gate " E$ Tag scrub event on"); 2283*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2284*7c478bd9Sstevel@tonic-gate return; 2285*7c478bd9Sstevel@tonic-gate 2286*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_ETP_ETS_ERR: 2287*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2288*7c478bd9Sstevel@tonic-gate " AFSR.ETP is set and AFSR.ETS is zero on"); 2289*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_flt); 2290*7c478bd9Sstevel@tonic-gate return; 2291*7c478bd9Sstevel@tonic-gate 2292*7c478bd9Sstevel@tonic-gate 2293*7c478bd9Sstevel@tonic-gate case CPU_ADDITIONAL_ERR: 2294*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2295*7c478bd9Sstevel@tonic-gate " Additional errors detected during error processing on"); 2296*7c478bd9Sstevel@tonic-gate return; 2297*7c478bd9Sstevel@tonic-gate 2298*7c478bd9Sstevel@tonic-gate default: 2299*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2300*7c478bd9Sstevel@tonic-gate "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2301*7c478bd9Sstevel@tonic-gate return; 2302*7c478bd9Sstevel@tonic-gate } 2303*7c478bd9Sstevel@tonic-gate 2304*7c478bd9Sstevel@tonic-gate /* ... fall through from the UE, EDP, or LDP cases */ 2305*7c478bd9Sstevel@tonic-gate 2306*7c478bd9Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2307*7c478bd9Sstevel@tonic-gate if (!panicstr) { 2308*7c478bd9Sstevel@tonic-gate /* 2309*7c478bd9Sstevel@tonic-gate * Retire the bad page that caused the error 2310*7c478bd9Sstevel@tonic-gate */ 2311*7c478bd9Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 2312*7c478bd9Sstevel@tonic-gate (aflt->flt_addr >> MMU_PAGESHIFT)); 2313*7c478bd9Sstevel@tonic-gate 2314*7c478bd9Sstevel@tonic-gate if (pp != NULL) { 2315*7c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 2316*7c478bd9Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_TOXIC); 2317*7c478bd9Sstevel@tonic-gate } else { 2318*7c478bd9Sstevel@tonic-gate uint64_t pa = 2319*7c478bd9Sstevel@tonic-gate P2ALIGN(aflt->flt_addr, MMU_PAGESIZE); 2320*7c478bd9Sstevel@tonic-gate 2321*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, 2322*7c478bd9Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 2323*7c478bd9Sstevel@tonic-gate ": cannot schedule clearing of error on " 2324*7c478bd9Sstevel@tonic-gate "page 0x%08x.%08x; page not in VM system", 2325*7c478bd9Sstevel@tonic-gate (uint32_t)(pa >> 32), (uint32_t)pa); 2326*7c478bd9Sstevel@tonic-gate } 2327*7c478bd9Sstevel@tonic-gate } else { 2328*7c478bd9Sstevel@tonic-gate /* 2329*7c478bd9Sstevel@tonic-gate * Clear UEs on panic so that we don't 2330*7c478bd9Sstevel@tonic-gate * get haunted by them during panic or 2331*7c478bd9Sstevel@tonic-gate * after reboot 2332*7c478bd9Sstevel@tonic-gate */ 2333*7c478bd9Sstevel@tonic-gate clearphys(P2ALIGN(aflt->flt_addr, 64), 2334*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size, 2335*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 2336*7c478bd9Sstevel@tonic-gate 2337*7c478bd9Sstevel@tonic-gate (void) clear_errors(NULL, NULL); 2338*7c478bd9Sstevel@tonic-gate } 2339*7c478bd9Sstevel@tonic-gate } 2340*7c478bd9Sstevel@tonic-gate 2341*7c478bd9Sstevel@tonic-gate /* 2342*7c478bd9Sstevel@tonic-gate * Log final recover message 2343*7c478bd9Sstevel@tonic-gate */ 2344*7c478bd9Sstevel@tonic-gate if (!panicstr) { 2345*7c478bd9Sstevel@tonic-gate if (!aflt->flt_priv) { 2346*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2347*7c478bd9Sstevel@tonic-gate NULL, " Above Error is in User Mode" 2348*7c478bd9Sstevel@tonic-gate "\n and is fatal: " 2349*7c478bd9Sstevel@tonic-gate "will SIGKILL process and notify contract"); 2350*7c478bd9Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2351*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2352*7c478bd9Sstevel@tonic-gate NULL, " Above Error detected while dumping core;" 2353*7c478bd9Sstevel@tonic-gate "\n core file will be truncated"); 2354*7c478bd9Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2355*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2356*7c478bd9Sstevel@tonic-gate NULL, " Above Error is due to Kernel access" 2357*7c478bd9Sstevel@tonic-gate "\n to User space and is fatal: " 2358*7c478bd9Sstevel@tonic-gate "will SIGKILL process and notify contract"); 2359*7c478bd9Sstevel@tonic-gate } else if (aflt->flt_prot == AFLT_PROT_EC) { 2360*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2361*7c478bd9Sstevel@tonic-gate " Above Error detected by protected Kernel code" 2362*7c478bd9Sstevel@tonic-gate "\n that will try to clear error from system"); 2363*7c478bd9Sstevel@tonic-gate } 2364*7c478bd9Sstevel@tonic-gate } 2365*7c478bd9Sstevel@tonic-gate } 2366*7c478bd9Sstevel@tonic-gate 2367*7c478bd9Sstevel@tonic-gate 2368*7c478bd9Sstevel@tonic-gate /* 2369*7c478bd9Sstevel@tonic-gate * Check all cpus for non-trapping UE-causing errors 2370*7c478bd9Sstevel@tonic-gate * In Ultra I/II, we look for copyback errors (CPs) 2371*7c478bd9Sstevel@tonic-gate */ 2372*7c478bd9Sstevel@tonic-gate void 2373*7c478bd9Sstevel@tonic-gate cpu_check_allcpus(struct async_flt *aflt) 2374*7c478bd9Sstevel@tonic-gate { 2375*7c478bd9Sstevel@tonic-gate spitf_async_flt cp; 2376*7c478bd9Sstevel@tonic-gate spitf_async_flt *spf_cpflt = &cp; 2377*7c478bd9Sstevel@tonic-gate struct async_flt *cpflt = (struct async_flt *)&cp; 2378*7c478bd9Sstevel@tonic-gate int pix; 2379*7c478bd9Sstevel@tonic-gate 2380*7c478bd9Sstevel@tonic-gate cpflt->flt_id = aflt->flt_id; 2381*7c478bd9Sstevel@tonic-gate cpflt->flt_addr = aflt->flt_addr; 2382*7c478bd9Sstevel@tonic-gate 2383*7c478bd9Sstevel@tonic-gate for (pix = 0; pix < NCPU; pix++) { 2384*7c478bd9Sstevel@tonic-gate if (CPU_XCALL_READY(pix)) { 2385*7c478bd9Sstevel@tonic-gate xc_one(pix, (xcfunc_t *)get_cpu_status, 2386*7c478bd9Sstevel@tonic-gate (uint64_t)cpflt, 0); 2387*7c478bd9Sstevel@tonic-gate 2388*7c478bd9Sstevel@tonic-gate if (cpflt->flt_stat & P_AFSR_CP) { 2389*7c478bd9Sstevel@tonic-gate char *space; 2390*7c478bd9Sstevel@tonic-gate 2391*7c478bd9Sstevel@tonic-gate /* See which space - this info may not exist */ 2392*7c478bd9Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP) 2393*7c478bd9Sstevel@tonic-gate space = "Data "; 2394*7c478bd9Sstevel@tonic-gate else if (aflt->flt_status & ECC_I_TRAP) 2395*7c478bd9Sstevel@tonic-gate space = "Instruction "; 2396*7c478bd9Sstevel@tonic-gate else 2397*7c478bd9Sstevel@tonic-gate space = ""; 2398*7c478bd9Sstevel@tonic-gate 2399*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2400*7c478bd9Sstevel@tonic-gate NULL, " AFAR was derived from UE report," 2401*7c478bd9Sstevel@tonic-gate " CP event on CPU%d (caused %saccess " 2402*7c478bd9Sstevel@tonic-gate "error on %s%d)", pix, space, 2403*7c478bd9Sstevel@tonic-gate (aflt->flt_status & ECC_IOBUS) ? 2404*7c478bd9Sstevel@tonic-gate "IOBUS" : "CPU", aflt->flt_bus_id); 2405*7c478bd9Sstevel@tonic-gate 2406*7c478bd9Sstevel@tonic-gate if (spf_cpflt->flt_ec_lcnt > 0) 2407*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spf_cpflt); 2408*7c478bd9Sstevel@tonic-gate else 2409*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2410*7c478bd9Sstevel@tonic-gate CPU_ERRID_FIRST, NULL, 2411*7c478bd9Sstevel@tonic-gate " No cache dump available"); 2412*7c478bd9Sstevel@tonic-gate } 2413*7c478bd9Sstevel@tonic-gate } 2414*7c478bd9Sstevel@tonic-gate } 2415*7c478bd9Sstevel@tonic-gate } 2416*7c478bd9Sstevel@tonic-gate 2417*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2418*7c478bd9Sstevel@tonic-gate int test_mp_cp = 0; 2419*7c478bd9Sstevel@tonic-gate #endif 2420*7c478bd9Sstevel@tonic-gate 2421*7c478bd9Sstevel@tonic-gate /* 2422*7c478bd9Sstevel@tonic-gate * Cross-call callback routine to tell a CPU to read its own %afsr to check 2423*7c478bd9Sstevel@tonic-gate * for copyback errors and capture relevant information. 2424*7c478bd9Sstevel@tonic-gate */ 2425*7c478bd9Sstevel@tonic-gate static uint_t 2426*7c478bd9Sstevel@tonic-gate get_cpu_status(uint64_t arg) 2427*7c478bd9Sstevel@tonic-gate { 2428*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)arg; 2429*7c478bd9Sstevel@tonic-gate spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2430*7c478bd9Sstevel@tonic-gate uint64_t afsr; 2431*7c478bd9Sstevel@tonic-gate uint32_t ec_idx; 2432*7c478bd9Sstevel@tonic-gate uint64_t sdbh, sdbl; 2433*7c478bd9Sstevel@tonic-gate int i; 2434*7c478bd9Sstevel@tonic-gate uint32_t ec_set_size; 2435*7c478bd9Sstevel@tonic-gate uchar_t valid; 2436*7c478bd9Sstevel@tonic-gate ec_data_t ec_data[8]; 2437*7c478bd9Sstevel@tonic-gate uint64_t ec_tag, flt_addr_tag, oafsr; 2438*7c478bd9Sstevel@tonic-gate uint64_t *acc_afsr = NULL; 2439*7c478bd9Sstevel@tonic-gate 2440*7c478bd9Sstevel@tonic-gate get_asyncflt(&afsr); 2441*7c478bd9Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) { 2442*7c478bd9Sstevel@tonic-gate acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2443*7c478bd9Sstevel@tonic-gate afsr |= *acc_afsr; 2444*7c478bd9Sstevel@tonic-gate *acc_afsr = 0; 2445*7c478bd9Sstevel@tonic-gate } 2446*7c478bd9Sstevel@tonic-gate 2447*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2448*7c478bd9Sstevel@tonic-gate if (test_mp_cp) 2449*7c478bd9Sstevel@tonic-gate afsr |= P_AFSR_CP; 2450*7c478bd9Sstevel@tonic-gate #endif 2451*7c478bd9Sstevel@tonic-gate aflt->flt_stat = afsr; 2452*7c478bd9Sstevel@tonic-gate 2453*7c478bd9Sstevel@tonic-gate if (afsr & P_AFSR_CP) { 2454*7c478bd9Sstevel@tonic-gate /* 2455*7c478bd9Sstevel@tonic-gate * Capture the UDBs 2456*7c478bd9Sstevel@tonic-gate */ 2457*7c478bd9Sstevel@tonic-gate get_udb_errors(&sdbh, &sdbl); 2458*7c478bd9Sstevel@tonic-gate spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2459*7c478bd9Sstevel@tonic-gate spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2460*7c478bd9Sstevel@tonic-gate 2461*7c478bd9Sstevel@tonic-gate /* 2462*7c478bd9Sstevel@tonic-gate * Clear CP bit before capturing ecache data 2463*7c478bd9Sstevel@tonic-gate * and AFSR info. 2464*7c478bd9Sstevel@tonic-gate */ 2465*7c478bd9Sstevel@tonic-gate set_asyncflt(P_AFSR_CP); 2466*7c478bd9Sstevel@tonic-gate 2467*7c478bd9Sstevel@tonic-gate /* 2468*7c478bd9Sstevel@tonic-gate * See if we can capture the ecache line for the 2469*7c478bd9Sstevel@tonic-gate * fault PA. 2470*7c478bd9Sstevel@tonic-gate * 2471*7c478bd9Sstevel@tonic-gate * Return a valid matching ecache line, if any. 2472*7c478bd9Sstevel@tonic-gate * Otherwise, return the first matching ecache 2473*7c478bd9Sstevel@tonic-gate * line marked invalid. 2474*7c478bd9Sstevel@tonic-gate */ 2475*7c478bd9Sstevel@tonic-gate flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2476*7c478bd9Sstevel@tonic-gate ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2477*7c478bd9Sstevel@tonic-gate ecache_associativity; 2478*7c478bd9Sstevel@tonic-gate spf_flt->flt_ec_lcnt = 0; 2479*7c478bd9Sstevel@tonic-gate 2480*7c478bd9Sstevel@tonic-gate for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2481*7c478bd9Sstevel@tonic-gate i < ecache_associativity; i++, ec_idx += ec_set_size) { 2482*7c478bd9Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(ec_idx, 64), 2483*7c478bd9Sstevel@tonic-gate (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2484*7c478bd9Sstevel@tonic-gate acc_afsr); 2485*7c478bd9Sstevel@tonic-gate 2486*7c478bd9Sstevel@tonic-gate if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2487*7c478bd9Sstevel@tonic-gate continue; 2488*7c478bd9Sstevel@tonic-gate 2489*7c478bd9Sstevel@tonic-gate valid = cpu_ec_state_valid & 2490*7c478bd9Sstevel@tonic-gate (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2491*7c478bd9Sstevel@tonic-gate cpu_ec_state_shift); 2492*7c478bd9Sstevel@tonic-gate 2493*7c478bd9Sstevel@tonic-gate if (valid || spf_flt->flt_ec_lcnt == 0) { 2494*7c478bd9Sstevel@tonic-gate spf_flt->flt_ec_tag = ec_tag; 2495*7c478bd9Sstevel@tonic-gate bcopy(&ec_data, &spf_flt->flt_ec_data, 2496*7c478bd9Sstevel@tonic-gate sizeof (ec_data)); 2497*7c478bd9Sstevel@tonic-gate spf_flt->flt_ec_lcnt = 1; 2498*7c478bd9Sstevel@tonic-gate 2499*7c478bd9Sstevel@tonic-gate if (valid) 2500*7c478bd9Sstevel@tonic-gate break; 2501*7c478bd9Sstevel@tonic-gate } 2502*7c478bd9Sstevel@tonic-gate } 2503*7c478bd9Sstevel@tonic-gate } 2504*7c478bd9Sstevel@tonic-gate return (0); 2505*7c478bd9Sstevel@tonic-gate } 2506*7c478bd9Sstevel@tonic-gate 2507*7c478bd9Sstevel@tonic-gate /* 2508*7c478bd9Sstevel@tonic-gate * CPU-module callback for the non-panicking CPUs. This routine is invoked 2509*7c478bd9Sstevel@tonic-gate * from panic_idle() as part of the other CPUs stopping themselves when a 2510*7c478bd9Sstevel@tonic-gate * panic occurs. We need to be VERY careful what we do here, since panicstr 2511*7c478bd9Sstevel@tonic-gate * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2512*7c478bd9Sstevel@tonic-gate * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2513*7c478bd9Sstevel@tonic-gate * CP error information. 2514*7c478bd9Sstevel@tonic-gate */ 2515*7c478bd9Sstevel@tonic-gate void 2516*7c478bd9Sstevel@tonic-gate cpu_async_panic_callb(void) 2517*7c478bd9Sstevel@tonic-gate { 2518*7c478bd9Sstevel@tonic-gate spitf_async_flt cp; 2519*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)&cp; 2520*7c478bd9Sstevel@tonic-gate uint64_t *scrub_afsr; 2521*7c478bd9Sstevel@tonic-gate 2522*7c478bd9Sstevel@tonic-gate if (panic_aflt.flt_id != 0) { 2523*7c478bd9Sstevel@tonic-gate aflt->flt_addr = panic_aflt.flt_addr; 2524*7c478bd9Sstevel@tonic-gate (void) get_cpu_status((uint64_t)aflt); 2525*7c478bd9Sstevel@tonic-gate 2526*7c478bd9Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) { 2527*7c478bd9Sstevel@tonic-gate scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2528*7c478bd9Sstevel@tonic-gate if (*scrub_afsr & P_AFSR_CP) { 2529*7c478bd9Sstevel@tonic-gate aflt->flt_stat |= *scrub_afsr; 2530*7c478bd9Sstevel@tonic-gate *scrub_afsr = 0; 2531*7c478bd9Sstevel@tonic-gate } 2532*7c478bd9Sstevel@tonic-gate } 2533*7c478bd9Sstevel@tonic-gate if (aflt->flt_stat & P_AFSR_CP) { 2534*7c478bd9Sstevel@tonic-gate aflt->flt_id = panic_aflt.flt_id; 2535*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 2536*7c478bd9Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 2537*7c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 2538*7c478bd9Sstevel@tonic-gate cp.flt_type = CPU_PANIC_CP_ERR; 2539*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2540*7c478bd9Sstevel@tonic-gate (void *)&cp, sizeof (cp), ue_queue, 2541*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 2542*7c478bd9Sstevel@tonic-gate } 2543*7c478bd9Sstevel@tonic-gate } 2544*7c478bd9Sstevel@tonic-gate } 2545*7c478bd9Sstevel@tonic-gate 2546*7c478bd9Sstevel@tonic-gate /* 2547*7c478bd9Sstevel@tonic-gate * Turn off all cpu error detection, normally only used for panics. 2548*7c478bd9Sstevel@tonic-gate */ 2549*7c478bd9Sstevel@tonic-gate void 2550*7c478bd9Sstevel@tonic-gate cpu_disable_errors(void) 2551*7c478bd9Sstevel@tonic-gate { 2552*7c478bd9Sstevel@tonic-gate xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2553*7c478bd9Sstevel@tonic-gate } 2554*7c478bd9Sstevel@tonic-gate 2555*7c478bd9Sstevel@tonic-gate /* 2556*7c478bd9Sstevel@tonic-gate * Enable errors. 2557*7c478bd9Sstevel@tonic-gate */ 2558*7c478bd9Sstevel@tonic-gate void 2559*7c478bd9Sstevel@tonic-gate cpu_enable_errors(void) 2560*7c478bd9Sstevel@tonic-gate { 2561*7c478bd9Sstevel@tonic-gate xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2562*7c478bd9Sstevel@tonic-gate } 2563*7c478bd9Sstevel@tonic-gate 2564*7c478bd9Sstevel@tonic-gate static void 2565*7c478bd9Sstevel@tonic-gate cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2566*7c478bd9Sstevel@tonic-gate { 2567*7c478bd9Sstevel@tonic-gate uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2568*7c478bd9Sstevel@tonic-gate int i, loop = 1; 2569*7c478bd9Sstevel@tonic-gate ushort_t ecc_0; 2570*7c478bd9Sstevel@tonic-gate uint64_t paddr; 2571*7c478bd9Sstevel@tonic-gate uint64_t data; 2572*7c478bd9Sstevel@tonic-gate 2573*7c478bd9Sstevel@tonic-gate if (verbose) 2574*7c478bd9Sstevel@tonic-gate loop = 8; 2575*7c478bd9Sstevel@tonic-gate for (i = 0; i < loop; i++) { 2576*7c478bd9Sstevel@tonic-gate paddr = aligned_addr + (i * 8); 2577*7c478bd9Sstevel@tonic-gate data = lddphys(paddr); 2578*7c478bd9Sstevel@tonic-gate if (verbose) { 2579*7c478bd9Sstevel@tonic-gate if (ce_err) { 2580*7c478bd9Sstevel@tonic-gate ecc_0 = ecc_gen((uint32_t)(data>>32), 2581*7c478bd9Sstevel@tonic-gate (uint32_t)data); 2582*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2583*7c478bd9Sstevel@tonic-gate NULL, " Paddr 0x%" PRIx64 ", " 2584*7c478bd9Sstevel@tonic-gate "Data 0x%08x.%08x, ECC 0x%x", paddr, 2585*7c478bd9Sstevel@tonic-gate (uint32_t)(data>>32), (uint32_t)data, ecc_0); 2586*7c478bd9Sstevel@tonic-gate } else { 2587*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2588*7c478bd9Sstevel@tonic-gate NULL, " Paddr 0x%" PRIx64 ", " 2589*7c478bd9Sstevel@tonic-gate "Data 0x%08x.%08x", paddr, 2590*7c478bd9Sstevel@tonic-gate (uint32_t)(data>>32), (uint32_t)data); 2591*7c478bd9Sstevel@tonic-gate } 2592*7c478bd9Sstevel@tonic-gate } 2593*7c478bd9Sstevel@tonic-gate } 2594*7c478bd9Sstevel@tonic-gate } 2595*7c478bd9Sstevel@tonic-gate 2596*7c478bd9Sstevel@tonic-gate static struct { /* sec-ded-s4ed ecc code */ 2597*7c478bd9Sstevel@tonic-gate uint_t hi, lo; 2598*7c478bd9Sstevel@tonic-gate } ecc_code[8] = { 2599*7c478bd9Sstevel@tonic-gate { 0xee55de23U, 0x16161161U }, 2600*7c478bd9Sstevel@tonic-gate { 0x55eede93U, 0x61612212U }, 2601*7c478bd9Sstevel@tonic-gate { 0xbb557b8cU, 0x49494494U }, 2602*7c478bd9Sstevel@tonic-gate { 0x55bb7b6cU, 0x94948848U }, 2603*7c478bd9Sstevel@tonic-gate { 0x16161161U, 0xee55de23U }, 2604*7c478bd9Sstevel@tonic-gate { 0x61612212U, 0x55eede93U }, 2605*7c478bd9Sstevel@tonic-gate { 0x49494494U, 0xbb557b8cU }, 2606*7c478bd9Sstevel@tonic-gate { 0x94948848U, 0x55bb7b6cU } 2607*7c478bd9Sstevel@tonic-gate }; 2608*7c478bd9Sstevel@tonic-gate 2609*7c478bd9Sstevel@tonic-gate static ushort_t 2610*7c478bd9Sstevel@tonic-gate ecc_gen(uint_t high_bytes, uint_t low_bytes) 2611*7c478bd9Sstevel@tonic-gate { 2612*7c478bd9Sstevel@tonic-gate int i, j; 2613*7c478bd9Sstevel@tonic-gate uchar_t checker, bit_mask; 2614*7c478bd9Sstevel@tonic-gate struct { 2615*7c478bd9Sstevel@tonic-gate uint_t hi, lo; 2616*7c478bd9Sstevel@tonic-gate } hex_data, masked_data[8]; 2617*7c478bd9Sstevel@tonic-gate 2618*7c478bd9Sstevel@tonic-gate hex_data.hi = high_bytes; 2619*7c478bd9Sstevel@tonic-gate hex_data.lo = low_bytes; 2620*7c478bd9Sstevel@tonic-gate 2621*7c478bd9Sstevel@tonic-gate /* mask out bits according to sec-ded-s4ed ecc code */ 2622*7c478bd9Sstevel@tonic-gate for (i = 0; i < 8; i++) { 2623*7c478bd9Sstevel@tonic-gate masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2624*7c478bd9Sstevel@tonic-gate masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2625*7c478bd9Sstevel@tonic-gate } 2626*7c478bd9Sstevel@tonic-gate 2627*7c478bd9Sstevel@tonic-gate /* 2628*7c478bd9Sstevel@tonic-gate * xor all bits in masked_data[i] to get bit_i of checker, 2629*7c478bd9Sstevel@tonic-gate * where i = 0 to 7 2630*7c478bd9Sstevel@tonic-gate */ 2631*7c478bd9Sstevel@tonic-gate checker = 0; 2632*7c478bd9Sstevel@tonic-gate for (i = 0; i < 8; i++) { 2633*7c478bd9Sstevel@tonic-gate bit_mask = 1 << i; 2634*7c478bd9Sstevel@tonic-gate for (j = 0; j < 32; j++) { 2635*7c478bd9Sstevel@tonic-gate if (masked_data[i].lo & 1) checker ^= bit_mask; 2636*7c478bd9Sstevel@tonic-gate if (masked_data[i].hi & 1) checker ^= bit_mask; 2637*7c478bd9Sstevel@tonic-gate masked_data[i].hi >>= 1; 2638*7c478bd9Sstevel@tonic-gate masked_data[i].lo >>= 1; 2639*7c478bd9Sstevel@tonic-gate } 2640*7c478bd9Sstevel@tonic-gate } 2641*7c478bd9Sstevel@tonic-gate return (checker); 2642*7c478bd9Sstevel@tonic-gate } 2643*7c478bd9Sstevel@tonic-gate 2644*7c478bd9Sstevel@tonic-gate /* 2645*7c478bd9Sstevel@tonic-gate * Flush the entire ecache using displacement flush by reading through a 2646*7c478bd9Sstevel@tonic-gate * physical address range as large as the ecache. 2647*7c478bd9Sstevel@tonic-gate */ 2648*7c478bd9Sstevel@tonic-gate void 2649*7c478bd9Sstevel@tonic-gate cpu_flush_ecache(void) 2650*7c478bd9Sstevel@tonic-gate { 2651*7c478bd9Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2652*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 2653*7c478bd9Sstevel@tonic-gate } 2654*7c478bd9Sstevel@tonic-gate 2655*7c478bd9Sstevel@tonic-gate /* 2656*7c478bd9Sstevel@tonic-gate * read and display the data in the cache line where the 2657*7c478bd9Sstevel@tonic-gate * original ce error occurred. 2658*7c478bd9Sstevel@tonic-gate * This routine is mainly used for debugging new hardware. 2659*7c478bd9Sstevel@tonic-gate */ 2660*7c478bd9Sstevel@tonic-gate void 2661*7c478bd9Sstevel@tonic-gate read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2662*7c478bd9Sstevel@tonic-gate { 2663*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 2664*7c478bd9Sstevel@tonic-gate /* disable ECC error traps */ 2665*7c478bd9Sstevel@tonic-gate set_error_enable(EER_ECC_DISABLE); 2666*7c478bd9Sstevel@tonic-gate 2667*7c478bd9Sstevel@tonic-gate /* 2668*7c478bd9Sstevel@tonic-gate * flush the ecache 2669*7c478bd9Sstevel@tonic-gate * read the data 2670*7c478bd9Sstevel@tonic-gate * check to see if an ECC error occured 2671*7c478bd9Sstevel@tonic-gate */ 2672*7c478bd9Sstevel@tonic-gate flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2673*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_linesize); 2674*7c478bd9Sstevel@tonic-gate set_lsu(get_lsu() | cache_boot_state); 2675*7c478bd9Sstevel@tonic-gate cpu_read_paddr(ecc, verbose, ce_err); 2676*7c478bd9Sstevel@tonic-gate (void) check_ecc(ecc); 2677*7c478bd9Sstevel@tonic-gate 2678*7c478bd9Sstevel@tonic-gate /* enable ECC error traps */ 2679*7c478bd9Sstevel@tonic-gate set_error_enable(EER_ENABLE); 2680*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 2681*7c478bd9Sstevel@tonic-gate } 2682*7c478bd9Sstevel@tonic-gate 2683*7c478bd9Sstevel@tonic-gate /* 2684*7c478bd9Sstevel@tonic-gate * Check the AFSR bits for UE/CE persistence. 2685*7c478bd9Sstevel@tonic-gate * If UE or CE errors are detected, the routine will 2686*7c478bd9Sstevel@tonic-gate * clears all the AFSR sticky bits (except CP for 2687*7c478bd9Sstevel@tonic-gate * spitfire/blackbird) and the UDBs. 2688*7c478bd9Sstevel@tonic-gate * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2689*7c478bd9Sstevel@tonic-gate */ 2690*7c478bd9Sstevel@tonic-gate static int 2691*7c478bd9Sstevel@tonic-gate check_ecc(struct async_flt *ecc) 2692*7c478bd9Sstevel@tonic-gate { 2693*7c478bd9Sstevel@tonic-gate uint64_t t_afsr; 2694*7c478bd9Sstevel@tonic-gate uint64_t t_afar; 2695*7c478bd9Sstevel@tonic-gate uint64_t udbh; 2696*7c478bd9Sstevel@tonic-gate uint64_t udbl; 2697*7c478bd9Sstevel@tonic-gate ushort_t udb; 2698*7c478bd9Sstevel@tonic-gate int persistent = 0; 2699*7c478bd9Sstevel@tonic-gate 2700*7c478bd9Sstevel@tonic-gate /* 2701*7c478bd9Sstevel@tonic-gate * Capture the AFSR, AFAR and UDBs info 2702*7c478bd9Sstevel@tonic-gate */ 2703*7c478bd9Sstevel@tonic-gate get_asyncflt(&t_afsr); 2704*7c478bd9Sstevel@tonic-gate get_asyncaddr(&t_afar); 2705*7c478bd9Sstevel@tonic-gate t_afar &= SABRE_AFAR_PA; 2706*7c478bd9Sstevel@tonic-gate get_udb_errors(&udbh, &udbl); 2707*7c478bd9Sstevel@tonic-gate 2708*7c478bd9Sstevel@tonic-gate if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2709*7c478bd9Sstevel@tonic-gate /* 2710*7c478bd9Sstevel@tonic-gate * Clear the errors 2711*7c478bd9Sstevel@tonic-gate */ 2712*7c478bd9Sstevel@tonic-gate clr_datapath(); 2713*7c478bd9Sstevel@tonic-gate 2714*7c478bd9Sstevel@tonic-gate if (isus2i || isus2e) 2715*7c478bd9Sstevel@tonic-gate set_asyncflt(t_afsr); 2716*7c478bd9Sstevel@tonic-gate else 2717*7c478bd9Sstevel@tonic-gate set_asyncflt(t_afsr & ~P_AFSR_CP); 2718*7c478bd9Sstevel@tonic-gate 2719*7c478bd9Sstevel@tonic-gate /* 2720*7c478bd9Sstevel@tonic-gate * determine whether to check UDBH or UDBL for persistence 2721*7c478bd9Sstevel@tonic-gate */ 2722*7c478bd9Sstevel@tonic-gate if (ecc->flt_synd & UDBL_REG) { 2723*7c478bd9Sstevel@tonic-gate udb = (ushort_t)udbl; 2724*7c478bd9Sstevel@tonic-gate t_afar |= 0x8; 2725*7c478bd9Sstevel@tonic-gate } else { 2726*7c478bd9Sstevel@tonic-gate udb = (ushort_t)udbh; 2727*7c478bd9Sstevel@tonic-gate } 2728*7c478bd9Sstevel@tonic-gate 2729*7c478bd9Sstevel@tonic-gate if (ce_debug || ue_debug) { 2730*7c478bd9Sstevel@tonic-gate spitf_async_flt spf_flt; /* for logging */ 2731*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = 2732*7c478bd9Sstevel@tonic-gate (struct async_flt *)&spf_flt; 2733*7c478bd9Sstevel@tonic-gate 2734*7c478bd9Sstevel@tonic-gate /* Package the info nicely in the spf_flt struct */ 2735*7c478bd9Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 2736*7c478bd9Sstevel@tonic-gate aflt->flt_stat = t_afsr; 2737*7c478bd9Sstevel@tonic-gate aflt->flt_addr = t_afar; 2738*7c478bd9Sstevel@tonic-gate spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2739*7c478bd9Sstevel@tonic-gate spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2740*7c478bd9Sstevel@tonic-gate 2741*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2742*7c478bd9Sstevel@tonic-gate CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2743*7c478bd9Sstevel@tonic-gate " check_ecc: Dumping captured error states ..."); 2744*7c478bd9Sstevel@tonic-gate } 2745*7c478bd9Sstevel@tonic-gate 2746*7c478bd9Sstevel@tonic-gate /* 2747*7c478bd9Sstevel@tonic-gate * if the fault addresses don't match, not persistent 2748*7c478bd9Sstevel@tonic-gate */ 2749*7c478bd9Sstevel@tonic-gate if (t_afar != ecc->flt_addr) { 2750*7c478bd9Sstevel@tonic-gate return (persistent); 2751*7c478bd9Sstevel@tonic-gate } 2752*7c478bd9Sstevel@tonic-gate 2753*7c478bd9Sstevel@tonic-gate /* 2754*7c478bd9Sstevel@tonic-gate * check for UE persistence 2755*7c478bd9Sstevel@tonic-gate * since all DIMMs in the bank are identified for a UE, 2756*7c478bd9Sstevel@tonic-gate * there's no reason to check the syndrome 2757*7c478bd9Sstevel@tonic-gate */ 2758*7c478bd9Sstevel@tonic-gate if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2759*7c478bd9Sstevel@tonic-gate persistent = 1; 2760*7c478bd9Sstevel@tonic-gate } 2761*7c478bd9Sstevel@tonic-gate 2762*7c478bd9Sstevel@tonic-gate /* 2763*7c478bd9Sstevel@tonic-gate * check for CE persistence 2764*7c478bd9Sstevel@tonic-gate */ 2765*7c478bd9Sstevel@tonic-gate if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2766*7c478bd9Sstevel@tonic-gate if ((udb & P_DER_E_SYND) == 2767*7c478bd9Sstevel@tonic-gate (ecc->flt_synd & P_DER_E_SYND)) { 2768*7c478bd9Sstevel@tonic-gate persistent = 1; 2769*7c478bd9Sstevel@tonic-gate } 2770*7c478bd9Sstevel@tonic-gate } 2771*7c478bd9Sstevel@tonic-gate } 2772*7c478bd9Sstevel@tonic-gate return (persistent); 2773*7c478bd9Sstevel@tonic-gate } 2774*7c478bd9Sstevel@tonic-gate 2775*7c478bd9Sstevel@tonic-gate #ifdef HUMMINGBIRD 2776*7c478bd9Sstevel@tonic-gate #define HB_FULL_DIV 1 2777*7c478bd9Sstevel@tonic-gate #define HB_HALF_DIV 2 2778*7c478bd9Sstevel@tonic-gate #define HB_LOWEST_DIV 8 2779*7c478bd9Sstevel@tonic-gate #define HB_ECLK_INVALID 0xdeadbad 2780*7c478bd9Sstevel@tonic-gate static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2781*7c478bd9Sstevel@tonic-gate HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2782*7c478bd9Sstevel@tonic-gate HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2783*7c478bd9Sstevel@tonic-gate HB_ECLK_8 }; 2784*7c478bd9Sstevel@tonic-gate 2785*7c478bd9Sstevel@tonic-gate #define HB_SLOW_DOWN 0 2786*7c478bd9Sstevel@tonic-gate #define HB_SPEED_UP 1 2787*7c478bd9Sstevel@tonic-gate 2788*7c478bd9Sstevel@tonic-gate #define SET_ESTAR_MODE(mode) \ 2789*7c478bd9Sstevel@tonic-gate stdphysio(HB_ESTAR_MODE, (mode)); \ 2790*7c478bd9Sstevel@tonic-gate /* \ 2791*7c478bd9Sstevel@tonic-gate * PLL logic requires minimum of 16 clock \ 2792*7c478bd9Sstevel@tonic-gate * cycles to lock to the new clock speed. \ 2793*7c478bd9Sstevel@tonic-gate * Wait 1 usec to satisfy this requirement. \ 2794*7c478bd9Sstevel@tonic-gate */ \ 2795*7c478bd9Sstevel@tonic-gate drv_usecwait(1); 2796*7c478bd9Sstevel@tonic-gate 2797*7c478bd9Sstevel@tonic-gate #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2798*7c478bd9Sstevel@tonic-gate { \ 2799*7c478bd9Sstevel@tonic-gate volatile uint64_t data; \ 2800*7c478bd9Sstevel@tonic-gate uint64_t count, new_count; \ 2801*7c478bd9Sstevel@tonic-gate clock_t delay; \ 2802*7c478bd9Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2803*7c478bd9Sstevel@tonic-gate count = (data & HB_REFRESH_COUNT_MASK) >> \ 2804*7c478bd9Sstevel@tonic-gate HB_REFRESH_COUNT_SHIFT; \ 2805*7c478bd9Sstevel@tonic-gate new_count = (HB_REFRESH_INTERVAL * \ 2806*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].clock_freq) / \ 2807*7c478bd9Sstevel@tonic-gate (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2808*7c478bd9Sstevel@tonic-gate data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2809*7c478bd9Sstevel@tonic-gate (new_count << HB_REFRESH_COUNT_SHIFT); \ 2810*7c478bd9Sstevel@tonic-gate stdphysio(HB_MEM_CNTRL0, data); \ 2811*7c478bd9Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2812*7c478bd9Sstevel@tonic-gate /* \ 2813*7c478bd9Sstevel@tonic-gate * If we are slowing down the cpu and Memory \ 2814*7c478bd9Sstevel@tonic-gate * Self Refresh is not enabled, it is required \ 2815*7c478bd9Sstevel@tonic-gate * to wait for old refresh count to count-down and \ 2816*7c478bd9Sstevel@tonic-gate * new refresh count to go into effect (let new value \ 2817*7c478bd9Sstevel@tonic-gate * counts down once). \ 2818*7c478bd9Sstevel@tonic-gate */ \ 2819*7c478bd9Sstevel@tonic-gate if ((direction) == HB_SLOW_DOWN && \ 2820*7c478bd9Sstevel@tonic-gate (data & HB_SELF_REFRESH_MASK) == 0) { \ 2821*7c478bd9Sstevel@tonic-gate /* \ 2822*7c478bd9Sstevel@tonic-gate * Each count takes 64 cpu clock cycles \ 2823*7c478bd9Sstevel@tonic-gate * to decrement. Wait for current refresh \ 2824*7c478bd9Sstevel@tonic-gate * count plus new refresh count at current \ 2825*7c478bd9Sstevel@tonic-gate * cpu speed to count down to zero. Round \ 2826*7c478bd9Sstevel@tonic-gate * up the delay time. \ 2827*7c478bd9Sstevel@tonic-gate */ \ 2828*7c478bd9Sstevel@tonic-gate delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2829*7c478bd9Sstevel@tonic-gate (count + new_count) * MICROSEC * (cur_div)) /\ 2830*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2831*7c478bd9Sstevel@tonic-gate drv_usecwait(delay); \ 2832*7c478bd9Sstevel@tonic-gate } \ 2833*7c478bd9Sstevel@tonic-gate } 2834*7c478bd9Sstevel@tonic-gate 2835*7c478bd9Sstevel@tonic-gate #define SET_SELF_REFRESH(bit) \ 2836*7c478bd9Sstevel@tonic-gate { \ 2837*7c478bd9Sstevel@tonic-gate volatile uint64_t data; \ 2838*7c478bd9Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2839*7c478bd9Sstevel@tonic-gate data = (data & ~HB_SELF_REFRESH_MASK) | \ 2840*7c478bd9Sstevel@tonic-gate ((bit) << HB_SELF_REFRESH_SHIFT); \ 2841*7c478bd9Sstevel@tonic-gate stdphysio(HB_MEM_CNTRL0, data); \ 2842*7c478bd9Sstevel@tonic-gate data = lddphysio(HB_MEM_CNTRL0); \ 2843*7c478bd9Sstevel@tonic-gate } 2844*7c478bd9Sstevel@tonic-gate #endif /* HUMMINGBIRD */ 2845*7c478bd9Sstevel@tonic-gate 2846*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2847*7c478bd9Sstevel@tonic-gate void 2848*7c478bd9Sstevel@tonic-gate cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2849*7c478bd9Sstevel@tonic-gate { 2850*7c478bd9Sstevel@tonic-gate #ifdef HUMMINGBIRD 2851*7c478bd9Sstevel@tonic-gate uint64_t cur_mask, cur_divisor = 0; 2852*7c478bd9Sstevel@tonic-gate volatile uint64_t reg; 2853*7c478bd9Sstevel@tonic-gate int index; 2854*7c478bd9Sstevel@tonic-gate 2855*7c478bd9Sstevel@tonic-gate if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2856*7c478bd9Sstevel@tonic-gate (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2857*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2858*7c478bd9Sstevel@tonic-gate new_divisor); 2859*7c478bd9Sstevel@tonic-gate return; 2860*7c478bd9Sstevel@tonic-gate } 2861*7c478bd9Sstevel@tonic-gate 2862*7c478bd9Sstevel@tonic-gate reg = lddphysio(HB_ESTAR_MODE); 2863*7c478bd9Sstevel@tonic-gate cur_mask = reg & HB_ECLK_MASK; 2864*7c478bd9Sstevel@tonic-gate for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2865*7c478bd9Sstevel@tonic-gate if (hb_eclk[index] == cur_mask) { 2866*7c478bd9Sstevel@tonic-gate cur_divisor = index; 2867*7c478bd9Sstevel@tonic-gate break; 2868*7c478bd9Sstevel@tonic-gate } 2869*7c478bd9Sstevel@tonic-gate } 2870*7c478bd9Sstevel@tonic-gate 2871*7c478bd9Sstevel@tonic-gate if (cur_divisor == 0) 2872*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2873*7c478bd9Sstevel@tonic-gate "can't be determined!"); 2874*7c478bd9Sstevel@tonic-gate 2875*7c478bd9Sstevel@tonic-gate /* 2876*7c478bd9Sstevel@tonic-gate * If we are already at the requested divisor speed, just 2877*7c478bd9Sstevel@tonic-gate * return. 2878*7c478bd9Sstevel@tonic-gate */ 2879*7c478bd9Sstevel@tonic-gate if (cur_divisor == new_divisor) 2880*7c478bd9Sstevel@tonic-gate return; 2881*7c478bd9Sstevel@tonic-gate 2882*7c478bd9Sstevel@tonic-gate if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2883*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2884*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2885*7c478bd9Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2886*7c478bd9Sstevel@tonic-gate 2887*7c478bd9Sstevel@tonic-gate } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2888*7c478bd9Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2889*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2890*7c478bd9Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2891*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2892*7c478bd9Sstevel@tonic-gate 2893*7c478bd9Sstevel@tonic-gate } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2894*7c478bd9Sstevel@tonic-gate /* 2895*7c478bd9Sstevel@tonic-gate * Transition to 1/2 speed first, then to 2896*7c478bd9Sstevel@tonic-gate * lower speed. 2897*7c478bd9Sstevel@tonic-gate */ 2898*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2899*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2900*7c478bd9Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2901*7c478bd9Sstevel@tonic-gate 2902*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2903*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2904*7c478bd9Sstevel@tonic-gate 2905*7c478bd9Sstevel@tonic-gate } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2906*7c478bd9Sstevel@tonic-gate /* 2907*7c478bd9Sstevel@tonic-gate * Transition to 1/2 speed first, then to 2908*7c478bd9Sstevel@tonic-gate * full speed. 2909*7c478bd9Sstevel@tonic-gate */ 2910*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2911*7c478bd9Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2912*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2913*7c478bd9Sstevel@tonic-gate 2914*7c478bd9Sstevel@tonic-gate SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2915*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2916*7c478bd9Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2917*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2918*7c478bd9Sstevel@tonic-gate 2919*7c478bd9Sstevel@tonic-gate } else if (cur_divisor < new_divisor) { 2920*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2921*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2922*7c478bd9Sstevel@tonic-gate 2923*7c478bd9Sstevel@tonic-gate } else if (cur_divisor > new_divisor) { 2924*7c478bd9Sstevel@tonic-gate SET_ESTAR_MODE(hb_eclk[new_divisor]); 2925*7c478bd9Sstevel@tonic-gate /* LINTED: E_FALSE_LOGICAL_EXPR */ 2926*7c478bd9Sstevel@tonic-gate CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2927*7c478bd9Sstevel@tonic-gate } 2928*7c478bd9Sstevel@tonic-gate CPU->cpu_m.divisor = (uchar_t)new_divisor; 2929*7c478bd9Sstevel@tonic-gate #endif 2930*7c478bd9Sstevel@tonic-gate } 2931*7c478bd9Sstevel@tonic-gate 2932*7c478bd9Sstevel@tonic-gate /* 2933*7c478bd9Sstevel@tonic-gate * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2934*7c478bd9Sstevel@tonic-gate * we clear all the sticky bits. If a non-null pointer to a async fault 2935*7c478bd9Sstevel@tonic-gate * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2936*7c478bd9Sstevel@tonic-gate * info will be returned in the structure. If a non-null pointer to a 2937*7c478bd9Sstevel@tonic-gate * uint64_t is passed in, this will be updated if the CP bit is set in the 2938*7c478bd9Sstevel@tonic-gate * AFSR. The afsr will be returned. 2939*7c478bd9Sstevel@tonic-gate */ 2940*7c478bd9Sstevel@tonic-gate static uint64_t 2941*7c478bd9Sstevel@tonic-gate clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2942*7c478bd9Sstevel@tonic-gate { 2943*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 2944*7c478bd9Sstevel@tonic-gate uint64_t afsr; 2945*7c478bd9Sstevel@tonic-gate uint64_t udbh, udbl; 2946*7c478bd9Sstevel@tonic-gate 2947*7c478bd9Sstevel@tonic-gate get_asyncflt(&afsr); 2948*7c478bd9Sstevel@tonic-gate 2949*7c478bd9Sstevel@tonic-gate if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2950*7c478bd9Sstevel@tonic-gate *acc_afsr |= afsr; 2951*7c478bd9Sstevel@tonic-gate 2952*7c478bd9Sstevel@tonic-gate if (spf_flt != NULL) { 2953*7c478bd9Sstevel@tonic-gate aflt->flt_stat = afsr; 2954*7c478bd9Sstevel@tonic-gate get_asyncaddr(&aflt->flt_addr); 2955*7c478bd9Sstevel@tonic-gate aflt->flt_addr &= SABRE_AFAR_PA; 2956*7c478bd9Sstevel@tonic-gate 2957*7c478bd9Sstevel@tonic-gate get_udb_errors(&udbh, &udbl); 2958*7c478bd9Sstevel@tonic-gate spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2959*7c478bd9Sstevel@tonic-gate spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2960*7c478bd9Sstevel@tonic-gate } 2961*7c478bd9Sstevel@tonic-gate 2962*7c478bd9Sstevel@tonic-gate set_asyncflt(afsr); /* clear afsr */ 2963*7c478bd9Sstevel@tonic-gate clr_datapath(); /* clear udbs */ 2964*7c478bd9Sstevel@tonic-gate return (afsr); 2965*7c478bd9Sstevel@tonic-gate } 2966*7c478bd9Sstevel@tonic-gate 2967*7c478bd9Sstevel@tonic-gate /* 2968*7c478bd9Sstevel@tonic-gate * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2969*7c478bd9Sstevel@tonic-gate * tag of the first bad line will be returned. We also return the old-afsr 2970*7c478bd9Sstevel@tonic-gate * (before clearing the sticky bits). The linecnt data will be updated to 2971*7c478bd9Sstevel@tonic-gate * indicate the number of bad lines detected. 2972*7c478bd9Sstevel@tonic-gate */ 2973*7c478bd9Sstevel@tonic-gate static void 2974*7c478bd9Sstevel@tonic-gate scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2975*7c478bd9Sstevel@tonic-gate uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2976*7c478bd9Sstevel@tonic-gate { 2977*7c478bd9Sstevel@tonic-gate ec_data_t t_ecdata[8]; 2978*7c478bd9Sstevel@tonic-gate uint64_t t_etag, oafsr; 2979*7c478bd9Sstevel@tonic-gate uint64_t pa = AFLT_INV_ADDR; 2980*7c478bd9Sstevel@tonic-gate uint32_t i, j, ecache_sz; 2981*7c478bd9Sstevel@tonic-gate uint64_t acc_afsr = 0; 2982*7c478bd9Sstevel@tonic-gate uint64_t *cpu_afsr = NULL; 2983*7c478bd9Sstevel@tonic-gate 2984*7c478bd9Sstevel@tonic-gate if (CPU_PRIVATE(CPU) != NULL) 2985*7c478bd9Sstevel@tonic-gate cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2986*7c478bd9Sstevel@tonic-gate 2987*7c478bd9Sstevel@tonic-gate *linecnt = 0; 2988*7c478bd9Sstevel@tonic-gate ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2989*7c478bd9Sstevel@tonic-gate 2990*7c478bd9Sstevel@tonic-gate for (i = 0; i < ecache_sz; i += 64) { 2991*7c478bd9Sstevel@tonic-gate get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2992*7c478bd9Sstevel@tonic-gate cpu_afsr); 2993*7c478bd9Sstevel@tonic-gate acc_afsr |= oafsr; 2994*7c478bd9Sstevel@tonic-gate 2995*7c478bd9Sstevel@tonic-gate /* 2996*7c478bd9Sstevel@tonic-gate * Scan through the whole 64 bytes line in 8 8-byte chunks 2997*7c478bd9Sstevel@tonic-gate * looking for the first occurrence of an EDP error. The AFSR 2998*7c478bd9Sstevel@tonic-gate * info is captured for each 8-byte chunk. Note that for 2999*7c478bd9Sstevel@tonic-gate * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 3000*7c478bd9Sstevel@tonic-gate * 16-byte chunk granularity (i.e. the AFSR will be the same 3001*7c478bd9Sstevel@tonic-gate * for the high and low 8-byte words within the 16-byte chunk). 3002*7c478bd9Sstevel@tonic-gate * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 3003*7c478bd9Sstevel@tonic-gate * granularity and only PSYND bits [7:0] are used. 3004*7c478bd9Sstevel@tonic-gate */ 3005*7c478bd9Sstevel@tonic-gate for (j = 0; j < 8; j++) { 3006*7c478bd9Sstevel@tonic-gate ec_data_t *ecdptr = &t_ecdata[j]; 3007*7c478bd9Sstevel@tonic-gate 3008*7c478bd9Sstevel@tonic-gate if (ecdptr->ec_afsr & P_AFSR_EDP) { 3009*7c478bd9Sstevel@tonic-gate uint64_t errpa; 3010*7c478bd9Sstevel@tonic-gate ushort_t psynd; 3011*7c478bd9Sstevel@tonic-gate uint32_t ec_set_size = ecache_sz / 3012*7c478bd9Sstevel@tonic-gate ecache_associativity; 3013*7c478bd9Sstevel@tonic-gate 3014*7c478bd9Sstevel@tonic-gate /* 3015*7c478bd9Sstevel@tonic-gate * For Spitfire/Blackbird, we need to look at 3016*7c478bd9Sstevel@tonic-gate * the PSYND to make sure that this 8-byte chunk 3017*7c478bd9Sstevel@tonic-gate * is the right one. PSYND bits [15:8] belong 3018*7c478bd9Sstevel@tonic-gate * to the upper 8-byte (even) chunk. Bits 3019*7c478bd9Sstevel@tonic-gate * [7:0] belong to the lower 8-byte chunk (odd). 3020*7c478bd9Sstevel@tonic-gate */ 3021*7c478bd9Sstevel@tonic-gate psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3022*7c478bd9Sstevel@tonic-gate if (!isus2i && !isus2e) { 3023*7c478bd9Sstevel@tonic-gate if (j & 0x1) 3024*7c478bd9Sstevel@tonic-gate psynd = psynd & 0xFF; 3025*7c478bd9Sstevel@tonic-gate else 3026*7c478bd9Sstevel@tonic-gate psynd = psynd >> 8; 3027*7c478bd9Sstevel@tonic-gate 3028*7c478bd9Sstevel@tonic-gate if (!psynd) 3029*7c478bd9Sstevel@tonic-gate continue; /* wrong chunk */ 3030*7c478bd9Sstevel@tonic-gate } 3031*7c478bd9Sstevel@tonic-gate 3032*7c478bd9Sstevel@tonic-gate /* Construct the PA */ 3033*7c478bd9Sstevel@tonic-gate errpa = ((t_etag & cpu_ec_tag_mask) << 3034*7c478bd9Sstevel@tonic-gate cpu_ec_tag_shift) | ((i | (j << 3)) % 3035*7c478bd9Sstevel@tonic-gate ec_set_size); 3036*7c478bd9Sstevel@tonic-gate 3037*7c478bd9Sstevel@tonic-gate /* clean up the cache line */ 3038*7c478bd9Sstevel@tonic-gate flushecacheline(P2ALIGN(errpa, 64), 3039*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 3040*7c478bd9Sstevel@tonic-gate 3041*7c478bd9Sstevel@tonic-gate oafsr = clear_errors(NULL, cpu_afsr); 3042*7c478bd9Sstevel@tonic-gate acc_afsr |= oafsr; 3043*7c478bd9Sstevel@tonic-gate 3044*7c478bd9Sstevel@tonic-gate (*linecnt)++; 3045*7c478bd9Sstevel@tonic-gate 3046*7c478bd9Sstevel@tonic-gate /* 3047*7c478bd9Sstevel@tonic-gate * Capture the PA for the first bad line found. 3048*7c478bd9Sstevel@tonic-gate * Return the ecache dump and tag info. 3049*7c478bd9Sstevel@tonic-gate */ 3050*7c478bd9Sstevel@tonic-gate if (pa == AFLT_INV_ADDR) { 3051*7c478bd9Sstevel@tonic-gate int k; 3052*7c478bd9Sstevel@tonic-gate 3053*7c478bd9Sstevel@tonic-gate pa = errpa; 3054*7c478bd9Sstevel@tonic-gate for (k = 0; k < 8; k++) 3055*7c478bd9Sstevel@tonic-gate ecache_data[k] = t_ecdata[k]; 3056*7c478bd9Sstevel@tonic-gate *ecache_tag = t_etag; 3057*7c478bd9Sstevel@tonic-gate } 3058*7c478bd9Sstevel@tonic-gate break; 3059*7c478bd9Sstevel@tonic-gate } 3060*7c478bd9Sstevel@tonic-gate } 3061*7c478bd9Sstevel@tonic-gate } 3062*7c478bd9Sstevel@tonic-gate *t_afar = pa; 3063*7c478bd9Sstevel@tonic-gate *t_afsr = acc_afsr; 3064*7c478bd9Sstevel@tonic-gate } 3065*7c478bd9Sstevel@tonic-gate 3066*7c478bd9Sstevel@tonic-gate static void 3067*7c478bd9Sstevel@tonic-gate cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3068*7c478bd9Sstevel@tonic-gate { 3069*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spf_flt; 3070*7c478bd9Sstevel@tonic-gate uint64_t ecache_tag = spf_flt->flt_ec_tag; 3071*7c478bd9Sstevel@tonic-gate char linestr[30]; 3072*7c478bd9Sstevel@tonic-gate char *state_str; 3073*7c478bd9Sstevel@tonic-gate int i; 3074*7c478bd9Sstevel@tonic-gate 3075*7c478bd9Sstevel@tonic-gate /* 3076*7c478bd9Sstevel@tonic-gate * Check the ecache tag to make sure it 3077*7c478bd9Sstevel@tonic-gate * is valid. If invalid, a memory dump was 3078*7c478bd9Sstevel@tonic-gate * captured instead of a ecache dump. 3079*7c478bd9Sstevel@tonic-gate */ 3080*7c478bd9Sstevel@tonic-gate if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3081*7c478bd9Sstevel@tonic-gate uchar_t eparity = (uchar_t) 3082*7c478bd9Sstevel@tonic-gate ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3083*7c478bd9Sstevel@tonic-gate 3084*7c478bd9Sstevel@tonic-gate uchar_t estate = (uchar_t) 3085*7c478bd9Sstevel@tonic-gate ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3086*7c478bd9Sstevel@tonic-gate 3087*7c478bd9Sstevel@tonic-gate if (estate == cpu_ec_state_shr) 3088*7c478bd9Sstevel@tonic-gate state_str = "Shared"; 3089*7c478bd9Sstevel@tonic-gate else if (estate == cpu_ec_state_exl) 3090*7c478bd9Sstevel@tonic-gate state_str = "Exclusive"; 3091*7c478bd9Sstevel@tonic-gate else if (estate == cpu_ec_state_own) 3092*7c478bd9Sstevel@tonic-gate state_str = "Owner"; 3093*7c478bd9Sstevel@tonic-gate else if (estate == cpu_ec_state_mod) 3094*7c478bd9Sstevel@tonic-gate state_str = "Modified"; 3095*7c478bd9Sstevel@tonic-gate else 3096*7c478bd9Sstevel@tonic-gate state_str = "Invalid"; 3097*7c478bd9Sstevel@tonic-gate 3098*7c478bd9Sstevel@tonic-gate if (spf_flt->flt_ec_lcnt > 1) { 3099*7c478bd9Sstevel@tonic-gate (void) snprintf(linestr, sizeof (linestr), 3100*7c478bd9Sstevel@tonic-gate "Badlines found=%d", spf_flt->flt_ec_lcnt); 3101*7c478bd9Sstevel@tonic-gate } else { 3102*7c478bd9Sstevel@tonic-gate linestr[0] = '\0'; 3103*7c478bd9Sstevel@tonic-gate } 3104*7c478bd9Sstevel@tonic-gate 3105*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3106*7c478bd9Sstevel@tonic-gate " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3107*7c478bd9Sstevel@tonic-gate "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3108*7c478bd9Sstevel@tonic-gate (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3109*7c478bd9Sstevel@tonic-gate (uint32_t)ecache_tag, state_str, 3110*7c478bd9Sstevel@tonic-gate (uint32_t)eparity, linestr); 3111*7c478bd9Sstevel@tonic-gate } else { 3112*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3113*7c478bd9Sstevel@tonic-gate " E$tag != PA from AFAR; E$line was victimized" 3114*7c478bd9Sstevel@tonic-gate "\n dumping memory from PA 0x%08x.%08x instead", 3115*7c478bd9Sstevel@tonic-gate (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3116*7c478bd9Sstevel@tonic-gate (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3117*7c478bd9Sstevel@tonic-gate } 3118*7c478bd9Sstevel@tonic-gate 3119*7c478bd9Sstevel@tonic-gate /* 3120*7c478bd9Sstevel@tonic-gate * Dump out all 8 8-byte ecache data captured 3121*7c478bd9Sstevel@tonic-gate * For each 8-byte data captured, we check the 3122*7c478bd9Sstevel@tonic-gate * captured afsr's parity syndrome to find out 3123*7c478bd9Sstevel@tonic-gate * which 8-byte chunk is bad. For memory dump, the 3124*7c478bd9Sstevel@tonic-gate * AFSR values were initialized to 0. 3125*7c478bd9Sstevel@tonic-gate */ 3126*7c478bd9Sstevel@tonic-gate for (i = 0; i < 8; i++) { 3127*7c478bd9Sstevel@tonic-gate ec_data_t *ecdptr; 3128*7c478bd9Sstevel@tonic-gate uint_t offset; 3129*7c478bd9Sstevel@tonic-gate ushort_t psynd; 3130*7c478bd9Sstevel@tonic-gate ushort_t bad; 3131*7c478bd9Sstevel@tonic-gate uint64_t edp; 3132*7c478bd9Sstevel@tonic-gate 3133*7c478bd9Sstevel@tonic-gate offset = i << 3; /* multiply by 8 */ 3134*7c478bd9Sstevel@tonic-gate ecdptr = &spf_flt->flt_ec_data[i]; 3135*7c478bd9Sstevel@tonic-gate psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3136*7c478bd9Sstevel@tonic-gate edp = ecdptr->ec_afsr & P_AFSR_EDP; 3137*7c478bd9Sstevel@tonic-gate 3138*7c478bd9Sstevel@tonic-gate /* 3139*7c478bd9Sstevel@tonic-gate * For Sabre/Hummingbird, parity synd is captured only 3140*7c478bd9Sstevel@tonic-gate * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3141*7c478bd9Sstevel@tonic-gate * For spitfire/blackbird, AFSR.PSYND is captured 3142*7c478bd9Sstevel@tonic-gate * in 16-byte granularity. [15:8] represent 3143*7c478bd9Sstevel@tonic-gate * the upper 8 byte and [7:0] the lower 8 byte. 3144*7c478bd9Sstevel@tonic-gate */ 3145*7c478bd9Sstevel@tonic-gate if (isus2i || isus2e || (i & 0x1)) 3146*7c478bd9Sstevel@tonic-gate bad = (psynd & 0xFF); /* check bits [7:0] */ 3147*7c478bd9Sstevel@tonic-gate else 3148*7c478bd9Sstevel@tonic-gate bad = (psynd & 0xFF00); /* check bits [15:8] */ 3149*7c478bd9Sstevel@tonic-gate 3150*7c478bd9Sstevel@tonic-gate if (bad && edp) { 3151*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3152*7c478bd9Sstevel@tonic-gate " E$Data (0x%02x): 0x%08x.%08x " 3153*7c478bd9Sstevel@tonic-gate "*Bad* PSYND=0x%04x", offset, 3154*7c478bd9Sstevel@tonic-gate (uint32_t)(ecdptr->ec_d8 >> 32), 3155*7c478bd9Sstevel@tonic-gate (uint32_t)ecdptr->ec_d8, psynd); 3156*7c478bd9Sstevel@tonic-gate } else { 3157*7c478bd9Sstevel@tonic-gate cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3158*7c478bd9Sstevel@tonic-gate " E$Data (0x%02x): 0x%08x.%08x", offset, 3159*7c478bd9Sstevel@tonic-gate (uint32_t)(ecdptr->ec_d8 >> 32), 3160*7c478bd9Sstevel@tonic-gate (uint32_t)ecdptr->ec_d8); 3161*7c478bd9Sstevel@tonic-gate } 3162*7c478bd9Sstevel@tonic-gate } 3163*7c478bd9Sstevel@tonic-gate } 3164*7c478bd9Sstevel@tonic-gate 3165*7c478bd9Sstevel@tonic-gate /* 3166*7c478bd9Sstevel@tonic-gate * Common logging function for all cpu async errors. This function allows the 3167*7c478bd9Sstevel@tonic-gate * caller to generate a single cmn_err() call that logs the appropriate items 3168*7c478bd9Sstevel@tonic-gate * from the fault structure, and implements our rules for AFT logging levels. 3169*7c478bd9Sstevel@tonic-gate * 3170*7c478bd9Sstevel@tonic-gate * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3171*7c478bd9Sstevel@tonic-gate * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3172*7c478bd9Sstevel@tonic-gate * spflt: pointer to spitfire async fault structure 3173*7c478bd9Sstevel@tonic-gate * logflags: bitflags indicating what to output 3174*7c478bd9Sstevel@tonic-gate * endstr: a end string to appear at the end of this log 3175*7c478bd9Sstevel@tonic-gate * fmt: a format string to appear at the beginning of the log 3176*7c478bd9Sstevel@tonic-gate * 3177*7c478bd9Sstevel@tonic-gate * The logflags allows the construction of predetermined output from the spflt 3178*7c478bd9Sstevel@tonic-gate * structure. The individual data items always appear in a consistent order. 3179*7c478bd9Sstevel@tonic-gate * Note that either or both of the spflt structure pointer and logflags may be 3180*7c478bd9Sstevel@tonic-gate * NULL or zero respectively, indicating that the predetermined output 3181*7c478bd9Sstevel@tonic-gate * substrings are not requested in this log. The output looks like this: 3182*7c478bd9Sstevel@tonic-gate * 3183*7c478bd9Sstevel@tonic-gate * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3184*7c478bd9Sstevel@tonic-gate * <CPU_SPACE><CPU_ERRID> 3185*7c478bd9Sstevel@tonic-gate * newline+4spaces<CPU_AFSR><CPU_AFAR> 3186*7c478bd9Sstevel@tonic-gate * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3187*7c478bd9Sstevel@tonic-gate * newline+4spaces<CPU_UDBH><CPU_UDBL> 3188*7c478bd9Sstevel@tonic-gate * newline+4spaces<CPU_SYND> 3189*7c478bd9Sstevel@tonic-gate * newline+4spaces<endstr> 3190*7c478bd9Sstevel@tonic-gate * 3191*7c478bd9Sstevel@tonic-gate * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3192*7c478bd9Sstevel@tonic-gate * it is assumed that <endstr> will be the unum string in this case. The size 3193*7c478bd9Sstevel@tonic-gate * of our intermediate formatting buf[] is based on the worst case of all flags 3194*7c478bd9Sstevel@tonic-gate * being enabled. We pass the caller's varargs directly to vcmn_err() for 3195*7c478bd9Sstevel@tonic-gate * formatting so we don't need additional stack space to format them here. 3196*7c478bd9Sstevel@tonic-gate */ 3197*7c478bd9Sstevel@tonic-gate /*PRINTFLIKE6*/ 3198*7c478bd9Sstevel@tonic-gate static void 3199*7c478bd9Sstevel@tonic-gate cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3200*7c478bd9Sstevel@tonic-gate const char *endstr, const char *fmt, ...) 3201*7c478bd9Sstevel@tonic-gate { 3202*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)spflt; 3203*7c478bd9Sstevel@tonic-gate char buf[400], *p, *q; /* see comments about buf[] size above */ 3204*7c478bd9Sstevel@tonic-gate va_list ap; 3205*7c478bd9Sstevel@tonic-gate int console_log_flag; 3206*7c478bd9Sstevel@tonic-gate 3207*7c478bd9Sstevel@tonic-gate if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3208*7c478bd9Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_LEVEL1)) || 3209*7c478bd9Sstevel@tonic-gate (aflt->flt_panic)) { 3210*7c478bd9Sstevel@tonic-gate console_log_flag = (tagnum < 2) || aft_verbose; 3211*7c478bd9Sstevel@tonic-gate } else { 3212*7c478bd9Sstevel@tonic-gate int verbose = ((aflt->flt_class == BUS_FAULT) || 3213*7c478bd9Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_CE)) ? 3214*7c478bd9Sstevel@tonic-gate ce_verbose_memory : ce_verbose_other; 3215*7c478bd9Sstevel@tonic-gate 3216*7c478bd9Sstevel@tonic-gate if (!verbose) 3217*7c478bd9Sstevel@tonic-gate return; 3218*7c478bd9Sstevel@tonic-gate 3219*7c478bd9Sstevel@tonic-gate console_log_flag = (verbose > 1); 3220*7c478bd9Sstevel@tonic-gate } 3221*7c478bd9Sstevel@tonic-gate 3222*7c478bd9Sstevel@tonic-gate if (console_log_flag) 3223*7c478bd9Sstevel@tonic-gate (void) sprintf(buf, "[AFT%d]", tagnum); 3224*7c478bd9Sstevel@tonic-gate else 3225*7c478bd9Sstevel@tonic-gate (void) sprintf(buf, "![AFT%d]", tagnum); 3226*7c478bd9Sstevel@tonic-gate 3227*7c478bd9Sstevel@tonic-gate p = buf + strlen(buf); /* current buffer position */ 3228*7c478bd9Sstevel@tonic-gate q = buf + sizeof (buf); /* pointer past end of buffer */ 3229*7c478bd9Sstevel@tonic-gate 3230*7c478bd9Sstevel@tonic-gate if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3231*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3232*7c478bd9Sstevel@tonic-gate (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3233*7c478bd9Sstevel@tonic-gate p += strlen(p); 3234*7c478bd9Sstevel@tonic-gate } 3235*7c478bd9Sstevel@tonic-gate 3236*7c478bd9Sstevel@tonic-gate /* 3237*7c478bd9Sstevel@tonic-gate * Copy the caller's format string verbatim into buf[]. It will be 3238*7c478bd9Sstevel@tonic-gate * formatted by the call to vcmn_err() at the end of this function. 3239*7c478bd9Sstevel@tonic-gate */ 3240*7c478bd9Sstevel@tonic-gate if (fmt != NULL && p < q) { 3241*7c478bd9Sstevel@tonic-gate (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3242*7c478bd9Sstevel@tonic-gate buf[sizeof (buf) - 1] = '\0'; 3243*7c478bd9Sstevel@tonic-gate p += strlen(p); 3244*7c478bd9Sstevel@tonic-gate } 3245*7c478bd9Sstevel@tonic-gate 3246*7c478bd9Sstevel@tonic-gate if (spflt != NULL) { 3247*7c478bd9Sstevel@tonic-gate if (logflags & CPU_FLTCPU) { 3248*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " CPU%d", 3249*7c478bd9Sstevel@tonic-gate aflt->flt_inst); 3250*7c478bd9Sstevel@tonic-gate p += strlen(p); 3251*7c478bd9Sstevel@tonic-gate } 3252*7c478bd9Sstevel@tonic-gate 3253*7c478bd9Sstevel@tonic-gate if (logflags & CPU_SPACE) { 3254*7c478bd9Sstevel@tonic-gate if (aflt->flt_status & ECC_D_TRAP) 3255*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3256*7c478bd9Sstevel@tonic-gate " Data access"); 3257*7c478bd9Sstevel@tonic-gate else if (aflt->flt_status & ECC_I_TRAP) 3258*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3259*7c478bd9Sstevel@tonic-gate " Instruction access"); 3260*7c478bd9Sstevel@tonic-gate p += strlen(p); 3261*7c478bd9Sstevel@tonic-gate } 3262*7c478bd9Sstevel@tonic-gate 3263*7c478bd9Sstevel@tonic-gate if (logflags & CPU_TL) { 3264*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " at TL%s", 3265*7c478bd9Sstevel@tonic-gate aflt->flt_tl ? ">0" : "=0"); 3266*7c478bd9Sstevel@tonic-gate p += strlen(p); 3267*7c478bd9Sstevel@tonic-gate } 3268*7c478bd9Sstevel@tonic-gate 3269*7c478bd9Sstevel@tonic-gate if (logflags & CPU_ERRID) { 3270*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3271*7c478bd9Sstevel@tonic-gate ", errID 0x%08x.%08x", 3272*7c478bd9Sstevel@tonic-gate (uint32_t)(aflt->flt_id >> 32), 3273*7c478bd9Sstevel@tonic-gate (uint32_t)aflt->flt_id); 3274*7c478bd9Sstevel@tonic-gate p += strlen(p); 3275*7c478bd9Sstevel@tonic-gate } 3276*7c478bd9Sstevel@tonic-gate 3277*7c478bd9Sstevel@tonic-gate if (logflags & CPU_AFSR) { 3278*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3279*7c478bd9Sstevel@tonic-gate "\n AFSR 0x%08b.%08b", 3280*7c478bd9Sstevel@tonic-gate (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3281*7c478bd9Sstevel@tonic-gate (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3282*7c478bd9Sstevel@tonic-gate p += strlen(p); 3283*7c478bd9Sstevel@tonic-gate } 3284*7c478bd9Sstevel@tonic-gate 3285*7c478bd9Sstevel@tonic-gate if (logflags & CPU_AFAR) { 3286*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3287*7c478bd9Sstevel@tonic-gate (uint32_t)(aflt->flt_addr >> 32), 3288*7c478bd9Sstevel@tonic-gate (uint32_t)aflt->flt_addr); 3289*7c478bd9Sstevel@tonic-gate p += strlen(p); 3290*7c478bd9Sstevel@tonic-gate } 3291*7c478bd9Sstevel@tonic-gate 3292*7c478bd9Sstevel@tonic-gate if (logflags & CPU_AF_PSYND) { 3293*7c478bd9Sstevel@tonic-gate ushort_t psynd = (ushort_t) 3294*7c478bd9Sstevel@tonic-gate (aflt->flt_stat & P_AFSR_P_SYND); 3295*7c478bd9Sstevel@tonic-gate 3296*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3297*7c478bd9Sstevel@tonic-gate "\n AFSR.PSYND 0x%04x(Score %02d)", 3298*7c478bd9Sstevel@tonic-gate psynd, ecc_psynd_score(psynd)); 3299*7c478bd9Sstevel@tonic-gate p += strlen(p); 3300*7c478bd9Sstevel@tonic-gate } 3301*7c478bd9Sstevel@tonic-gate 3302*7c478bd9Sstevel@tonic-gate if (logflags & CPU_AF_ETS) { 3303*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3304*7c478bd9Sstevel@tonic-gate (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3305*7c478bd9Sstevel@tonic-gate p += strlen(p); 3306*7c478bd9Sstevel@tonic-gate } 3307*7c478bd9Sstevel@tonic-gate 3308*7c478bd9Sstevel@tonic-gate if (logflags & CPU_FAULTPC) { 3309*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3310*7c478bd9Sstevel@tonic-gate (void *)aflt->flt_pc); 3311*7c478bd9Sstevel@tonic-gate p += strlen(p); 3312*7c478bd9Sstevel@tonic-gate } 3313*7c478bd9Sstevel@tonic-gate 3314*7c478bd9Sstevel@tonic-gate if (logflags & CPU_UDBH) { 3315*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3316*7c478bd9Sstevel@tonic-gate "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3317*7c478bd9Sstevel@tonic-gate spflt->flt_sdbh, UDB_FMTSTR, 3318*7c478bd9Sstevel@tonic-gate spflt->flt_sdbh & 0xFF); 3319*7c478bd9Sstevel@tonic-gate p += strlen(p); 3320*7c478bd9Sstevel@tonic-gate } 3321*7c478bd9Sstevel@tonic-gate 3322*7c478bd9Sstevel@tonic-gate if (logflags & CPU_UDBL) { 3323*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3324*7c478bd9Sstevel@tonic-gate " UDBL 0x%04b UDBL.ESYND 0x%02x", 3325*7c478bd9Sstevel@tonic-gate spflt->flt_sdbl, UDB_FMTSTR, 3326*7c478bd9Sstevel@tonic-gate spflt->flt_sdbl & 0xFF); 3327*7c478bd9Sstevel@tonic-gate p += strlen(p); 3328*7c478bd9Sstevel@tonic-gate } 3329*7c478bd9Sstevel@tonic-gate 3330*7c478bd9Sstevel@tonic-gate if (logflags & CPU_SYND) { 3331*7c478bd9Sstevel@tonic-gate ushort_t synd = SYND(aflt->flt_synd); 3332*7c478bd9Sstevel@tonic-gate 3333*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), 3334*7c478bd9Sstevel@tonic-gate "\n %s Syndrome 0x%x Memory Module ", 3335*7c478bd9Sstevel@tonic-gate UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3336*7c478bd9Sstevel@tonic-gate p += strlen(p); 3337*7c478bd9Sstevel@tonic-gate } 3338*7c478bd9Sstevel@tonic-gate } 3339*7c478bd9Sstevel@tonic-gate 3340*7c478bd9Sstevel@tonic-gate if (endstr != NULL) { 3341*7c478bd9Sstevel@tonic-gate if (!(logflags & CPU_SYND)) 3342*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3343*7c478bd9Sstevel@tonic-gate else 3344*7c478bd9Sstevel@tonic-gate (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3345*7c478bd9Sstevel@tonic-gate p += strlen(p); 3346*7c478bd9Sstevel@tonic-gate } 3347*7c478bd9Sstevel@tonic-gate 3348*7c478bd9Sstevel@tonic-gate if (ce_code == CE_CONT && (p < q - 1)) 3349*7c478bd9Sstevel@tonic-gate (void) strcpy(p, "\n"); /* add final \n if needed */ 3350*7c478bd9Sstevel@tonic-gate 3351*7c478bd9Sstevel@tonic-gate va_start(ap, fmt); 3352*7c478bd9Sstevel@tonic-gate vcmn_err(ce_code, buf, ap); 3353*7c478bd9Sstevel@tonic-gate va_end(ap); 3354*7c478bd9Sstevel@tonic-gate } 3355*7c478bd9Sstevel@tonic-gate 3356*7c478bd9Sstevel@tonic-gate /* 3357*7c478bd9Sstevel@tonic-gate * Ecache Scrubbing 3358*7c478bd9Sstevel@tonic-gate * 3359*7c478bd9Sstevel@tonic-gate * The basic idea is to prevent lines from sitting in the ecache long enough 3360*7c478bd9Sstevel@tonic-gate * to build up soft errors which can lead to ecache parity errors. 3361*7c478bd9Sstevel@tonic-gate * 3362*7c478bd9Sstevel@tonic-gate * The following rules are observed when flushing the ecache: 3363*7c478bd9Sstevel@tonic-gate * 3364*7c478bd9Sstevel@tonic-gate * 1. When the system is busy, flush bad clean lines 3365*7c478bd9Sstevel@tonic-gate * 2. When the system is idle, flush all clean lines 3366*7c478bd9Sstevel@tonic-gate * 3. When the system is idle, flush good dirty lines 3367*7c478bd9Sstevel@tonic-gate * 4. Never flush bad dirty lines. 3368*7c478bd9Sstevel@tonic-gate * 3369*7c478bd9Sstevel@tonic-gate * modify parity busy idle 3370*7c478bd9Sstevel@tonic-gate * ---------------------------- 3371*7c478bd9Sstevel@tonic-gate * clean good X 3372*7c478bd9Sstevel@tonic-gate * clean bad X X 3373*7c478bd9Sstevel@tonic-gate * dirty good X 3374*7c478bd9Sstevel@tonic-gate * dirty bad 3375*7c478bd9Sstevel@tonic-gate * 3376*7c478bd9Sstevel@tonic-gate * Bad or good refers to whether a line has an E$ parity error or not. 3377*7c478bd9Sstevel@tonic-gate * Clean or dirty refers to the state of the modified bit. We currently 3378*7c478bd9Sstevel@tonic-gate * default the scan rate to 100 (scan 10% of the cache per second). 3379*7c478bd9Sstevel@tonic-gate * 3380*7c478bd9Sstevel@tonic-gate * The following are E$ states and actions. 3381*7c478bd9Sstevel@tonic-gate * 3382*7c478bd9Sstevel@tonic-gate * We encode our state as a 3-bit number, consisting of: 3383*7c478bd9Sstevel@tonic-gate * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3384*7c478bd9Sstevel@tonic-gate * ECACHE_STATE_PARITY (0=good, 1=bad) 3385*7c478bd9Sstevel@tonic-gate * ECACHE_STATE_BUSY (0=idle, 1=busy) 3386*7c478bd9Sstevel@tonic-gate * 3387*7c478bd9Sstevel@tonic-gate * We associate a flushing and a logging action with each state. 3388*7c478bd9Sstevel@tonic-gate * 3389*7c478bd9Sstevel@tonic-gate * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3390*7c478bd9Sstevel@tonic-gate * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3391*7c478bd9Sstevel@tonic-gate * E$ only, in addition to value being set by ec_flush. 3392*7c478bd9Sstevel@tonic-gate */ 3393*7c478bd9Sstevel@tonic-gate 3394*7c478bd9Sstevel@tonic-gate #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3395*7c478bd9Sstevel@tonic-gate #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3396*7c478bd9Sstevel@tonic-gate #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3397*7c478bd9Sstevel@tonic-gate 3398*7c478bd9Sstevel@tonic-gate struct { 3399*7c478bd9Sstevel@tonic-gate char ec_flush; /* whether to flush or not */ 3400*7c478bd9Sstevel@tonic-gate char ec_log; /* ecache logging */ 3401*7c478bd9Sstevel@tonic-gate char ec_log_type; /* log type info */ 3402*7c478bd9Sstevel@tonic-gate } ec_action[] = { /* states of the E$ line in M P B */ 3403*7c478bd9Sstevel@tonic-gate { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3404*7c478bd9Sstevel@tonic-gate { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3405*7c478bd9Sstevel@tonic-gate { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3406*7c478bd9Sstevel@tonic-gate { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3407*7c478bd9Sstevel@tonic-gate { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3408*7c478bd9Sstevel@tonic-gate { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3409*7c478bd9Sstevel@tonic-gate { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3410*7c478bd9Sstevel@tonic-gate { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3411*7c478bd9Sstevel@tonic-gate }; 3412*7c478bd9Sstevel@tonic-gate 3413*7c478bd9Sstevel@tonic-gate /* 3414*7c478bd9Sstevel@tonic-gate * Offsets into the ec_action[] that determines clean_good_busy and 3415*7c478bd9Sstevel@tonic-gate * dirty_good_busy lines. 3416*7c478bd9Sstevel@tonic-gate */ 3417*7c478bd9Sstevel@tonic-gate #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3418*7c478bd9Sstevel@tonic-gate #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3419*7c478bd9Sstevel@tonic-gate 3420*7c478bd9Sstevel@tonic-gate /* 3421*7c478bd9Sstevel@tonic-gate * We are flushing lines which are Clean_Good_Busy and also the lines 3422*7c478bd9Sstevel@tonic-gate * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3423*7c478bd9Sstevel@tonic-gate */ 3424*7c478bd9Sstevel@tonic-gate #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3425*7c478bd9Sstevel@tonic-gate #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3426*7c478bd9Sstevel@tonic-gate 3427*7c478bd9Sstevel@tonic-gate #define ECACHE_STATE_MODIFIED 0x4 3428*7c478bd9Sstevel@tonic-gate #define ECACHE_STATE_PARITY 0x2 3429*7c478bd9Sstevel@tonic-gate #define ECACHE_STATE_BUSY 0x1 3430*7c478bd9Sstevel@tonic-gate 3431*7c478bd9Sstevel@tonic-gate /* 3432*7c478bd9Sstevel@tonic-gate * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3433*7c478bd9Sstevel@tonic-gate */ 3434*7c478bd9Sstevel@tonic-gate int ecache_calls_a_sec_mirrored = 1; 3435*7c478bd9Sstevel@tonic-gate int ecache_lines_per_call_mirrored = 1; 3436*7c478bd9Sstevel@tonic-gate 3437*7c478bd9Sstevel@tonic-gate int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3438*7c478bd9Sstevel@tonic-gate int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3439*7c478bd9Sstevel@tonic-gate int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3440*7c478bd9Sstevel@tonic-gate int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3441*7c478bd9Sstevel@tonic-gate int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3442*7c478bd9Sstevel@tonic-gate int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3443*7c478bd9Sstevel@tonic-gate int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3444*7c478bd9Sstevel@tonic-gate int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3445*7c478bd9Sstevel@tonic-gate 3446*7c478bd9Sstevel@tonic-gate volatile int ec_timeout_calls = 1; /* timeout calls */ 3447*7c478bd9Sstevel@tonic-gate 3448*7c478bd9Sstevel@tonic-gate /* 3449*7c478bd9Sstevel@tonic-gate * Interrupt number and pil for ecache scrubber cross-trap calls. 3450*7c478bd9Sstevel@tonic-gate */ 3451*7c478bd9Sstevel@tonic-gate static uint_t ecache_scrub_inum; 3452*7c478bd9Sstevel@tonic-gate uint_t ecache_scrub_pil = PIL_9; 3453*7c478bd9Sstevel@tonic-gate 3454*7c478bd9Sstevel@tonic-gate /* 3455*7c478bd9Sstevel@tonic-gate * Kstats for the E$ scrubber. 3456*7c478bd9Sstevel@tonic-gate */ 3457*7c478bd9Sstevel@tonic-gate typedef struct ecache_kstat { 3458*7c478bd9Sstevel@tonic-gate kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3459*7c478bd9Sstevel@tonic-gate kstat_named_t clean_good_busy; /* # of lines skipped */ 3460*7c478bd9Sstevel@tonic-gate kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3461*7c478bd9Sstevel@tonic-gate kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3462*7c478bd9Sstevel@tonic-gate kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3463*7c478bd9Sstevel@tonic-gate kstat_named_t dirty_good_busy; /* # of lines skipped */ 3464*7c478bd9Sstevel@tonic-gate kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3465*7c478bd9Sstevel@tonic-gate kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3466*7c478bd9Sstevel@tonic-gate kstat_named_t invalid_lines; /* # of invalid lines */ 3467*7c478bd9Sstevel@tonic-gate kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3468*7c478bd9Sstevel@tonic-gate kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3469*7c478bd9Sstevel@tonic-gate kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3470*7c478bd9Sstevel@tonic-gate } ecache_kstat_t; 3471*7c478bd9Sstevel@tonic-gate 3472*7c478bd9Sstevel@tonic-gate static ecache_kstat_t ec_kstat_template = { 3473*7c478bd9Sstevel@tonic-gate { "clean_good_idle", KSTAT_DATA_ULONG }, 3474*7c478bd9Sstevel@tonic-gate { "clean_good_busy", KSTAT_DATA_ULONG }, 3475*7c478bd9Sstevel@tonic-gate { "clean_bad_idle", KSTAT_DATA_ULONG }, 3476*7c478bd9Sstevel@tonic-gate { "clean_bad_busy", KSTAT_DATA_ULONG }, 3477*7c478bd9Sstevel@tonic-gate { "dirty_good_idle", KSTAT_DATA_ULONG }, 3478*7c478bd9Sstevel@tonic-gate { "dirty_good_busy", KSTAT_DATA_ULONG }, 3479*7c478bd9Sstevel@tonic-gate { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3480*7c478bd9Sstevel@tonic-gate { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3481*7c478bd9Sstevel@tonic-gate { "invalid_lines", KSTAT_DATA_ULONG }, 3482*7c478bd9Sstevel@tonic-gate { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3483*7c478bd9Sstevel@tonic-gate { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3484*7c478bd9Sstevel@tonic-gate { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3485*7c478bd9Sstevel@tonic-gate }; 3486*7c478bd9Sstevel@tonic-gate 3487*7c478bd9Sstevel@tonic-gate struct kmem_cache *sf_private_cache; 3488*7c478bd9Sstevel@tonic-gate 3489*7c478bd9Sstevel@tonic-gate /* 3490*7c478bd9Sstevel@tonic-gate * Called periodically on each CPU to scan the ecache once a sec. 3491*7c478bd9Sstevel@tonic-gate * adjusting the ecache line index appropriately 3492*7c478bd9Sstevel@tonic-gate */ 3493*7c478bd9Sstevel@tonic-gate void 3494*7c478bd9Sstevel@tonic-gate scrub_ecache_line() 3495*7c478bd9Sstevel@tonic-gate { 3496*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3497*7c478bd9Sstevel@tonic-gate int cpuid = CPU->cpu_id; 3498*7c478bd9Sstevel@tonic-gate uint32_t index = ssmp->ecache_flush_index; 3499*7c478bd9Sstevel@tonic-gate uint64_t ec_size = cpunodes[cpuid].ecache_size; 3500*7c478bd9Sstevel@tonic-gate size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3501*7c478bd9Sstevel@tonic-gate int nlines = ssmp->ecache_nlines; 3502*7c478bd9Sstevel@tonic-gate uint32_t ec_set_size = ec_size / ecache_associativity; 3503*7c478bd9Sstevel@tonic-gate int ec_mirror = ssmp->ecache_mirror; 3504*7c478bd9Sstevel@tonic-gate ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3505*7c478bd9Sstevel@tonic-gate 3506*7c478bd9Sstevel@tonic-gate int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3507*7c478bd9Sstevel@tonic-gate int mpb; /* encode Modified, Parity, Busy for action */ 3508*7c478bd9Sstevel@tonic-gate uchar_t state; 3509*7c478bd9Sstevel@tonic-gate uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3510*7c478bd9Sstevel@tonic-gate uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3511*7c478bd9Sstevel@tonic-gate ec_data_t ec_data[8]; 3512*7c478bd9Sstevel@tonic-gate kstat_named_t *ec_knp; 3513*7c478bd9Sstevel@tonic-gate 3514*7c478bd9Sstevel@tonic-gate switch (ec_mirror) { 3515*7c478bd9Sstevel@tonic-gate default: 3516*7c478bd9Sstevel@tonic-gate case ECACHE_CPU_NON_MIRROR: 3517*7c478bd9Sstevel@tonic-gate /* 3518*7c478bd9Sstevel@tonic-gate * The E$ scan rate is expressed in units of tenths of 3519*7c478bd9Sstevel@tonic-gate * a percent. ecache_scan_rate = 1000 (100%) means the 3520*7c478bd9Sstevel@tonic-gate * whole cache is scanned every second. 3521*7c478bd9Sstevel@tonic-gate */ 3522*7c478bd9Sstevel@tonic-gate scan_lines = (nlines * ecache_scan_rate) / 3523*7c478bd9Sstevel@tonic-gate (1000 * ecache_calls_a_sec); 3524*7c478bd9Sstevel@tonic-gate if (!(ssmp->ecache_busy)) { 3525*7c478bd9Sstevel@tonic-gate if (ecache_idle_factor > 0) { 3526*7c478bd9Sstevel@tonic-gate scan_lines *= ecache_idle_factor; 3527*7c478bd9Sstevel@tonic-gate } 3528*7c478bd9Sstevel@tonic-gate } else { 3529*7c478bd9Sstevel@tonic-gate flush_clean_busy = (scan_lines * 3530*7c478bd9Sstevel@tonic-gate ecache_flush_clean_good_busy) / 100; 3531*7c478bd9Sstevel@tonic-gate flush_dirty_busy = (scan_lines * 3532*7c478bd9Sstevel@tonic-gate ecache_flush_dirty_good_busy) / 100; 3533*7c478bd9Sstevel@tonic-gate } 3534*7c478bd9Sstevel@tonic-gate 3535*7c478bd9Sstevel@tonic-gate ec_timeout_calls = (ecache_calls_a_sec ? 3536*7c478bd9Sstevel@tonic-gate ecache_calls_a_sec : 1); 3537*7c478bd9Sstevel@tonic-gate break; 3538*7c478bd9Sstevel@tonic-gate 3539*7c478bd9Sstevel@tonic-gate case ECACHE_CPU_MIRROR: 3540*7c478bd9Sstevel@tonic-gate scan_lines = ecache_lines_per_call_mirrored; 3541*7c478bd9Sstevel@tonic-gate ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3542*7c478bd9Sstevel@tonic-gate ecache_calls_a_sec_mirrored : 1); 3543*7c478bd9Sstevel@tonic-gate break; 3544*7c478bd9Sstevel@tonic-gate } 3545*7c478bd9Sstevel@tonic-gate 3546*7c478bd9Sstevel@tonic-gate /* 3547*7c478bd9Sstevel@tonic-gate * The ecache scrubber algorithm operates by reading and 3548*7c478bd9Sstevel@tonic-gate * decoding the E$ tag to determine whether the corresponding E$ line 3549*7c478bd9Sstevel@tonic-gate * can be scrubbed. There is a implicit assumption in the scrubber 3550*7c478bd9Sstevel@tonic-gate * logic that the E$ tag is valid. Unfortunately, this assertion is 3551*7c478bd9Sstevel@tonic-gate * flawed since the E$ tag may also be corrupted and have parity errors 3552*7c478bd9Sstevel@tonic-gate * The scrubber logic is enhanced to check the validity of the E$ tag 3553*7c478bd9Sstevel@tonic-gate * before scrubbing. When a parity error is detected in the E$ tag, 3554*7c478bd9Sstevel@tonic-gate * it is possible to recover and scrub the tag under certain conditions 3555*7c478bd9Sstevel@tonic-gate * so that a ETP error condition can be avoided. 3556*7c478bd9Sstevel@tonic-gate */ 3557*7c478bd9Sstevel@tonic-gate 3558*7c478bd9Sstevel@tonic-gate for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3559*7c478bd9Sstevel@tonic-gate /* 3560*7c478bd9Sstevel@tonic-gate * We get the old-AFSR before clearing the AFSR sticky bits 3561*7c478bd9Sstevel@tonic-gate * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3562*7c478bd9Sstevel@tonic-gate * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3563*7c478bd9Sstevel@tonic-gate */ 3564*7c478bd9Sstevel@tonic-gate ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3565*7c478bd9Sstevel@tonic-gate state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3566*7c478bd9Sstevel@tonic-gate cpu_ec_state_shift); 3567*7c478bd9Sstevel@tonic-gate 3568*7c478bd9Sstevel@tonic-gate /* 3569*7c478bd9Sstevel@tonic-gate * ETP is set try to scrub the ecache tag. 3570*7c478bd9Sstevel@tonic-gate */ 3571*7c478bd9Sstevel@tonic-gate if (nafsr & P_AFSR_ETP) { 3572*7c478bd9Sstevel@tonic-gate ecache_scrub_tag_err(nafsr, state, index); 3573*7c478bd9Sstevel@tonic-gate } else if (state & cpu_ec_state_valid) { 3574*7c478bd9Sstevel@tonic-gate /* 3575*7c478bd9Sstevel@tonic-gate * ETP is not set, E$ tag is valid. 3576*7c478bd9Sstevel@tonic-gate * Proceed with the E$ scrubbing. 3577*7c478bd9Sstevel@tonic-gate */ 3578*7c478bd9Sstevel@tonic-gate if (state & cpu_ec_state_dirty) 3579*7c478bd9Sstevel@tonic-gate mpb |= ECACHE_STATE_MODIFIED; 3580*7c478bd9Sstevel@tonic-gate 3581*7c478bd9Sstevel@tonic-gate tafsr = check_ecache_line(index, acc_afsr); 3582*7c478bd9Sstevel@tonic-gate 3583*7c478bd9Sstevel@tonic-gate if (tafsr & P_AFSR_EDP) { 3584*7c478bd9Sstevel@tonic-gate mpb |= ECACHE_STATE_PARITY; 3585*7c478bd9Sstevel@tonic-gate 3586*7c478bd9Sstevel@tonic-gate if (ecache_scrub_verbose || 3587*7c478bd9Sstevel@tonic-gate ecache_scrub_panic) { 3588*7c478bd9Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(index, 64), 3589*7c478bd9Sstevel@tonic-gate (uint64_t *)&ec_data[0], 3590*7c478bd9Sstevel@tonic-gate &ec_tag, &oafsr, acc_afsr); 3591*7c478bd9Sstevel@tonic-gate } 3592*7c478bd9Sstevel@tonic-gate } 3593*7c478bd9Sstevel@tonic-gate 3594*7c478bd9Sstevel@tonic-gate if (ssmp->ecache_busy) 3595*7c478bd9Sstevel@tonic-gate mpb |= ECACHE_STATE_BUSY; 3596*7c478bd9Sstevel@tonic-gate 3597*7c478bd9Sstevel@tonic-gate ec_knp = (kstat_named_t *)ec_ksp + mpb; 3598*7c478bd9Sstevel@tonic-gate ec_knp->value.ul++; 3599*7c478bd9Sstevel@tonic-gate 3600*7c478bd9Sstevel@tonic-gate paddr = ((ec_tag & cpu_ec_tag_mask) << 3601*7c478bd9Sstevel@tonic-gate cpu_ec_tag_shift) | (index % ec_set_size); 3602*7c478bd9Sstevel@tonic-gate 3603*7c478bd9Sstevel@tonic-gate /* 3604*7c478bd9Sstevel@tonic-gate * We flush the E$ lines depending on the ec_flush, 3605*7c478bd9Sstevel@tonic-gate * we additionally flush clean_good_busy and 3606*7c478bd9Sstevel@tonic-gate * dirty_good_busy lines for mirrored E$. 3607*7c478bd9Sstevel@tonic-gate */ 3608*7c478bd9Sstevel@tonic-gate if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3609*7c478bd9Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3610*7c478bd9Sstevel@tonic-gate } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3611*7c478bd9Sstevel@tonic-gate (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3612*7c478bd9Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3613*7c478bd9Sstevel@tonic-gate } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3614*7c478bd9Sstevel@tonic-gate softcall(ecache_page_retire, (void *)paddr); 3615*7c478bd9Sstevel@tonic-gate } 3616*7c478bd9Sstevel@tonic-gate 3617*7c478bd9Sstevel@tonic-gate /* 3618*7c478bd9Sstevel@tonic-gate * Conditionally flush both the clean_good and 3619*7c478bd9Sstevel@tonic-gate * dirty_good lines when busy. 3620*7c478bd9Sstevel@tonic-gate */ 3621*7c478bd9Sstevel@tonic-gate if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3622*7c478bd9Sstevel@tonic-gate flush_clean_busy--; 3623*7c478bd9Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3624*7c478bd9Sstevel@tonic-gate ec_ksp->clean_good_busy_flush.value.ul++; 3625*7c478bd9Sstevel@tonic-gate } else if (DGB(mpb, ec_mirror) && 3626*7c478bd9Sstevel@tonic-gate (flush_dirty_busy > 0)) { 3627*7c478bd9Sstevel@tonic-gate flush_dirty_busy--; 3628*7c478bd9Sstevel@tonic-gate flushecacheline(paddr, ec_size); 3629*7c478bd9Sstevel@tonic-gate ec_ksp->dirty_good_busy_flush.value.ul++; 3630*7c478bd9Sstevel@tonic-gate } 3631*7c478bd9Sstevel@tonic-gate 3632*7c478bd9Sstevel@tonic-gate if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3633*7c478bd9Sstevel@tonic-gate ecache_scrub_panic)) { 3634*7c478bd9Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3635*7c478bd9Sstevel@tonic-gate tafsr); 3636*7c478bd9Sstevel@tonic-gate } 3637*7c478bd9Sstevel@tonic-gate 3638*7c478bd9Sstevel@tonic-gate } else { 3639*7c478bd9Sstevel@tonic-gate ec_ksp->invalid_lines.value.ul++; 3640*7c478bd9Sstevel@tonic-gate } 3641*7c478bd9Sstevel@tonic-gate 3642*7c478bd9Sstevel@tonic-gate if ((index += ec_linesize) >= ec_size) 3643*7c478bd9Sstevel@tonic-gate index = 0; 3644*7c478bd9Sstevel@tonic-gate 3645*7c478bd9Sstevel@tonic-gate } 3646*7c478bd9Sstevel@tonic-gate 3647*7c478bd9Sstevel@tonic-gate /* 3648*7c478bd9Sstevel@tonic-gate * set the ecache scrub index for the next time around 3649*7c478bd9Sstevel@tonic-gate */ 3650*7c478bd9Sstevel@tonic-gate ssmp->ecache_flush_index = index; 3651*7c478bd9Sstevel@tonic-gate 3652*7c478bd9Sstevel@tonic-gate if (*acc_afsr & P_AFSR_CP) { 3653*7c478bd9Sstevel@tonic-gate uint64_t ret_afsr; 3654*7c478bd9Sstevel@tonic-gate 3655*7c478bd9Sstevel@tonic-gate ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3656*7c478bd9Sstevel@tonic-gate if ((ret_afsr & P_AFSR_CP) == 0) 3657*7c478bd9Sstevel@tonic-gate *acc_afsr = 0; 3658*7c478bd9Sstevel@tonic-gate } 3659*7c478bd9Sstevel@tonic-gate } 3660*7c478bd9Sstevel@tonic-gate 3661*7c478bd9Sstevel@tonic-gate /* 3662*7c478bd9Sstevel@tonic-gate * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3663*7c478bd9Sstevel@tonic-gate * we decrement the outstanding request count to zero. 3664*7c478bd9Sstevel@tonic-gate */ 3665*7c478bd9Sstevel@tonic-gate 3666*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3667*7c478bd9Sstevel@tonic-gate uint_t 3668*7c478bd9Sstevel@tonic-gate scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3669*7c478bd9Sstevel@tonic-gate { 3670*7c478bd9Sstevel@tonic-gate int i; 3671*7c478bd9Sstevel@tonic-gate int outstanding; 3672*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3673*7c478bd9Sstevel@tonic-gate uint32_t *countp = &ssmp->ec_scrub_outstanding; 3674*7c478bd9Sstevel@tonic-gate 3675*7c478bd9Sstevel@tonic-gate do { 3676*7c478bd9Sstevel@tonic-gate outstanding = *countp; 3677*7c478bd9Sstevel@tonic-gate ASSERT(outstanding > 0); 3678*7c478bd9Sstevel@tonic-gate for (i = 0; i < outstanding; i++) 3679*7c478bd9Sstevel@tonic-gate scrub_ecache_line(); 3680*7c478bd9Sstevel@tonic-gate } while (atomic_add_32_nv(countp, -outstanding)); 3681*7c478bd9Sstevel@tonic-gate 3682*7c478bd9Sstevel@tonic-gate return (DDI_INTR_CLAIMED); 3683*7c478bd9Sstevel@tonic-gate } 3684*7c478bd9Sstevel@tonic-gate 3685*7c478bd9Sstevel@tonic-gate /* 3686*7c478bd9Sstevel@tonic-gate * force each cpu to perform an ecache scrub, called from a timeout 3687*7c478bd9Sstevel@tonic-gate */ 3688*7c478bd9Sstevel@tonic-gate extern xcfunc_t ecache_scrubreq_tl1; 3689*7c478bd9Sstevel@tonic-gate 3690*7c478bd9Sstevel@tonic-gate void 3691*7c478bd9Sstevel@tonic-gate do_scrub_ecache_line(void) 3692*7c478bd9Sstevel@tonic-gate { 3693*7c478bd9Sstevel@tonic-gate long delta; 3694*7c478bd9Sstevel@tonic-gate 3695*7c478bd9Sstevel@tonic-gate if (ecache_calls_a_sec > hz) 3696*7c478bd9Sstevel@tonic-gate ecache_calls_a_sec = hz; 3697*7c478bd9Sstevel@tonic-gate else if (ecache_calls_a_sec <= 0) 3698*7c478bd9Sstevel@tonic-gate ecache_calls_a_sec = 1; 3699*7c478bd9Sstevel@tonic-gate 3700*7c478bd9Sstevel@tonic-gate if (ecache_calls_a_sec_mirrored > hz) 3701*7c478bd9Sstevel@tonic-gate ecache_calls_a_sec_mirrored = hz; 3702*7c478bd9Sstevel@tonic-gate else if (ecache_calls_a_sec_mirrored <= 0) 3703*7c478bd9Sstevel@tonic-gate ecache_calls_a_sec_mirrored = 1; 3704*7c478bd9Sstevel@tonic-gate 3705*7c478bd9Sstevel@tonic-gate if (ecache_scrub_enable) { 3706*7c478bd9Sstevel@tonic-gate xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3707*7c478bd9Sstevel@tonic-gate delta = hz / ec_timeout_calls; 3708*7c478bd9Sstevel@tonic-gate } else { 3709*7c478bd9Sstevel@tonic-gate delta = hz; 3710*7c478bd9Sstevel@tonic-gate } 3711*7c478bd9Sstevel@tonic-gate 3712*7c478bd9Sstevel@tonic-gate (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3713*7c478bd9Sstevel@tonic-gate delta); 3714*7c478bd9Sstevel@tonic-gate } 3715*7c478bd9Sstevel@tonic-gate 3716*7c478bd9Sstevel@tonic-gate /* 3717*7c478bd9Sstevel@tonic-gate * initialization for ecache scrubbing 3718*7c478bd9Sstevel@tonic-gate * This routine is called AFTER all cpus have had cpu_init_private called 3719*7c478bd9Sstevel@tonic-gate * to initialize their private data areas. 3720*7c478bd9Sstevel@tonic-gate */ 3721*7c478bd9Sstevel@tonic-gate void 3722*7c478bd9Sstevel@tonic-gate cpu_init_cache_scrub(void) 3723*7c478bd9Sstevel@tonic-gate { 3724*7c478bd9Sstevel@tonic-gate if (ecache_calls_a_sec > hz) { 3725*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3726*7c478bd9Sstevel@tonic-gate "resetting to hz (%d)", ecache_calls_a_sec, hz); 3727*7c478bd9Sstevel@tonic-gate ecache_calls_a_sec = hz; 3728*7c478bd9Sstevel@tonic-gate } 3729*7c478bd9Sstevel@tonic-gate 3730*7c478bd9Sstevel@tonic-gate /* 3731*7c478bd9Sstevel@tonic-gate * Register softint for ecache scrubbing. 3732*7c478bd9Sstevel@tonic-gate */ 3733*7c478bd9Sstevel@tonic-gate ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3734*7c478bd9Sstevel@tonic-gate scrub_ecache_line_intr, NULL); 3735*7c478bd9Sstevel@tonic-gate 3736*7c478bd9Sstevel@tonic-gate /* 3737*7c478bd9Sstevel@tonic-gate * kick off the scrubbing using realtime timeout 3738*7c478bd9Sstevel@tonic-gate */ 3739*7c478bd9Sstevel@tonic-gate (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3740*7c478bd9Sstevel@tonic-gate hz / ecache_calls_a_sec); 3741*7c478bd9Sstevel@tonic-gate } 3742*7c478bd9Sstevel@tonic-gate 3743*7c478bd9Sstevel@tonic-gate /* 3744*7c478bd9Sstevel@tonic-gate * Unset the busy flag for this cpu. 3745*7c478bd9Sstevel@tonic-gate */ 3746*7c478bd9Sstevel@tonic-gate void 3747*7c478bd9Sstevel@tonic-gate cpu_idle_ecache_scrub(struct cpu *cp) 3748*7c478bd9Sstevel@tonic-gate { 3749*7c478bd9Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 3750*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3751*7c478bd9Sstevel@tonic-gate sfpr_scrub_misc); 3752*7c478bd9Sstevel@tonic-gate ssmp->ecache_busy = ECACHE_CPU_IDLE; 3753*7c478bd9Sstevel@tonic-gate } 3754*7c478bd9Sstevel@tonic-gate } 3755*7c478bd9Sstevel@tonic-gate 3756*7c478bd9Sstevel@tonic-gate /* 3757*7c478bd9Sstevel@tonic-gate * Set the busy flag for this cpu. 3758*7c478bd9Sstevel@tonic-gate */ 3759*7c478bd9Sstevel@tonic-gate void 3760*7c478bd9Sstevel@tonic-gate cpu_busy_ecache_scrub(struct cpu *cp) 3761*7c478bd9Sstevel@tonic-gate { 3762*7c478bd9Sstevel@tonic-gate if (CPU_PRIVATE(cp) != NULL) { 3763*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3764*7c478bd9Sstevel@tonic-gate sfpr_scrub_misc); 3765*7c478bd9Sstevel@tonic-gate ssmp->ecache_busy = ECACHE_CPU_BUSY; 3766*7c478bd9Sstevel@tonic-gate } 3767*7c478bd9Sstevel@tonic-gate } 3768*7c478bd9Sstevel@tonic-gate 3769*7c478bd9Sstevel@tonic-gate /* 3770*7c478bd9Sstevel@tonic-gate * initialize the ecache scrubber data structures 3771*7c478bd9Sstevel@tonic-gate * The global entry point cpu_init_private replaces this entry point. 3772*7c478bd9Sstevel@tonic-gate * 3773*7c478bd9Sstevel@tonic-gate */ 3774*7c478bd9Sstevel@tonic-gate static void 3775*7c478bd9Sstevel@tonic-gate cpu_init_ecache_scrub_dr(struct cpu *cp) 3776*7c478bd9Sstevel@tonic-gate { 3777*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3778*7c478bd9Sstevel@tonic-gate int cpuid = cp->cpu_id; 3779*7c478bd9Sstevel@tonic-gate 3780*7c478bd9Sstevel@tonic-gate /* 3781*7c478bd9Sstevel@tonic-gate * intialize bookkeeping for cache scrubbing 3782*7c478bd9Sstevel@tonic-gate */ 3783*7c478bd9Sstevel@tonic-gate bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3784*7c478bd9Sstevel@tonic-gate 3785*7c478bd9Sstevel@tonic-gate ssmp->ecache_flush_index = 0; 3786*7c478bd9Sstevel@tonic-gate 3787*7c478bd9Sstevel@tonic-gate ssmp->ecache_nlines = 3788*7c478bd9Sstevel@tonic-gate cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3789*7c478bd9Sstevel@tonic-gate 3790*7c478bd9Sstevel@tonic-gate /* 3791*7c478bd9Sstevel@tonic-gate * Determine whether we are running on mirrored SRAM 3792*7c478bd9Sstevel@tonic-gate */ 3793*7c478bd9Sstevel@tonic-gate 3794*7c478bd9Sstevel@tonic-gate if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3795*7c478bd9Sstevel@tonic-gate ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3796*7c478bd9Sstevel@tonic-gate else 3797*7c478bd9Sstevel@tonic-gate ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3798*7c478bd9Sstevel@tonic-gate 3799*7c478bd9Sstevel@tonic-gate cpu_busy_ecache_scrub(cp); 3800*7c478bd9Sstevel@tonic-gate 3801*7c478bd9Sstevel@tonic-gate /* 3802*7c478bd9Sstevel@tonic-gate * initialize the kstats 3803*7c478bd9Sstevel@tonic-gate */ 3804*7c478bd9Sstevel@tonic-gate ecache_kstat_init(cp); 3805*7c478bd9Sstevel@tonic-gate } 3806*7c478bd9Sstevel@tonic-gate 3807*7c478bd9Sstevel@tonic-gate /* 3808*7c478bd9Sstevel@tonic-gate * uninitialize the ecache scrubber data structures 3809*7c478bd9Sstevel@tonic-gate * The global entry point cpu_uninit_private replaces this entry point. 3810*7c478bd9Sstevel@tonic-gate */ 3811*7c478bd9Sstevel@tonic-gate static void 3812*7c478bd9Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3813*7c478bd9Sstevel@tonic-gate { 3814*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3815*7c478bd9Sstevel@tonic-gate 3816*7c478bd9Sstevel@tonic-gate if (ssmp->ecache_ksp != NULL) { 3817*7c478bd9Sstevel@tonic-gate kstat_delete(ssmp->ecache_ksp); 3818*7c478bd9Sstevel@tonic-gate ssmp->ecache_ksp = NULL; 3819*7c478bd9Sstevel@tonic-gate } 3820*7c478bd9Sstevel@tonic-gate 3821*7c478bd9Sstevel@tonic-gate /* 3822*7c478bd9Sstevel@tonic-gate * un-initialize bookkeeping for cache scrubbing 3823*7c478bd9Sstevel@tonic-gate */ 3824*7c478bd9Sstevel@tonic-gate bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3825*7c478bd9Sstevel@tonic-gate 3826*7c478bd9Sstevel@tonic-gate cpu_idle_ecache_scrub(cp); 3827*7c478bd9Sstevel@tonic-gate } 3828*7c478bd9Sstevel@tonic-gate 3829*7c478bd9Sstevel@tonic-gate struct kmem_cache *sf_private_cache; 3830*7c478bd9Sstevel@tonic-gate 3831*7c478bd9Sstevel@tonic-gate /* 3832*7c478bd9Sstevel@tonic-gate * Cpu private initialization. This includes allocating the cpu_private 3833*7c478bd9Sstevel@tonic-gate * data structure, initializing it, and initializing the scrubber for this 3834*7c478bd9Sstevel@tonic-gate * cpu. This is called once for EVERY cpu, including CPU 0. This function 3835*7c478bd9Sstevel@tonic-gate * calls cpu_init_ecache_scrub_dr to init the scrubber. 3836*7c478bd9Sstevel@tonic-gate * We use kmem_cache_create for the spitfire private data structure because it 3837*7c478bd9Sstevel@tonic-gate * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3838*7c478bd9Sstevel@tonic-gate */ 3839*7c478bd9Sstevel@tonic-gate void 3840*7c478bd9Sstevel@tonic-gate cpu_init_private(struct cpu *cp) 3841*7c478bd9Sstevel@tonic-gate { 3842*7c478bd9Sstevel@tonic-gate spitfire_private_t *sfprp; 3843*7c478bd9Sstevel@tonic-gate 3844*7c478bd9Sstevel@tonic-gate ASSERT(CPU_PRIVATE(cp) == NULL); 3845*7c478bd9Sstevel@tonic-gate 3846*7c478bd9Sstevel@tonic-gate /* 3847*7c478bd9Sstevel@tonic-gate * If the sf_private_cache has not been created, create it. 3848*7c478bd9Sstevel@tonic-gate */ 3849*7c478bd9Sstevel@tonic-gate if (sf_private_cache == NULL) { 3850*7c478bd9Sstevel@tonic-gate sf_private_cache = kmem_cache_create("sf_private_cache", 3851*7c478bd9Sstevel@tonic-gate sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3852*7c478bd9Sstevel@tonic-gate NULL, NULL, NULL, NULL, 0); 3853*7c478bd9Sstevel@tonic-gate ASSERT(sf_private_cache); 3854*7c478bd9Sstevel@tonic-gate } 3855*7c478bd9Sstevel@tonic-gate 3856*7c478bd9Sstevel@tonic-gate sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3857*7c478bd9Sstevel@tonic-gate 3858*7c478bd9Sstevel@tonic-gate bzero(sfprp, sizeof (spitfire_private_t)); 3859*7c478bd9Sstevel@tonic-gate 3860*7c478bd9Sstevel@tonic-gate cpu_init_ecache_scrub_dr(cp); 3861*7c478bd9Sstevel@tonic-gate } 3862*7c478bd9Sstevel@tonic-gate 3863*7c478bd9Sstevel@tonic-gate /* 3864*7c478bd9Sstevel@tonic-gate * Cpu private unitialization. Uninitialize the Ecache scrubber and 3865*7c478bd9Sstevel@tonic-gate * deallocate the scrubber data structures and cpu_private data structure. 3866*7c478bd9Sstevel@tonic-gate * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3867*7c478bd9Sstevel@tonic-gate * the scrubber for the specified cpu. 3868*7c478bd9Sstevel@tonic-gate */ 3869*7c478bd9Sstevel@tonic-gate void 3870*7c478bd9Sstevel@tonic-gate cpu_uninit_private(struct cpu *cp) 3871*7c478bd9Sstevel@tonic-gate { 3872*7c478bd9Sstevel@tonic-gate ASSERT(CPU_PRIVATE(cp)); 3873*7c478bd9Sstevel@tonic-gate 3874*7c478bd9Sstevel@tonic-gate cpu_uninit_ecache_scrub_dr(cp); 3875*7c478bd9Sstevel@tonic-gate kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3876*7c478bd9Sstevel@tonic-gate CPU_PRIVATE(cp) = NULL; 3877*7c478bd9Sstevel@tonic-gate } 3878*7c478bd9Sstevel@tonic-gate 3879*7c478bd9Sstevel@tonic-gate /* 3880*7c478bd9Sstevel@tonic-gate * initialize the ecache kstats for each cpu 3881*7c478bd9Sstevel@tonic-gate */ 3882*7c478bd9Sstevel@tonic-gate static void 3883*7c478bd9Sstevel@tonic-gate ecache_kstat_init(struct cpu *cp) 3884*7c478bd9Sstevel@tonic-gate { 3885*7c478bd9Sstevel@tonic-gate struct kstat *ksp; 3886*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3887*7c478bd9Sstevel@tonic-gate 3888*7c478bd9Sstevel@tonic-gate ASSERT(ssmp != NULL); 3889*7c478bd9Sstevel@tonic-gate 3890*7c478bd9Sstevel@tonic-gate if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3891*7c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, 3892*7c478bd9Sstevel@tonic-gate sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3893*7c478bd9Sstevel@tonic-gate KSTAT_FLAG_WRITABLE)) == NULL) { 3894*7c478bd9Sstevel@tonic-gate ssmp->ecache_ksp = NULL; 3895*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3896*7c478bd9Sstevel@tonic-gate return; 3897*7c478bd9Sstevel@tonic-gate } 3898*7c478bd9Sstevel@tonic-gate 3899*7c478bd9Sstevel@tonic-gate ssmp->ecache_ksp = ksp; 3900*7c478bd9Sstevel@tonic-gate bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3901*7c478bd9Sstevel@tonic-gate kstat_install(ksp); 3902*7c478bd9Sstevel@tonic-gate } 3903*7c478bd9Sstevel@tonic-gate 3904*7c478bd9Sstevel@tonic-gate /* 3905*7c478bd9Sstevel@tonic-gate * log the bad ecache information 3906*7c478bd9Sstevel@tonic-gate */ 3907*7c478bd9Sstevel@tonic-gate static void 3908*7c478bd9Sstevel@tonic-gate ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3909*7c478bd9Sstevel@tonic-gate uint64_t afsr) 3910*7c478bd9Sstevel@tonic-gate { 3911*7c478bd9Sstevel@tonic-gate spitf_async_flt spf_flt; 3912*7c478bd9Sstevel@tonic-gate struct async_flt *aflt; 3913*7c478bd9Sstevel@tonic-gate int i; 3914*7c478bd9Sstevel@tonic-gate char *class; 3915*7c478bd9Sstevel@tonic-gate 3916*7c478bd9Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 3917*7c478bd9Sstevel@tonic-gate aflt = &spf_flt.cmn_asyncflt; 3918*7c478bd9Sstevel@tonic-gate 3919*7c478bd9Sstevel@tonic-gate for (i = 0; i < 8; i++) { 3920*7c478bd9Sstevel@tonic-gate spf_flt.flt_ec_data[i] = ec_data[i]; 3921*7c478bd9Sstevel@tonic-gate } 3922*7c478bd9Sstevel@tonic-gate 3923*7c478bd9Sstevel@tonic-gate spf_flt.flt_ec_tag = ec_tag; 3924*7c478bd9Sstevel@tonic-gate 3925*7c478bd9Sstevel@tonic-gate if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3926*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = ec_action[mpb].ec_log_type; 3927*7c478bd9Sstevel@tonic-gate } else spf_flt.flt_type = (ushort_t)mpb; 3928*7c478bd9Sstevel@tonic-gate 3929*7c478bd9Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 3930*7c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 3931*7c478bd9Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 3932*7c478bd9Sstevel@tonic-gate aflt->flt_addr = paddr; 3933*7c478bd9Sstevel@tonic-gate aflt->flt_stat = afsr; 3934*7c478bd9Sstevel@tonic-gate aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3935*7c478bd9Sstevel@tonic-gate 3936*7c478bd9Sstevel@tonic-gate switch (mpb) { 3937*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_TAG_ERR: 3938*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_ADDR_PAR_ERR: 3939*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_ETP_ETS_ERR: 3940*7c478bd9Sstevel@tonic-gate case CPU_ECACHE_STATE_ERR: 3941*7c478bd9Sstevel@tonic-gate class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3942*7c478bd9Sstevel@tonic-gate break; 3943*7c478bd9Sstevel@tonic-gate default: 3944*7c478bd9Sstevel@tonic-gate class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3945*7c478bd9Sstevel@tonic-gate break; 3946*7c478bd9Sstevel@tonic-gate } 3947*7c478bd9Sstevel@tonic-gate 3948*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3949*7c478bd9Sstevel@tonic-gate ue_queue, aflt->flt_panic); 3950*7c478bd9Sstevel@tonic-gate 3951*7c478bd9Sstevel@tonic-gate if (aflt->flt_panic) 3952*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3953*7c478bd9Sstevel@tonic-gate "line detected"); 3954*7c478bd9Sstevel@tonic-gate } 3955*7c478bd9Sstevel@tonic-gate 3956*7c478bd9Sstevel@tonic-gate /* 3957*7c478bd9Sstevel@tonic-gate * Process an ecache error that occured during the E$ scrubbing. 3958*7c478bd9Sstevel@tonic-gate * We do the ecache scan to find the bad line, flush the bad line 3959*7c478bd9Sstevel@tonic-gate * and start the memscrubber to find any UE (in memory or in another cache) 3960*7c478bd9Sstevel@tonic-gate */ 3961*7c478bd9Sstevel@tonic-gate static uint64_t 3962*7c478bd9Sstevel@tonic-gate ecache_scrub_misc_err(int type, uint64_t afsr) 3963*7c478bd9Sstevel@tonic-gate { 3964*7c478bd9Sstevel@tonic-gate spitf_async_flt spf_flt; 3965*7c478bd9Sstevel@tonic-gate struct async_flt *aflt; 3966*7c478bd9Sstevel@tonic-gate uint64_t oafsr; 3967*7c478bd9Sstevel@tonic-gate 3968*7c478bd9Sstevel@tonic-gate bzero(&spf_flt, sizeof (spitf_async_flt)); 3969*7c478bd9Sstevel@tonic-gate aflt = &spf_flt.cmn_asyncflt; 3970*7c478bd9Sstevel@tonic-gate 3971*7c478bd9Sstevel@tonic-gate /* 3972*7c478bd9Sstevel@tonic-gate * Scan each line in the cache to look for the one 3973*7c478bd9Sstevel@tonic-gate * with bad parity 3974*7c478bd9Sstevel@tonic-gate */ 3975*7c478bd9Sstevel@tonic-gate aflt->flt_addr = AFLT_INV_ADDR; 3976*7c478bd9Sstevel@tonic-gate scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3977*7c478bd9Sstevel@tonic-gate &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3978*7c478bd9Sstevel@tonic-gate 3979*7c478bd9Sstevel@tonic-gate if (oafsr & P_AFSR_CP) { 3980*7c478bd9Sstevel@tonic-gate uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3981*7c478bd9Sstevel@tonic-gate *cp_afsr |= oafsr; 3982*7c478bd9Sstevel@tonic-gate } 3983*7c478bd9Sstevel@tonic-gate 3984*7c478bd9Sstevel@tonic-gate /* 3985*7c478bd9Sstevel@tonic-gate * If we found a bad PA, update the state to indicate if it is 3986*7c478bd9Sstevel@tonic-gate * memory or I/O space. 3987*7c478bd9Sstevel@tonic-gate */ 3988*7c478bd9Sstevel@tonic-gate if (aflt->flt_addr != AFLT_INV_ADDR) { 3989*7c478bd9Sstevel@tonic-gate aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3990*7c478bd9Sstevel@tonic-gate MMU_PAGESHIFT)) ? 1 : 0; 3991*7c478bd9Sstevel@tonic-gate } 3992*7c478bd9Sstevel@tonic-gate 3993*7c478bd9Sstevel@tonic-gate spf_flt.flt_type = (ushort_t)type; 3994*7c478bd9Sstevel@tonic-gate 3995*7c478bd9Sstevel@tonic-gate aflt->flt_inst = CPU->cpu_id; 3996*7c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 3997*7c478bd9Sstevel@tonic-gate aflt->flt_id = gethrtime_waitfree(); 3998*7c478bd9Sstevel@tonic-gate aflt->flt_status = afsr; 3999*7c478bd9Sstevel@tonic-gate aflt->flt_panic = (uchar_t)ecache_scrub_panic; 4000*7c478bd9Sstevel@tonic-gate 4001*7c478bd9Sstevel@tonic-gate /* 4002*7c478bd9Sstevel@tonic-gate * We have the bad line, flush that line and start 4003*7c478bd9Sstevel@tonic-gate * the memscrubber. 4004*7c478bd9Sstevel@tonic-gate */ 4005*7c478bd9Sstevel@tonic-gate if (spf_flt.flt_ec_lcnt > 0) { 4006*7c478bd9Sstevel@tonic-gate flushecacheline(P2ALIGN(aflt->flt_addr, 64), 4007*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 4008*7c478bd9Sstevel@tonic-gate read_all_memscrub = 1; 4009*7c478bd9Sstevel@tonic-gate memscrub_run(); 4010*7c478bd9Sstevel@tonic-gate } 4011*7c478bd9Sstevel@tonic-gate 4012*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 4013*7c478bd9Sstevel@tonic-gate FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 4014*7c478bd9Sstevel@tonic-gate (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 4015*7c478bd9Sstevel@tonic-gate 4016*7c478bd9Sstevel@tonic-gate return (oafsr); 4017*7c478bd9Sstevel@tonic-gate } 4018*7c478bd9Sstevel@tonic-gate 4019*7c478bd9Sstevel@tonic-gate static void 4020*7c478bd9Sstevel@tonic-gate ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 4021*7c478bd9Sstevel@tonic-gate { 4022*7c478bd9Sstevel@tonic-gate ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 4023*7c478bd9Sstevel@tonic-gate spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 4024*7c478bd9Sstevel@tonic-gate ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 4025*7c478bd9Sstevel@tonic-gate uint64_t ec_tag, paddr, oafsr; 4026*7c478bd9Sstevel@tonic-gate ec_data_t ec_data[8]; 4027*7c478bd9Sstevel@tonic-gate int cpuid = CPU->cpu_id; 4028*7c478bd9Sstevel@tonic-gate uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4029*7c478bd9Sstevel@tonic-gate ecache_associativity; 4030*7c478bd9Sstevel@tonic-gate uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4031*7c478bd9Sstevel@tonic-gate 4032*7c478bd9Sstevel@tonic-gate get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4033*7c478bd9Sstevel@tonic-gate &oafsr, cpu_afsr); 4034*7c478bd9Sstevel@tonic-gate paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4035*7c478bd9Sstevel@tonic-gate (index % ec_set_size); 4036*7c478bd9Sstevel@tonic-gate 4037*7c478bd9Sstevel@tonic-gate /* 4038*7c478bd9Sstevel@tonic-gate * E$ tag state has good parity 4039*7c478bd9Sstevel@tonic-gate */ 4040*7c478bd9Sstevel@tonic-gate if ((afsr_ets & cpu_ec_state_parity) == 0) { 4041*7c478bd9Sstevel@tonic-gate if (afsr_ets & cpu_ec_parity) { 4042*7c478bd9Sstevel@tonic-gate /* 4043*7c478bd9Sstevel@tonic-gate * E$ tag state bits indicate the line is clean, 4044*7c478bd9Sstevel@tonic-gate * invalidate the E$ tag and continue. 4045*7c478bd9Sstevel@tonic-gate */ 4046*7c478bd9Sstevel@tonic-gate if (!(state & cpu_ec_state_dirty)) { 4047*7c478bd9Sstevel@tonic-gate /* 4048*7c478bd9Sstevel@tonic-gate * Zero the tag and mark the state invalid 4049*7c478bd9Sstevel@tonic-gate * with good parity for the tag. 4050*7c478bd9Sstevel@tonic-gate */ 4051*7c478bd9Sstevel@tonic-gate if (isus2i || isus2e) 4052*7c478bd9Sstevel@tonic-gate write_hb_ec_tag_parity(index); 4053*7c478bd9Sstevel@tonic-gate else 4054*7c478bd9Sstevel@tonic-gate write_ec_tag_parity(index); 4055*7c478bd9Sstevel@tonic-gate 4056*7c478bd9Sstevel@tonic-gate /* Sync with the dual tag */ 4057*7c478bd9Sstevel@tonic-gate flushecacheline(0, 4058*7c478bd9Sstevel@tonic-gate cpunodes[CPU->cpu_id].ecache_size); 4059*7c478bd9Sstevel@tonic-gate ec_ksp->tags_cleared.value.ul++; 4060*7c478bd9Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4061*7c478bd9Sstevel@tonic-gate CPU_ECACHE_TAG_ERR, afsr); 4062*7c478bd9Sstevel@tonic-gate return; 4063*7c478bd9Sstevel@tonic-gate } else { 4064*7c478bd9Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4065*7c478bd9Sstevel@tonic-gate CPU_ECACHE_ADDR_PAR_ERR, afsr); 4066*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, " E$ tag address has bad" 4067*7c478bd9Sstevel@tonic-gate " parity"); 4068*7c478bd9Sstevel@tonic-gate } 4069*7c478bd9Sstevel@tonic-gate } else if ((afsr_ets & cpu_ec_parity) == 0) { 4070*7c478bd9Sstevel@tonic-gate /* 4071*7c478bd9Sstevel@tonic-gate * ETS is zero but ETP is set 4072*7c478bd9Sstevel@tonic-gate */ 4073*7c478bd9Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4074*7c478bd9Sstevel@tonic-gate CPU_ECACHE_ETP_ETS_ERR, afsr); 4075*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "AFSR.ETP is set and" 4076*7c478bd9Sstevel@tonic-gate " AFSR.ETS is zero"); 4077*7c478bd9Sstevel@tonic-gate } 4078*7c478bd9Sstevel@tonic-gate } else { 4079*7c478bd9Sstevel@tonic-gate /* 4080*7c478bd9Sstevel@tonic-gate * E$ tag state bit has a bad parity 4081*7c478bd9Sstevel@tonic-gate */ 4082*7c478bd9Sstevel@tonic-gate ecache_scrub_log(ec_data, ec_tag, paddr, 4083*7c478bd9Sstevel@tonic-gate CPU_ECACHE_STATE_ERR, afsr); 4084*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4085*7c478bd9Sstevel@tonic-gate } 4086*7c478bd9Sstevel@tonic-gate } 4087*7c478bd9Sstevel@tonic-gate 4088*7c478bd9Sstevel@tonic-gate static void 4089*7c478bd9Sstevel@tonic-gate ecache_page_retire(void *arg) 4090*7c478bd9Sstevel@tonic-gate { 4091*7c478bd9Sstevel@tonic-gate uint64_t paddr = (uint64_t)arg; 4092*7c478bd9Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t)(paddr >> MMU_PAGESHIFT)); 4093*7c478bd9Sstevel@tonic-gate 4094*7c478bd9Sstevel@tonic-gate if (pp) { 4095*7c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 4096*7c478bd9Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_TOXIC); 4097*7c478bd9Sstevel@tonic-gate } 4098*7c478bd9Sstevel@tonic-gate } 4099*7c478bd9Sstevel@tonic-gate 4100*7c478bd9Sstevel@tonic-gate void 4101*7c478bd9Sstevel@tonic-gate sticksync_slave(void) 4102*7c478bd9Sstevel@tonic-gate {} 4103*7c478bd9Sstevel@tonic-gate 4104*7c478bd9Sstevel@tonic-gate void 4105*7c478bd9Sstevel@tonic-gate sticksync_master(void) 4106*7c478bd9Sstevel@tonic-gate {} 4107*7c478bd9Sstevel@tonic-gate 4108*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4109*7c478bd9Sstevel@tonic-gate void 4110*7c478bd9Sstevel@tonic-gate cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4111*7c478bd9Sstevel@tonic-gate {} 4112*7c478bd9Sstevel@tonic-gate 4113*7c478bd9Sstevel@tonic-gate void 4114*7c478bd9Sstevel@tonic-gate cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4115*7c478bd9Sstevel@tonic-gate { 4116*7c478bd9Sstevel@tonic-gate int status; 4117*7c478bd9Sstevel@tonic-gate ddi_fm_error_t de; 4118*7c478bd9Sstevel@tonic-gate 4119*7c478bd9Sstevel@tonic-gate bzero(&de, sizeof (ddi_fm_error_t)); 4120*7c478bd9Sstevel@tonic-gate 4121*7c478bd9Sstevel@tonic-gate de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4122*7c478bd9Sstevel@tonic-gate FM_ENA_FMT1); 4123*7c478bd9Sstevel@tonic-gate de.fme_flag = expected; 4124*7c478bd9Sstevel@tonic-gate de.fme_bus_specific = (void *)aflt->flt_addr; 4125*7c478bd9Sstevel@tonic-gate status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4126*7c478bd9Sstevel@tonic-gate 4127*7c478bd9Sstevel@tonic-gate if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4128*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 4129*7c478bd9Sstevel@tonic-gate } 4130*7c478bd9Sstevel@tonic-gate 4131*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4132*7c478bd9Sstevel@tonic-gate void 4133*7c478bd9Sstevel@tonic-gate cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4134*7c478bd9Sstevel@tonic-gate errorq_t *eqp, uint_t flag) 4135*7c478bd9Sstevel@tonic-gate { 4136*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)payload; 4137*7c478bd9Sstevel@tonic-gate 4138*7c478bd9Sstevel@tonic-gate aflt->flt_erpt_class = error_class; 4139*7c478bd9Sstevel@tonic-gate errorq_dispatch(eqp, payload, payload_sz, flag); 4140*7c478bd9Sstevel@tonic-gate } 4141*7c478bd9Sstevel@tonic-gate 4142*7c478bd9Sstevel@tonic-gate #define MAX_SIMM 8 4143*7c478bd9Sstevel@tonic-gate 4144*7c478bd9Sstevel@tonic-gate struct ce_info { 4145*7c478bd9Sstevel@tonic-gate char name[UNUM_NAMLEN]; 4146*7c478bd9Sstevel@tonic-gate uint64_t intermittent_total; 4147*7c478bd9Sstevel@tonic-gate uint64_t persistent_total; 4148*7c478bd9Sstevel@tonic-gate uint64_t sticky_total; 4149*7c478bd9Sstevel@tonic-gate unsigned short leaky_bucket_cnt; 4150*7c478bd9Sstevel@tonic-gate }; 4151*7c478bd9Sstevel@tonic-gate 4152*7c478bd9Sstevel@tonic-gate /* 4153*7c478bd9Sstevel@tonic-gate * Separately-defined structure for use in reporting the ce_info 4154*7c478bd9Sstevel@tonic-gate * to SunVTS without exposing the internal layout and implementation 4155*7c478bd9Sstevel@tonic-gate * of struct ce_info. 4156*7c478bd9Sstevel@tonic-gate */ 4157*7c478bd9Sstevel@tonic-gate static struct ecc_error_info ecc_error_info_data = { 4158*7c478bd9Sstevel@tonic-gate { "version", KSTAT_DATA_UINT32 }, 4159*7c478bd9Sstevel@tonic-gate { "maxcount", KSTAT_DATA_UINT32 }, 4160*7c478bd9Sstevel@tonic-gate { "count", KSTAT_DATA_UINT32 } 4161*7c478bd9Sstevel@tonic-gate }; 4162*7c478bd9Sstevel@tonic-gate static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4163*7c478bd9Sstevel@tonic-gate sizeof (struct kstat_named); 4164*7c478bd9Sstevel@tonic-gate 4165*7c478bd9Sstevel@tonic-gate #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4166*7c478bd9Sstevel@tonic-gate #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4167*7c478bd9Sstevel@tonic-gate #endif 4168*7c478bd9Sstevel@tonic-gate 4169*7c478bd9Sstevel@tonic-gate struct ce_info *mem_ce_simm = NULL; 4170*7c478bd9Sstevel@tonic-gate size_t mem_ce_simm_size = 0; 4171*7c478bd9Sstevel@tonic-gate 4172*7c478bd9Sstevel@tonic-gate /* 4173*7c478bd9Sstevel@tonic-gate * Default values for the number of CE's allowed per interval. 4174*7c478bd9Sstevel@tonic-gate * Interval is defined in minutes 4175*7c478bd9Sstevel@tonic-gate * SOFTERR_MIN_TIMEOUT is defined in microseconds 4176*7c478bd9Sstevel@tonic-gate */ 4177*7c478bd9Sstevel@tonic-gate #define SOFTERR_LIMIT_DEFAULT 2 4178*7c478bd9Sstevel@tonic-gate #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4179*7c478bd9Sstevel@tonic-gate #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4180*7c478bd9Sstevel@tonic-gate #define TIMEOUT_NONE ((timeout_id_t)0) 4181*7c478bd9Sstevel@tonic-gate #define TIMEOUT_SET ((timeout_id_t)1) 4182*7c478bd9Sstevel@tonic-gate 4183*7c478bd9Sstevel@tonic-gate /* 4184*7c478bd9Sstevel@tonic-gate * timeout identifer for leaky_bucket 4185*7c478bd9Sstevel@tonic-gate */ 4186*7c478bd9Sstevel@tonic-gate static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4187*7c478bd9Sstevel@tonic-gate 4188*7c478bd9Sstevel@tonic-gate /* 4189*7c478bd9Sstevel@tonic-gate * Tunables for maximum number of allowed CE's in a given time 4190*7c478bd9Sstevel@tonic-gate */ 4191*7c478bd9Sstevel@tonic-gate int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4192*7c478bd9Sstevel@tonic-gate int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4193*7c478bd9Sstevel@tonic-gate 4194*7c478bd9Sstevel@tonic-gate void 4195*7c478bd9Sstevel@tonic-gate cpu_mp_init(void) 4196*7c478bd9Sstevel@tonic-gate { 4197*7c478bd9Sstevel@tonic-gate size_t size = cpu_aflt_size(); 4198*7c478bd9Sstevel@tonic-gate size_t i; 4199*7c478bd9Sstevel@tonic-gate kstat_t *ksp; 4200*7c478bd9Sstevel@tonic-gate 4201*7c478bd9Sstevel@tonic-gate /* 4202*7c478bd9Sstevel@tonic-gate * Initialize the CE error handling buffers. 4203*7c478bd9Sstevel@tonic-gate */ 4204*7c478bd9Sstevel@tonic-gate mem_ce_simm_size = MAX_SIMM * max_ncpus; 4205*7c478bd9Sstevel@tonic-gate size = sizeof (struct ce_info) * mem_ce_simm_size; 4206*7c478bd9Sstevel@tonic-gate mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4207*7c478bd9Sstevel@tonic-gate 4208*7c478bd9Sstevel@tonic-gate ksp = kstat_create("unix", 0, "ecc-info", "misc", 4209*7c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4210*7c478bd9Sstevel@tonic-gate if (ksp != NULL) { 4211*7c478bd9Sstevel@tonic-gate ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4212*7c478bd9Sstevel@tonic-gate ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4213*7c478bd9Sstevel@tonic-gate ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4214*7c478bd9Sstevel@tonic-gate ecc_error_info_data.count.value.ui32 = 0; 4215*7c478bd9Sstevel@tonic-gate kstat_install(ksp); 4216*7c478bd9Sstevel@tonic-gate } 4217*7c478bd9Sstevel@tonic-gate 4218*7c478bd9Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 4219*7c478bd9Sstevel@tonic-gate struct kstat_ecc_mm_info *kceip; 4220*7c478bd9Sstevel@tonic-gate 4221*7c478bd9Sstevel@tonic-gate kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4222*7c478bd9Sstevel@tonic-gate KM_SLEEP); 4223*7c478bd9Sstevel@tonic-gate ksp = kstat_create("mm", i, "ecc-info", "misc", 4224*7c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, 4225*7c478bd9Sstevel@tonic-gate sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4226*7c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL); 4227*7c478bd9Sstevel@tonic-gate if (ksp != NULL) { 4228*7c478bd9Sstevel@tonic-gate /* 4229*7c478bd9Sstevel@tonic-gate * Re-declare ks_data_size to include room for the 4230*7c478bd9Sstevel@tonic-gate * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4231*7c478bd9Sstevel@tonic-gate * set. 4232*7c478bd9Sstevel@tonic-gate */ 4233*7c478bd9Sstevel@tonic-gate ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4234*7c478bd9Sstevel@tonic-gate KSTAT_CE_UNUM_NAMLEN; 4235*7c478bd9Sstevel@tonic-gate ksp->ks_data = kceip; 4236*7c478bd9Sstevel@tonic-gate kstat_named_init(&kceip->name, 4237*7c478bd9Sstevel@tonic-gate "name", KSTAT_DATA_STRING); 4238*7c478bd9Sstevel@tonic-gate kstat_named_init(&kceip->intermittent_total, 4239*7c478bd9Sstevel@tonic-gate "intermittent_total", KSTAT_DATA_UINT64); 4240*7c478bd9Sstevel@tonic-gate kstat_named_init(&kceip->persistent_total, 4241*7c478bd9Sstevel@tonic-gate "persistent_total", KSTAT_DATA_UINT64); 4242*7c478bd9Sstevel@tonic-gate kstat_named_init(&kceip->sticky_total, 4243*7c478bd9Sstevel@tonic-gate "sticky_total", KSTAT_DATA_UINT64); 4244*7c478bd9Sstevel@tonic-gate /* 4245*7c478bd9Sstevel@tonic-gate * Use the default snapshot routine as it knows how to 4246*7c478bd9Sstevel@tonic-gate * deal with named kstats with long strings. 4247*7c478bd9Sstevel@tonic-gate */ 4248*7c478bd9Sstevel@tonic-gate ksp->ks_update = ecc_kstat_update; 4249*7c478bd9Sstevel@tonic-gate kstat_install(ksp); 4250*7c478bd9Sstevel@tonic-gate } else { 4251*7c478bd9Sstevel@tonic-gate kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4252*7c478bd9Sstevel@tonic-gate } 4253*7c478bd9Sstevel@tonic-gate } 4254*7c478bd9Sstevel@tonic-gate } 4255*7c478bd9Sstevel@tonic-gate 4256*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4257*7c478bd9Sstevel@tonic-gate static void 4258*7c478bd9Sstevel@tonic-gate leaky_bucket_timeout(void *arg) 4259*7c478bd9Sstevel@tonic-gate { 4260*7c478bd9Sstevel@tonic-gate int i; 4261*7c478bd9Sstevel@tonic-gate struct ce_info *psimm = mem_ce_simm; 4262*7c478bd9Sstevel@tonic-gate 4263*7c478bd9Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 4264*7c478bd9Sstevel@tonic-gate if (psimm[i].leaky_bucket_cnt > 0) 4265*7c478bd9Sstevel@tonic-gate atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4266*7c478bd9Sstevel@tonic-gate } 4267*7c478bd9Sstevel@tonic-gate add_leaky_bucket_timeout(); 4268*7c478bd9Sstevel@tonic-gate } 4269*7c478bd9Sstevel@tonic-gate 4270*7c478bd9Sstevel@tonic-gate static void 4271*7c478bd9Sstevel@tonic-gate add_leaky_bucket_timeout(void) 4272*7c478bd9Sstevel@tonic-gate { 4273*7c478bd9Sstevel@tonic-gate long timeout_in_microsecs; 4274*7c478bd9Sstevel@tonic-gate 4275*7c478bd9Sstevel@tonic-gate /* 4276*7c478bd9Sstevel@tonic-gate * create timeout for next leak. 4277*7c478bd9Sstevel@tonic-gate * 4278*7c478bd9Sstevel@tonic-gate * The timeout interval is calculated as follows 4279*7c478bd9Sstevel@tonic-gate * 4280*7c478bd9Sstevel@tonic-gate * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4281*7c478bd9Sstevel@tonic-gate * 4282*7c478bd9Sstevel@tonic-gate * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4283*7c478bd9Sstevel@tonic-gate * in a minute), then multiply this by MICROSEC to get the interval 4284*7c478bd9Sstevel@tonic-gate * in microseconds. Divide this total by ecc_softerr_limit so that 4285*7c478bd9Sstevel@tonic-gate * the timeout interval is accurate to within a few microseconds. 4286*7c478bd9Sstevel@tonic-gate */ 4287*7c478bd9Sstevel@tonic-gate 4288*7c478bd9Sstevel@tonic-gate if (ecc_softerr_limit <= 0) 4289*7c478bd9Sstevel@tonic-gate ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4290*7c478bd9Sstevel@tonic-gate if (ecc_softerr_interval <= 0) 4291*7c478bd9Sstevel@tonic-gate ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4292*7c478bd9Sstevel@tonic-gate 4293*7c478bd9Sstevel@tonic-gate timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4294*7c478bd9Sstevel@tonic-gate ecc_softerr_limit; 4295*7c478bd9Sstevel@tonic-gate 4296*7c478bd9Sstevel@tonic-gate if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4297*7c478bd9Sstevel@tonic-gate timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4298*7c478bd9Sstevel@tonic-gate 4299*7c478bd9Sstevel@tonic-gate leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4300*7c478bd9Sstevel@tonic-gate (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4301*7c478bd9Sstevel@tonic-gate } 4302*7c478bd9Sstevel@tonic-gate 4303*7c478bd9Sstevel@tonic-gate /* 4304*7c478bd9Sstevel@tonic-gate * Legacy Correctable ECC Error Hash 4305*7c478bd9Sstevel@tonic-gate * 4306*7c478bd9Sstevel@tonic-gate * All of the code below this comment is used to implement a legacy array 4307*7c478bd9Sstevel@tonic-gate * which counted intermittent, persistent, and sticky CE errors by unum, 4308*7c478bd9Sstevel@tonic-gate * and then was later extended to publish the data as a kstat for SunVTS. 4309*7c478bd9Sstevel@tonic-gate * All of this code is replaced by FMA, and remains here until such time 4310*7c478bd9Sstevel@tonic-gate * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4311*7c478bd9Sstevel@tonic-gate * 4312*7c478bd9Sstevel@tonic-gate * Errors are saved in three buckets per-unum: 4313*7c478bd9Sstevel@tonic-gate * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4314*7c478bd9Sstevel@tonic-gate * This could represent a problem, and is immediately printed out. 4315*7c478bd9Sstevel@tonic-gate * (2) persistent - was successfully scrubbed 4316*7c478bd9Sstevel@tonic-gate * These errors use the leaky bucket algorithm to determine 4317*7c478bd9Sstevel@tonic-gate * if there is a serious problem. 4318*7c478bd9Sstevel@tonic-gate * (3) intermittent - may have originated from the cpu or upa/safari bus, 4319*7c478bd9Sstevel@tonic-gate * and does not necessarily indicate any problem with the dimm itself, 4320*7c478bd9Sstevel@tonic-gate * is critical information for debugging new hardware. 4321*7c478bd9Sstevel@tonic-gate * Because we do not know if it came from the dimm, it would be 4322*7c478bd9Sstevel@tonic-gate * inappropriate to include these in the leaky bucket counts. 4323*7c478bd9Sstevel@tonic-gate * 4324*7c478bd9Sstevel@tonic-gate * If the E$ line was modified before the scrub operation began, then the 4325*7c478bd9Sstevel@tonic-gate * displacement flush at the beginning of scrubphys() will cause the modified 4326*7c478bd9Sstevel@tonic-gate * line to be written out, which will clean up the CE. Then, any subsequent 4327*7c478bd9Sstevel@tonic-gate * read will not cause an error, which will cause persistent errors to be 4328*7c478bd9Sstevel@tonic-gate * identified as intermittent. 4329*7c478bd9Sstevel@tonic-gate * 4330*7c478bd9Sstevel@tonic-gate * If a DIMM is going bad, it will produce true persistents as well as 4331*7c478bd9Sstevel@tonic-gate * false intermittents, so these intermittents can be safely ignored. 4332*7c478bd9Sstevel@tonic-gate * 4333*7c478bd9Sstevel@tonic-gate * If the error count is excessive for a DIMM, this function will return 4334*7c478bd9Sstevel@tonic-gate * PAGE_IS_FAILING, and the CPU module may then decide to remove that page 4335*7c478bd9Sstevel@tonic-gate * from use. 4336*7c478bd9Sstevel@tonic-gate */ 4337*7c478bd9Sstevel@tonic-gate static int 4338*7c478bd9Sstevel@tonic-gate ce_count_unum(int status, int len, char *unum) 4339*7c478bd9Sstevel@tonic-gate { 4340*7c478bd9Sstevel@tonic-gate int i; 4341*7c478bd9Sstevel@tonic-gate struct ce_info *psimm = mem_ce_simm; 4342*7c478bd9Sstevel@tonic-gate int page_status = PAGE_IS_OK; 4343*7c478bd9Sstevel@tonic-gate 4344*7c478bd9Sstevel@tonic-gate ASSERT(psimm != NULL); 4345*7c478bd9Sstevel@tonic-gate 4346*7c478bd9Sstevel@tonic-gate if (len <= 0 || 4347*7c478bd9Sstevel@tonic-gate (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4348*7c478bd9Sstevel@tonic-gate return (page_status); 4349*7c478bd9Sstevel@tonic-gate 4350*7c478bd9Sstevel@tonic-gate /* 4351*7c478bd9Sstevel@tonic-gate * Initialize the leaky_bucket timeout 4352*7c478bd9Sstevel@tonic-gate */ 4353*7c478bd9Sstevel@tonic-gate if (casptr(&leaky_bucket_timeout_id, 4354*7c478bd9Sstevel@tonic-gate TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4355*7c478bd9Sstevel@tonic-gate add_leaky_bucket_timeout(); 4356*7c478bd9Sstevel@tonic-gate 4357*7c478bd9Sstevel@tonic-gate for (i = 0; i < mem_ce_simm_size; i++) { 4358*7c478bd9Sstevel@tonic-gate if (psimm[i].name[0] == '\0') { 4359*7c478bd9Sstevel@tonic-gate /* 4360*7c478bd9Sstevel@tonic-gate * Hit the end of the valid entries, add 4361*7c478bd9Sstevel@tonic-gate * a new one. 4362*7c478bd9Sstevel@tonic-gate */ 4363*7c478bd9Sstevel@tonic-gate (void) strncpy(psimm[i].name, unum, len); 4364*7c478bd9Sstevel@tonic-gate if (status & ECC_STICKY) { 4365*7c478bd9Sstevel@tonic-gate /* 4366*7c478bd9Sstevel@tonic-gate * Sticky - the leaky bucket is used to track 4367*7c478bd9Sstevel@tonic-gate * soft errors. Since a sticky error is a 4368*7c478bd9Sstevel@tonic-gate * hard error and likely to be retired soon, 4369*7c478bd9Sstevel@tonic-gate * we do not count it in the leaky bucket. 4370*7c478bd9Sstevel@tonic-gate */ 4371*7c478bd9Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 0; 4372*7c478bd9Sstevel@tonic-gate psimm[i].intermittent_total = 0; 4373*7c478bd9Sstevel@tonic-gate psimm[i].persistent_total = 0; 4374*7c478bd9Sstevel@tonic-gate psimm[i].sticky_total = 1; 4375*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 4376*7c478bd9Sstevel@tonic-gate "[AFT0] Sticky Softerror encountered " 4377*7c478bd9Sstevel@tonic-gate "on Memory Module %s\n", unum); 4378*7c478bd9Sstevel@tonic-gate page_status = PAGE_IS_FAILING; 4379*7c478bd9Sstevel@tonic-gate } else if (status & ECC_PERSISTENT) { 4380*7c478bd9Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 1; 4381*7c478bd9Sstevel@tonic-gate psimm[i].intermittent_total = 0; 4382*7c478bd9Sstevel@tonic-gate psimm[i].persistent_total = 1; 4383*7c478bd9Sstevel@tonic-gate psimm[i].sticky_total = 0; 4384*7c478bd9Sstevel@tonic-gate } else { 4385*7c478bd9Sstevel@tonic-gate /* 4386*7c478bd9Sstevel@tonic-gate * Intermittent - Because the scrub operation 4387*7c478bd9Sstevel@tonic-gate * cannot find the error in the DIMM, we will 4388*7c478bd9Sstevel@tonic-gate * not count these in the leaky bucket 4389*7c478bd9Sstevel@tonic-gate */ 4390*7c478bd9Sstevel@tonic-gate psimm[i].leaky_bucket_cnt = 0; 4391*7c478bd9Sstevel@tonic-gate psimm[i].intermittent_total = 1; 4392*7c478bd9Sstevel@tonic-gate psimm[i].persistent_total = 0; 4393*7c478bd9Sstevel@tonic-gate psimm[i].sticky_total = 0; 4394*7c478bd9Sstevel@tonic-gate } 4395*7c478bd9Sstevel@tonic-gate ecc_error_info_data.count.value.ui32++; 4396*7c478bd9Sstevel@tonic-gate break; 4397*7c478bd9Sstevel@tonic-gate } else if (strncmp(unum, psimm[i].name, len) == 0) { 4398*7c478bd9Sstevel@tonic-gate /* 4399*7c478bd9Sstevel@tonic-gate * Found an existing entry for the current 4400*7c478bd9Sstevel@tonic-gate * memory module, adjust the counts. 4401*7c478bd9Sstevel@tonic-gate */ 4402*7c478bd9Sstevel@tonic-gate if (status & ECC_STICKY) { 4403*7c478bd9Sstevel@tonic-gate psimm[i].sticky_total++; 4404*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 4405*7c478bd9Sstevel@tonic-gate "[AFT0] Sticky Softerror encountered " 4406*7c478bd9Sstevel@tonic-gate "on Memory Module %s\n", unum); 4407*7c478bd9Sstevel@tonic-gate page_status = PAGE_IS_FAILING; 4408*7c478bd9Sstevel@tonic-gate } else if (status & ECC_PERSISTENT) { 4409*7c478bd9Sstevel@tonic-gate int new_value; 4410*7c478bd9Sstevel@tonic-gate 4411*7c478bd9Sstevel@tonic-gate new_value = atomic_add_16_nv( 4412*7c478bd9Sstevel@tonic-gate &psimm[i].leaky_bucket_cnt, 1); 4413*7c478bd9Sstevel@tonic-gate psimm[i].persistent_total++; 4414*7c478bd9Sstevel@tonic-gate if (new_value > ecc_softerr_limit) { 4415*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "[AFT0] Most recent %d" 4416*7c478bd9Sstevel@tonic-gate " soft errors from Memory Module" 4417*7c478bd9Sstevel@tonic-gate " %s exceed threshold (N=%d," 4418*7c478bd9Sstevel@tonic-gate " T=%dh:%02dm) triggering page" 4419*7c478bd9Sstevel@tonic-gate " retire", new_value, unum, 4420*7c478bd9Sstevel@tonic-gate ecc_softerr_limit, 4421*7c478bd9Sstevel@tonic-gate ecc_softerr_interval / 60, 4422*7c478bd9Sstevel@tonic-gate ecc_softerr_interval % 60); 4423*7c478bd9Sstevel@tonic-gate atomic_add_16( 4424*7c478bd9Sstevel@tonic-gate &psimm[i].leaky_bucket_cnt, -1); 4425*7c478bd9Sstevel@tonic-gate page_status = PAGE_IS_FAILING; 4426*7c478bd9Sstevel@tonic-gate } 4427*7c478bd9Sstevel@tonic-gate } else { /* Intermittent */ 4428*7c478bd9Sstevel@tonic-gate psimm[i].intermittent_total++; 4429*7c478bd9Sstevel@tonic-gate } 4430*7c478bd9Sstevel@tonic-gate break; 4431*7c478bd9Sstevel@tonic-gate } 4432*7c478bd9Sstevel@tonic-gate } 4433*7c478bd9Sstevel@tonic-gate 4434*7c478bd9Sstevel@tonic-gate if (i >= mem_ce_simm_size) 4435*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4436*7c478bd9Sstevel@tonic-gate "space.\n"); 4437*7c478bd9Sstevel@tonic-gate 4438*7c478bd9Sstevel@tonic-gate return (page_status); 4439*7c478bd9Sstevel@tonic-gate } 4440*7c478bd9Sstevel@tonic-gate 4441*7c478bd9Sstevel@tonic-gate /* 4442*7c478bd9Sstevel@tonic-gate * Function to support counting of IO detected CEs. 4443*7c478bd9Sstevel@tonic-gate */ 4444*7c478bd9Sstevel@tonic-gate void 4445*7c478bd9Sstevel@tonic-gate cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4446*7c478bd9Sstevel@tonic-gate { 4447*7c478bd9Sstevel@tonic-gate if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING && 4448*7c478bd9Sstevel@tonic-gate automatic_page_removal) { 4449*7c478bd9Sstevel@tonic-gate page_t *pp = page_numtopp_nolock((pfn_t) 4450*7c478bd9Sstevel@tonic-gate (ecc->flt_addr >> MMU_PAGESHIFT)); 4451*7c478bd9Sstevel@tonic-gate 4452*7c478bd9Sstevel@tonic-gate if (pp) { 4453*7c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAULTY); 4454*7c478bd9Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_FAILING); 4455*7c478bd9Sstevel@tonic-gate } 4456*7c478bd9Sstevel@tonic-gate } 4457*7c478bd9Sstevel@tonic-gate } 4458*7c478bd9Sstevel@tonic-gate 4459*7c478bd9Sstevel@tonic-gate static int 4460*7c478bd9Sstevel@tonic-gate ecc_kstat_update(kstat_t *ksp, int rw) 4461*7c478bd9Sstevel@tonic-gate { 4462*7c478bd9Sstevel@tonic-gate struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4463*7c478bd9Sstevel@tonic-gate struct ce_info *ceip = mem_ce_simm; 4464*7c478bd9Sstevel@tonic-gate int i = ksp->ks_instance; 4465*7c478bd9Sstevel@tonic-gate 4466*7c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) 4467*7c478bd9Sstevel@tonic-gate return (EACCES); 4468*7c478bd9Sstevel@tonic-gate 4469*7c478bd9Sstevel@tonic-gate ASSERT(ksp->ks_data != NULL); 4470*7c478bd9Sstevel@tonic-gate ASSERT(i < mem_ce_simm_size && i >= 0); 4471*7c478bd9Sstevel@tonic-gate 4472*7c478bd9Sstevel@tonic-gate /* 4473*7c478bd9Sstevel@tonic-gate * Since we're not using locks, make sure that we don't get partial 4474*7c478bd9Sstevel@tonic-gate * data. The name is always copied before the counters are incremented 4475*7c478bd9Sstevel@tonic-gate * so only do this update routine if at least one of the counters is 4476*7c478bd9Sstevel@tonic-gate * non-zero, which ensures that ce_count_unum() is done, and the 4477*7c478bd9Sstevel@tonic-gate * string is fully copied. 4478*7c478bd9Sstevel@tonic-gate */ 4479*7c478bd9Sstevel@tonic-gate if (ceip[i].intermittent_total == 0 && 4480*7c478bd9Sstevel@tonic-gate ceip[i].persistent_total == 0 && 4481*7c478bd9Sstevel@tonic-gate ceip[i].sticky_total == 0) { 4482*7c478bd9Sstevel@tonic-gate /* 4483*7c478bd9Sstevel@tonic-gate * Uninitialized or partially initialized. Ignore. 4484*7c478bd9Sstevel@tonic-gate * The ks_data buffer was allocated via kmem_zalloc, 4485*7c478bd9Sstevel@tonic-gate * so no need to bzero it. 4486*7c478bd9Sstevel@tonic-gate */ 4487*7c478bd9Sstevel@tonic-gate return (0); 4488*7c478bd9Sstevel@tonic-gate } 4489*7c478bd9Sstevel@tonic-gate 4490*7c478bd9Sstevel@tonic-gate kstat_named_setstr(&kceip->name, ceip[i].name); 4491*7c478bd9Sstevel@tonic-gate kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4492*7c478bd9Sstevel@tonic-gate kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4493*7c478bd9Sstevel@tonic-gate kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4494*7c478bd9Sstevel@tonic-gate 4495*7c478bd9Sstevel@tonic-gate return (0); 4496*7c478bd9Sstevel@tonic-gate } 4497*7c478bd9Sstevel@tonic-gate 4498*7c478bd9Sstevel@tonic-gate #define VIS_BLOCKSIZE 64 4499*7c478bd9Sstevel@tonic-gate 4500*7c478bd9Sstevel@tonic-gate int 4501*7c478bd9Sstevel@tonic-gate dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4502*7c478bd9Sstevel@tonic-gate { 4503*7c478bd9Sstevel@tonic-gate int ret, watched; 4504*7c478bd9Sstevel@tonic-gate 4505*7c478bd9Sstevel@tonic-gate watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4506*7c478bd9Sstevel@tonic-gate ret = dtrace_blksuword32(addr, data, 0); 4507*7c478bd9Sstevel@tonic-gate if (watched) 4508*7c478bd9Sstevel@tonic-gate watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4509*7c478bd9Sstevel@tonic-gate 4510*7c478bd9Sstevel@tonic-gate return (ret); 4511*7c478bd9Sstevel@tonic-gate } 4512*7c478bd9Sstevel@tonic-gate 4513*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4514*7c478bd9Sstevel@tonic-gate void 4515*7c478bd9Sstevel@tonic-gate cpu_faulted_enter(struct cpu *cp) 4516*7c478bd9Sstevel@tonic-gate { 4517*7c478bd9Sstevel@tonic-gate } 4518*7c478bd9Sstevel@tonic-gate 4519*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4520*7c478bd9Sstevel@tonic-gate void 4521*7c478bd9Sstevel@tonic-gate cpu_faulted_exit(struct cpu *cp) 4522*7c478bd9Sstevel@tonic-gate { 4523*7c478bd9Sstevel@tonic-gate } 4524*7c478bd9Sstevel@tonic-gate 4525*7c478bd9Sstevel@tonic-gate static int mmu_disable_ism_large_pages = ((1 << TTE512K) | 4526*7c478bd9Sstevel@tonic-gate (1 << TTE32M) | (1 << TTE256M)); 4527*7c478bd9Sstevel@tonic-gate static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); 4528*7c478bd9Sstevel@tonic-gate 4529*7c478bd9Sstevel@tonic-gate /* 4530*7c478bd9Sstevel@tonic-gate * The function returns the US_II mmu-specific values for the 4531*7c478bd9Sstevel@tonic-gate * hat's disable_large_pages and disable_ism_large_pages variables. 4532*7c478bd9Sstevel@tonic-gate */ 4533*7c478bd9Sstevel@tonic-gate int 4534*7c478bd9Sstevel@tonic-gate mmu_large_pages_disabled(uint_t flag) 4535*7c478bd9Sstevel@tonic-gate { 4536*7c478bd9Sstevel@tonic-gate int pages_disable = 0; 4537*7c478bd9Sstevel@tonic-gate 4538*7c478bd9Sstevel@tonic-gate if (flag == HAT_LOAD) { 4539*7c478bd9Sstevel@tonic-gate pages_disable = mmu_disable_large_pages; 4540*7c478bd9Sstevel@tonic-gate } else if (flag == HAT_LOAD_SHARE) { 4541*7c478bd9Sstevel@tonic-gate pages_disable = mmu_disable_ism_large_pages; 4542*7c478bd9Sstevel@tonic-gate } 4543*7c478bd9Sstevel@tonic-gate return (pages_disable); 4544*7c478bd9Sstevel@tonic-gate } 4545*7c478bd9Sstevel@tonic-gate 4546*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4547*7c478bd9Sstevel@tonic-gate void 4548*7c478bd9Sstevel@tonic-gate mmu_init_kernel_pgsz(struct hat *hat) 4549*7c478bd9Sstevel@tonic-gate { 4550*7c478bd9Sstevel@tonic-gate } 4551*7c478bd9Sstevel@tonic-gate 4552*7c478bd9Sstevel@tonic-gate size_t 4553*7c478bd9Sstevel@tonic-gate mmu_get_kernel_lpsize(size_t lpsize) 4554*7c478bd9Sstevel@tonic-gate { 4555*7c478bd9Sstevel@tonic-gate uint_t tte; 4556*7c478bd9Sstevel@tonic-gate 4557*7c478bd9Sstevel@tonic-gate if (lpsize == 0) { 4558*7c478bd9Sstevel@tonic-gate /* no setting for segkmem_lpsize in /etc/system: use default */ 4559*7c478bd9Sstevel@tonic-gate return (MMU_PAGESIZE4M); 4560*7c478bd9Sstevel@tonic-gate } 4561*7c478bd9Sstevel@tonic-gate 4562*7c478bd9Sstevel@tonic-gate for (tte = TTE8K; tte <= TTE4M; tte++) { 4563*7c478bd9Sstevel@tonic-gate if (lpsize == TTEBYTES(tte)) 4564*7c478bd9Sstevel@tonic-gate return (lpsize); 4565*7c478bd9Sstevel@tonic-gate } 4566*7c478bd9Sstevel@tonic-gate 4567*7c478bd9Sstevel@tonic-gate return (TTEBYTES(TTE8K)); 4568*7c478bd9Sstevel@tonic-gate } 4569